From b20a35035f983f4ac7e29c4a68f30e43510007e0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 22 Mar 2006 00:09:12 -0800 Subject: [PATCH] page migration reorg Centralize the page migration functions in anticipation of additional tinkering. Creates a new file mm/migrate.c 1. Extract buffer_migrate_page() from fs/buffer.c 2. Extract central migration code from vmscan.c 3. Extract some components from mempolicy.c 4. Export pageout() and remove_from_swap() from vmscan.c 5. Make it possible to configure NUMA systems without page migration and non-NUMA systems with page migration. I had to so some #ifdeffing in mempolicy.c that may need a cleanup. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 62 ------------------------------------------------------------- 1 file changed, 62 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index a9b39940200..1d3683d496f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3050,68 +3050,6 @@ asmlinkage long sys_bdflush(int func, long data) return 0; } -/* - * Migration function for pages with buffers. This function can only be used - * if the underlying filesystem guarantees that no other references to "page" - * exist. - */ -#ifdef CONFIG_MIGRATION -int buffer_migrate_page(struct page *newpage, struct page *page) -{ - struct address_space *mapping = page->mapping; - struct buffer_head *bh, *head; - int rc; - - if (!mapping) - return -EAGAIN; - - if (!page_has_buffers(page)) - return migrate_page(newpage, page); - - head = page_buffers(page); - - rc = migrate_page_remove_references(newpage, page, 3); - if (rc) - return rc; - - bh = head; - do { - get_bh(bh); - lock_buffer(bh); - bh = bh->b_this_page; - - } while (bh != head); - - ClearPagePrivate(page); - set_page_private(newpage, page_private(page)); - set_page_private(page, 0); - put_page(page); - get_page(newpage); - - bh = head; - do { - set_bh_page(bh, newpage, bh_offset(bh)); - bh = bh->b_this_page; - - } while (bh != head); - - SetPagePrivate(newpage); - - migrate_page_copy(newpage, page); - - bh = head; - do { - unlock_buffer(bh); - put_bh(bh); - bh = bh->b_this_page; - - } while (bh != head); - - return 0; -} -EXPORT_SYMBOL(buffer_migrate_page); -#endif - /* * Buffer-head allocation */ -- cgit v1.2.3-70-g09d2 From c039e3134ae62863bbc8e8429b29e3c43cf21b2a Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 23 Mar 2006 03:00:28 -0800 Subject: [PATCH] sem2mutex: blockdev #2 Semaphore to mutex conversion. The conversion was generated via scripts, and the result was validated automatically via a script as well. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Acked-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/ioctl.c | 22 +++++++++++----------- drivers/block/rd.c | 4 ++-- drivers/s390/block/dasd_ioctl.c | 8 ++++---- fs/block_dev.c | 28 ++++++++++++++-------------- fs/buffer.c | 6 +++--- fs/super.c | 4 ++-- include/linux/fs.h | 4 ++-- 7 files changed, 38 insertions(+), 38 deletions(-) (limited to 'fs/buffer.c') diff --git a/block/ioctl.c b/block/ioctl.c index e1109491c23..35fdb7dc651 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -42,9 +42,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user return -EINVAL; } /* partition number in use? */ - down(&bdev->bd_sem); + mutex_lock(&bdev->bd_mutex); if (disk->part[part - 1]) { - up(&bdev->bd_sem); + mutex_unlock(&bdev->bd_mutex); return -EBUSY; } /* overlap? */ @@ -55,13 +55,13 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user continue; if (!(start+length <= s->start_sect || start >= s->start_sect + s->nr_sects)) { - up(&bdev->bd_sem); + mutex_unlock(&bdev->bd_mutex); return -EBUSY; } } /* all seems OK */ add_partition(disk, part, start, length); - up(&bdev->bd_sem); + mutex_unlock(&bdev->bd_mutex); return 0; case BLKPG_DEL_PARTITION: if (!disk->part[part-1]) @@ -71,9 +71,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user bdevp = bdget_disk(disk, part); if (!bdevp) return -ENOMEM; - down(&bdevp->bd_sem); + mutex_lock(&bdevp->bd_mutex); if (bdevp->bd_openers) { - up(&bdevp->bd_sem); + mutex_unlock(&bdevp->bd_mutex); bdput(bdevp); return -EBUSY; } @@ -81,10 +81,10 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user fsync_bdev(bdevp); invalidate_bdev(bdevp, 0); - down(&bdev->bd_sem); + mutex_lock(&bdev->bd_mutex); delete_partition(disk, part); - up(&bdev->bd_sem); - up(&bdevp->bd_sem); + mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdevp->bd_mutex); bdput(bdevp); return 0; @@ -102,10 +102,10 @@ static int blkdev_reread_part(struct block_device *bdev) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (down_trylock(&bdev->bd_sem)) + if (!mutex_trylock(&bdev->bd_mutex)) return -EBUSY; res = rescan_partitions(disk, bdev); - up(&bdev->bd_sem); + mutex_unlock(&bdev->bd_mutex); return res; } diff --git a/drivers/block/rd.c b/drivers/block/rd.c index ffd6abd6d5a..1c54f46d3f7 100644 --- a/drivers/block/rd.c +++ b/drivers/block/rd.c @@ -310,12 +310,12 @@ static int rd_ioctl(struct inode *inode, struct file *file, * cache */ error = -EBUSY; - down(&bdev->bd_sem); + mutex_lock(&bdev->bd_mutex); if (bdev->bd_openers <= 2) { truncate_inode_pages(bdev->bd_inode->i_mapping, 0); error = 0; } - up(&bdev->bd_sem); + mutex_unlock(&bdev->bd_mutex); return error; } diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index fafeeae5267..f9930552ab5 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -151,9 +151,9 @@ dasd_ioctl_enable(struct block_device *bdev, int no, long args) return -ENODEV; dasd_enable_device(device); /* Formatting the dasd device can change the capacity. */ - down(&bdev->bd_sem); + mutex_lock(&bdev->bd_mutex); i_size_write(bdev->bd_inode, (loff_t)get_capacity(device->gdp) << 9); - up(&bdev->bd_sem); + mutex_unlock(&bdev->bd_mutex); return 0; } @@ -184,9 +184,9 @@ dasd_ioctl_disable(struct block_device *bdev, int no, long args) * Set i_size to zero, since read, write, etc. check against this * value. */ - down(&bdev->bd_sem); + mutex_lock(&bdev->bd_mutex); i_size_write(bdev->bd_inode, 0); - up(&bdev->bd_sem); + mutex_unlock(&bdev->bd_mutex); return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 6e50346fb1e..44d05e6e34d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -265,8 +265,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) SLAB_CTOR_CONSTRUCTOR) { memset(bdev, 0, sizeof(*bdev)); - sema_init(&bdev->bd_sem, 1); - sema_init(&bdev->bd_mount_sem, 1); + mutex_init(&bdev->bd_mutex); + mutex_init(&bdev->bd_mount_mutex); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); inode_init_once(&ei->vfs_inode); @@ -574,7 +574,7 @@ static int do_open(struct block_device *bdev, struct file *file) } owner = disk->fops->owner; - down(&bdev->bd_sem); + mutex_lock(&bdev->bd_mutex); if (!bdev->bd_openers) { bdev->bd_disk = disk; bdev->bd_contains = bdev; @@ -605,21 +605,21 @@ static int do_open(struct block_device *bdev, struct file *file) if (ret) goto out_first; bdev->bd_contains = whole; - down(&whole->bd_sem); + mutex_lock(&whole->bd_mutex); whole->bd_part_count++; p = disk->part[part - 1]; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { whole->bd_part_count--; - up(&whole->bd_sem); + mutex_unlock(&whole->bd_mutex); ret = -ENXIO; goto out_first; } kobject_get(&p->kobj); bdev->bd_part = p; bd_set_size(bdev, (loff_t) p->nr_sects << 9); - up(&whole->bd_sem); + mutex_unlock(&whole->bd_mutex); } } else { put_disk(disk); @@ -633,13 +633,13 @@ static int do_open(struct block_device *bdev, struct file *file) if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); } else { - down(&bdev->bd_contains->bd_sem); + mutex_lock(&bdev->bd_contains->bd_mutex); bdev->bd_contains->bd_part_count++; - up(&bdev->bd_contains->bd_sem); + mutex_unlock(&bdev->bd_contains->bd_mutex); } } bdev->bd_openers++; - up(&bdev->bd_sem); + mutex_unlock(&bdev->bd_mutex); unlock_kernel(); return 0; @@ -652,7 +652,7 @@ out_first: put_disk(disk); module_put(owner); out: - up(&bdev->bd_sem); + mutex_unlock(&bdev->bd_mutex); unlock_kernel(); if (ret) bdput(bdev); @@ -714,7 +714,7 @@ int blkdev_put(struct block_device *bdev) struct inode *bd_inode = bdev->bd_inode; struct gendisk *disk = bdev->bd_disk; - down(&bdev->bd_sem); + mutex_lock(&bdev->bd_mutex); lock_kernel(); if (!--bdev->bd_openers) { sync_blockdev(bdev); @@ -724,9 +724,9 @@ int blkdev_put(struct block_device *bdev) if (disk->fops->release) ret = disk->fops->release(bd_inode, NULL); } else { - down(&bdev->bd_contains->bd_sem); + mutex_lock(&bdev->bd_contains->bd_mutex); bdev->bd_contains->bd_part_count--; - up(&bdev->bd_contains->bd_sem); + mutex_unlock(&bdev->bd_contains->bd_mutex); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; @@ -746,7 +746,7 @@ int blkdev_put(struct block_device *bdev) bdev->bd_contains = NULL; } unlock_kernel(); - up(&bdev->bd_sem); + mutex_unlock(&bdev->bd_mutex); bdput(bdev); return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index 1d3683d496f..0d6ca7bac6c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -201,7 +201,7 @@ int fsync_bdev(struct block_device *bdev) * freeze_bdev -- lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * - * This takes the block device bd_mount_sem to make sure no new mounts + * This takes the block device bd_mount_mutex to make sure no new mounts * happen on bdev until thaw_bdev() is called. * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. @@ -210,7 +210,7 @@ struct super_block *freeze_bdev(struct block_device *bdev) { struct super_block *sb; - down(&bdev->bd_mount_sem); + mutex_lock(&bdev->bd_mount_mutex); sb = get_super(bdev); if (sb && !(sb->s_flags & MS_RDONLY)) { sb->s_frozen = SB_FREEZE_WRITE; @@ -264,7 +264,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb) drop_super(sb); } - up(&bdev->bd_mount_sem); + mutex_unlock(&bdev->bd_mount_mutex); } EXPORT_SYMBOL(thaw_bdev); diff --git a/fs/super.c b/fs/super.c index e20b5580afd..8f9c9b3af70 100644 --- a/fs/super.c +++ b/fs/super.c @@ -693,9 +693,9 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, * will protect the lockfs code from trying to start a snapshot * while we are mounting */ - down(&bdev->bd_mount_sem); + mutex_lock(&bdev->bd_mount_mutex); s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); - up(&bdev->bd_mount_sem); + mutex_unlock(&bdev->bd_mount_mutex); if (IS_ERR(s)) goto out; diff --git a/include/linux/fs.h b/include/linux/fs.h index 128d0082522..009ac96053f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -397,8 +397,8 @@ struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ struct inode * bd_inode; /* will die */ int bd_openers; - struct semaphore bd_sem; /* open/close mutex */ - struct semaphore bd_mount_sem; /* mount mutex */ + struct mutex bd_mutex; /* open/close mutex */ + struct mutex bd_mount_mutex; /* mount mutex */ struct list_head bd_inodes; void * bd_holder; int bd_holders; -- cgit v1.2.3-70-g09d2 From b0196009d8c3ecf6ea6ec080c63d2ccc146e7ad9 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Fri, 24 Mar 2006 03:16:09 -0800 Subject: [PATCH] cpuset memory spread slab cache hooks Change the kmem_cache_create calls for certain slab caches to support cpuset memory spreading. See the previous patches, cpuset_mem_spread, for an explanation of cpuset memory spreading, and cpuset_mem_spread_slab_cache for the slab cache support for memory spreading. The slab caches marked for now are: dentry_cache, inode_cache, some xfs slab caches, and buffer_head. This list may change over time. In particular, other file system types that are used extensively on large NUMA systems may want to allow for spreading their directory and inode slab cache entries. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 7 +++++-- fs/dcache.c | 3 ++- fs/inode.c | 9 +++++++-- fs/xfs/linux-2.6/kmem.h | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 0d6ca7bac6c..36c7253bea7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3143,8 +3143,11 @@ void __init buffer_init(void) int nrpages; bh_cachep = kmem_cache_create("buffer_head", - sizeof(struct buffer_head), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL); + sizeof(struct buffer_head), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| + SLAB_MEM_SPREAD), + init_buffer_head, + NULL); /* * Limit the bh occupancy to 10% of ZONE_NORMAL diff --git a/fs/dcache.c b/fs/dcache.c index 11dc83092d4..653f64ce98e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1682,7 +1682,8 @@ static void __init dcache_init(unsigned long mempages) dentry_cache = kmem_cache_create("dentry_cache", sizeof(struct dentry), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, + (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| + SLAB_MEM_SPREAD), NULL, NULL); set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); diff --git a/fs/inode.c b/fs/inode.c index 25967b67903..a51c671c54c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1375,8 +1375,13 @@ void __init inode_init(unsigned long mempages) int loop; /* inode slab cache */ - inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), - 0, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_once, NULL); + inode_cachep = kmem_cache_create("inode_cache", + sizeof(struct inode), + 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| + SLAB_MEM_SPREAD), + init_once, + NULL); set_shrinker(DEFAULT_SEEKS, shrink_icache_memory); /* Hash may have been set up in inode_init_early */ diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index f0268a84e6f..2cfd33d4d8a 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -100,7 +100,7 @@ extern void kmem_free(void *, size_t); #define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN #define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT -#define KM_ZONE_SPREAD 0 +#define KM_ZONE_SPREAD SLAB_MEM_SPREAD #define kmem_zone kmem_cache #define kmem_zone_t struct kmem_cache -- cgit v1.2.3-70-g09d2 From 8a14342683b1e3adcf5f78660a42fcbd95b44a35 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Mar 2006 03:18:10 -0800 Subject: [PATCH] HOTPLUG_CPU: avoid hitting too many cachelines in recalc_bh_state() Instead of using for_each_cpu(i), we can use for_each_online_cpu(i). When a CPU goes offline (ie removed from online map), it might have a non null bh_accounting.nr, so this patch adds a transfer of this counter to an online CPU counter. We already have a hotcpu_notifier, (function buffer_cpu_notify()), where we can do this bh_accounting. Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 36c7253bea7..11ca6eb46a3 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3078,7 +3078,7 @@ static void recalc_bh_state(void) if (__get_cpu_var(bh_accounting).ratelimit++ < 4096) return; __get_cpu_var(bh_accounting).ratelimit = 0; - for_each_cpu(i) + for_each_online_cpu(i) tot += per_cpu(bh_accounting, i).nr; buffer_heads_over_limit = (tot > max_buffer_heads); } @@ -3127,6 +3127,9 @@ static void buffer_exit_cpu(int cpu) brelse(b->bhs[i]); b->bhs[i] = NULL; } + get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr; + per_cpu(bh_accounting, cpu).nr = 0; + put_cpu_var(bh_accounting); } static int buffer_cpu_notify(struct notifier_block *self, -- cgit v1.2.3-70-g09d2 From 4741c9fd36b3bcadd37238321c469049da94a4b9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 24 Mar 2006 03:18:11 -0800 Subject: [PATCH] set_page_dirty() return value fixes We need set_page_dirty() to return true if it actually transitioned the page from a clean to dirty state. This wasn't right in a couple of places. Do a kernel-wide audit, fix things up. This leaves open the possibility of returning a negative errno from set_page_dirty() sometime in the future. But we don't do that at present. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v32/drivers/cryptocop.c | 2 +- drivers/block/rd.c | 3 ++- fs/buffer.c | 2 +- include/linux/fs.h | 2 +- mm/page-writeback.c | 11 ++++++----- 5 files changed, 11 insertions(+), 9 deletions(-) (limited to 'fs/buffer.c') diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index 501fa52d8d3..c59ee28a35f 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2944,7 +2944,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig int spdl_err; /* Mark output pages dirty. */ spdl_err = set_page_dirty_lock(outpages[i]); - DEBUG(if (spdl_err)printk("cryptocop_ioctl_process: set_page_dirty_lock returned %d\n", spdl_err)); + DEBUG(if (spdl_err < 0)printk("cryptocop_ioctl_process: set_page_dirty_lock returned %d\n", spdl_err)); } for (i = 0; i < nooutpages; i++){ put_page(outpages[i]); diff --git a/drivers/block/rd.c b/drivers/block/rd.c index 1c54f46d3f7..940bfd7951e 100644 --- a/drivers/block/rd.c +++ b/drivers/block/rd.c @@ -186,7 +186,8 @@ static int ramdisk_writepages(struct address_space *mapping, */ static int ramdisk_set_page_dirty(struct page *page) { - SetPageDirty(page); + if (!TestSetPageDirty(page)) + return 1; return 0; } diff --git a/fs/buffer.c b/fs/buffer.c index 11ca6eb46a3..24262ea8cc5 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -865,8 +865,8 @@ int __set_page_dirty_buffers(struct page *page) } write_unlock_irq(&mapping->tree_lock); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + return 1; } - return 0; } EXPORT_SYMBOL(__set_page_dirty_buffers); diff --git a/include/linux/fs.h b/include/linux/fs.h index 0ad70c1e5e5..092cfaee0cd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -350,7 +350,7 @@ struct address_space_operations { /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); - /* Set a page dirty */ + /* Set a page dirty. Return true if this dirtied it */ int (*set_page_dirty)(struct page *page); int (*readpages)(struct file *filp, struct address_space *mapping, diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c67ddc46472..893d7677579 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -628,8 +628,6 @@ EXPORT_SYMBOL(write_one_page); */ int __set_page_dirty_nobuffers(struct page *page) { - int ret = 0; - if (!TestSetPageDirty(page)) { struct address_space *mapping = page_mapping(page); struct address_space *mapping2; @@ -651,8 +649,9 @@ int __set_page_dirty_nobuffers(struct page *page) I_DIRTY_PAGES); } } + return 1; } - return ret; + return 0; } EXPORT_SYMBOL(__set_page_dirty_nobuffers); @@ -682,8 +681,10 @@ int fastcall set_page_dirty(struct page *page) return (*spd)(page); return __set_page_dirty_buffers(page); } - if (!PageDirty(page)) - SetPageDirty(page); + if (!PageDirty(page)) { + if (!TestSetPageDirty(page)) + return 1; + } return 0; } EXPORT_SYMBOL(set_page_dirty); -- cgit v1.2.3-70-g09d2 From 18e79b40ed9c5223b88771f805c69f5993fc131b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 24 Mar 2006 03:18:14 -0800 Subject: [PATCH] fsync: extract internal code Pull the guts out of do_fsync() - we can use it elsewhere. Cc: Hugh Dickins Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 43 +++++++++++++++++++++++-------------------- include/linux/fs.h | 1 + 2 files changed, 24 insertions(+), 20 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 24262ea8cc5..6d77ce9f54e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -327,31 +327,24 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) return ret; } -static long do_fsync(unsigned int fd, int datasync) +long do_fsync(struct file *file, int datasync) { - struct file * file; - struct address_space *mapping; - int ret, err; - - ret = -EBADF; - file = fget(fd); - if (!file) - goto out; + int ret; + int err; + struct address_space *mapping = file->f_mapping; - ret = -EINVAL; if (!file->f_op || !file->f_op->fsync) { /* Why? We can still call filemap_fdatawrite */ - goto out_putf; + ret = -EINVAL; + goto out; } - mapping = file->f_mapping; - current->flags |= PF_SYNCWRITE; ret = filemap_fdatawrite(mapping); /* - * We need to protect against concurrent writers, - * which could cause livelocks in fsync_buffers_list + * We need to protect against concurrent writers, which could cause + * livelocks in fsync_buffers_list(). */ mutex_lock(&mapping->host->i_mutex); err = file->f_op->fsync(file, file->f_dentry, datasync); @@ -362,21 +355,31 @@ static long do_fsync(unsigned int fd, int datasync) if (!ret) ret = err; current->flags &= ~PF_SYNCWRITE; - -out_putf: - fput(file); out: return ret; } +static long __do_fsync(unsigned int fd, int datasync) +{ + struct file *file; + int ret = -EBADF; + + file = fget(fd); + if (file) { + ret = do_fsync(file, datasync); + fput(file); + } + return ret; +} + asmlinkage long sys_fsync(unsigned int fd) { - return do_fsync(fd, 0); + return __do_fsync(fd, 0); } asmlinkage long sys_fdatasync(unsigned int fd) { - return do_fsync(fd, 1); + return __do_fsync(fd, 1); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 092cfaee0cd..215696a0f16 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1478,6 +1478,7 @@ extern int wait_on_page_writeback_range(struct address_space *mapping, extern int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); +extern long do_fsync(struct file *file, int datasync); extern void sync_supers(void); extern void sync_filesystems(int wait); extern void emergency_sync(void); -- cgit v1.2.3-70-g09d2 From d25b9a1ff0741e71a46f37f45263b5ddcbc948c4 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sat, 25 Mar 2006 03:07:44 -0800 Subject: [PATCH] freeze_bdev() cleanup freeze_bdev() uses a fsync_super() without sync_blockdev(). This patch makes __fsync_super() and shares it. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 6d77ce9f54e..3b3ab528192 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -160,12 +160,7 @@ int sync_blockdev(struct block_device *bdev) } EXPORT_SYMBOL(sync_blockdev); -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_super(struct super_block *sb) +static void __fsync_super(struct super_block *sb) { sync_inodes_sb(sb, 0); DQUOT_SYNC(sb); @@ -177,7 +172,16 @@ int fsync_super(struct super_block *sb) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); sync_inodes_sb(sb, 1); +} +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. + */ +int fsync_super(struct super_block *sb) +{ + __fsync_super(sb); return sync_blockdev(sb->s_bdev); } @@ -216,19 +220,7 @@ struct super_block *freeze_bdev(struct block_device *bdev) sb->s_frozen = SB_FREEZE_WRITE; smp_wmb(); - sync_inodes_sb(sb, 0); - DQUOT_SYNC(sb); - - lock_super(sb); - if (sb->s_dirt && sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - - sync_blockdev(sb->s_bdev); - sync_inodes_sb(sb, 1); + __fsync_super(sb); sb->s_frozen = SB_FREEZE_TRANS; smp_wmb(); -- cgit v1.2.3-70-g09d2 From e827f92355e1eeec2d227d3bd3350d04042a011e Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sun, 26 Mar 2006 18:24:46 +0200 Subject: BUG_ON() Conversion in fs/buffer.c this changes if() BUG(); constructs to BUG_ON() which is cleaner and can better optimized away Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- fs/buffer.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 3b3ab528192..4342ab0ad99 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -796,8 +796,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) if (!mapping->assoc_mapping) { mapping->assoc_mapping = buffer_mapping; } else { - if (mapping->assoc_mapping != buffer_mapping) - BUG(); + BUG_ON(mapping->assoc_mapping != buffer_mapping); } if (list_empty(&bh->b_assoc_buffers)) { spin_lock(&buffer_mapping->private_lock); @@ -1114,8 +1113,7 @@ grow_dev_page(struct block_device *bdev, sector_t block, if (!page) return NULL; - if (!PageLocked(page)) - BUG(); + BUG_ON(!PageLocked(page)); if (page_has_buffers(page)) { bh = page_buffers(page); @@ -1522,8 +1520,7 @@ void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset) { bh->b_page = page; - if (offset >= PAGE_SIZE) - BUG(); + BUG_ON(offset >= PAGE_SIZE); if (PageHighMem(page)) /* * This catches illegal uses and preserves the offset: -- cgit v1.2.3-70-g09d2 From 3978d7179d3849848df8a37dd0a5acc20bcb8750 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 26 Mar 2006 01:37:17 -0800 Subject: [PATCH] Make address_space_operations->sync_page return void The only user ignores the return value, and the only instanace (block_sync_page) always returns 0... Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 3 +-- fs/cifs/file.c | 6 ++++-- include/linux/buffer_head.h | 2 +- include/linux/fs.h | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 3b3ab528192..0b9456fd074 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3007,7 +3007,7 @@ out: } EXPORT_SYMBOL(try_to_free_buffers); -int block_sync_page(struct page *page) +void block_sync_page(struct page *page) { struct address_space *mapping; @@ -3015,7 +3015,6 @@ int block_sync_page(struct page *page) mapping = page_mapping(page); if (mapping) blk_run_backing_dev(mapping->backing_dev_info, page); - return 0; } /* diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 165d6742638..fb49aef1f2e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1339,7 +1339,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) return rc; } -/* static int cifs_sync_page(struct page *page) +/* static void cifs_sync_page(struct page *page) { struct address_space *mapping; struct inode *inode; @@ -1353,16 +1353,18 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) return 0; inode = mapping->host; if (!inode) - return 0; */ + return; */ /* fill in rpages then result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */ /* cFYI(1, ("rpages is %d for sync page of Index %ld ", rpages, index)); +#if 0 if (rc < 0) return rc; return 0; +#endif } */ /* diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9f159baf153..27dd97b3fce 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -200,7 +200,7 @@ int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, int generic_cont_expand(struct inode *inode, loff_t size); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int block_sync_page(struct page *); +void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5adf32b90f3..972435d4df5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -350,7 +350,7 @@ struct writeback_control; struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); - int (*sync_page)(struct page *); + void (*sync_page)(struct page *); /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); -- cgit v1.2.3-70-g09d2 From 2ff28e22bdb8727fbc7d7889807bc5a73aae56c5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 26 Mar 2006 01:37:18 -0800 Subject: [PATCH] Make address_space_operations->invalidatepage return void The return value of this function is never used, so let's be honest and declare it as void. Some places where invalidatepage returned 0, I have inserted comments suggesting a BUG_ON. [akpm@osdl.org: JBD BUG fix] [akpm@osdl.org: rework for git-nfs] [akpm@osdl.org: don't go BUG in block_invalidate_page()] Signed-off-by: Neil Brown Acked-by: Dave Kleikamp Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/afs/file.c | 6 +++--- fs/buffer.c | 18 ++++++++---------- fs/ext3/inode.c | 4 ++-- fs/jbd/transaction.c | 13 +++++-------- fs/jfs/jfs_metapage.c | 7 +++---- fs/nfs/file.c | 3 +-- fs/reiserfs/inode.c | 8 +++++--- fs/xfs/linux-2.6/xfs_aops.c | 4 ++-- include/linux/buffer_head.h | 4 ++-- include/linux/fs.h | 2 +- include/linux/jbd.h | 2 +- 11 files changed, 33 insertions(+), 38 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/afs/file.c b/fs/afs/file.c index 150b1922792..7bb716887e2 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -28,7 +28,7 @@ static int afs_file_release(struct inode *inode, struct file *file); #endif static int afs_file_readpage(struct file *file, struct page *page); -static int afs_file_invalidatepage(struct page *page, unsigned long offset); +static void afs_file_invalidatepage(struct page *page, unsigned long offset); static int afs_file_releasepage(struct page *page, gfp_t gfp_flags); struct inode_operations afs_file_inode_operations = { @@ -212,7 +212,7 @@ int afs_cache_get_page_cookie(struct page *page, /* * invalidate part or all of a page */ -static int afs_file_invalidatepage(struct page *page, unsigned long offset) +static void afs_file_invalidatepage(struct page *page, unsigned long offset) { int ret = 1; @@ -238,11 +238,11 @@ static int afs_file_invalidatepage(struct page *page, unsigned long offset) if (!PageWriteback(page)) ret = page->mapping->a_ops->releasepage(page, 0); + /* possibly should BUG_ON(!ret); - neilb */ } } _leave(" = %d", ret); - return ret; } /* end afs_file_invalidatepage() */ /*****************************************************************************/ diff --git a/fs/buffer.c b/fs/buffer.c index 0b9456fd074..f25f5809642 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1593,11 +1593,10 @@ EXPORT_SYMBOL(try_to_release_page); * point. Because the caller is about to free (and possibly reuse) those * blocks on-disk. */ -int block_invalidatepage(struct page *page, unsigned long offset) +void block_invalidatepage(struct page *page, unsigned long offset) { struct buffer_head *head, *bh, *next; unsigned int curr_off = 0; - int ret = 1; BUG_ON(!PageLocked(page)); if (!page_has_buffers(page)) @@ -1624,19 +1623,18 @@ int block_invalidatepage(struct page *page, unsigned long offset) * so real IO is not possible anymore. */ if (offset == 0) - ret = try_to_release_page(page, 0); + try_to_release_page(page, 0); out: - return ret; + return; } EXPORT_SYMBOL(block_invalidatepage); -int do_invalidatepage(struct page *page, unsigned long offset) +void do_invalidatepage(struct page *page, unsigned long offset) { - int (*invalidatepage)(struct page *, unsigned long); - invalidatepage = page->mapping->a_ops->invalidatepage; - if (invalidatepage == NULL) - invalidatepage = block_invalidatepage; - return (*invalidatepage)(page, offset); + void (*invalidatepage)(struct page *, unsigned long); + invalidatepage = page->mapping->a_ops->invalidatepage ? : + block_invalidatepage; + (*invalidatepage)(page, offset); } /* diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2c361377e0a..76e22c9c9c6 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1430,7 +1430,7 @@ ext3_readpages(struct file *file, struct address_space *mapping, return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); } -static int ext3_invalidatepage(struct page *page, unsigned long offset) +static void ext3_invalidatepage(struct page *page, unsigned long offset) { journal_t *journal = EXT3_JOURNAL(page->mapping->host); @@ -1440,7 +1440,7 @@ static int ext3_invalidatepage(struct page *page, unsigned long offset) if (offset == 0) ClearPageChecked(page); - return journal_invalidatepage(journal, page, offset); + journal_invalidatepage(journal, page, offset); } static int ext3_releasepage(struct page *page, gfp_t wait) diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index ada31fa272e..c609f5034fc 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1873,16 +1873,15 @@ zap_buffer_unlocked: } /** - * int journal_invalidatepage() + * void journal_invalidatepage() * @journal: journal to use for flush... * @page: page to flush * @offset: length of page to invalidate. * * Reap page buffers containing data after offset in page. * - * Return non-zero if the page's buffers were successfully reaped. */ -int journal_invalidatepage(journal_t *journal, +void journal_invalidatepage(journal_t *journal, struct page *page, unsigned long offset) { @@ -1893,7 +1892,7 @@ int journal_invalidatepage(journal_t *journal, if (!PageLocked(page)) BUG(); if (!page_has_buffers(page)) - return 1; + return; /* We will potentially be playing with lists other than just the * data lists (especially for journaled data mode), so be @@ -1916,11 +1915,9 @@ int journal_invalidatepage(journal_t *journal, } while (bh != head); if (!offset) { - if (!may_free || !try_to_free_buffers(page)) - return 0; - J_ASSERT(!page_has_buffers(page)); + if (may_free && try_to_free_buffers(page)) + J_ASSERT(!page_has_buffers(page)); } - return 1; } /* diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 5fbaeaadccd..8508043849f 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -578,14 +578,13 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask) return 0; } -static int metapage_invalidatepage(struct page *page, unsigned long offset) +static void metapage_invalidatepage(struct page *page, unsigned long offset) { BUG_ON(offset); - if (PageWriteback(page)) - return 0; + BUG_ON(PageWriteback(page)); - return metapage_releasepage(page, 0); + metapage_releasepage(page, 0); } struct address_space_operations jfs_metapage_aops = { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5263b2864a4..dee49a0cb99 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -318,10 +318,9 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse return status; } -static int nfs_invalidate_page(struct page *page, unsigned long offset) +static void nfs_invalidate_page(struct page *page, unsigned long offset) { /* FIXME: we really should cancel any unstarted writes on this page */ - return 1; } static int nfs_release_page(struct page *page, gfp_t gfp) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index d60f6238c66..62e18c19b44 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2793,7 +2793,7 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) } /* clm -- taken from fs/buffer.c:block_invalidate_page */ -static int reiserfs_invalidatepage(struct page *page, unsigned long offset) +static void reiserfs_invalidatepage(struct page *page, unsigned long offset) { struct buffer_head *head, *bh, *next; struct inode *inode = page->mapping->host; @@ -2832,10 +2832,12 @@ static int reiserfs_invalidatepage(struct page *page, unsigned long offset) * The get_block cached value has been unconditionally invalidated, * so real IO is not possible anymore. */ - if (!offset && ret) + if (!offset && ret) { ret = try_to_release_page(page, 0); + /* maybe should BUG_ON(!ret); - neilb */ + } out: - return ret; + return; } static int reiserfs_set_page_dirty(struct page *page) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 97fc056130e..4f2476f188b 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1442,14 +1442,14 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_block); } -STATIC int +STATIC void xfs_vm_invalidatepage( struct page *page, unsigned long offset) { xfs_page_trace(XFS_INVALIDPAGE_ENTER, page->mapping->host, page, offset); - return block_invalidatepage(page, offset); + block_invalidatepage(page, offset); } struct address_space_operations xfs_address_space_operations = { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 27dd97b3fce..da917ed096a 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -189,8 +189,8 @@ extern int buffer_heads_over_limit; * address_spaces. */ int try_to_release_page(struct page * page, gfp_t gfp_mask); -int block_invalidatepage(struct page *page, unsigned long offset); -int do_invalidatepage(struct page *page, unsigned long offset); +void block_invalidatepage(struct page *page, unsigned long offset); +void do_invalidatepage(struct page *page, unsigned long offset); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int block_read_full_page(struct page*, get_block_t*); diff --git a/include/linux/fs.h b/include/linux/fs.h index 972435d4df5..9674679525f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -369,7 +369,7 @@ struct address_space_operations { int (*commit_write)(struct file *, struct page *, unsigned, unsigned); /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); - int (*invalidatepage) (struct page *, unsigned long); + void (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, gfp_t); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 4fc7dffd66e..6a425e370cb 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -895,7 +895,7 @@ extern int journal_dirty_metadata (handle_t *, struct buffer_head *); extern void journal_release_buffer (handle_t *, struct buffer_head *); extern int journal_forget (handle_t *, struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *); -extern int journal_invalidatepage(journal_t *, +extern void journal_invalidatepage(journal_t *, struct page *, unsigned long); extern int journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); extern int journal_stop(handle_t *); -- cgit v1.2.3-70-g09d2 From 205f87f6b342444f722e4559d33318686f7df2ca Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 26 Mar 2006 01:38:00 -0800 Subject: [PATCH] change buffer_head.b_size to size_t Increase the size of the buffer_head b_size field (only) for 64 bit platforms. Update some old and moldy comments in and around the structure as well. The b_size increase allows us to perform larger mappings and allocations for large I/O requests from userspace, which tie in with other changes allowing the get_block_t() interface to map multiple blocks at once. Signed-off-by: Nathan Scott Signed-off-by: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 6 ++++-- fs/ocfs2/journal.c | 2 +- fs/reiserfs/prints.c | 2 +- include/linux/buffer_head.h | 19 +++++++++++-------- 4 files changed, 17 insertions(+), 12 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index f25f5809642..e7a1461f438 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -426,8 +426,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) if (all_mapped) { printk("__find_get_block_slow() failed. " "block=%llu, b_blocknr=%llu\n", - (unsigned long long)block, (unsigned long long)bh->b_blocknr); - printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size); + (unsigned long long)block, + (unsigned long long)bh->b_blocknr); + printk("b_state=0x%08lx, b_size=%zu\n", + bh->b_state, bh->b_size); printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits); } out_unlock: diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 90641929a43..6a610ae5358 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -377,7 +377,7 @@ int ocfs2_journal_access(struct ocfs2_journal_handle *handle, BUG_ON(!bh); BUG_ON(!(handle->flags & OCFS2_HANDLE_STARTED)); - mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %hu\n", + mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n", (unsigned long long)bh->b_blocknr, type, (type == OCFS2_JOURNAL_ACCESS_CREATE) ? "OCFS2_JOURNAL_ACCESS_CREATE" : diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 78b40621b88..27bd3a1df2a 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -143,7 +143,7 @@ static void sprintf_buffer_head(char *buf, struct buffer_head *bh) char b[BDEVNAME_SIZE]; sprintf(buf, - "dev %s, size %d, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", + "dev %s, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", bdevname(bh->b_bdev, b), bh->b_size, (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)), bh->b_state, bh->b_page, diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index da917ed096a..464f068f8b1 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -46,25 +46,28 @@ struct address_space; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); /* - * Keep related fields in common cachelines. The most commonly accessed - * field (b_state) goes at the start so the compiler does not generate - * indexed addressing for it. + * Historically, a buffer_head was used to map a single block + * within a page, and of course as the unit of I/O through the + * filesystem and block layers. Nowadays the basic I/O unit + * is the bio, and buffer_heads are used for extracting block + * mappings (via a get_block_t call), for tracking state within + * a page (via a page_mapping) and for wrapping bio submission + * for backward compatibility reasons (e.g. submit_bh). */ struct buffer_head { - /* First cache line: */ unsigned long b_state; /* buffer state bitmap (see above) */ struct buffer_head *b_this_page;/* circular list of page's buffers */ struct page *b_page; /* the page this bh is mapped to */ - atomic_t b_count; /* users using this block */ - u32 b_size; /* block size */ - sector_t b_blocknr; /* block number */ - char *b_data; /* pointer to data block */ + sector_t b_blocknr; /* start block number */ + size_t b_size; /* size of mapping */ + char *b_data; /* pointer to data within the page */ struct block_device *b_bdev; bh_end_io_t *b_end_io; /* I/O completion */ void *b_private; /* reserved for b_end_io */ struct list_head b_assoc_buffers; /* associated with another mapping */ + atomic_t b_count; /* users using this buffer_head */ }; /* -- cgit v1.2.3-70-g09d2 From b0cf2321c6599138f860517745503691556d8453 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 26 Mar 2006 01:38:00 -0800 Subject: [PATCH] pass b_size to ->get_block() Pass amount of disk needs to be mapped to get_block(). This way one can modify the fs ->get_block() functions to map multiple blocks at the same time. [akpm@osdl.org: performance tweak] [akpm@osdl.org: remove unneeded assignments] Signed-off-by: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 9 ++++++++- fs/mpage.c | 2 ++ include/linux/buffer_head.h | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index e7a1461f438..a507b58550f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1738,6 +1738,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, sector_t block; sector_t last_block; struct buffer_head *bh, *head; + const unsigned blocksize = 1 << inode->i_blkbits; int nr_underway = 0; BUG_ON(!PageLocked(page)); @@ -1745,7 +1746,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; if (!page_has_buffers(page)) { - create_empty_buffers(page, 1 << inode->i_blkbits, + create_empty_buffers(page, blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); } @@ -1780,6 +1781,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, clear_buffer_dirty(bh); set_buffer_uptodate(bh); } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { + WARN_ON(bh->b_size != blocksize); err = get_block(inode, block, bh, 1); if (err) goto recover; @@ -1933,6 +1935,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (buffer_new(bh)) clear_buffer_new(bh); if (!buffer_mapped(bh)) { + WARN_ON(bh->b_size != blocksize); err = get_block(inode, block, bh, 1); if (err) break; @@ -2088,6 +2091,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block) fully_mapped = 0; if (iblock < lblock) { + WARN_ON(bh->b_size != blocksize); err = get_block(inode, iblock, bh, 0); if (err) SetPageError(page); @@ -2409,6 +2413,7 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, create = 1; if (block_start >= to) create = 0; + map_bh.b_size = blocksize; ret = get_block(inode, block_in_file + block_in_page, &map_bh, create); if (ret) @@ -2669,6 +2674,7 @@ int block_truncate_page(struct address_space *mapping, err = 0; if (!buffer_mapped(bh)) { + WARN_ON(bh->b_size != blocksize); err = get_block(inode, iblock, bh, 0); if (err) goto unlock; @@ -2755,6 +2761,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, struct inode *inode = mapping->host; tmp.b_state = 0; tmp.b_blocknr = 0; + tmp.b_size = 1 << inode->i_blkbits; get_block(inode, block, &tmp, 0); return tmp.b_blocknr; } diff --git a/fs/mpage.c b/fs/mpage.c index e431cb3878d..7903b740cc1 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -192,6 +192,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, page_block++, block_in_file++) { bh.b_state = 0; if (block_in_file < last_block) { + bh.b_size = blocksize; if (get_block(inode, block_in_file, &bh, 0)) goto confused; } @@ -472,6 +473,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, for (page_block = 0; page_block < blocks_per_page; ) { map_bh.b_state = 0; + map_bh.b_size = 1 << blkbits; if (get_block(inode, block_in_file, &map_bh, 1)) goto confused; if (buffer_new(&map_bh)) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 464f068f8b1..fb7e9b7ccbe 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -280,6 +280,7 @@ map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) set_buffer_mapped(bh); bh->b_bdev = sb->s_bdev; bh->b_blocknr = block; + bh->b_size = sb->s_blocksize; } /* -- cgit v1.2.3-70-g09d2 From ec936fc563715a9e2b2e363eb060655b49529325 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:59 -0800 Subject: [PATCH] for_each_online_pgdat: renaming for_each_pgdat Replace for_each_pgdat() with for_each_online_pgdat(). Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/pgtable.c | 2 +- arch/ia64/mm/discontig.c | 4 ++-- arch/ia64/mm/init.c | 2 +- arch/m32r/mm/init.c | 2 +- arch/powerpc/mm/mem.c | 4 ++-- arch/x86_64/mm/init.c | 2 +- fs/buffer.c | 2 +- mm/page_alloc.c | 6 +++--- mm/vmscan.c | 6 +++--- 9 files changed, 15 insertions(+), 15 deletions(-) (limited to 'fs/buffer.c') diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 9db3242103b..2889567e21a 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -36,7 +36,7 @@ void show_mem(void) printk(KERN_INFO "Mem-info:\n"); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 2f5e44862e9..384f1d7dce9 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -386,7 +386,7 @@ static void __init pgdat_insert(pg_data_t *pgdat) { pg_data_t *prev = NULL, *next; - for_each_pgdat(next) + for_each_online_pgdat(next) if (pgdat->node_id < next->node_id) break; else @@ -560,7 +560,7 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; int shared = 0, cached = 0, reserved = 0; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index ff4f31fcd33..2ef1151cde9 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -600,7 +600,7 @@ mem_init (void) kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); kclist_add(&kcore_kernel, _stext, _end - _stext); - for_each_pgdat(pgdat) + for_each_online_pgdat(pgdat) if (pgdat->bdata->node_bootmem_map) totalram_pages += free_all_bootmem_node(pgdat); diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index c9e7dad860b..2e0fe199ce3 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -47,7 +47,7 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long flags; pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index badac10d700..5e435a9c343 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -195,7 +195,7 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long flags; pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; i++) { @@ -351,7 +351,7 @@ void __init mem_init(void) max_mapnr = max_pfn; totalram_pages += free_all_bootmem(); #endif - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { if (!pfn_valid(pgdat->node_start_pfn + i)) continue; diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index b0441562544..e5f7f1c3446 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -72,7 +72,7 @@ void show_mem(void) show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pfn_to_page(pgdat->node_start_pfn + i); total++; diff --git a/fs/buffer.c b/fs/buffer.c index d597758dd12..23f1f3a6807 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -493,7 +493,7 @@ static void free_more_memory(void) wakeup_pdflush(1024); yield(); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; if (*zones) try_to_free_pages(zones, GFP_NOFS); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8dc8f2735d2..ccc3713dd40 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1201,7 +1201,7 @@ unsigned int nr_free_highpages (void) pg_data_t *pgdat; unsigned int pages = 0; - for_each_pgdat(pgdat) + for_each_online_pgdat(pgdat) pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; return pages; @@ -1343,7 +1343,7 @@ void get_zone_counts(unsigned long *active, *active = 0; *inactive = 0; *free = 0; - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long l, m, n; __get_zone_counts(&l, &m, &n, pgdat); *active += l; @@ -2482,7 +2482,7 @@ static void setup_per_zone_lowmem_reserve(void) struct pglist_data *pgdat; int j, idx; - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long present_pages = zone->present_pages; diff --git a/mm/vmscan.c b/mm/vmscan.c index 78865c849f8..acdf001d694 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1305,7 +1305,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) current->reclaim_state = &reclaim_state; repeat: - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long freed; freed = balance_pgdat(pgdat, nr_to_free, 0); @@ -1335,7 +1335,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, cpumask_t mask; if (action == CPU_ONLINE) { - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { mask = node_to_cpumask(pgdat->node_id); if (any_online_cpu(mask) != NR_CPUS) /* One of our CPUs online: restore mask */ @@ -1351,7 +1351,7 @@ static int __init kswapd_init(void) pg_data_t *pgdat; swap_setup(); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { pid_t pid; pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL); -- cgit v1.2.3-70-g09d2