From 93b270f76e7ef3b81001576860c2701931cdc78b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 24 Feb 2011 17:25:47 +1100 Subject: Fix over-zealous flush_disk when changing device size. There are two cases when we call flush_disk. In one, the device has disappeared (check_disk_change) so any data will hold becomes irrelevant. In the oter, the device has changed size (check_disk_size_change) so data we hold may be irrelevant. In both cases it makes sense to discard any 'clean' buffers, so they will be read back from the device if needed. In the former case it makes sense to discard 'dirty' buffers as there will never be anywhere safe to write the data. In the second case it *does*not* make sense to discard dirty buffers as that will lead to file system corruption when you simply enlarge the containing devices. flush_disk calls __invalidate_devices. __invalidate_device calls both invalidate_inodes and invalidate_bdev. invalidate_inodes *does* discard I_DIRTY inodes and this does lead to fs corruption. invalidate_bev *does*not* discard dirty pages, but I don't really care about that at present. So this patch adds a flag to __invalidate_device (calling it __invalidate_device2) to indicate whether dirty buffers should be killed, and this is passed to invalidate_inodes which can choose to skip dirty inodes. flusk_disk then passes true from check_disk_change and false from check_disk_size_change. dm avoids tripping over this problem by calling i_size_write directly rathher than using check_disk_size_change. md does use check_disk_size_change and so is affected. This regression was introduced by commit 608aeef17a which causes check_disk_size_change to call flush_disk, so it is suitable for any kernel since 2.6.27. Cc: stable@kernel.org Acked-by: Jeff Moyer Cc: Andrew Patterson Cc: Jens Axboe Signed-off-by: NeilBrown --- fs/block_dev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 333a7bb4cb9..5e23152d04a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -927,9 +927,9 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); * when a disk has been changed -- either by a media change or online * resize. */ -static void flush_disk(struct block_device *bdev) +static void flush_disk(struct block_device *bdev, bool kill_dirty) { - if (__invalidate_device(bdev)) { + if (__invalidate_device(bdev, kill_dirty)) { char name[BDEVNAME_SIZE] = ""; if (bdev->bd_disk) @@ -966,7 +966,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) "%s: detected capacity change from %lld to %lld\n", name, bdev_size, disk_size); i_size_write(bdev->bd_inode, disk_size); - flush_disk(bdev); + flush_disk(bdev, false); } } EXPORT_SYMBOL(check_disk_size_change); @@ -1019,7 +1019,7 @@ int check_disk_change(struct block_device *bdev) if (!(events & DISK_EVENT_MEDIA_CHANGE)) return 0; - flush_disk(bdev); + flush_disk(bdev, true); if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); return 1; @@ -1601,7 +1601,7 @@ fail: } EXPORT_SYMBOL(lookup_bdev); -int __invalidate_device(struct block_device *bdev) +int __invalidate_device(struct block_device *bdev, bool kill_dirty) { struct super_block *sb = get_super(bdev); int res = 0; @@ -1614,7 +1614,7 @@ int __invalidate_device(struct block_device *bdev) * hold). */ shrink_dcache_sb(sb); - res = invalidate_inodes(sb); + res = invalidate_inodes(sb, kill_dirty); drop_super(sb); } invalidate_bdev(bdev); -- cgit v1.2.3-70-g09d2 From e7407d1619713f4b1fdff3a485e1bd8e77bd480d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 Feb 2011 09:56:32 +0100 Subject: block: bd_link_disk_holder() should hold on to holder_dir The new implementation of bd_link_disk_holder() added by 49731baa41d (block: restore multiple bd_link_disk_holder() support) didn't get an extra reference for the holder_dir kobject of the slave bdev; however, bdev kills holder_dir on removal, not release, so if the slave bdev is removed while there are holder links, the holder_dir will be destroyed while there still are holder links, which leads to oops later when bd_unlink_disk_order() tries to remove those links. Make bd_link_disk_holder() grab an extra reference for the slave's holder_dir and put it in bd_unlink_disk_holder(). Signed-off-by: Tejun Heo Reported-by: "Hawrylewicz Czarnowski, Przemyslaw" Tested-by: "Hawrylewicz Czarnowski, Przemyslaw" Cc: Neil Brown Cc: Jens Axboe Signed-off-by: Linus Torvalds --- fs/block_dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 4fb8a343153..94d41db6200 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); if (ret) goto out_del; + /* + * bdev could be deleted beneath us which would implicitly destroy + * the holder directory. Hold on to it. + */ + kobject_get(bdev->bd_part->holder_dir); list_add(&holder->list, &bdev->bd_holder_disks); goto out_unlock; @@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); + kobject_put(bdev->bd_part->holder_dir); list_del_init(&holder->list); kfree(holder); } -- cgit v1.2.3-70-g09d2 From e6eb5ce1b202ac9cdcfda5be559c9b9d8ec7542c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 26 Feb 2011 10:54:00 -0800 Subject: fs/block_dev.c: fix new kernel-doc warning Fix new kernel-doc warning in fs/block_dev.c: Warning(fs/block_dev.c:937): No description found for parameter 'kill_dirty' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- fs/block_dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index f05bf16cd97..88928701959 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -928,6 +928,7 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); * flush_disk - invalidates all buffer-cache entries on a disk * * @bdev: struct block device to be flushed + * @kill_dirty: flag to guide handling of dirty inodes * * Invalidates all buffer-cache entries on a disk. It should be called * when a disk has been changed -- either by a media change or online -- cgit v1.2.3-70-g09d2