From 8c9c2bf7a3c4f7e9d158c0be9c49f372fb943ad2 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Sat, 25 Feb 2012 09:09:30 +0100 Subject: btrfs: fix race in reada When inserting into the radix tree returns EEXIST, get the existing entry without giving up the spinlock in between. There was a race for both the zones trees and the extent tree. Signed-off-by: Arne Jansen --- fs/btrfs/reada.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/reada.c') diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index dc5d33146fd..8dec650099c 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, struct btrfs_bio *bbio) { int ret; - int looped = 0; struct reada_zone *zone; struct btrfs_block_group_cache *cache = NULL; u64 start; u64 end; int i; -again: zone = NULL; spin_lock(&fs_info->reada_lock); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, @@ -274,9 +272,6 @@ again: spin_unlock(&fs_info->reada_lock); } - if (looped) - return NULL; - cache = btrfs_lookup_block_group(fs_info, logical); if (!cache) return NULL; @@ -307,13 +302,15 @@ again: ret = radix_tree_insert(&dev->reada_zones, (unsigned long)(zone->end >> PAGE_CACHE_SHIFT), zone); - spin_unlock(&fs_info->reada_lock); - if (ret) { + if (ret == -EEXIST) { kfree(zone); - looped = 1; - goto again; + ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, + logical >> PAGE_CACHE_SHIFT, 1); + if (ret == 1) + kref_get(&zone->refcnt); } + spin_unlock(&fs_info->reada_lock); return zone; } @@ -323,8 +320,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, struct btrfs_key *top, int level) { int ret; - int looped = 0; struct reada_extent *re = NULL; + struct reada_extent *re_exist = NULL; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; struct btrfs_bio *bbio = NULL; @@ -335,14 +332,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, int i; unsigned long index = logical >> PAGE_CACHE_SHIFT; -again: spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) kref_get(&re->refcnt); spin_unlock(&fs_info->reada_lock); - if (re || looped) + if (re) return re; re = kzalloc(sizeof(*re), GFP_NOFS); @@ -398,12 +394,15 @@ again: /* insert extent in reada_tree + all per-device trees, all or nothing */ spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&fs_info->reada_tree, index, re); + if (ret == -EEXIST) { + re_exist = radix_tree_lookup(&fs_info->reada_tree, index); + BUG_ON(!re_exist); + kref_get(&re_exist->refcnt); + spin_unlock(&fs_info->reada_lock); + goto error; + } if (ret) { spin_unlock(&fs_info->reada_lock); - if (ret != -ENOMEM) { - /* someone inserted the extent in the meantime */ - looped = 1; - } goto error; } for (i = 0; i < nzones; ++i) { @@ -450,9 +449,7 @@ error: } kfree(bbio); kfree(re); - if (looped) - goto again; - return NULL; + return re_exist; } static void reada_kref_dummy(struct kref *kr) -- cgit v1.2.3-70-g09d2 From 207a232ccac0a8cb79d304bd17298dbc96e2e082 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Sat, 25 Feb 2012 09:09:47 +0100 Subject: btrfs: don't add both copies of DUP to reada extent tree Normally when there are 2 copies of a block, we add both to the reada extent tree and prefetch only the one that is easier to reach. This way we can better utilize multiple devices. In case of DUP this makes no sense as both copies reside on the same device. Signed-off-by: Arne Jansen --- fs/btrfs/reada.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/btrfs/reada.c') diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 8dec650099c..ac5d0108588 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -326,6 +326,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; struct btrfs_bio *bbio = NULL; struct btrfs_device *dev; + struct btrfs_device *prev_dev; u32 blocksize; u64 length; int nzones = 0; @@ -405,8 +406,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, spin_unlock(&fs_info->reada_lock); goto error; } + prev_dev = NULL; for (i = 0; i < nzones; ++i) { dev = bbio->stripes[i].dev; + if (dev == prev_dev) { + /* + * in case of DUP, just add the first zone. As both + * are on the same device, there's nothing to gain + * from adding both. + * Also, it wouldn't work, as the tree is per device + * and adding would fail with EEXIST + */ + continue; + } + prev_dev = dev; ret = radix_tree_insert(&dev->reada_extents, index, re); if (ret) { while (--i >= 0) { -- cgit v1.2.3-70-g09d2