diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 245 |
1 files changed, 149 insertions, 96 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd13eb81ee4..c7367ae5a3e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -33,17 +33,6 @@ #include "volumes.h" #include "async-thread.h" -struct map_lookup { - u64 type; - int io_align; - int io_width; - int stripe_len; - int sector_size; - int num_stripes; - int sub_stripes; - struct btrfs_bio_stripe stripes[]; -}; - static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); @@ -162,22 +151,25 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) struct bio *cur; int again = 0; unsigned long num_run; - unsigned long num_sync_run; unsigned long batch_run = 0; unsigned long limit; unsigned long last_waited = 0; int force_reg = 0; + struct blk_plug plug; + + /* + * this function runs all the bios we've collected for + * a particular device. We don't want to wander off to + * another device without first sending all of these down. + * So, setup a plug here and finish it off before we return + */ + blk_start_plug(&plug); bdi = blk_get_backing_dev_info(device->bdev); fs_info = device->dev_root->fs_info; limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; - /* we want to make sure that every time we switch from the sync - * list to the normal list, we unplug - */ - num_sync_run = 0; - loop: spin_lock(&device->io_lock); @@ -223,15 +215,6 @@ loop_lock: spin_unlock(&device->io_lock); - /* - * if we're doing the regular priority list, make sure we unplug - * for any high prio bios we've sent down - */ - if (pending_bios == &device->pending_bios && num_sync_run > 0) { - num_sync_run = 0; - blk_run_backing_dev(bdi, NULL); - } - while (pending) { rmb(); @@ -259,19 +242,11 @@ loop_lock: BUG_ON(atomic_read(&cur->bi_cnt) == 0); - if (cur->bi_rw & REQ_SYNC) - num_sync_run++; - submit_bio(cur->bi_rw, cur); num_run++; batch_run++; - if (need_resched()) { - if (num_sync_run) { - blk_run_backing_dev(bdi, NULL); - num_sync_run = 0; - } + if (need_resched()) cond_resched(); - } /* * we made progress, there is more work to do and the bdi @@ -304,13 +279,8 @@ loop_lock: * against it before looping */ last_waited = ioc->last_waited; - if (need_resched()) { - if (num_sync_run) { - blk_run_backing_dev(bdi, NULL); - num_sync_run = 0; - } + if (need_resched()) cond_resched(); - } continue; } spin_lock(&device->io_lock); @@ -323,22 +293,6 @@ loop_lock: } } - if (num_sync_run) { - num_sync_run = 0; - blk_run_backing_dev(bdi, NULL); - } - /* - * IO has already been through a long path to get here. Checksumming, - * async helper threads, perhaps compression. We've done a pretty - * good job of collecting a batch of IO and should just unplug - * the device right away. - * - * This will help anyone who is waiting on the IO, they might have - * already unplugged, but managed to do so before the bio they - * cared about found its way down here. - */ - blk_run_backing_dev(bdi, NULL); - cond_resched(); if (again) goto loop; @@ -349,6 +303,7 @@ loop_lock: spin_unlock(&device->io_lock); done: + blk_finish_plug(&plug); return 0; } @@ -1923,6 +1878,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, BUG_ON(ret); + trace_btrfs_chunk_free(root, map, chunk_offset, em->len); + if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); BUG_ON(ret); @@ -2650,6 +2607,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, *num_bytes = chunk_bytes_by_type(type, calc_size, map->num_stripes, sub_stripes); + trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); + em = alloc_extent_map(GFP_NOFS); if (!em) { ret = -ENOMEM; @@ -2758,6 +2717,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, item_size); BUG_ON(ret); } + kfree(chunk); return 0; } @@ -2955,14 +2915,17 @@ static int find_live_mirror(struct map_lookup *map, int first, int num, static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, struct btrfs_multi_bio **multi_ret, - int mirror_num, struct page *unplug_page) + int mirror_num) { struct extent_map *em; struct map_lookup *map; struct extent_map_tree *em_tree = &map_tree->map_tree; u64 offset; u64 stripe_offset; + u64 stripe_end_offset; u64 stripe_nr; + u64 stripe_nr_orig; + u64 stripe_nr_end; int stripes_allocated = 8; int stripes_required = 1; int stripe_index; @@ -2971,7 +2934,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int max_errors = 0; struct btrfs_multi_bio *multi = NULL; - if (multi_ret && !(rw & REQ_WRITE)) + if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) stripes_allocated = 1; again: if (multi_ret) { @@ -2987,11 +2950,6 @@ again: em = lookup_extent_mapping(em_tree, logical, *length); read_unlock(&em_tree->lock); - if (!em && unplug_page) { - kfree(multi); - return 0; - } - if (!em) { printk(KERN_CRIT "unable to find logical %llu len %llu\n", (unsigned long long)logical, @@ -3017,7 +2975,15 @@ again: max_errors = 1; } } - if (multi_ret && (rw & REQ_WRITE) && + if (rw & REQ_DISCARD) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID10)) { + stripes_required = map->num_stripes; + } + } + if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && stripes_allocated < stripes_required) { stripes_allocated = map->num_stripes; free_extent_map(em); @@ -3037,23 +3003,37 @@ again: /* stripe_offset is the offset of this block in its stripe*/ stripe_offset = offset - stripe_offset; - if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10 | - BTRFS_BLOCK_GROUP_DUP)) { + if (rw & REQ_DISCARD) + *length = min_t(u64, em->len - offset, *length); + else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_DUP)) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, em->len - offset, - map->stripe_len - stripe_offset); + map->stripe_len - stripe_offset); } else { *length = em->len - offset; } - if (!multi_ret && !unplug_page) + if (!multi_ret) goto out; num_stripes = 1; stripe_index = 0; - if (map->type & BTRFS_BLOCK_GROUP_RAID1) { - if (unplug_page || (rw & REQ_WRITE)) + stripe_nr_orig = stripe_nr; + stripe_nr_end = (offset + *length + map->stripe_len - 1) & + (~(map->stripe_len - 1)); + do_div(stripe_nr_end, map->stripe_len); + stripe_end_offset = stripe_nr_end * map->stripe_len - + (offset + *length); + if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + if (rw & REQ_DISCARD) + num_stripes = min_t(u64, map->num_stripes, + stripe_nr_end - stripe_nr_orig); + stripe_index = do_div(stripe_nr, map->num_stripes); + } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { + if (rw & (REQ_WRITE | REQ_DISCARD)) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -3064,7 +3044,7 @@ again: } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (rw & REQ_WRITE) + if (rw & (REQ_WRITE | REQ_DISCARD)) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -3075,8 +3055,12 @@ again: stripe_index = do_div(stripe_nr, factor); stripe_index *= map->sub_stripes; - if (unplug_page || (rw & REQ_WRITE)) + if (rw & REQ_WRITE) num_stripes = map->sub_stripes; + else if (rw & REQ_DISCARD) + num_stripes = min_t(u64, map->sub_stripes * + (stripe_nr_end - stripe_nr_orig), + map->num_stripes); else if (mirror_num) stripe_index += mirror_num - 1; else { @@ -3094,24 +3078,101 @@ again: } BUG_ON(stripe_index >= map->num_stripes); - for (i = 0; i < num_stripes; i++) { - if (unplug_page) { - struct btrfs_device *device; - struct backing_dev_info *bdi; - - device = map->stripes[stripe_index].dev; - if (device->bdev) { - bdi = blk_get_backing_dev_info(device->bdev); - if (bdi->unplug_io_fn) - bdi->unplug_io_fn(bdi, unplug_page); - } - } else { + if (rw & REQ_DISCARD) { + for (i = 0; i < num_stripes; i++) { multi->stripes[i].physical = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; multi->stripes[i].dev = map->stripes[stripe_index].dev; + + if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + u64 stripes; + u32 last_stripe = 0; + int j; + + div_u64_rem(stripe_nr_end - 1, + map->num_stripes, + &last_stripe); + + for (j = 0; j < map->num_stripes; j++) { + u32 test; + + div_u64_rem(stripe_nr_end - 1 - j, + map->num_stripes, &test); + if (test == stripe_index) + break; + } + stripes = stripe_nr_end - 1 - j; + do_div(stripes, map->num_stripes); + multi->stripes[i].length = map->stripe_len * + (stripes - stripe_nr + 1); + + if (i == 0) { + multi->stripes[i].length -= + stripe_offset; + stripe_offset = 0; + } + if (stripe_index == last_stripe) + multi->stripes[i].length -= + stripe_end_offset; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + u64 stripes; + int j; + int factor = map->num_stripes / + map->sub_stripes; + u32 last_stripe = 0; + + div_u64_rem(stripe_nr_end - 1, + factor, &last_stripe); + last_stripe *= map->sub_stripes; + + for (j = 0; j < factor; j++) { + u32 test; + + div_u64_rem(stripe_nr_end - 1 - j, + factor, &test); + + if (test == + stripe_index / map->sub_stripes) + break; + } + stripes = stripe_nr_end - 1 - j; + do_div(stripes, factor); + multi->stripes[i].length = map->stripe_len * + (stripes - stripe_nr + 1); + + if (i < map->sub_stripes) { + multi->stripes[i].length -= + stripe_offset; + if (i == map->sub_stripes - 1) + stripe_offset = 0; + } + if (stripe_index >= last_stripe && + stripe_index <= (last_stripe + + map->sub_stripes - 1)) { + multi->stripes[i].length -= + stripe_end_offset; + } + } else + multi->stripes[i].length = *length; + + stripe_index++; + if (stripe_index == map->num_stripes) { + /* This could only happen for RAID0/10 */ + stripe_index = 0; + stripe_nr++; + } + } + } else { + for (i = 0; i < num_stripes; i++) { + multi->stripes[i].physical = + map->stripes[stripe_index].physical + + stripe_offset + + stripe_nr * map->stripe_len; + multi->stripes[i].dev = + map->stripes[stripe_index].dev; + stripe_index++; } - stripe_index++; } if (multi_ret) { *multi_ret = multi; @@ -3128,7 +3189,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, struct btrfs_multi_bio **multi_ret, int mirror_num) { return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, - mirror_num, NULL); + mirror_num); } int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, @@ -3196,14 +3257,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, return 0; } -int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, - u64 logical, struct page *page) -{ - u64 length = PAGE_CACHE_SIZE; - return __btrfs_map_block(map_tree, READ, logical, &length, - NULL, 0, page); -} - static void end_bio_multi_stripe(struct bio *bio, int err) { struct btrfs_multi_bio *multi = bio->bi_private; |