summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c245
1 files changed, 149 insertions, 96 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index dd13eb81ee4..c7367ae5a3e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -33,17 +33,6 @@
#include "volumes.h"
#include "async-thread.h"
-struct map_lookup {
- u64 type;
- int io_align;
- int io_width;
- int stripe_len;
- int sector_size;
- int num_stripes;
- int sub_stripes;
- struct btrfs_bio_stripe stripes[];
-};
-
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_device *device);
@@ -162,22 +151,25 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
struct bio *cur;
int again = 0;
unsigned long num_run;
- unsigned long num_sync_run;
unsigned long batch_run = 0;
unsigned long limit;
unsigned long last_waited = 0;
int force_reg = 0;
+ struct blk_plug plug;
+
+ /*
+ * this function runs all the bios we've collected for
+ * a particular device. We don't want to wander off to
+ * another device without first sending all of these down.
+ * So, setup a plug here and finish it off before we return
+ */
+ blk_start_plug(&plug);
bdi = blk_get_backing_dev_info(device->bdev);
fs_info = device->dev_root->fs_info;
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
- /* we want to make sure that every time we switch from the sync
- * list to the normal list, we unplug
- */
- num_sync_run = 0;
-
loop:
spin_lock(&device->io_lock);
@@ -223,15 +215,6 @@ loop_lock:
spin_unlock(&device->io_lock);
- /*
- * if we're doing the regular priority list, make sure we unplug
- * for any high prio bios we've sent down
- */
- if (pending_bios == &device->pending_bios && num_sync_run > 0) {
- num_sync_run = 0;
- blk_run_backing_dev(bdi, NULL);
- }
-
while (pending) {
rmb();
@@ -259,19 +242,11 @@ loop_lock:
BUG_ON(atomic_read(&cur->bi_cnt) == 0);
- if (cur->bi_rw & REQ_SYNC)
- num_sync_run++;
-
submit_bio(cur->bi_rw, cur);
num_run++;
batch_run++;
- if (need_resched()) {
- if (num_sync_run) {
- blk_run_backing_dev(bdi, NULL);
- num_sync_run = 0;
- }
+ if (need_resched())
cond_resched();
- }
/*
* we made progress, there is more work to do and the bdi
@@ -304,13 +279,8 @@ loop_lock:
* against it before looping
*/
last_waited = ioc->last_waited;
- if (need_resched()) {
- if (num_sync_run) {
- blk_run_backing_dev(bdi, NULL);
- num_sync_run = 0;
- }
+ if (need_resched())
cond_resched();
- }
continue;
}
spin_lock(&device->io_lock);
@@ -323,22 +293,6 @@ loop_lock:
}
}
- if (num_sync_run) {
- num_sync_run = 0;
- blk_run_backing_dev(bdi, NULL);
- }
- /*
- * IO has already been through a long path to get here. Checksumming,
- * async helper threads, perhaps compression. We've done a pretty
- * good job of collecting a batch of IO and should just unplug
- * the device right away.
- *
- * This will help anyone who is waiting on the IO, they might have
- * already unplugged, but managed to do so before the bio they
- * cared about found its way down here.
- */
- blk_run_backing_dev(bdi, NULL);
-
cond_resched();
if (again)
goto loop;
@@ -349,6 +303,7 @@ loop_lock:
spin_unlock(&device->io_lock);
done:
+ blk_finish_plug(&plug);
return 0;
}
@@ -1923,6 +1878,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
BUG_ON(ret);
+ trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
+
if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
BUG_ON(ret);
@@ -2650,6 +2607,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
*num_bytes = chunk_bytes_by_type(type, calc_size,
map->num_stripes, sub_stripes);
+ trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes);
+
em = alloc_extent_map(GFP_NOFS);
if (!em) {
ret = -ENOMEM;
@@ -2758,6 +2717,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
item_size);
BUG_ON(ret);
}
+
kfree(chunk);
return 0;
}
@@ -2955,14 +2915,17 @@ static int find_live_mirror(struct map_lookup *map, int first, int num,
static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
u64 logical, u64 *length,
struct btrfs_multi_bio **multi_ret,
- int mirror_num, struct page *unplug_page)
+ int mirror_num)
{
struct extent_map *em;
struct map_lookup *map;
struct extent_map_tree *em_tree = &map_tree->map_tree;
u64 offset;
u64 stripe_offset;
+ u64 stripe_end_offset;
u64 stripe_nr;
+ u64 stripe_nr_orig;
+ u64 stripe_nr_end;
int stripes_allocated = 8;
int stripes_required = 1;
int stripe_index;
@@ -2971,7 +2934,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
int max_errors = 0;
struct btrfs_multi_bio *multi = NULL;
- if (multi_ret && !(rw & REQ_WRITE))
+ if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
stripes_allocated = 1;
again:
if (multi_ret) {
@@ -2987,11 +2950,6 @@ again:
em = lookup_extent_mapping(em_tree, logical, *length);
read_unlock(&em_tree->lock);
- if (!em && unplug_page) {
- kfree(multi);
- return 0;
- }
-
if (!em) {
printk(KERN_CRIT "unable to find logical %llu len %llu\n",
(unsigned long long)logical,
@@ -3017,7 +2975,15 @@ again:
max_errors = 1;
}
}
- if (multi_ret && (rw & REQ_WRITE) &&
+ if (rw & REQ_DISCARD) {
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID10)) {
+ stripes_required = map->num_stripes;
+ }
+ }
+ if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
stripes_allocated < stripes_required) {
stripes_allocated = map->num_stripes;
free_extent_map(em);
@@ -3037,23 +3003,37 @@ again:
/* stripe_offset is the offset of this block in its stripe*/
stripe_offset = offset - stripe_offset;
- if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_DUP)) {
+ if (rw & REQ_DISCARD)
+ *length = min_t(u64, em->len - offset, *length);
+ else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_DUP)) {
/* we limit the length of each bio to what fits in a stripe */
*length = min_t(u64, em->len - offset,
- map->stripe_len - stripe_offset);
+ map->stripe_len - stripe_offset);
} else {
*length = em->len - offset;
}
- if (!multi_ret && !unplug_page)
+ if (!multi_ret)
goto out;
num_stripes = 1;
stripe_index = 0;
- if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
- if (unplug_page || (rw & REQ_WRITE))
+ stripe_nr_orig = stripe_nr;
+ stripe_nr_end = (offset + *length + map->stripe_len - 1) &
+ (~(map->stripe_len - 1));
+ do_div(stripe_nr_end, map->stripe_len);
+ stripe_end_offset = stripe_nr_end * map->stripe_len -
+ (offset + *length);
+ if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+ if (rw & REQ_DISCARD)
+ num_stripes = min_t(u64, map->num_stripes,
+ stripe_nr_end - stripe_nr_orig);
+ stripe_index = do_div(stripe_nr, map->num_stripes);
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+ if (rw & (REQ_WRITE | REQ_DISCARD))
num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
@@ -3064,7 +3044,7 @@ again:
}
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- if (rw & REQ_WRITE)
+ if (rw & (REQ_WRITE | REQ_DISCARD))
num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
@@ -3075,8 +3055,12 @@ again:
stripe_index = do_div(stripe_nr, factor);
stripe_index *= map->sub_stripes;
- if (unplug_page || (rw & REQ_WRITE))
+ if (rw & REQ_WRITE)
num_stripes = map->sub_stripes;
+ else if (rw & REQ_DISCARD)
+ num_stripes = min_t(u64, map->sub_stripes *
+ (stripe_nr_end - stripe_nr_orig),
+ map->num_stripes);
else if (mirror_num)
stripe_index += mirror_num - 1;
else {
@@ -3094,24 +3078,101 @@ again:
}
BUG_ON(stripe_index >= map->num_stripes);
- for (i = 0; i < num_stripes; i++) {
- if (unplug_page) {
- struct btrfs_device *device;
- struct backing_dev_info *bdi;
-
- device = map->stripes[stripe_index].dev;
- if (device->bdev) {
- bdi = blk_get_backing_dev_info(device->bdev);
- if (bdi->unplug_io_fn)
- bdi->unplug_io_fn(bdi, unplug_page);
- }
- } else {
+ if (rw & REQ_DISCARD) {
+ for (i = 0; i < num_stripes; i++) {
multi->stripes[i].physical =
map->stripes[stripe_index].physical +
stripe_offset + stripe_nr * map->stripe_len;
multi->stripes[i].dev = map->stripes[stripe_index].dev;
+
+ if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+ u64 stripes;
+ u32 last_stripe = 0;
+ int j;
+
+ div_u64_rem(stripe_nr_end - 1,
+ map->num_stripes,
+ &last_stripe);
+
+ for (j = 0; j < map->num_stripes; j++) {
+ u32 test;
+
+ div_u64_rem(stripe_nr_end - 1 - j,
+ map->num_stripes, &test);
+ if (test == stripe_index)
+ break;
+ }
+ stripes = stripe_nr_end - 1 - j;
+ do_div(stripes, map->num_stripes);
+ multi->stripes[i].length = map->stripe_len *
+ (stripes - stripe_nr + 1);
+
+ if (i == 0) {
+ multi->stripes[i].length -=
+ stripe_offset;
+ stripe_offset = 0;
+ }
+ if (stripe_index == last_stripe)
+ multi->stripes[i].length -=
+ stripe_end_offset;
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+ u64 stripes;
+ int j;
+ int factor = map->num_stripes /
+ map->sub_stripes;
+ u32 last_stripe = 0;
+
+ div_u64_rem(stripe_nr_end - 1,
+ factor, &last_stripe);
+ last_stripe *= map->sub_stripes;
+
+ for (j = 0; j < factor; j++) {
+ u32 test;
+
+ div_u64_rem(stripe_nr_end - 1 - j,
+ factor, &test);
+
+ if (test ==
+ stripe_index / map->sub_stripes)
+ break;
+ }
+ stripes = stripe_nr_end - 1 - j;
+ do_div(stripes, factor);
+ multi->stripes[i].length = map->stripe_len *
+ (stripes - stripe_nr + 1);
+
+ if (i < map->sub_stripes) {
+ multi->stripes[i].length -=
+ stripe_offset;
+ if (i == map->sub_stripes - 1)
+ stripe_offset = 0;
+ }
+ if (stripe_index >= last_stripe &&
+ stripe_index <= (last_stripe +
+ map->sub_stripes - 1)) {
+ multi->stripes[i].length -=
+ stripe_end_offset;
+ }
+ } else
+ multi->stripes[i].length = *length;
+
+ stripe_index++;
+ if (stripe_index == map->num_stripes) {
+ /* This could only happen for RAID0/10 */
+ stripe_index = 0;
+ stripe_nr++;
+ }
+ }
+ } else {
+ for (i = 0; i < num_stripes; i++) {
+ multi->stripes[i].physical =
+ map->stripes[stripe_index].physical +
+ stripe_offset +
+ stripe_nr * map->stripe_len;
+ multi->stripes[i].dev =
+ map->stripes[stripe_index].dev;
+ stripe_index++;
}
- stripe_index++;
}
if (multi_ret) {
*multi_ret = multi;
@@ -3128,7 +3189,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
struct btrfs_multi_bio **multi_ret, int mirror_num)
{
return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
- mirror_num, NULL);
+ mirror_num);
}
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -3196,14 +3257,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
return 0;
}
-int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
- u64 logical, struct page *page)
-{
- u64 length = PAGE_CACHE_SIZE;
- return __btrfs_map_block(map_tree, READ, logical, &length,
- NULL, 0, page);
-}
-
static void end_bio_multi_stripe(struct bio *bio, int err)
{
struct btrfs_multi_bio *multi = bio->bi_private;