diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 215 |
1 files changed, 174 insertions, 41 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e0913e46972..3ab80e9cd76 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) return NULL; } +static void requeue_list(struct btrfs_pending_bios *pending_bios, + struct bio *head, struct bio *tail) +{ + + struct bio *old_head; + + old_head = pending_bios->head; + pending_bios->head = head; + if (pending_bios->tail) + tail->bi_next = old_head; + else + pending_bios->tail = tail; +} + /* * we try to collect pending bios for a device so we don't get a large * number of procs sending bios down to the same device. This greatly @@ -141,32 +155,49 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) struct bio *pending; struct backing_dev_info *bdi; struct btrfs_fs_info *fs_info; + struct btrfs_pending_bios *pending_bios; struct bio *tail; struct bio *cur; int again = 0; - unsigned long num_run = 0; + unsigned long num_run; + unsigned long num_sync_run; + unsigned long batch_run = 0; unsigned long limit; unsigned long last_waited = 0; + int force_reg = 0; bdi = blk_get_backing_dev_info(device->bdev); fs_info = device->dev_root->fs_info; limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; + /* we want to make sure that every time we switch from the sync + * list to the normal list, we unplug + */ + num_sync_run = 0; + loop: spin_lock(&device->io_lock); loop_lock: + num_run = 0; + /* take all the bios off the list at once and process them * later on (without the lock held). But, remember the * tail and other pointers so the bios can be properly reinserted * into the list if we hit congestion */ - pending = device->pending_bios; - tail = device->pending_bio_tail; + if (!force_reg && device->pending_sync_bios.head) { + pending_bios = &device->pending_sync_bios; + force_reg = 1; + } else { + pending_bios = &device->pending_bios; + force_reg = 0; + } + + pending = pending_bios->head; + tail = pending_bios->tail; WARN_ON(pending && !tail); - device->pending_bios = NULL; - device->pending_bio_tail = NULL; /* * if pending was null this time around, no bios need processing @@ -176,16 +207,45 @@ loop_lock: * device->running_pending is used to synchronize with the * schedule_bio code. */ - if (pending) { - again = 1; - device->running_pending = 1; - } else { + if (device->pending_sync_bios.head == NULL && + device->pending_bios.head == NULL) { again = 0; device->running_pending = 0; + } else { + again = 1; + device->running_pending = 1; } + + pending_bios->head = NULL; + pending_bios->tail = NULL; + spin_unlock(&device->io_lock); + /* + * if we're doing the regular priority list, make sure we unplug + * for any high prio bios we've sent down + */ + if (pending_bios == &device->pending_bios && num_sync_run > 0) { + num_sync_run = 0; + blk_run_backing_dev(bdi, NULL); + } + while (pending) { + + rmb(); + /* we want to work on both lists, but do more bios on the + * sync list than the regular list + */ + if ((num_run > 32 && + pending_bios != &device->pending_sync_bios && + device->pending_sync_bios.head) || + (num_run > 64 && pending_bios == &device->pending_sync_bios && + device->pending_bios.head)) { + spin_lock(&device->io_lock); + requeue_list(pending_bios, pending, tail); + goto loop_lock; + } + cur = pending; pending = pending->bi_next; cur->bi_next = NULL; @@ -196,19 +256,28 @@ loop_lock: wake_up(&fs_info->async_submit_wait); BUG_ON(atomic_read(&cur->bi_cnt) == 0); - bio_get(cur); submit_bio(cur->bi_rw, cur); - bio_put(cur); num_run++; + batch_run++; + + if (bio_sync(cur)) + num_sync_run++; + + if (need_resched()) { + if (num_sync_run) { + blk_run_backing_dev(bdi, NULL); + num_sync_run = 0; + } + cond_resched(); + } /* * we made progress, there is more work to do and the bdi * is now congested. Back off and let other work structs * run instead */ - if (pending && bdi_write_congested(bdi) && num_run > 16 && + if (pending && bdi_write_congested(bdi) && batch_run > 32 && fs_info->fs_devices->open_devices > 1) { - struct bio *old_head; struct io_context *ioc; ioc = current->io_context; @@ -233,17 +302,17 @@ loop_lock: * against it before looping */ last_waited = ioc->last_waited; + if (need_resched()) { + if (num_sync_run) { + blk_run_backing_dev(bdi, NULL); + num_sync_run = 0; + } + cond_resched(); + } continue; } spin_lock(&device->io_lock); - - old_head = device->pending_bios; - device->pending_bios = pending; - if (device->pending_bio_tail) - tail->bi_next = old_head; - else - device->pending_bio_tail = tail; - + requeue_list(pending_bios, pending, tail); device->running_pending = 1; spin_unlock(&device->io_lock); @@ -251,11 +320,18 @@ loop_lock: goto done; } } + + if (num_sync_run) { + num_sync_run = 0; + blk_run_backing_dev(bdi, NULL); + } + + cond_resched(); if (again) goto loop; spin_lock(&device->io_lock); - if (device->pending_bios) + if (device->pending_bios.head || device->pending_sync_bios.head) goto loop_lock; spin_unlock(&device->io_lock); @@ -301,6 +377,7 @@ static noinline int device_list_add(const char *path, memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; + mutex_init(&fs_devices->device_list_mutex); device = NULL; } else { device = __find_device(&fs_devices->devices, devid, @@ -327,7 +404,11 @@ static noinline int device_list_add(const char *path, return -ENOMEM; } INIT_LIST_HEAD(&device->dev_alloc_list); + + mutex_lock(&fs_devices->device_list_mutex); list_add(&device->dev_list, &fs_devices->devices); + mutex_unlock(&fs_devices->device_list_mutex); + device->fs_devices = fs_devices; fs_devices->num_devices++; } @@ -353,10 +434,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) INIT_LIST_HEAD(&fs_devices->devices); INIT_LIST_HEAD(&fs_devices->alloc_list); INIT_LIST_HEAD(&fs_devices->list); + mutex_init(&fs_devices->device_list_mutex); fs_devices->latest_devid = orig->latest_devid; fs_devices->latest_trans = orig->latest_trans; memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); + mutex_lock(&orig->device_list_mutex); list_for_each_entry(orig_dev, &orig->devices, dev_list) { device = kzalloc(sizeof(*device), GFP_NOFS); if (!device) @@ -378,8 +461,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) device->fs_devices = fs_devices; fs_devices->num_devices++; } + mutex_unlock(&orig->device_list_mutex); return fs_devices; error: + mutex_unlock(&orig->device_list_mutex); free_fs_devices(fs_devices); return ERR_PTR(-ENOMEM); } @@ -390,6 +475,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) mutex_lock(&uuid_mutex); again: + mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { if (device->in_fs_metadata) continue; @@ -409,6 +495,7 @@ again: kfree(device->name); kfree(device); } + mutex_unlock(&fs_devices->device_list_mutex); if (fs_devices->seed) { fs_devices = fs_devices->seed; @@ -529,6 +616,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, device->in_fs_metadata = 0; device->mode = flags; + if (!blk_queue_nonrot(bdev_get_queue(bdev))) + fs_devices->rotating = 1; + fs_devices->open_devices++; if (device->writeable) { fs_devices->rw_devices++; @@ -1056,12 +1146,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) device = NULL; devices = &root->fs_info->fs_devices->devices; + mutex_lock(&root->fs_info->fs_devices->device_list_mutex); list_for_each_entry(tmp, devices, dev_list) { if (tmp->in_fs_metadata && !tmp->bdev) { device = tmp; break; } } + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); bdev = NULL; bh = NULL; disk_super = NULL; @@ -1116,7 +1208,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) goto error_brelse; device->in_fs_metadata = 0; + + /* + * the device list mutex makes sure that we don't change + * the device list while someone else is writing out all + * the device supers. + */ + mutex_lock(&root->fs_info->fs_devices->device_list_mutex); list_del_init(&device->dev_list); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + device->fs_devices->num_devices--; next_device = list_entry(root->fs_info->fs_devices->devices.next, @@ -1210,6 +1311,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, seed_devices->opened = 1; INIT_LIST_HEAD(&seed_devices->devices); INIT_LIST_HEAD(&seed_devices->alloc_list); + mutex_init(&seed_devices->device_list_mutex); list_splice_init(&fs_devices->devices, &seed_devices->devices); list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); list_for_each_entry(device, &seed_devices->devices, dev_list) { @@ -1335,6 +1437,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) mutex_lock(&root->fs_info->volume_mutex); devices = &root->fs_info->fs_devices->devices; + /* + * we have the volume lock, so we don't need the extra + * device list mutex while reading the list here. + */ list_for_each_entry(device, devices, dev_list) { if (device->bdev == bdev) { ret = -EEXIST; @@ -1375,6 +1481,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->io_align = root->sectorsize; device->sector_size = root->sectorsize; device->total_bytes = i_size_read(bdev->bd_inode); + device->disk_total_bytes = device->total_bytes; device->dev_root = root->fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; @@ -1388,6 +1495,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } device->fs_devices = root->fs_info->fs_devices; + + /* + * we don't want write_supers to jump in here with our device + * half setup + */ + mutex_lock(&root->fs_info->fs_devices->device_list_mutex); list_add(&device->dev_list, &root->fs_info->fs_devices->devices); list_add(&device->dev_alloc_list, &root->fs_info->fs_devices->alloc_list); @@ -1396,6 +1509,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->rw_devices++; root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; + if (!blk_queue_nonrot(bdev_get_queue(bdev))) + root->fs_info->fs_devices->rotating = 1; + total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); btrfs_set_super_total_bytes(&root->fs_info->super_copy, total_bytes + device->total_bytes); @@ -1403,6 +1519,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); btrfs_set_super_num_devices(&root->fs_info->super_copy, total_bytes + 1); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (seeding_dev) { ret = init_first_rw_device(trans, root, device); @@ -1478,7 +1595,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); - btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); btrfs_mark_buffer_dirty(leaf); @@ -1605,8 +1722,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, int ret; int i; - printk(KERN_INFO "btrfs relocating chunk %llu\n", - (unsigned long long)chunk_offset); root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; em_tree = &root->fs_info->mapping_tree.map_tree; @@ -1875,14 +1990,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) device->total_bytes = new_size; if (device->writeable) device->fs_devices->total_rw_bytes -= diff; - ret = btrfs_update_device(trans, device); - if (ret) { - unlock_chunks(root); - btrfs_end_transaction(trans, root); - goto done; - } - WARN_ON(diff > old_total); - btrfs_set_super_total_bytes(super_copy, old_total - diff); unlock_chunks(root); btrfs_end_transaction(trans, root); @@ -1914,7 +2021,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) length = btrfs_dev_extent_length(l, dev_extent); if (key.offset + length <= new_size) - goto done; + break; chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); @@ -1927,6 +2034,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) goto done; } + /* Shrinking succeeded, else we would be at "done". */ + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto done; + } + lock_chunks(root); + + device->disk_total_bytes = new_size; + /* Now btrfs_update_device() will change the on-disk size. */ + ret = btrfs_update_device(trans, device); + if (ret) { + unlock_chunks(root); + btrfs_end_transaction(trans, root); + goto done; + } + WARN_ON(diff > old_total); + btrfs_set_super_total_bytes(super_copy, old_total - diff); + unlock_chunks(root); + btrfs_end_transaction(trans, root); done: btrfs_free_path(path); return ret; @@ -2497,7 +2624,7 @@ again: max_errors = 1; } } - if (multi_ret && rw == WRITE && + if (multi_ret && (rw & (1 << BIO_RW)) && stripes_allocated < stripes_required) { stripes_allocated = map->num_stripes; free_extent_map(em); @@ -2762,6 +2889,7 @@ static noinline int schedule_bio(struct btrfs_root *root, int rw, struct bio *bio) { int should_queue = 1; + struct btrfs_pending_bios *pending_bios; /* don't bother with additional async steps for reads, right now */ if (!(rw & (1 << BIO_RW))) { @@ -2783,13 +2911,17 @@ static noinline int schedule_bio(struct btrfs_root *root, bio->bi_rw |= rw; spin_lock(&device->io_lock); + if (bio_sync(bio)) + pending_bios = &device->pending_sync_bios; + else + pending_bios = &device->pending_bios; - if (device->pending_bio_tail) - device->pending_bio_tail->bi_next = bio; + if (pending_bios->tail) + pending_bios->tail->bi_next = bio; - device->pending_bio_tail = bio; - if (!device->pending_bios) - device->pending_bios = bio; + pending_bios->tail = bio; + if (!pending_bios->head) + pending_bios->head = bio; if (device->running_pending) should_queue = 0; @@ -3006,7 +3138,8 @@ static int fill_device_from_item(struct extent_buffer *leaf, unsigned long ptr; device->devid = btrfs_device_id(leaf, dev_item); - device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); + device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); + device->total_bytes = device->disk_total_bytes; device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); device->type = btrfs_device_type(leaf, dev_item); device->io_align = btrfs_device_io_align(leaf, dev_item); |