diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 8 | ||||
-rw-r--r-- | block/blk-exec.c | 1 | ||||
-rw-r--r-- | block/blk-merge.c | 17 | ||||
-rw-r--r-- | block/blk-mq-sysfs.c | 6 | ||||
-rw-r--r-- | block/blk-mq.c | 124 | ||||
-rw-r--r-- | block/blk-sysfs.c | 17 | ||||
-rw-r--r-- | block/genhd.c | 28 | ||||
-rw-r--r-- | block/partition-generic.c | 2 |
8 files changed, 145 insertions, 58 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index bf930f481d4..9c888bd22b0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -240,7 +240,7 @@ EXPORT_SYMBOL(blk_stop_queue); * this function. * * This function does not cancel any asynchronous activity arising - * out of elevator or throttling code. That would require elevaotor_exit() + * out of elevator or throttling code. That would require elevator_exit() * and blkcg_exit_queue() to be called with queue lock initialized. * */ @@ -933,7 +933,7 @@ static struct io_context *rq_ioc(struct bio *bio) * Get a free request from @q. This function may fail under memory * pressure or if @q is dead. * - * Must be callled with @q->queue_lock held and, + * Must be called with @q->queue_lock held and, * Returns %NULL on failure, with @q->queue_lock held. * Returns !%NULL on success, with @q->queue_lock *not held*. */ @@ -1110,7 +1110,7 @@ rq_starved: * Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this * function keeps retrying under memory pressure and fails iff @q is dead. * - * Must be callled with @q->queue_lock held and, + * Must be called with @q->queue_lock held and, * Returns %NULL on failure, with @q->queue_lock held. * Returns !%NULL on success, with @q->queue_lock *not held*. */ @@ -1241,7 +1241,7 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio, EXPORT_SYMBOL(blk_make_request); /** - * blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC + * blk_rq_set_block_pc - initialize a request to type BLOCK_PC * @rq: request to be initialized * */ diff --git a/block/blk-exec.c b/block/blk-exec.c index f4d27b12c90..9924725fa50 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -56,6 +56,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, bool is_pm_resume; WARN_ON(irqs_disabled()); + WARN_ON(rq->cmd_type == REQ_TYPE_FS); rq->rq_disk = bd_disk; rq->end_io = done; diff --git a/block/blk-merge.c b/block/blk-merge.c index 54535831f1e..77881798f79 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -10,10 +10,11 @@ #include "blk.h" static unsigned int __blk_recalc_rq_segments(struct request_queue *q, - struct bio *bio) + struct bio *bio, + bool no_sg_merge) { struct bio_vec bv, bvprv = { NULL }; - int cluster, high, highprv = 1, no_sg_merge; + int cluster, high, highprv = 1; unsigned int seg_size, nr_phys_segs; struct bio *fbio, *bbio; struct bvec_iter iter; @@ -35,7 +36,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, cluster = blk_queue_cluster(q); seg_size = 0; nr_phys_segs = 0; - no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); high = 0; for_each_bio(bio) { bio_for_each_segment(bv, bio, iter) { @@ -88,18 +88,23 @@ new_segment: void blk_recalc_rq_segments(struct request *rq) { - rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio); + bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, + &rq->q->queue_flags); + + rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio, + no_sg_merge); } void blk_recount_segments(struct request_queue *q, struct bio *bio) { - if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags)) + if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && + bio->bi_vcnt < queue_max_segments(q)) bio->bi_phys_segments = bio->bi_vcnt; else { struct bio *nxt = bio->bi_next; bio->bi_next = NULL; - bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); + bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); bio->bi_next = nxt; } diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index ed521786755..371d8800b48 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -402,6 +402,12 @@ static void blk_mq_sysfs_init(struct request_queue *q) } } +/* see blk_register_queue() */ +void blk_mq_finish_init(struct request_queue *q) +{ + percpu_ref_switch_to_percpu(&q->mq_usage_counter); +} + int blk_mq_register_disk(struct gendisk *disk) { struct device *dev = disk_to_dev(disk); diff --git a/block/blk-mq.c b/block/blk-mq.c index 4aac82615a4..38f4a165640 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -203,7 +203,6 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) if (tag != BLK_MQ_TAG_FAIL) { rq = data->hctx->tags->rqs[tag]; - rq->cmd_flags = 0; if (blk_mq_tag_busy(data->hctx)) { rq->cmd_flags = REQ_MQ_INFLIGHT; atomic_inc(&data->hctx->nr_active); @@ -258,6 +257,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, if (rq->cmd_flags & REQ_MQ_INFLIGHT) atomic_dec(&hctx->nr_active); + rq->cmd_flags = 0; clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); blk_mq_put_tag(hctx, tag, &ctx->last_tag); @@ -393,6 +393,12 @@ static void blk_mq_start_request(struct request *rq, bool last) blk_add_timer(rq); /* + * Ensure that ->deadline is visible before set the started + * flag and clear the completed flag. + */ + smp_mb__before_atomic(); + + /* * Mark us as started and clear complete. Complete might have been * set if requeue raced with timeout, which then marked it as * complete. So be sure to clear complete again when we start @@ -473,7 +479,11 @@ static void blk_mq_requeue_work(struct work_struct *work) blk_mq_insert_request(rq, false, false, false); } - blk_mq_run_queues(q, false); + /* + * Use the start variant of queue running here, so that running + * the requeue work will kick stopped queues. + */ + blk_mq_start_hw_queues(q); } void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) @@ -957,14 +967,9 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, hctx = q->mq_ops->map_queue(q, ctx->cpu); - if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) && - !(rq->cmd_flags & (REQ_FLUSH_SEQ))) { - blk_insert_flush(rq); - } else { - spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq, at_head); - spin_unlock(&ctx->lock); - } + spin_lock(&ctx->lock); + __blk_mq_insert_request(hctx, rq, at_head); + spin_unlock(&ctx->lock); if (run_queue) blk_mq_run_hw_queue(hctx, async); @@ -1321,6 +1326,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, continue; set->ops->exit_request(set->driver_data, tags->rqs[i], hctx_idx, i); + tags->rqs[i] = NULL; } } @@ -1354,8 +1360,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, INIT_LIST_HEAD(&tags->page_list); - tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), - GFP_KERNEL, set->numa_node); + tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), + GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, + set->numa_node); if (!tags->rqs) { blk_mq_free_tags(tags); return NULL; @@ -1379,8 +1386,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, this_order--; do { - page = alloc_pages_node(set->numa_node, GFP_KERNEL, - this_order); + page = alloc_pages_node(set->numa_node, + GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, + this_order); if (page) break; if (!this_order--) @@ -1401,11 +1409,15 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, left -= to_do * rq_size; for (j = 0; j < to_do; j++) { tags->rqs[i] = p; + tags->rqs[i]->atomic_flags = 0; + tags->rqs[i]->cmd_flags = 0; if (set->ops->init_request) { if (set->ops->init_request(set->driver_data, tags->rqs[i], hctx_idx, i, - set->numa_node)) + set->numa_node)) { + tags->rqs[i] = NULL; goto fail; + } } p += rq_size; @@ -1416,7 +1428,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, return tags; fail: - pr_warn("%s: failed to allocate requests\n", __func__); blk_mq_free_rq_map(set, tags, hctx_idx); return NULL; } @@ -1784,7 +1795,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!q) goto err_hctxs; - if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release)) + /* + * Init percpu_ref in atomic mode so that it's faster to shutdown. + * See blk_register_queue() for details. + */ + if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, + PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) goto err_map; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); @@ -1936,6 +1952,60 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, return NOTIFY_OK; } +static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) +{ + int i; + + for (i = 0; i < set->nr_hw_queues; i++) { + set->tags[i] = blk_mq_init_rq_map(set, i); + if (!set->tags[i]) + goto out_unwind; + } + + return 0; + +out_unwind: + while (--i >= 0) + blk_mq_free_rq_map(set, set->tags[i], i); + + return -ENOMEM; +} + +/* + * Allocate the request maps associated with this tag_set. Note that this + * may reduce the depth asked for, if memory is tight. set->queue_depth + * will be updated to reflect the allocated depth. + */ +static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) +{ + unsigned int depth; + int err; + + depth = set->queue_depth; + do { + err = __blk_mq_alloc_rq_maps(set); + if (!err) + break; + + set->queue_depth >>= 1; + if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) { + err = -ENOMEM; + break; + } + } while (set->queue_depth); + + if (!set->queue_depth || err) { + pr_err("blk-mq: failed to allocate request map\n"); + return -ENOMEM; + } + + if (depth != set->queue_depth) + pr_info("blk-mq: reduced tag depth (%u -> %u)\n", + depth, set->queue_depth); + + return 0; +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -1944,8 +2014,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, */ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) { - int i; - if (!set->nr_hw_queues) return -EINVAL; if (!set->queue_depth) @@ -1966,23 +2034,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) sizeof(struct blk_mq_tags *), GFP_KERNEL, set->numa_node); if (!set->tags) - goto out; + return -ENOMEM; - for (i = 0; i < set->nr_hw_queues; i++) { - set->tags[i] = blk_mq_init_rq_map(set, i); - if (!set->tags[i]) - goto out_unwind; - } + if (blk_mq_alloc_rq_maps(set)) + goto enomem; mutex_init(&set->tag_list_lock); INIT_LIST_HEAD(&set->tag_list); return 0; - -out_unwind: - while (--i >= 0) - blk_mq_free_rq_map(set, set->tags[i], i); -out: +enomem: + kfree(set->tags); + set->tags = NULL; return -ENOMEM; } EXPORT_SYMBOL(blk_mq_alloc_tag_set); @@ -1997,6 +2060,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) } kfree(set->tags); + set->tags = NULL; } EXPORT_SYMBOL(blk_mq_free_tag_set); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 4db5abf96b9..521ae9089c5 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -551,11 +551,20 @@ int blk_register_queue(struct gendisk *disk) return -ENXIO; /* - * Initialization must be complete by now. Finish the initial - * bypass from queue allocation. + * SCSI probing may synchronously create and destroy a lot of + * request_queues for non-existent devices. Shutting down a fully + * functional queue takes measureable wallclock time as RCU grace + * periods are involved. To avoid excessive latency in these + * cases, a request_queue starts out in a degraded mode which is + * faster to shut down and is made fully functional here as + * request_queues for non-existent devices never get registered. */ - queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); - blk_queue_bypass_end(q); + if (!blk_queue_init_done(q)) { + queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); + blk_queue_bypass_end(q); + if (q->mq_ops) + blk_mq_finish_init(q); + } ret = blk_trace_init_sysfs(dev); if (ret) diff --git a/block/genhd.c b/block/genhd.c index 791f4194313..bd3060684ab 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -28,10 +28,10 @@ struct kobject *block_depr; /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) -/* For extended devt allocation. ext_devt_mutex prevents look up +/* For extended devt allocation. ext_devt_lock prevents look up * results from going away underneath its user. */ -static DEFINE_MUTEX(ext_devt_mutex); +static DEFINE_SPINLOCK(ext_devt_lock); static DEFINE_IDR(ext_devt_idr); static struct device_type disk_type; @@ -420,9 +420,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) } /* allocate ext devt */ - mutex_lock(&ext_devt_mutex); - idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL); - mutex_unlock(&ext_devt_mutex); + idr_preload(GFP_KERNEL); + + spin_lock(&ext_devt_lock); + idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); + spin_unlock(&ext_devt_lock); + + idr_preload_end(); if (idx < 0) return idx == -ENOSPC ? -EBUSY : idx; @@ -441,15 +445,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) */ void blk_free_devt(dev_t devt) { - might_sleep(); - if (devt == MKDEV(0, 0)) return; if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - mutex_lock(&ext_devt_mutex); + spin_lock(&ext_devt_lock); idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - mutex_unlock(&ext_devt_mutex); + spin_unlock(&ext_devt_lock); } } @@ -665,7 +667,6 @@ void del_gendisk(struct gendisk *disk) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); - blk_free_devt(disk_to_dev(disk)->devt); } EXPORT_SYMBOL(del_gendisk); @@ -690,13 +691,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) } else { struct hd_struct *part; - mutex_lock(&ext_devt_mutex); + spin_lock(&ext_devt_lock); part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); if (part && get_disk(part_to_disk(part))) { *partno = part->partno; disk = part_to_disk(part); } - mutex_unlock(&ext_devt_mutex); + spin_unlock(&ext_devt_lock); } return disk; @@ -1098,6 +1099,7 @@ static void disk_release(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); + blk_free_devt(dev->devt); disk_release_events(disk); kfree(disk->random); disk_replace_part_tbl(disk, NULL); @@ -1543,7 +1545,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask) /** * disk_clear_events - synchronously check, clear and return pending events * @disk: disk to fetch and clear events from - * @mask: mask of events to be fetched and clearted + * @mask: mask of events to be fetched and cleared * * Disk events are synchronously checked and pending events in @mask * are cleared and returned. This ignores the block count. diff --git a/block/partition-generic.c b/block/partition-generic.c index 789cdea0589..0d9e5f97f0a 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -211,6 +211,7 @@ static const struct attribute_group *part_attr_groups[] = { static void part_release(struct device *dev) { struct hd_struct *p = dev_to_part(dev); + blk_free_devt(dev->devt); free_part_stats(p); free_part_info(p); kfree(p); @@ -253,7 +254,6 @@ void delete_partition(struct gendisk *disk, int partno) rcu_assign_pointer(ptbl->last_lookup, NULL); kobject_put(part->holder_dir); device_del(part_to_dev(part)); - blk_free_devt(part_devt(part)); hd_struct_put(part); } |