summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c8
-rw-r--r--block/blk-exec.c1
-rw-r--r--block/blk-merge.c17
-rw-r--r--block/blk-mq-sysfs.c6
-rw-r--r--block/blk-mq.c124
-rw-r--r--block/blk-sysfs.c17
-rw-r--r--block/genhd.c28
-rw-r--r--block/partition-generic.c2
8 files changed, 145 insertions, 58 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index bf930f481d4..9c888bd22b0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -240,7 +240,7 @@ EXPORT_SYMBOL(blk_stop_queue);
* this function.
*
* This function does not cancel any asynchronous activity arising
- * out of elevator or throttling code. That would require elevaotor_exit()
+ * out of elevator or throttling code. That would require elevator_exit()
* and blkcg_exit_queue() to be called with queue lock initialized.
*
*/
@@ -933,7 +933,7 @@ static struct io_context *rq_ioc(struct bio *bio)
* Get a free request from @q. This function may fail under memory
* pressure or if @q is dead.
*
- * Must be callled with @q->queue_lock held and,
+ * Must be called with @q->queue_lock held and,
* Returns %NULL on failure, with @q->queue_lock held.
* Returns !%NULL on success, with @q->queue_lock *not held*.
*/
@@ -1110,7 +1110,7 @@ rq_starved:
* Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this
* function keeps retrying under memory pressure and fails iff @q is dead.
*
- * Must be callled with @q->queue_lock held and,
+ * Must be called with @q->queue_lock held and,
* Returns %NULL on failure, with @q->queue_lock held.
* Returns !%NULL on success, with @q->queue_lock *not held*.
*/
@@ -1241,7 +1241,7 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio,
EXPORT_SYMBOL(blk_make_request);
/**
- * blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC
+ * blk_rq_set_block_pc - initialize a request to type BLOCK_PC
* @rq: request to be initialized
*
*/
diff --git a/block/blk-exec.c b/block/blk-exec.c
index f4d27b12c90..9924725fa50 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -56,6 +56,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
bool is_pm_resume;
WARN_ON(irqs_disabled());
+ WARN_ON(rq->cmd_type == REQ_TYPE_FS);
rq->rq_disk = bd_disk;
rq->end_io = done;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 54535831f1e..77881798f79 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -10,10 +10,11 @@
#include "blk.h"
static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
- struct bio *bio)
+ struct bio *bio,
+ bool no_sg_merge)
{
struct bio_vec bv, bvprv = { NULL };
- int cluster, high, highprv = 1, no_sg_merge;
+ int cluster, high, highprv = 1;
unsigned int seg_size, nr_phys_segs;
struct bio *fbio, *bbio;
struct bvec_iter iter;
@@ -35,7 +36,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
cluster = blk_queue_cluster(q);
seg_size = 0;
nr_phys_segs = 0;
- no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
high = 0;
for_each_bio(bio) {
bio_for_each_segment(bv, bio, iter) {
@@ -88,18 +88,23 @@ new_segment:
void blk_recalc_rq_segments(struct request *rq)
{
- rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio);
+ bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE,
+ &rq->q->queue_flags);
+
+ rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio,
+ no_sg_merge);
}
void blk_recount_segments(struct request_queue *q, struct bio *bio)
{
- if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags))
+ if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) &&
+ bio->bi_vcnt < queue_max_segments(q))
bio->bi_phys_segments = bio->bi_vcnt;
else {
struct bio *nxt = bio->bi_next;
bio->bi_next = NULL;
- bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
+ bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false);
bio->bi_next = nxt;
}
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index ed521786755..371d8800b48 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -402,6 +402,12 @@ static void blk_mq_sysfs_init(struct request_queue *q)
}
}
+/* see blk_register_queue() */
+void blk_mq_finish_init(struct request_queue *q)
+{
+ percpu_ref_switch_to_percpu(&q->mq_usage_counter);
+}
+
int blk_mq_register_disk(struct gendisk *disk)
{
struct device *dev = disk_to_dev(disk);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4aac82615a4..38f4a165640 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -203,7 +203,6 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
if (tag != BLK_MQ_TAG_FAIL) {
rq = data->hctx->tags->rqs[tag];
- rq->cmd_flags = 0;
if (blk_mq_tag_busy(data->hctx)) {
rq->cmd_flags = REQ_MQ_INFLIGHT;
atomic_inc(&data->hctx->nr_active);
@@ -258,6 +257,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
if (rq->cmd_flags & REQ_MQ_INFLIGHT)
atomic_dec(&hctx->nr_active);
+ rq->cmd_flags = 0;
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
blk_mq_put_tag(hctx, tag, &ctx->last_tag);
@@ -393,6 +393,12 @@ static void blk_mq_start_request(struct request *rq, bool last)
blk_add_timer(rq);
/*
+ * Ensure that ->deadline is visible before set the started
+ * flag and clear the completed flag.
+ */
+ smp_mb__before_atomic();
+
+ /*
* Mark us as started and clear complete. Complete might have been
* set if requeue raced with timeout, which then marked it as
* complete. So be sure to clear complete again when we start
@@ -473,7 +479,11 @@ static void blk_mq_requeue_work(struct work_struct *work)
blk_mq_insert_request(rq, false, false, false);
}
- blk_mq_run_queues(q, false);
+ /*
+ * Use the start variant of queue running here, so that running
+ * the requeue work will kick stopped queues.
+ */
+ blk_mq_start_hw_queues(q);
}
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
@@ -957,14 +967,9 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
hctx = q->mq_ops->map_queue(q, ctx->cpu);
- if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) &&
- !(rq->cmd_flags & (REQ_FLUSH_SEQ))) {
- blk_insert_flush(rq);
- } else {
- spin_lock(&ctx->lock);
- __blk_mq_insert_request(hctx, rq, at_head);
- spin_unlock(&ctx->lock);
- }
+ spin_lock(&ctx->lock);
+ __blk_mq_insert_request(hctx, rq, at_head);
+ spin_unlock(&ctx->lock);
if (run_queue)
blk_mq_run_hw_queue(hctx, async);
@@ -1321,6 +1326,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
continue;
set->ops->exit_request(set->driver_data, tags->rqs[i],
hctx_idx, i);
+ tags->rqs[i] = NULL;
}
}
@@ -1354,8 +1360,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
INIT_LIST_HEAD(&tags->page_list);
- tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *),
- GFP_KERNEL, set->numa_node);
+ tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
+ GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+ set->numa_node);
if (!tags->rqs) {
blk_mq_free_tags(tags);
return NULL;
@@ -1379,8 +1386,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
this_order--;
do {
- page = alloc_pages_node(set->numa_node, GFP_KERNEL,
- this_order);
+ page = alloc_pages_node(set->numa_node,
+ GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+ this_order);
if (page)
break;
if (!this_order--)
@@ -1401,11 +1409,15 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
left -= to_do * rq_size;
for (j = 0; j < to_do; j++) {
tags->rqs[i] = p;
+ tags->rqs[i]->atomic_flags = 0;
+ tags->rqs[i]->cmd_flags = 0;
if (set->ops->init_request) {
if (set->ops->init_request(set->driver_data,
tags->rqs[i], hctx_idx, i,
- set->numa_node))
+ set->numa_node)) {
+ tags->rqs[i] = NULL;
goto fail;
+ }
}
p += rq_size;
@@ -1416,7 +1428,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
return tags;
fail:
- pr_warn("%s: failed to allocate requests\n", __func__);
blk_mq_free_rq_map(set, tags, hctx_idx);
return NULL;
}
@@ -1784,7 +1795,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
if (!q)
goto err_hctxs;
- if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release))
+ /*
+ * Init percpu_ref in atomic mode so that it's faster to shutdown.
+ * See blk_register_queue() for details.
+ */
+ if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
+ PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
goto err_map;
setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
@@ -1936,6 +1952,60 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
return NOTIFY_OK;
}
+static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
+{
+ int i;
+
+ for (i = 0; i < set->nr_hw_queues; i++) {
+ set->tags[i] = blk_mq_init_rq_map(set, i);
+ if (!set->tags[i])
+ goto out_unwind;
+ }
+
+ return 0;
+
+out_unwind:
+ while (--i >= 0)
+ blk_mq_free_rq_map(set, set->tags[i], i);
+
+ return -ENOMEM;
+}
+
+/*
+ * Allocate the request maps associated with this tag_set. Note that this
+ * may reduce the depth asked for, if memory is tight. set->queue_depth
+ * will be updated to reflect the allocated depth.
+ */
+static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
+{
+ unsigned int depth;
+ int err;
+
+ depth = set->queue_depth;
+ do {
+ err = __blk_mq_alloc_rq_maps(set);
+ if (!err)
+ break;
+
+ set->queue_depth >>= 1;
+ if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
+ err = -ENOMEM;
+ break;
+ }
+ } while (set->queue_depth);
+
+ if (!set->queue_depth || err) {
+ pr_err("blk-mq: failed to allocate request map\n");
+ return -ENOMEM;
+ }
+
+ if (depth != set->queue_depth)
+ pr_info("blk-mq: reduced tag depth (%u -> %u)\n",
+ depth, set->queue_depth);
+
+ return 0;
+}
+
/*
* Alloc a tag set to be associated with one or more request queues.
* May fail with EINVAL for various error conditions. May adjust the
@@ -1944,8 +2014,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
*/
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
{
- int i;
-
if (!set->nr_hw_queues)
return -EINVAL;
if (!set->queue_depth)
@@ -1966,23 +2034,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
if (!set->tags)
- goto out;
+ return -ENOMEM;
- for (i = 0; i < set->nr_hw_queues; i++) {
- set->tags[i] = blk_mq_init_rq_map(set, i);
- if (!set->tags[i])
- goto out_unwind;
- }
+ if (blk_mq_alloc_rq_maps(set))
+ goto enomem;
mutex_init(&set->tag_list_lock);
INIT_LIST_HEAD(&set->tag_list);
return 0;
-
-out_unwind:
- while (--i >= 0)
- blk_mq_free_rq_map(set, set->tags[i], i);
-out:
+enomem:
+ kfree(set->tags);
+ set->tags = NULL;
return -ENOMEM;
}
EXPORT_SYMBOL(blk_mq_alloc_tag_set);
@@ -1997,6 +2060,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
}
kfree(set->tags);
+ set->tags = NULL;
}
EXPORT_SYMBOL(blk_mq_free_tag_set);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 4db5abf96b9..521ae9089c5 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -551,11 +551,20 @@ int blk_register_queue(struct gendisk *disk)
return -ENXIO;
/*
- * Initialization must be complete by now. Finish the initial
- * bypass from queue allocation.
+ * SCSI probing may synchronously create and destroy a lot of
+ * request_queues for non-existent devices. Shutting down a fully
+ * functional queue takes measureable wallclock time as RCU grace
+ * periods are involved. To avoid excessive latency in these
+ * cases, a request_queue starts out in a degraded mode which is
+ * faster to shut down and is made fully functional here as
+ * request_queues for non-existent devices never get registered.
*/
- queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
- blk_queue_bypass_end(q);
+ if (!blk_queue_init_done(q)) {
+ queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
+ blk_queue_bypass_end(q);
+ if (q->mq_ops)
+ blk_mq_finish_init(q);
+ }
ret = blk_trace_init_sysfs(dev);
if (ret)
diff --git a/block/genhd.c b/block/genhd.c
index 791f4194313..bd3060684ab 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -28,10 +28,10 @@ struct kobject *block_depr;
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
-/* For extended devt allocation. ext_devt_mutex prevents look up
+/* For extended devt allocation. ext_devt_lock prevents look up
* results from going away underneath its user.
*/
-static DEFINE_MUTEX(ext_devt_mutex);
+static DEFINE_SPINLOCK(ext_devt_lock);
static DEFINE_IDR(ext_devt_idr);
static struct device_type disk_type;
@@ -420,9 +420,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
}
/* allocate ext devt */
- mutex_lock(&ext_devt_mutex);
- idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL);
- mutex_unlock(&ext_devt_mutex);
+ idr_preload(GFP_KERNEL);
+
+ spin_lock(&ext_devt_lock);
+ idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
+ spin_unlock(&ext_devt_lock);
+
+ idr_preload_end();
if (idx < 0)
return idx == -ENOSPC ? -EBUSY : idx;
@@ -441,15 +445,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
*/
void blk_free_devt(dev_t devt)
{
- might_sleep();
-
if (devt == MKDEV(0, 0))
return;
if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
- mutex_lock(&ext_devt_mutex);
+ spin_lock(&ext_devt_lock);
idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
- mutex_unlock(&ext_devt_mutex);
+ spin_unlock(&ext_devt_lock);
}
}
@@ -665,7 +667,6 @@ void del_gendisk(struct gendisk *disk)
sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
device_del(disk_to_dev(disk));
- blk_free_devt(disk_to_dev(disk)->devt);
}
EXPORT_SYMBOL(del_gendisk);
@@ -690,13 +691,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
} else {
struct hd_struct *part;
- mutex_lock(&ext_devt_mutex);
+ spin_lock(&ext_devt_lock);
part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
if (part && get_disk(part_to_disk(part))) {
*partno = part->partno;
disk = part_to_disk(part);
}
- mutex_unlock(&ext_devt_mutex);
+ spin_unlock(&ext_devt_lock);
}
return disk;
@@ -1098,6 +1099,7 @@ static void disk_release(struct device *dev)
{
struct gendisk *disk = dev_to_disk(dev);
+ blk_free_devt(dev->devt);
disk_release_events(disk);
kfree(disk->random);
disk_replace_part_tbl(disk, NULL);
@@ -1543,7 +1545,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
/**
* disk_clear_events - synchronously check, clear and return pending events
* @disk: disk to fetch and clear events from
- * @mask: mask of events to be fetched and clearted
+ * @mask: mask of events to be fetched and cleared
*
* Disk events are synchronously checked and pending events in @mask
* are cleared and returned. This ignores the block count.
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 789cdea0589..0d9e5f97f0a 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -211,6 +211,7 @@ static const struct attribute_group *part_attr_groups[] = {
static void part_release(struct device *dev)
{
struct hd_struct *p = dev_to_part(dev);
+ blk_free_devt(dev->devt);
free_part_stats(p);
free_part_info(p);
kfree(p);
@@ -253,7 +254,6 @@ void delete_partition(struct gendisk *disk, int partno)
rcu_assign_pointer(ptbl->last_lookup, NULL);
kobject_put(part->holder_dir);
device_del(part_to_dev(part));
- blk_free_devt(part_devt(part));
hd_struct_put(part);
}