summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2008-09-13 20:26:01 +0200
committerJens Axboe <jens.axboe@oracle.com>2008-10-09 08:56:09 +0200
commitc7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 (patch)
treeecc3d2517b3471ccc35d4cb4e3b48d4b57205061 /block
parent18887ad910e56066233a07fd3cfb2fa11338b782 (diff)
block: add support for IO CPU affinity
This patch adds support for controlling the IO completion CPU of either all requests on a queue, or on a per-request basis. We export a sysfs variable (rq_affinity) which, if set, migrates completions of requests to the CPU that originally submitted it. A bio helper (bio_set_completion_cpu()) is also added, so that queuers can ask for completion on that specific CPU. In testing, this has been show to cut the system time by as much as 20-40% on synthetic workloads where CPU affinity is desired. This requires a little help from the architecture, so it'll only work as designed for archs that are using the new generic smp helper infrastructure. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c46
-rw-r--r--block/blk-settings.c2
-rw-r--r--block/blk-softirq.c126
-rw-r--r--block/blk-sysfs.c31
-rw-r--r--block/blk.h12
5 files changed, 162 insertions, 55 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 9c6f818d0c3..5484838f46e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -110,7 +110,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
memset(rq, 0, sizeof(*rq));
INIT_LIST_HEAD(&rq->queuelist);
- INIT_LIST_HEAD(&rq->donelist);
+ rq->cpu = -1;
rq->q = q;
rq->sector = rq->hard_sector = (sector_t) -1;
INIT_HLIST_NODE(&rq->hash);
@@ -322,6 +322,21 @@ void blk_unplug(struct request_queue *q)
}
EXPORT_SYMBOL(blk_unplug);
+static void blk_invoke_request_fn(struct request_queue *q)
+{
+ /*
+ * one level of recursion is ok and is much faster than kicking
+ * the unplug handling
+ */
+ if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+ q->request_fn(q);
+ queue_flag_clear(QUEUE_FLAG_REENTER, q);
+ } else {
+ queue_flag_set(QUEUE_FLAG_PLUGGED, q);
+ kblockd_schedule_work(q, &q->unplug_work);
+ }
+}
+
/**
* blk_start_queue - restart a previously stopped queue
* @q: The &struct request_queue in question
@@ -336,18 +351,7 @@ void blk_start_queue(struct request_queue *q)
WARN_ON(!irqs_disabled());
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-
- /*
- * one level of recursion is ok and is much faster than kicking
- * the unplug handling
- */
- if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
- q->request_fn(q);
- queue_flag_clear(QUEUE_FLAG_REENTER, q);
- } else {
- blk_plug_device(q);
- kblockd_schedule_work(q, &q->unplug_work);
- }
+ blk_invoke_request_fn(q);
}
EXPORT_SYMBOL(blk_start_queue);
@@ -405,15 +409,8 @@ void __blk_run_queue(struct request_queue *q)
* Only recurse once to avoid overrunning the stack, let the unplug
* handling reinvoke the handler shortly if we already got there.
*/
- if (!elv_queue_empty(q)) {
- if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
- q->request_fn(q);
- queue_flag_clear(QUEUE_FLAG_REENTER, q);
- } else {
- blk_plug_device(q);
- kblockd_schedule_work(q, &q->unplug_work);
- }
- }
+ if (!elv_queue_empty(q))
+ blk_invoke_request_fn(q);
}
EXPORT_SYMBOL(__blk_run_queue);
@@ -1056,6 +1053,7 @@ EXPORT_SYMBOL(blk_put_request);
void init_request_from_bio(struct request *req, struct bio *bio)
{
+ req->cpu = bio->bi_comp_cpu;
req->cmd_type = REQ_TYPE_FS;
/*
@@ -1198,13 +1196,15 @@ get_rq:
init_request_from_bio(req, bio);
spin_lock_irq(q->queue_lock);
+ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
+ bio_flagged(bio, BIO_CPU_AFFINE))
+ req->cpu = blk_cpu_to_group(smp_processor_id());
if (elv_queue_empty(q))
blk_plug_device(q);
add_request(q, req);
out:
if (sync)
__generic_unplug_device(q);
-
spin_unlock_irq(q->queue_lock);
return 0;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d70692badcd..a60e959a12c 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -443,7 +443,7 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
}
EXPORT_SYMBOL(blk_queue_update_dma_alignment);
-static int __init blk_settings_init(void)
+int __init blk_settings_init(void)
{
blk_max_low_pfn = max_low_pfn - 1;
blk_max_pfn = max_pfn - 1;
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 9e1c43bff66..3a1af551191 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -13,6 +13,70 @@
static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+/*
+ * Softirq action handler - move entries to local list and loop over them
+ * while passing them to the queue registered handler.
+ */
+static void blk_done_softirq(struct softirq_action *h)
+{
+ struct list_head *cpu_list, local_list;
+
+ local_irq_disable();
+ cpu_list = &__get_cpu_var(blk_cpu_done);
+ list_replace_init(cpu_list, &local_list);
+ local_irq_enable();
+
+ while (!list_empty(&local_list)) {
+ struct request *rq;
+
+ rq = list_entry(local_list.next, struct request, csd.list);
+ list_del_init(&rq->csd.list);
+ rq->q->softirq_done_fn(rq);
+ }
+}
+
+#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+static void trigger_softirq(void *data)
+{
+ struct request *rq = data;
+ unsigned long flags;
+ struct list_head *list;
+
+ local_irq_save(flags);
+ list = &__get_cpu_var(blk_cpu_done);
+ list_add_tail(&rq->csd.list, list);
+
+ if (list->next == &rq->csd.list)
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Setup and invoke a run of 'trigger_softirq' on the given cpu.
+ */
+static int raise_blk_irq(int cpu, struct request *rq)
+{
+ if (cpu_online(cpu)) {
+ struct call_single_data *data = &rq->csd;
+
+ data->func = trigger_softirq;
+ data->info = rq;
+ data->flags = 0;
+
+ __smp_call_function_single(cpu, data);
+ return 0;
+ }
+
+ return 1;
+}
+#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
+static int raise_blk_irq(int cpu, struct request *rq)
+{
+ return 1;
+}
+#endif
+
static int __cpuinit blk_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
@@ -33,33 +97,10 @@ static int __cpuinit blk_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-
-static struct notifier_block blk_cpu_notifier __cpuinitdata = {
+static struct notifier_block __cpuinitdata blk_cpu_notifier = {
.notifier_call = blk_cpu_notify,
};
-/*
- * splice the completion data to a local structure and hand off to
- * process_completion_queue() to complete the requests
- */
-static void blk_done_softirq(struct softirq_action *h)
-{
- struct list_head *cpu_list, local_list;
-
- local_irq_disable();
- cpu_list = &__get_cpu_var(blk_cpu_done);
- list_replace_init(cpu_list, &local_list);
- local_irq_enable();
-
- while (!list_empty(&local_list)) {
- struct request *rq;
-
- rq = list_entry(local_list.next, struct request, donelist);
- list_del_init(&rq->donelist);
- rq->q->softirq_done_fn(rq);
- }
-}
-
/**
* blk_complete_request - end I/O on a request
* @req: the request being processed
@@ -71,25 +112,48 @@ static void blk_done_softirq(struct softirq_action *h)
* through a softirq handler. The user must have registered a completion
* callback through blk_queue_softirq_done().
**/
-
void blk_complete_request(struct request *req)
{
- struct list_head *cpu_list;
+ struct request_queue *q = req->q;
unsigned long flags;
+ int ccpu, cpu, group_cpu;
- BUG_ON(!req->q->softirq_done_fn);
+ BUG_ON(!q->softirq_done_fn);
local_irq_save(flags);
+ cpu = smp_processor_id();
+ group_cpu = blk_cpu_to_group(cpu);
- cpu_list = &__get_cpu_var(blk_cpu_done);
- list_add_tail(&req->donelist, cpu_list);
- raise_softirq_irqoff(BLOCK_SOFTIRQ);
+ /*
+ * Select completion CPU
+ */
+ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
+ ccpu = req->cpu;
+ else
+ ccpu = cpu;
+
+ if (ccpu == cpu || ccpu == group_cpu) {
+ struct list_head *list;
+do_local:
+ list = &__get_cpu_var(blk_cpu_done);
+ list_add_tail(&req->csd.list, list);
+
+ /*
+ * if the list only contains our just added request,
+ * signal a raise of the softirq. If there are already
+ * entries there, someone already raised the irq but it
+ * hasn't run yet.
+ */
+ if (list->next == &req->csd.list)
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
+ } else if (raise_blk_irq(ccpu, req))
+ goto do_local;
local_irq_restore(flags);
}
EXPORT_SYMBOL(blk_complete_request);
-int __init blk_softirq_init(void)
+__init int blk_softirq_init(void)
{
int i;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b9a6ed16664..21e275d7eed 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
return ret;
}
+static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
+{
+ unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
+
+ return queue_var_show(set != 0, page);
+}
+
+static ssize_t
+queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
+{
+ ssize_t ret = -EINVAL;
+#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+ unsigned long val;
+
+ ret = queue_var_store(&val, page, count);
+ spin_lock_irq(q->queue_lock);
+ if (val)
+ queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
+ else
+ queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
+ spin_unlock_irq(q->queue_lock);
+#endif
+ return ret;
+}
static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
@@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_nomerges_entry = {
.store = queue_nomerges_store,
};
+static struct queue_sysfs_entry queue_rq_affinity_entry = {
+ .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_rq_affinity_show,
+ .store = queue_rq_affinity_store,
+};
+
static struct attribute *default_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr,
@@ -205,6 +235,7 @@ static struct attribute *default_attrs[] = {
&queue_iosched_entry.attr,
&queue_hw_sector_size_entry.attr,
&queue_nomerges_entry.attr,
+ &queue_rq_affinity_entry.attr,
NULL,
};
diff --git a/block/blk.h b/block/blk.h
index c79f30e1df5..de74254cb91 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -59,4 +59,16 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
#endif /* BLK_DEV_INTEGRITY */
+static inline int blk_cpu_to_group(int cpu)
+{
+#ifdef CONFIG_SCHED_MC
+ cpumask_t mask = cpu_coregroup_map(cpu);
+ return first_cpu(mask);
+#elif defined(CONFIG_SCHED_SMT)
+ return first_cpu(per_cpu(cpu_sibling_map, cpu));
+#else
+ return cpu;
+#endif
+}
+
#endif