summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-iopoll.c220
-rw-r--r--include/linux/blk-iopoll.h41
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--kernel/sysctl.c10
5 files changed, 272 insertions, 2 deletions
diff --git a/block/Makefile b/block/Makefile
index 6c54ed0ff75..ba74ca6bfa1 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
- ioctl.o genhd.o scsi_ioctl.o
+ blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
new file mode 100644
index 00000000000..566db1e7c1c
--- /dev/null
+++ b/block/blk-iopoll.c
@@ -0,0 +1,220 @@
+/*
+ * Functions related to interrupt-poll handling in the block layer. This
+ * is similar to NAPI for network devices.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/blk-iopoll.h>
+#include <linux/delay.h>
+
+#include "blk.h"
+
+int blk_iopoll_enabled = 1;
+EXPORT_SYMBOL(blk_iopoll_enabled);
+
+static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
+
+/**
+ * blk_iopoll_sched - Schedule a run of the iopoll handler
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * Add this blk_iopoll structure to the pending poll list and trigger the raise
+ * of the blk iopoll softirq. The driver must already have gotten a succesful
+ * return from blk_iopoll_sched_prep() before calling this.
+ **/
+void blk_iopoll_sched(struct blk_iopoll *iop)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
+ __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(blk_iopoll_sched);
+
+/**
+ * __blk_iopoll_complete - Mark this @iop as un-polled again
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * See blk_iopoll_complete(). This function must be called with interrupts disabled.
+ **/
+void __blk_iopoll_complete(struct blk_iopoll *iop)
+{
+ list_del(&iop->list);
+ smp_mb__before_clear_bit();
+ clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(__blk_iopoll_complete);
+
+/**
+ * blk_iopoll_complete - Mark this @iop as un-polled again
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * If a driver consumes less than the assigned budget in its run of the iopoll
+ * handler, it'll end the polled mode by calling this function. The iopoll handler
+ * will not be invoked again before blk_iopoll_sched_prep() is called.
+ **/
+void blk_iopoll_complete(struct blk_iopoll *iopoll)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __blk_iopoll_complete(iopoll);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(blk_iopoll_complete);
+
+static void blk_iopoll_softirq(struct softirq_action *h)
+{
+ struct list_head *list = &__get_cpu_var(blk_cpu_iopoll);
+ unsigned long start_time = jiffies;
+ int rearm = 0, budget = 64;
+
+ local_irq_disable();
+
+ while (!list_empty(list)) {
+ struct blk_iopoll *iop;
+ int work, weight;
+
+ /*
+ * If softirq window is exhausted then punt.
+ */
+ if (budget <= 0 || time_after(jiffies, start_time)) {
+ rearm = 1;
+ break;
+ }
+
+ local_irq_enable();
+
+ /* Even though interrupts have been re-enabled, this
+ * access is safe because interrupts can only add new
+ * entries to the tail of this list, and only ->poll()
+ * calls can remove this head entry from the list.
+ */
+ iop = list_entry(list->next, struct blk_iopoll, list);
+
+ weight = iop->weight;
+ work = 0;
+ if (test_bit(IOPOLL_F_SCHED, &iop->state))
+ work = iop->poll(iop, weight);
+
+ budget -= work;
+
+ local_irq_disable();
+
+ /* Drivers must not modify the NAPI state if they
+ * consume the entire weight. In such cases this code
+ * still "owns" the NAPI instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (work >= weight) {
+ if (blk_iopoll_disable_pending(iop))
+ __blk_iopoll_complete(iop);
+ else
+ list_move_tail(&iop->list, list);
+ }
+ }
+
+ if (rearm)
+ __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+
+ local_irq_enable();
+}
+
+/**
+ * blk_iopoll_disable - Disable iopoll on this @iop
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * Disable io polling and wait for any pending callbacks to have completed.
+ **/
+void blk_iopoll_disable(struct blk_iopoll *iop)
+{
+ set_bit(IOPOLL_F_DISABLE, &iop->state);
+ while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
+ msleep(1);
+ clear_bit(IOPOLL_F_DISABLE, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_disable);
+
+/**
+ * blk_iopoll_enable - Enable iopoll on this @iop
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * Enable iopoll on this @iop. Note that the handler run will not be scheduled, it
+ * will only mark it as active.
+ **/
+void blk_iopoll_enable(struct blk_iopoll *iop)
+{
+ BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
+ smp_mb__before_clear_bit();
+ clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_enable);
+
+/**
+ * blk_iopoll_init - Initialize this @iop
+ * @iop: The parent iopoll structure
+ * @weight: The default weight (or command completion budget)
+ * @poll_fn: The handler to invoke
+ *
+ * Description:
+ * Initialize this blk_iopoll structure. Before being actively used, the driver
+ * must call blk_iopoll_enable().
+ **/
+void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
+{
+ memset(iop, 0, sizeof(*iop));
+ INIT_LIST_HEAD(&iop->list);
+ iop->weight = weight;
+ iop->poll = poll_fn;
+ set_bit(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_init);
+
+static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ /*
+ * If a CPU goes away, splice its entries to the current CPU
+ * and trigger a run of the softirq
+ */
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+ int cpu = (unsigned long) hcpu;
+
+ local_irq_disable();
+ list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+ &__get_cpu_var(blk_cpu_iopoll));
+ raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+ local_irq_enable();
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = {
+ .notifier_call = blk_iopoll_cpu_notify,
+};
+
+static __init int blk_iopoll_setup(void)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
+
+ open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
+ register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
+ return 0;
+}
+subsys_initcall(blk_iopoll_setup);
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
new file mode 100644
index 00000000000..b2e1739a2e7
--- /dev/null
+++ b/include/linux/blk-iopoll.h
@@ -0,0 +1,41 @@
+#ifndef BLK_IOPOLL_H
+#define BLK_IOPOLL_H
+
+struct blk_iopoll;
+typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
+
+struct blk_iopoll {
+ struct list_head list;
+ unsigned long state;
+ unsigned long data;
+ int weight;
+ int max;
+ blk_iopoll_fn *poll;
+};
+
+enum {
+ IOPOLL_F_SCHED = 0,
+ IOPOLL_F_DISABLE = 1,
+};
+
+static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
+{
+ return !test_bit(IOPOLL_F_DISABLE, &iop->state) &&
+ !test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
+}
+
+static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
+{
+ return test_bit(IOPOLL_F_DISABLE, &iop->state);
+}
+
+extern void blk_iopoll_sched(struct blk_iopoll *);
+extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
+extern void blk_iopoll_complete(struct blk_iopoll *);
+extern void __blk_iopoll_complete(struct blk_iopoll *);
+extern void blk_iopoll_enable(struct blk_iopoll *);
+extern void blk_iopoll_disable(struct blk_iopoll *);
+
+extern int blk_iopoll_enabled;
+
+#endif
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 35e7df1e9f3..edd8d5c9039 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -344,6 +344,7 @@ enum
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
BLOCK_SOFTIRQ,
+ BLOCK_IOPOLL_SOFTIRQ,
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 58be76017fd..0ed9fa6f322 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -92,6 +92,7 @@ extern int sysctl_nr_trim_pages;
#ifdef CONFIG_RCU_TORTURE_TEST
extern int rcutorture_runnable;
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
+extern int blk_iopoll_enabled;
/* Constants used for minimum and maximum */
#ifdef CONFIG_DETECT_SOFTLOCKUP
@@ -990,7 +991,14 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
-
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "blk_iopoll",
+ .data = &blk_iopoll_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt