summaryrefslogtreecommitdiffstats
path: root/include/linux/iocontext.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/iocontext.h')
-rw-r--r--include/linux/iocontext.h136
1 files changed, 95 insertions, 41 deletions
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 5037a0ad231..7e1371c4bcc 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -3,32 +3,92 @@
#include <linux/radix-tree.h>
#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
-struct cfq_queue;
-struct cfq_ttime {
- unsigned long last_end_request;
-
- unsigned long ttime_total;
- unsigned long ttime_samples;
- unsigned long ttime_mean;
+enum {
+ ICQ_IOPRIO_CHANGED,
+ ICQ_CGROUP_CHANGED,
};
-struct cfq_io_context {
- void *key;
-
- struct cfq_queue *cfqq[2];
-
- struct io_context *ioc;
-
- struct cfq_ttime ttime;
-
- struct list_head queue_list;
- struct hlist_node cic_list;
-
- void (*dtor)(struct io_context *); /* destructor */
- void (*exit)(struct io_context *); /* called on task exit */
+/*
+ * An io_cq (icq) is association between an io_context (ioc) and a
+ * request_queue (q). This is used by elevators which need to track
+ * information per ioc - q pair.
+ *
+ * Elevator can request use of icq by setting elevator_type->icq_size and
+ * ->icq_align. Both size and align must be larger than that of struct
+ * io_cq and elevator can use the tail area for private information. The
+ * recommended way to do this is defining a struct which contains io_cq as
+ * the first member followed by private members and using its size and
+ * align. For example,
+ *
+ * struct snail_io_cq {
+ * struct io_cq icq;
+ * int poke_snail;
+ * int feed_snail;
+ * };
+ *
+ * struct elevator_type snail_elv_type {
+ * .ops = { ... },
+ * .icq_size = sizeof(struct snail_io_cq),
+ * .icq_align = __alignof__(struct snail_io_cq),
+ * ...
+ * };
+ *
+ * If icq_size is set, block core will manage icq's. All requests will
+ * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn()
+ * is called and be holding a reference to the associated io_context.
+ *
+ * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is
+ * called and, on destruction, ->elevator_exit_icq_fn(). Both functions
+ * are called with both the associated io_context and queue locks held.
+ *
+ * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding
+ * queue lock but the returned icq is valid only until the queue lock is
+ * released. Elevators can not and should not try to create or destroy
+ * icq's.
+ *
+ * As icq's are linked from both ioc and q, the locking rules are a bit
+ * complex.
+ *
+ * - ioc lock nests inside q lock.
+ *
+ * - ioc->icq_list and icq->ioc_node are protected by ioc lock.
+ * q->icq_list and icq->q_node by q lock.
+ *
+ * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq
+ * itself is protected by q lock. However, both the indexes and icq
+ * itself are also RCU managed and lookup can be performed holding only
+ * the q lock.
+ *
+ * - icq's are not reference counted. They are destroyed when either the
+ * ioc or q goes away. Each request with icq set holds an extra
+ * reference to ioc to ensure it stays until the request is completed.
+ *
+ * - Linking and unlinking icq's are performed while holding both ioc and q
+ * locks. Due to the lock ordering, q exit is simple but ioc exit
+ * requires reverse-order double lock dance.
+ */
+struct io_cq {
+ struct request_queue *q;
+ struct io_context *ioc;
- struct rcu_head rcu_head;
+ /*
+ * q_node and ioc_node link io_cq through icq_list of q and ioc
+ * respectively. Both fields are unused once ioc_exit_icq() is
+ * called and shared with __rcu_icq_cache and __rcu_head which are
+ * used for RCU free of io_cq.
+ */
+ union {
+ struct list_head q_node;
+ struct kmem_cache *__rcu_icq_cache;
+ };
+ union {
+ struct hlist_node ioc_node;
+ struct rcu_head __rcu_head;
+ };
+
+ unsigned long changed;
};
/*
@@ -43,11 +103,6 @@ struct io_context {
spinlock_t lock;
unsigned short ioprio;
- unsigned short ioprio_changed;
-
-#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
- unsigned short cgroup_changed;
-#endif
/*
* For request batching
@@ -55,9 +110,11 @@ struct io_context {
int nr_batch_requests; /* Number of requests left in the batch */
unsigned long last_waited; /* Time last woken after wait for request */
- struct radix_tree_root radix_root;
- struct hlist_head cic_list;
- void __rcu *ioc_data;
+ struct radix_tree_root icq_tree;
+ struct io_cq __rcu *icq_hint;
+ struct hlist_head icq_list;
+
+ struct work_struct release_work;
};
static inline struct io_context *ioc_task_link(struct io_context *ioc)
@@ -76,20 +133,17 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
struct task_struct;
#ifdef CONFIG_BLOCK
-int put_io_context(struct io_context *ioc);
+void put_io_context(struct io_context *ioc, struct request_queue *locked_q);
void exit_io_context(struct task_struct *task);
-struct io_context *get_io_context(gfp_t gfp_flags, int node);
-struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+struct io_context *get_task_io_context(struct task_struct *task,
+ gfp_t gfp_flags, int node);
+void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
+void ioc_cgroup_changed(struct io_context *ioc);
#else
-static inline void exit_io_context(struct task_struct *task)
-{
-}
-
struct io_context;
-static inline int put_io_context(struct io_context *ioc)
-{
- return 1;
-}
+static inline void put_io_context(struct io_context *ioc,
+ struct request_queue *locked_q) { }
+static inline void exit_io_context(struct task_struct *task) { }
#endif
#endif