summaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bio.h14
-rw-r--r--include/linux/blkdev.h25
-rw-r--r--include/linux/elevator.h8
-rw-r--r--include/linux/fs.h19
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/ioprio.h87
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/writeback.h6
8 files changed, 152 insertions, 15 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 038022763f0..36ef29fa0d8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -22,6 +22,7 @@
#include <linux/highmem.h>
#include <linux/mempool.h>
+#include <linux/ioprio.h>
/* Platforms may set this to teach the BIO layer about IOMMU hardware. */
#include <asm/io.h>
@@ -150,6 +151,19 @@ struct bio {
#define BIO_RW_SYNC 4
/*
+ * upper 16 bits of bi_rw define the io priority of this bio
+ */
+#define BIO_PRIO_SHIFT (8 * sizeof(unsigned long) - IOPRIO_BITS)
+#define bio_prio(bio) ((bio)->bi_rw >> BIO_PRIO_SHIFT)
+#define bio_prio_valid(bio) ioprio_valid(bio_prio(bio))
+
+#define bio_set_prio(bio, prio) do { \
+ WARN_ON(prio >= (1 << IOPRIO_BITS)); \
+ (bio)->bi_rw &= ((1UL << BIO_PRIO_SHIFT) - 1); \
+ (bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT); \
+} while (0)
+
+/*
* various member access, note that bio_data should of course not be used
* on highmem page vectors
*/
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b54a0348a89..21a8674cd14 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -54,16 +54,23 @@ struct as_io_context {
struct cfq_queue;
struct cfq_io_context {
- void (*dtor)(struct cfq_io_context *);
- void (*exit)(struct cfq_io_context *);
-
- struct io_context *ioc;
-
/*
* circular list of cfq_io_contexts belonging to a process io context
*/
struct list_head list;
struct cfq_queue *cfqq;
+ void *key;
+
+ struct io_context *ioc;
+
+ unsigned long last_end_request;
+ unsigned long last_queue;
+ unsigned long ttime_total;
+ unsigned long ttime_samples;
+ unsigned long ttime_mean;
+
+ void (*dtor)(struct cfq_io_context *);
+ void (*exit)(struct cfq_io_context *);
};
/*
@@ -73,7 +80,9 @@ struct cfq_io_context {
*/
struct io_context {
atomic_t refcount;
- pid_t pid;
+ struct task_struct *task;
+
+ int (*set_ioprio)(struct io_context *, unsigned int);
/*
* For request batching
@@ -81,8 +90,6 @@ struct io_context {
unsigned long last_waited; /* Time last woken after wait for request */
int nr_batch_requests; /* Number of requests left in the batch */
- spinlock_t lock;
-
struct as_io_context *aic;
struct cfq_io_context *cic;
};
@@ -134,6 +141,8 @@ struct request {
void *elevator_private;
+ unsigned short ioprio;
+
int rq_status; /* should split this into a few status bits */
struct gendisk *rq_disk;
int errors;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index ee54f81faad..ea6bbc2d740 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -16,9 +16,9 @@ typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *);
typedef void (elevator_requeue_req_fn) (request_queue_t *, struct request *);
typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
-typedef int (elevator_may_queue_fn) (request_queue_t *, int);
+typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *);
-typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int);
+typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, int);
typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
@@ -96,9 +96,9 @@ extern struct request *elv_former_request(request_queue_t *, struct request *);
extern struct request *elv_latter_request(request_queue_t *, struct request *);
extern int elv_register_queue(request_queue_t *q);
extern void elv_unregister_queue(request_queue_t *q);
-extern int elv_may_queue(request_queue_t *, int);
+extern int elv_may_queue(request_queue_t *, int, struct bio *);
extern void elv_completed_request(request_queue_t *, struct request *);
-extern int elv_set_request(request_queue_t *, struct request *, int);
+extern int elv_set_request(request_queue_t *, struct request *, struct bio *, int);
extern void elv_put_request(request_queue_t *, struct request *);
/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3ae8e37bdfc..047bde30836 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -213,6 +213,7 @@ extern int dir_notify_enable;
#include <linux/radix-tree.h>
#include <linux/prio_tree.h>
#include <linux/init.h>
+#include <linux/sched.h>
#include <asm/atomic.h>
#include <asm/semaphore.h>
@@ -822,16 +823,34 @@ enum {
#define vfs_check_frozen(sb, level) \
wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
+static inline void get_fs_excl(void)
+{
+ atomic_inc(&current->fs_excl);
+}
+
+static inline void put_fs_excl(void)
+{
+ atomic_dec(&current->fs_excl);
+}
+
+static inline int has_fs_excl(void)
+{
+ return atomic_read(&current->fs_excl);
+}
+
+
/*
* Superblock locking.
*/
static inline void lock_super(struct super_block * sb)
{
+ get_fs_excl();
down(&sb->s_lock);
}
static inline void unlock_super(struct super_block * sb)
{
+ put_fs_excl();
up(&sb->s_lock);
}
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 03206a425d7..c727c195a91 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -81,6 +81,7 @@ extern struct group_info init_groups;
.mm = NULL, \
.active_mm = &init_mm, \
.run_list = LIST_HEAD_INIT(tsk.run_list), \
+ .ioprio = 0, \
.time_slice = HZ, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \
@@ -110,6 +111,7 @@ extern struct group_info init_groups;
.proc_lock = SPIN_LOCK_UNLOCKED, \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
+ .fs_excl = ATOMIC_INIT(0), \
}
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
new file mode 100644
index 00000000000..7811300d88e
--- /dev/null
+++ b/include/linux/ioprio.h
@@ -0,0 +1,87 @@
+#ifndef IOPRIO_H
+#define IOPRIO_H
+
+#include <linux/sched.h>
+
+/*
+ * Gives us 8 prio classes with 13-bits of data for each class
+ */
+#define IOPRIO_BITS (16)
+#define IOPRIO_CLASS_SHIFT (13)
+#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
+
+#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
+#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
+
+#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE)
+
+/*
+ * These are the io priority groups as implemented by CFQ. RT is the realtime
+ * class, it always gets premium service. BE is the best-effort scheduling
+ * class, the default for any process. IDLE is the idle scheduling class, it
+ * is only served when no one else is using the disk.
+ */
+enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+};
+
+/*
+ * 8 best effort priority levels are supported
+ */
+#define IOPRIO_BE_NR (8)
+
+asmlinkage int sys_ioprio_set(int, int, int);
+asmlinkage int sys_ioprio_get(int, int);
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+/*
+ * if process has set io priority explicitly, use that. if not, convert
+ * the cpu scheduler nice value to an io priority
+ */
+#define IOPRIO_NORM (4)
+static inline int task_ioprio(struct task_struct *task)
+{
+ WARN_ON(!ioprio_valid(task->ioprio));
+ return IOPRIO_PRIO_DATA(task->ioprio);
+}
+
+static inline int task_nice_ioprio(struct task_struct *task)
+{
+ return (task_nice(task) + 20) / 5;
+}
+
+/*
+ * For inheritance, return the highest of the two given priorities
+ */
+static inline int ioprio_best(unsigned short aprio, unsigned short bprio)
+{
+ unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
+ unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
+
+ if (!ioprio_valid(aprio))
+ return bprio;
+ if (!ioprio_valid(bprio))
+ return aprio;
+
+ if (aclass == IOPRIO_CLASS_NONE)
+ aclass = IOPRIO_CLASS_BE;
+ if (bclass == IOPRIO_CLASS_NONE)
+ bclass = IOPRIO_CLASS_BE;
+
+ if (aclass == bclass)
+ return min(aprio, bprio);
+ if (aclass > bclass)
+ return bprio;
+ else
+ return aprio;
+}
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9530b190316..ff48815bd3a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -608,6 +608,8 @@ struct task_struct {
struct list_head run_list;
prio_array_t *array;
+ unsigned short ioprio;
+
unsigned long sleep_avg;
unsigned long long timestamp, last_ran;
unsigned long long sched_time; /* sched_clock time spent running */
@@ -763,6 +765,7 @@ struct task_struct {
nodemask_t mems_allowed;
int cpuset_mems_generation;
#endif
+ atomic_t fs_excl; /* holding fs exclusive resources */
};
static inline pid_t process_group(struct task_struct *tsk)
@@ -1112,7 +1115,8 @@ extern void unhash_process(struct task_struct *p);
/*
* Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring
- * subscriptions and synchronises with wait4(). Also used in procfs.
+ * subscriptions and synchronises with wait4(). Also used in procfs. Also
+ * pins the final release of task.io_context.
*
* Nests both inside and outside of read_lock(&tasklist_lock).
* It must not be nested with write_lock_irq(&tasklist_lock),
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 1262cb43c3a..d5c3fe1bf33 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -14,11 +14,13 @@ extern struct list_head inode_unused;
* Yes, writeback.h requires sched.h
* No, sched.h is not included from here.
*/
-static inline int current_is_pdflush(void)
+static inline int task_is_pdflush(struct task_struct *task)
{
- return current->flags & PF_FLUSHER;
+ return task->flags & PF_FLUSHER;
}
+#define current_is_pdflush() task_is_pdflush(current)
+
/*
* fs/fs-writeback.c
*/