summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig2
-rw-r--r--block/Kconfig.iosched2
-rw-r--r--block/blk-cgroup.c63
-rw-r--r--block/blk-cgroup.h13
-rw-r--r--block/blk-core.c34
-rw-r--r--block/blk-integrity.c2
-rw-r--r--block/blk-ioc.c2
-rw-r--r--block/blk-merge.c8
-rw-r--r--block/blk-settings.c131
-rw-r--r--block/blk-sysfs.c13
-rw-r--r--block/bsg.c2
-rw-r--r--block/cfq-iosched.c147
-rw-r--r--block/elevator.c13
13 files changed, 198 insertions, 234 deletions
diff --git a/block/Kconfig b/block/Kconfig
index e20fbde0875..62a5921321c 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -78,7 +78,7 @@ config BLK_DEV_INTEGRITY
Protection. If in doubt, say N.
config BLK_CGROUP
- bool
+ tristate
depends on CGROUPS
default n
---help---
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index b71abfb0d72..fc71cf071fb 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -23,6 +23,7 @@ config IOSCHED_DEADLINE
config IOSCHED_CFQ
tristate "CFQ I/O scheduler"
+ select BLK_CGROUP if CFQ_GROUP_IOSCHED
default y
---help---
The CFQ I/O scheduler tries to distribute bandwidth equally
@@ -35,7 +36,6 @@ config IOSCHED_CFQ
config CFQ_GROUP_IOSCHED
bool "CFQ Group Scheduling support"
depends on IOSCHED_CFQ && CGROUPS
- select BLK_CGROUP
default n
---help---
Enable group IO scheduling in CFQ.
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e7dbbaf5fb3..4b686ad08ea 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -23,19 +23,30 @@ static LIST_HEAD(blkio_list);
struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
EXPORT_SYMBOL_GPL(blkio_root_cgroup);
-bool blkiocg_css_tryget(struct blkio_cgroup *blkcg)
-{
- if (!css_tryget(&blkcg->css))
- return false;
- return true;
-}
-EXPORT_SYMBOL_GPL(blkiocg_css_tryget);
+static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
+ struct cgroup *);
+static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
+ struct task_struct *, bool);
+static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
+ struct cgroup *, struct task_struct *, bool);
+static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
+static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
-void blkiocg_css_put(struct blkio_cgroup *blkcg)
-{
- css_put(&blkcg->css);
-}
-EXPORT_SYMBOL_GPL(blkiocg_css_put);
+struct cgroup_subsys blkio_subsys = {
+ .name = "blkio",
+ .create = blkiocg_create,
+ .can_attach = blkiocg_can_attach,
+ .attach = blkiocg_attach,
+ .destroy = blkiocg_destroy,
+ .populate = blkiocg_populate,
+#ifdef CONFIG_BLK_CGROUP
+ /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
+ .subsys_id = blkio_subsys_id,
+#endif
+ .use_id = 1,
+ .module = THIS_MODULE,
+};
+EXPORT_SYMBOL_GPL(blkio_subsys);
struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
{
@@ -267,7 +278,8 @@ remove_entry:
done:
free_css_id(&blkio_subsys, &blkcg->css);
rcu_read_unlock();
- kfree(blkcg);
+ if (blkcg != &blkio_root_cgroup)
+ kfree(blkcg);
}
static struct cgroup_subsys_state *
@@ -333,17 +345,6 @@ static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
task_unlock(tsk);
}
-struct cgroup_subsys blkio_subsys = {
- .name = "blkio",
- .create = blkiocg_create,
- .can_attach = blkiocg_can_attach,
- .attach = blkiocg_attach,
- .destroy = blkiocg_destroy,
- .populate = blkiocg_populate,
- .subsys_id = blkio_subsys_id,
- .use_id = 1,
-};
-
void blkio_policy_register(struct blkio_policy_type *blkiop)
{
spin_lock(&blkio_list_lock);
@@ -359,3 +360,17 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop)
spin_unlock(&blkio_list_lock);
}
EXPORT_SYMBOL_GPL(blkio_policy_unregister);
+
+static int __init init_cgroup_blkio(void)
+{
+ return cgroup_load_subsys(&blkio_subsys);
+}
+
+static void __exit exit_cgroup_blkio(void)
+{
+ cgroup_unload_subsys(&blkio_subsys);
+}
+
+module_init(init_cgroup_blkio);
+module_exit(exit_cgroup_blkio);
+MODULE_LICENSE("GPL");
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 4d316df863b..8ccc20464da 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -15,7 +15,13 @@
#include <linux/cgroup.h>
-#ifdef CONFIG_BLK_CGROUP
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
+
+#ifndef CONFIG_BLK_CGROUP
+/* When blk-cgroup is a module, its subsys_id isn't a compile-time constant */
+extern struct cgroup_subsys blkio_subsys;
+#define blkio_subsys_id blkio_subsys.subsys_id
+#endif
struct blkio_cgroup {
struct cgroup_subsys_state css;
@@ -43,9 +49,6 @@ struct blkio_group {
unsigned long sectors;
};
-extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg);
-extern void blkiocg_css_put(struct blkio_cgroup *blkcg);
-
typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
unsigned int weight);
@@ -94,7 +97,7 @@ static inline void blkiocg_update_blkio_group_dequeue_stats(
struct blkio_group *blkg, unsigned long dequeue) {}
#endif
-#ifdef CONFIG_BLK_CGROUP
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
extern struct blkio_cgroup blkio_root_cgroup;
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
diff --git a/block/blk-core.c b/block/blk-core.c
index d1a9a0a64f9..9fe174dc74d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1490,9 +1490,9 @@ end_io:
/*
* We only want one ->make_request_fn to be active at a time,
* else stack usage with stacked devices could be a problem.
- * So use current->bio_{list,tail} to keep a list of requests
+ * So use current->bio_list to keep a list of requests
* submited by a make_request_fn function.
- * current->bio_tail is also used as a flag to say if
+ * current->bio_list is also used as a flag to say if
* generic_make_request is currently active in this task or not.
* If it is NULL, then no make_request is active. If it is non-NULL,
* then a make_request is active, and new requests should be added
@@ -1500,11 +1500,11 @@ end_io:
*/
void generic_make_request(struct bio *bio)
{
- if (current->bio_tail) {
+ struct bio_list bio_list_on_stack;
+
+ if (current->bio_list) {
/* make_request is active */
- *(current->bio_tail) = bio;
- bio->bi_next = NULL;
- current->bio_tail = &bio->bi_next;
+ bio_list_add(current->bio_list, bio);
return;
}
/* following loop may be a bit non-obvious, and so deserves some
@@ -1512,30 +1512,27 @@ void generic_make_request(struct bio *bio)
* Before entering the loop, bio->bi_next is NULL (as all callers
* ensure that) so we have a list with a single bio.
* We pretend that we have just taken it off a longer list, so
- * we assign bio_list to the next (which is NULL) and bio_tail
- * to &bio_list, thus initialising the bio_list of new bios to be
+ * we assign bio_list to a pointer to the bio_list_on_stack,
+ * thus initialising the bio_list of new bios to be
* added. __generic_make_request may indeed add some more bios
* through a recursive call to generic_make_request. If it
* did, we find a non-NULL value in bio_list and re-enter the loop
* from the top. In this case we really did just take the bio
- * of the top of the list (no pretending) and so fixup bio_list and
- * bio_tail or bi_next, and call into __generic_make_request again.
+ * of the top of the list (no pretending) and so remove it from
+ * bio_list, and call into __generic_make_request again.
*
* The loop was structured like this to make only one call to
* __generic_make_request (which is important as it is large and
* inlined) and to keep the structure simple.
*/
BUG_ON(bio->bi_next);
+ bio_list_init(&bio_list_on_stack);
+ current->bio_list = &bio_list_on_stack;
do {
- current->bio_list = bio->bi_next;
- if (bio->bi_next == NULL)
- current->bio_tail = &current->bio_list;
- else
- bio->bi_next = NULL;
__generic_make_request(bio);
- bio = current->bio_list;
+ bio = bio_list_pop(current->bio_list);
} while (bio);
- current->bio_tail = NULL; /* deactivate */
+ current->bio_list = NULL; /* deactivate */
}
EXPORT_SYMBOL(generic_make_request);
@@ -1617,8 +1614,7 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
* limitation.
*/
blk_recalc_rq_segments(rq);
- if (rq->nr_phys_segments > queue_max_phys_segments(q) ||
- rq->nr_phys_segments > queue_max_hw_segments(q)) {
+ if (rq->nr_phys_segments > queue_max_segments(q)) {
printk(KERN_ERR "%s: over max segments limit.\n", __func__);
return -EIO;
}
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 15c630813b1..96e83c2bdb9 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -278,7 +278,7 @@ static struct attribute *integrity_attrs[] = {
NULL,
};
-static struct sysfs_ops integrity_ops = {
+static const struct sysfs_ops integrity_ops = {
.show = &integrity_attr_show,
.store = &integrity_attr_store,
};
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 98e6bf61b0a..3f65c8aadb2 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -91,7 +91,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
spin_lock_init(&ret->lock);
ret->ioprio_changed = 0;
ret->ioprio = 0;
- ret->last_waited = jiffies; /* doesn't matter... */
+ ret->last_waited = 0; /* doesn't matter... */
ret->nr_batch_requests = 0; /* because this is 0 */
INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
INIT_HLIST_HEAD(&ret->cic_list);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 99cb5cf1f44..5e7dc997345 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -206,8 +206,7 @@ static inline int ll_new_hw_segment(struct request_queue *q,
{
int nr_phys_segs = bio_phys_segments(q, bio);
- if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) ||
- req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) {
+ if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@@ -300,10 +299,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
total_phys_segments--;
}
- if (total_phys_segments > queue_max_phys_segments(q))
- return 0;
-
- if (total_phys_segments > queue_max_hw_segments(q))
+ if (total_phys_segments > queue_max_segments(q))
return 0;
/* Merge is OK... */
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 5eeb9e0d256..31e7a9375c1 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -91,10 +91,9 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
*/
void blk_set_default_limits(struct queue_limits *lim)
{
- lim->max_phys_segments = MAX_PHYS_SEGMENTS;
- lim->max_hw_segments = MAX_HW_SEGMENTS;
+ lim->max_segments = BLK_MAX_SEGMENTS;
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
- lim->max_segment_size = MAX_SEGMENT_SIZE;
+ lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = BLK_DEF_MAX_SECTORS;
lim->max_hw_sectors = INT_MAX;
lim->max_discard_sectors = 0;
@@ -154,7 +153,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
q->unplug_timer.data = (unsigned long)q;
blk_set_default_limits(&q->limits);
- blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
+ blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
/*
* If the caller didn't supply a lock, fall back to our embedded
@@ -210,37 +209,32 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
EXPORT_SYMBOL(blk_queue_bounce_limit);
/**
- * blk_queue_max_sectors - set max sectors for a request for this queue
+ * blk_queue_max_hw_sectors - set max sectors for a request for this queue
* @q: the request queue for the device
- * @max_sectors: max sectors in the usual 512b unit
+ * @max_hw_sectors: max hardware sectors in the usual 512b unit
*
* Description:
- * Enables a low level driver to set an upper limit on the size of
- * received requests.
+ * Enables a low level driver to set a hard upper limit,
+ * max_hw_sectors, on the size of requests. max_hw_sectors is set by
+ * the device driver based upon the combined capabilities of I/O
+ * controller and storage device.
+ *
+ * max_sectors is a soft limit imposed by the block layer for
+ * filesystem type requests. This value can be overridden on a
+ * per-device basis in /sys/block/<device>/queue/max_sectors_kb.
+ * The soft limit can not exceed max_hw_sectors.
**/
-void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
{
- if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
- max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
+ if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
+ max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
printk(KERN_INFO "%s: set to minimum %d\n",
- __func__, max_sectors);
+ __func__, max_hw_sectors);
}
- if (BLK_DEF_MAX_SECTORS > max_sectors)
- q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors;
- else {
- q->limits.max_sectors = BLK_DEF_MAX_SECTORS;
- q->limits.max_hw_sectors = max_sectors;
- }
-}
-EXPORT_SYMBOL(blk_queue_max_sectors);
-
-void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
-{
- if (BLK_DEF_MAX_SECTORS > max_sectors)
- q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;
- else
- q->limits.max_hw_sectors = max_sectors;
+ q->limits.max_hw_sectors = max_hw_sectors;
+ q->limits.max_sectors = min_t(unsigned int, max_hw_sectors,
+ BLK_DEF_MAX_SECTORS);
}
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
@@ -257,17 +251,15 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
/**
- * blk_queue_max_phys_segments - set max phys segments for a request for this queue
+ * blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
* @max_segments: max number of segments
*
* Description:
* Enables a low level driver to set an upper limit on the number of
- * physical data segments in a request. This would be the largest sized
- * scatter list the driver could handle.
+ * hw data segments in a request.
**/
-void blk_queue_max_phys_segments(struct request_queue *q,
- unsigned short max_segments)
+void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
{
if (!max_segments) {
max_segments = 1;
@@ -275,33 +267,9 @@ void blk_queue_max_phys_segments(struct request_queue *q,
__func__, max_segments);
}
- q->limits.max_phys_segments = max_segments;
+ q->limits.max_segments = max_segments;
}
-EXPORT_SYMBOL(blk_queue_max_phys_segments);
-
-/**
- * blk_queue_max_hw_segments - set max hw segments for a request for this queue
- * @q: the request queue for the device
- * @max_segments: max number of segments
- *
- * Description:
- * Enables a low level driver to set an upper limit on the number of
- * hw data segments in a request. This would be the largest number of
- * address/length pairs the host adapter can actually give at once
- * to the device.
- **/
-void blk_queue_max_hw_segments(struct request_queue *q,
- unsigned short max_segments)
-{
- if (!max_segments) {
- max_segments = 1;
- printk(KERN_INFO "%s: set to minimum %d\n",
- __func__, max_segments);
- }
-
- q->limits.max_hw_segments = max_segments;
-}
-EXPORT_SYMBOL(blk_queue_max_hw_segments);
+EXPORT_SYMBOL(blk_queue_max_segments);
/**
* blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
@@ -507,7 +475,7 @@ static unsigned int lcm(unsigned int a, unsigned int b)
* blk_stack_limits - adjust queue_limits for stacked devices
* @t: the stacking driver limits (top device)
* @b: the underlying queue limits (bottom, component device)
- * @offset: offset to beginning of data within component device
+ * @start: first data sector within component device
*
* Description:
* This function is used by stacking drivers like MD and DM to ensure
@@ -525,10 +493,9 @@ static unsigned int lcm(unsigned int a, unsigned int b)
* the alignment_offset is undefined.
*/
int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
- sector_t offset)
+ sector_t start)
{
- sector_t alignment;
- unsigned int top, bottom, ret = 0;
+ unsigned int top, bottom, alignment, ret = 0;
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -537,18 +504,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
b->seg_boundary_mask);
- t->max_phys_segments = min_not_zero(t->max_phys_segments,
- b->max_phys_segments);
-
- t->max_hw_segments = min_not_zero(t->max_hw_segments,
- b->max_hw_segments);
+ t->max_segments = min_not_zero(t->max_segments, b->max_segments);
t->max_segment_size = min_not_zero(t->max_segment_size,
b->max_segment_size);
t->misaligned |= b->misaligned;
- alignment = queue_limit_alignment_offset(b, offset);
+ alignment = queue_limit_alignment_offset(b, start);
/* Bottom device has different alignment. Check that it is
* compatible with the current top alignment.
@@ -611,11 +574,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
/* Discard alignment and granularity */
if (b->discard_granularity) {
- unsigned int granularity = b->discard_granularity;
- offset &= granularity - 1;
-
- alignment = (granularity + b->discard_alignment - offset)
- & (granularity - 1);
+ alignment = queue_limit_discard_alignment(b, start);
if (t->discard_granularity != 0 &&
t->discard_alignment != alignment) {
@@ -657,7 +616,7 @@ int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
start += get_start_sect(bdev);
- return blk_stack_limits(t, &bq->limits, start << 9);
+ return blk_stack_limits(t, &bq->limits, start);
}
EXPORT_SYMBOL(bdev_stack_limits);
@@ -668,9 +627,8 @@ EXPORT_SYMBOL(bdev_stack_limits);
* @offset: offset to beginning of data within component device
*
* Description:
- * Merges the limits for two queues. Returns 0 if alignment
- * didn't change. Returns -1 if adding the bottom device caused
- * misalignment.
+ * Merges the limits for a top level gendisk and a bottom level
+ * block_device.
*/
void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
sector_t offset)
@@ -678,9 +636,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
struct request_queue *t = disk->queue;
struct request_queue *b = bdev_get_queue(bdev);
- offset += get_start_sect(bdev) << 9;
-
- if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {
+ if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) {
char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
disk_name(disk, 0, top);
@@ -752,22 +708,19 @@ EXPORT_SYMBOL(blk_queue_update_dma_pad);
* does is adjust the queue so that the buf is always appended
* silently to the scatterlist.
*
- * Note: This routine adjusts max_hw_segments to make room for
- * appending the drain buffer. If you call
- * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after
- * calling this routine, you must set the limit to one fewer than your
- * device can support otherwise there won't be room for the drain
- * buffer.
+ * Note: This routine adjusts max_hw_segments to make room for appending
+ * the drain buffer. If you call blk_queue_max_segments() after calling
+ * this routine, you must set the limit to one fewer than your device
+ * can support otherwise there won't be room for the drain buffer.
*/
int blk_queue_dma_drain(struct request_queue *q,
dma_drain_needed_fn *dma_drain_needed,
void *buf, unsigned int size)
{
- if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2)
+ if (queue_max_segments(q) < 2)
return -EINVAL;
/* make room for appending the drain */
- blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1);
- blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1);
+ blk_queue_max_segments(q, queue_max_segments(q) - 1);
q->dma_drain_needed = dma_drain_needed;
q->dma_drain_buffer = buf;
q->dma_drain_size = size;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 8606c9543fd..2ae2cb3f362 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -189,7 +189,8 @@ static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
{
- return queue_var_show(blk_queue_nomerges(q), page);
+ return queue_var_show((blk_queue_nomerges(q) << 1) |
+ blk_queue_noxmerges(q), page);
}
static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
@@ -199,10 +200,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
ssize_t ret = queue_var_store(&nm, page, count);
spin_lock_irq(q->queue_lock);
- if (nm)
+ queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+ queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
+ if (nm == 2)
queue_flag_set(QUEUE_FLAG_NOMERGES, q);
- else
- queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+ else if (nm)
+ queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
spin_unlock_irq(q->queue_lock);
return ret;
@@ -447,7 +450,7 @@ static void blk_release_queue(struct kobject *kobj)
kmem_cache_free(blk_requestq_cachep, q);
}
-static struct sysfs_ops queue_sysfs_ops = {
+static const struct sysfs_ops queue_sysfs_ops = {
.show = queue_attr_show,
.store = queue_attr_store,
};
diff --git a/block/bsg.c b/block/bsg.c
index a9fd2d84b53..46597a6bd11 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -260,7 +260,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
return ERR_PTR(ret);
/*
- * map scatter-gather elements seperately and string them to request
+ * map scatter-gather elements separately and string them to request
*/
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 023f4e69a33..dee9d9378fe 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -19,7 +19,7 @@
* tunables
*/
/* max queue in one round of service */
-static const int cfq_quantum = 4;
+static const int cfq_quantum = 8;
static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
/* maximum backwards seek, in KiB */
static const int cfq_back_max = 16 * 1024;
@@ -46,8 +46,9 @@ static const int cfq_hist_divisor = 4;
#define CFQ_HW_QUEUE_MIN (5)
#define CFQ_SERVICE_SHIFT 12
-#define CFQQ_SEEK_THR 8 * 1024
-#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR)
+#define CFQQ_SEEK_THR (sector_t)(8 * 100)
+#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
+#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
#define RQ_CIC(rq) \
((struct cfq_io_context *) (rq)->elevator_private)
@@ -77,11 +78,12 @@ struct cfq_rb_root {
struct rb_root rb;
struct rb_node *left;
unsigned count;
+ unsigned total_weight;
u64 min_vdisktime;
struct rb_node *active;
- unsigned total_weight;
};
-#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
+#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
+ .count = 0, .min_vdisktime = 0, }
/*
* Per process-grouping structure
@@ -115,11 +117,11 @@ struct cfq_queue {
/* time when queue got scheduled in to dispatch first request. */
unsigned long dispatch_start;
unsigned int allocated_slice;
+ unsigned int slice_dispatch;
/* time when first request from queue completed and slice started. */
unsigned long slice_start;
unsigned long slice_end;
long slice_resid;
- unsigned int slice_dispatch;
/* pending metadata requests */
int meta_pending;
@@ -130,13 +132,11 @@ struct cfq_queue {
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
- unsigned int seek_samples;
- u64 seek_total;
- sector_t seek_mean;
- sector_t last_request_pos;
-
pid_t pid;
+ u32 seek_history;
+ sector_t last_request_pos;
+
struct cfq_rb_root *service_tree;
struct cfq_queue *new_cfqq;
struct cfq_group *cfqg;
@@ -223,8 +223,8 @@ struct cfq_data {
unsigned int busy_queues;
- int rq_in_driver[2];
- int sync_flight;
+ int rq_in_driver;
+ int rq_in_flight[2];
/*
* queue-depth detection
@@ -417,11 +417,6 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
struct io_context *);
-static inline int rq_in_driver(struct cfq_data *cfqd)
-{
- return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1];
-}
-
static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
bool is_sync)
{
@@ -951,10 +946,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
unsigned int major, minor;
- /* Do we need to take this reference */
- if (!blkiocg_css_tryget(blkcg))
- return NULL;;
-
cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
if (cfqg || !create)
goto done;
@@ -985,7 +976,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
done:
- blkiocg_css_put(blkcg);
return cfqg;
}
@@ -1420,9 +1410,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- cfqd->rq_in_driver[rq_is_sync(rq)]++;
+ cfqd->rq_in_driver++;
cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
- rq_in_driver(cfqd));
+ cfqd->rq_in_driver);
cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
}
@@ -1430,12 +1420,11 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- const int sync = rq_is_sync(rq);
- WARN_ON(!cfqd->rq_in_driver[sync]);
- cfqd->rq_in_driver[sync]--;
+ WARN_ON(!cfqd->rq_in_driver);
+ cfqd->rq_in_driver--;
cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
- rq_in_driver(cfqd));
+ cfqd->rq_in_driver);
}
static void cfq_remove_request(struct request *rq)
@@ -1673,16 +1662,7 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct request *rq, bool for_preempt)
{
- sector_t sdist = cfqq->seek_mean;
-
- if (!sample_valid(cfqq->seek_samples))
- sdist = CFQQ_SEEK_THR;
-
- /* if seek_mean is big, using it as close criteria is meaningless */
- if (sdist > CFQQ_SEEK_THR && !for_preempt)
- sdist = CFQQ_SEEK_THR;
-
- return cfq_dist_from_last(cfqd, rq) <= sdist;
+ return cfq_dist_from_last(cfqd, rq) <= CFQQ_SEEK_THR;
}
static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
@@ -1878,8 +1858,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
cfqq->dispatched++;
elv_dispatch_sort(q, rq);
- if (cfq_cfqq_sync(cfqq))
- cfqd->sync_flight++;
+ cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
cfqq->nr_sectors += blk_rq_sectors(rq);
}
@@ -2219,6 +2198,19 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
return dispatched;
}
+static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
+ struct cfq_queue *cfqq)
+{
+ /* the queue hasn't finished any request, can't estimate */
+ if (cfq_cfqq_slice_new(cfqq))
+ return 1;
+ if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
+ cfqq->slice_end))
+ return 1;
+
+ return 0;
+}
+
static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
unsigned int max_dispatch;
@@ -2226,16 +2218,16 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
/*
* Drain async requests before we start sync IO
*/
- if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+ if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC])
return false;
/*
* If this is an async queue and we have sync IO in flight, let it wait
*/
- if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
+ if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq))
return false;
- max_dispatch = cfqd->cfq_quantum;
+ max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1);
if (cfq_class_idle(cfqq))
max_dispatch = 1;
@@ -2252,13 +2244,22 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
/*
* We have other queues, don't allow more IO from this one
*/
- if (cfqd->busy_queues > 1)
+ if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq))
return false;
/*
* Sole queue user, no limit
*/
- max_dispatch = -1;
+ if (cfqd->busy_queues == 1)
+ max_dispatch = -1;
+ else
+ /*
+ * Normally we start throttling cfqq when cfq_quantum/2
+ * requests have been dispatched. But we can drive
+ * deeper queue depths at the beginning of slice
+ * subjected to upper limit of cfq_quantum.
+ * */
+ max_dispatch = cfqd->cfq_quantum;
}
/*
@@ -2980,30 +2981,20 @@ static void
cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct request *rq)
{
- sector_t sdist;
- u64 total;
+ sector_t sdist = 0;
+ sector_t n_sec = blk_rq_sectors(rq);
+ if (cfqq->last_request_pos) {
+ if (cfqq->last_request_pos < blk_rq_pos(rq))
+ sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
+ else
+ sdist = cfqq->last_request_pos - blk_rq_pos(rq);
+ }
- if (!cfqq->last_request_pos)
- sdist = 0;
- else if (cfqq->last_request_pos < blk_rq_pos(rq))
- sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
+ cfqq->seek_history <<= 1;
+ if (blk_queue_nonrot(cfqd->queue))
+ cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT);
else
- sdist = cfqq->last_request_pos - blk_rq_pos(rq);
-
- /*
- * Don't allow the seek distance to get too large from the
- * odd fragment, pagein, etc
- */
- if (cfqq->seek_samples <= 60) /* second&third seek */
- sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*1024);
- else
- sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*64);
-
- cfqq->seek_samples = (7*cfqq->seek_samples + 256) / 8;
- cfqq->seek_total = (7*cfqq->seek_total + (u64)256*sdist) / 8;
- total = cfqq->seek_total + (cfqq->seek_samples/2);
- do_div(total, cfqq->seek_samples);
- cfqq->seek_mean = (sector_t)total;
+ cfqq->seek_history |= (sdist > CFQQ_SEEK_THR);
}
/*
@@ -3028,8 +3019,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
cfq_mark_cfqq_deep(cfqq);
if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
- (!cfq_cfqq_deep(cfqq) && sample_valid(cfqq->seek_samples)
- && CFQQ_SEEKY(cfqq)))
+ (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
enable_idle = 0;
else if (sample_valid(cic->ttime_samples)) {
if (cic->ttime_mean > cfqd->cfq_slice_idle)
@@ -3215,14 +3205,14 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
{
struct cfq_queue *cfqq = cfqd->active_queue;
- if (rq_in_driver(cfqd) > cfqd->hw_tag_est_depth)
- cfqd->hw_tag_est_depth = rq_in_driver(cfqd);
+ if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth)
+ cfqd->hw_tag_est_depth = cfqd->rq_in_driver;
if (cfqd->hw_tag == 1)
return;
if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
- rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN)
+ cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
return;
/*
@@ -3232,7 +3222,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
*/
if (cfqq && cfq_cfqq_idle_window(cfqq) &&
cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] <
- CFQ_HW_QUEUE_MIN && rq_in_driver(cfqd) < CFQ_HW_QUEUE_MIN)
+ CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN)
return;
if (cfqd->hw_tag_samples++ < 50)
@@ -3285,13 +3275,12 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfq_update_hw_tag(cfqd);
- WARN_ON(!cfqd->rq_in_driver[sync]);
+ WARN_ON(!cfqd->rq_in_driver);
WARN_ON(!cfqq->dispatched);
- cfqd->rq_in_driver[sync]--;
+ cfqd->rq_in_driver--;
cfqq->dispatched--;
- if (cfq_cfqq_sync(cfqq))
- cfqd->sync_flight--;
+ cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
if (sync) {
RQ_CIC(rq)->last_end_request = now;
@@ -3345,7 +3334,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
}
}
- if (!rq_in_driver(cfqd))
+ if (!cfqd->rq_in_driver)
cfq_schedule_dispatch(cfqd);
}
diff --git a/block/elevator.c b/block/elevator.c
index 9ad5ccc4c5e..df75676f667 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -474,6 +474,15 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
int ret;
/*
+ * Levels of merges:
+ * nomerges: No merges at all attempted
+ * noxmerges: Only simple one-hit cache try
+ * merges: All merge tries attempted
+ */
+ if (blk_queue_nomerges(q))
+ return ELEVATOR_NO_MERGE;
+
+ /*
* First try one-hit cache.
*/
if (q->last_merge) {
@@ -484,7 +493,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
}
}
- if (blk_queue_nomerges(q))
+ if (blk_queue_noxmerges(q))
return ELEVATOR_NO_MERGE;
/*
@@ -883,7 +892,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr,
return error;
}
-static struct sysfs_ops elv_sysfs_ops = {
+static const struct sysfs_ops elv_sysfs_ops = {
.show = elv_attr_show,
.store = elv_attr_store,
};