summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-30 12:43:21 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-30 12:43:21 -0800
commite48b7b66a6531f02f1264c7196f7069a9ce9251a (patch)
treed45ce978262e3c32ce8fe460516bb9aae0cc2fb4
parent5ccf73bb4dc7cc9e1f761202a34de5714164724f (diff)
parent9bd3f98821a83041e77ee25158b80b535d02d7b4 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: block: blk_rq_err_sectors cleanup block: Honor the gfp_mask for alloc_page() in blkdev_issue_discard() block: Fix incorrect alignment offset reporting and update documentation cfq-iosched: don't regard requests with long distance as close aoe: switch to the new bio_flush_dcache_pages() interface drivers/block/mg_disk.c: use resource_size() drivers/block/DAC960.c: use DAC960_V2_Controller block: Fix topology stacking for data and discard alignment drbd: remove unused #include <linux/version.h> drbd: remove duplicated #include drbd: Fix test of unsigned in _drbd_fault_random() drbd: Constify struct file_operations cfq-iosched: Remove prio_change logic for workload selection cfq-iosched: Get rid of nr_groups cfq-iosched: Remove the check for same cfq group from allow_merge drbd: fix test of unsigned in _drbd_fault_random() block: remove Documentation/block/as-iosched.txt
-rw-r--r--Documentation/block/00-INDEX2
-rw-r--r--Documentation/block/as-iosched.txt172
-rw-r--r--block/blk-barrier.c2
-rw-r--r--block/blk-settings.c121
-rw-r--r--block/cfq-iosched.c67
-rw-r--r--drivers/block/DAC960.c2
-rw-r--r--drivers/block/aoe/aoecmd.c17
-rw-r--r--drivers/block/drbd/drbd_int.h2
-rw-r--r--drivers/block/drbd/drbd_main.c5
-rw-r--r--drivers/block/drbd/drbd_proc.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c1
-rw-r--r--drivers/block/drbd/drbd_worker.c2
-rw-r--r--drivers/block/mg_disk.c2
-rw-r--r--include/linux/blkdev.h17
14 files changed, 115 insertions, 299 deletions
diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX
index 961a0513f8c..a406286f6f3 100644
--- a/Documentation/block/00-INDEX
+++ b/Documentation/block/00-INDEX
@@ -1,7 +1,5 @@
00-INDEX
- This file
-as-iosched.txt
- - Anticipatory IO scheduler
barrier.txt
- I/O Barriers
biodoc.txt
diff --git a/Documentation/block/as-iosched.txt b/Documentation/block/as-iosched.txt
deleted file mode 100644
index 738b72be128..00000000000
--- a/Documentation/block/as-iosched.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-Anticipatory IO scheduler
--------------------------
-Nick Piggin <piggin@cyberone.com.au> 13 Sep 2003
-
-Attention! Database servers, especially those using "TCQ" disks should
-investigate performance with the 'deadline' IO scheduler. Any system with high
-disk performance requirements should do so, in fact.
-
-If you see unusual performance characteristics of your disk systems, or you
-see big performance regressions versus the deadline scheduler, please email
-me. Database users don't bother unless you're willing to test a lot of patches
-from me ;) its a known issue.
-
-Also, users with hardware RAID controllers, doing striping, may find
-highly variable performance results with using the as-iosched. The
-as-iosched anticipatory implementation is based on the notion that a disk
-device has only one physical seeking head. A striped RAID controller
-actually has a head for each physical device in the logical RAID device.
-
-However, setting the antic_expire (see tunable parameters below) produces
-very similar behavior to the deadline IO scheduler.
-
-Selecting IO schedulers
------------------------
-Refer to Documentation/block/switching-sched.txt for information on
-selecting an io scheduler on a per-device basis.
-
-Anticipatory IO scheduler Policies
-----------------------------------
-The as-iosched implementation implements several layers of policies
-to determine when an IO request is dispatched to the disk controller.
-Here are the policies outlined, in order of application.
-
-1. one-way Elevator algorithm.
-
-The elevator algorithm is similar to that used in deadline scheduler, with
-the addition that it allows limited backward movement of the elevator
-(i.e. seeks backwards). A seek backwards can occur when choosing between
-two IO requests where one is behind the elevator's current position, and
-the other is in front of the elevator's position. If the seek distance to
-the request in back of the elevator is less than half the seek distance to
-the request in front of the elevator, then the request in back can be chosen.
-Backward seeks are also limited to a maximum of MAXBACK (1024*1024) sectors.
-This favors forward movement of the elevator, while allowing opportunistic
-"short" backward seeks.
-
-2. FIFO expiration times for reads and for writes.
-
-This is again very similar to the deadline IO scheduler. The expiration
-times for requests on these lists is tunable using the parameters read_expire
-and write_expire discussed below. When a read or a write expires in this way,
-the IO scheduler will interrupt its current elevator sweep or read anticipation
-to service the expired request.
-
-3. Read and write request batching
-
-A batch is a collection of read requests or a collection of write
-requests. The as scheduler alternates dispatching read and write batches
-to the driver. In the case a read batch, the scheduler submits read
-requests to the driver as long as there are read requests to submit, and
-the read batch time limit has not been exceeded (read_batch_expire).
-The read batch time limit begins counting down only when there are
-competing write requests pending.
-
-In the case of a write batch, the scheduler submits write requests to
-the driver as long as there are write requests available, and the
-write batch time limit has not been exceeded (write_batch_expire).
-However, the length of write batches will be gradually shortened
-when read batches frequently exceed their time limit.
-
-When changing between batch types, the scheduler waits for all requests
-from the previous batch to complete before scheduling requests for the
-next batch.
-
-The read and write fifo expiration times described in policy 2 above
-are checked only when in scheduling IO of a batch for the corresponding
-(read/write) type. So for example, the read FIFO timeout values are
-tested only during read batches. Likewise, the write FIFO timeout
-values are tested only during write batches. For this reason,
-it is generally not recommended for the read batch time
-to be longer than the write expiration time, nor for the write batch
-time to exceed the read expiration time (see tunable parameters below).
-
-When the IO scheduler changes from a read to a write batch,
-it begins the elevator from the request that is on the head of the
-write expiration FIFO. Likewise, when changing from a write batch to
-a read batch, scheduler begins the elevator from the first entry
-on the read expiration FIFO.
-
-4. Read anticipation.
-
-Read anticipation occurs only when scheduling a read batch.
-This implementation of read anticipation allows only one read request
-to be dispatched to the disk controller at a time. In
-contrast, many write requests may be dispatched to the disk controller
-at a time during a write batch. It is this characteristic that can make
-the anticipatory scheduler perform anomalously with controllers supporting
-TCQ, or with hardware striped RAID devices. Setting the antic_expire
-queue parameter (see below) to zero disables this behavior, and the
-anticipatory scheduler behaves essentially like the deadline scheduler.
-
-When read anticipation is enabled (antic_expire is not zero), reads
-are dispatched to the disk controller one at a time.
-At the end of each read request, the IO scheduler examines its next
-candidate read request from its sorted read list. If that next request
-is from the same process as the request that just completed,
-or if the next request in the queue is "very close" to the
-just completed request, it is dispatched immediately. Otherwise,
-statistics (average think time, average seek distance) on the process
-that submitted the just completed request are examined. If it seems
-likely that that process will submit another request soon, and that
-request is likely to be near the just completed request, then the IO
-scheduler will stop dispatching more read requests for up to (antic_expire)
-milliseconds, hoping that process will submit a new request near the one
-that just completed. If such a request is made, then it is dispatched
-immediately. If the antic_expire wait time expires, then the IO scheduler
-will dispatch the next read request from the sorted read queue.
-
-To decide whether an anticipatory wait is worthwhile, the scheduler
-maintains statistics for each process that can be used to compute
-mean "think time" (the time between read requests), and mean seek
-distance for that process. One observation is that these statistics
-are associated with each process, but those statistics are not associated
-with a specific IO device. So for example, if a process is doing IO
-on several file systems on separate devices, the statistics will be
-a combination of IO behavior from all those devices.
-
-
-Tuning the anticipatory IO scheduler
-------------------------------------
-When using 'as', the anticipatory IO scheduler there are 5 parameters under
-/sys/block/*/queue/iosched/. All are units of milliseconds.
-
-The parameters are:
-* read_expire
- Controls how long until a read request becomes "expired". It also controls the
- interval between which expired requests are served, so set to 50, a request
- might take anywhere < 100ms to be serviced _if_ it is the next on the
- expired list. Obviously request expiration strategies won't make the disk
- go faster. The result basically equates to the timeslice a single reader
- gets in the presence of other IO. 100*((seek time / read_expire) + 1) is
- very roughly the % streaming read efficiency your disk should get with
- multiple readers.
-
-* read_batch_expire
- Controls how much time a batch of reads is given before pending writes are
- served. A higher value is more efficient. This might be set below read_expire
- if writes are to be given higher priority than reads, but reads are to be
- as efficient as possible when there are no writes. Generally though, it
- should be some multiple of read_expire.
-
-* write_expire, and
-* write_batch_expire are equivalent to the above, for writes.
-
-* antic_expire
- Controls the maximum amount of time we can anticipate a good read (one
- with a short seek distance from the most recently completed request) before
- giving up. Many other factors may cause anticipation to be stopped early,
- or some processes will not be "anticipated" at all. Should be a bit higher
- for big seek time devices though not a linear correspondence - most
- processes have only a few ms thinktime.
-
-In addition to the tunables above there is a read-only file named est_time
-which, when read, will show:
-
- - The probability of a task exiting without a cooperating task
- submitting an anticipated IO.
-
- - The current mean think time.
-
- - The seek distance used to determine if an incoming IO is better.
-
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 8873b9b439f..8618d8996fe 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -402,7 +402,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
* our current implementations need. If we'll ever need
* more the interface will need revisiting.
*/
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ page = alloc_page(gfp_mask | __GFP_ZERO);
if (!page)
goto out_free_bio;
if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 6ae118d6e19..d52d4adc440 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -505,21 +505,30 @@ static unsigned int lcm(unsigned int a, unsigned int b)
/**
* blk_stack_limits - adjust queue_limits for stacked devices
- * @t: the stacking driver limits (top)
- * @b: the underlying queue limits (bottom)
+ * @t: the stacking driver limits (top device)
+ * @b: the underlying queue limits (bottom, component device)
* @offset: offset to beginning of data within component device
*
* Description:
- * Merges two queue_limit structs. Returns 0 if alignment didn't
- * change. Returns -1 if adding the bottom device caused
- * misalignment.
+ * This function is used by stacking drivers like MD and DM to ensure
+ * that all component devices have compatible block sizes and
+ * alignments. The stacking driver must provide a queue_limits
+ * struct (top) and then iteratively call the stacking function for
+ * all component (bottom) devices. The stacking function will
+ * attempt to combine the values and ensure proper alignment.
+ *
+ * Returns 0 if the top and bottom queue_limits are compatible. The
+ * top device's block sizes and alignment offsets may be adjusted to
+ * ensure alignment with the bottom device. If no compatible sizes
+ * and alignments exist, -1 is returned and the resulting top
+ * queue_limits will have the misaligned flag set to indicate that
+ * the alignment_offset is undefined.
*/
int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset)
{
- int ret;
-
- ret = 0;
+ sector_t alignment;
+ unsigned int top, bottom;
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -537,6 +546,22 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_segment_size = min_not_zero(t->max_segment_size,
b->max_segment_size);
+ alignment = queue_limit_alignment_offset(b, offset);
+
+ /* Bottom device has different alignment. Check that it is
+ * compatible with the current top alignment.
+ */
+ if (t->alignment_offset != alignment) {
+
+ top = max(t->physical_block_size, t->io_min)
+ + t->alignment_offset;
+ bottom = max(b->physical_block_size, b->io_min) + alignment;
+
+ /* Verify that top and bottom intervals line up */
+ if (max(top, bottom) & (min(top, bottom) - 1))
+ t->misaligned = 1;
+ }
+
t->logical_block_size = max(t->logical_block_size,
b->logical_block_size);
@@ -544,54 +569,64 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->physical_block_size);
t->io_min = max(t->io_min, b->io_min);
+ t->io_opt = lcm(t->io_opt, b->io_opt);
+
t->no_cluster |= b->no_cluster;
t->discard_zeroes_data &= b->discard_zeroes_data;
- /* Bottom device offset aligned? */
- if (offset &&
- (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
+ /* Physical block size a multiple of the logical block size? */
+ if (t->physical_block_size & (t->logical_block_size - 1)) {
+ t->physical_block_size = t->logical_block_size;
t->misaligned = 1;
- ret = -1;
}
- /*
- * Temporarily disable discard granularity. It's currently buggy
- * since we default to 0 for discard_granularity, hence this
- * "failure" will always trigger for non-zero offsets.
- */
-#if 0
- if (offset &&
- (offset & (b->discard_granularity - 1)) != b->discard_alignment) {
- t->discard_misaligned = 1;
- ret = -1;
+ /* Minimum I/O a multiple of the physical block size? */
+ if (t->io_min & (t->physical_block_size - 1)) {
+ t->io_min = t->physical_block_size;
+ t->misaligned = 1;
}
-#endif
-
- /* If top has no alignment offset, inherit from bottom */
- if (!t->alignment_offset)
- t->alignment_offset =
- b->alignment_offset & (b->physical_block_size - 1);
- if (!t->discard_alignment)
- t->discard_alignment =
- b->discard_alignment & (b->discard_granularity - 1);
-
- /* Top device aligned on logical block boundary? */
- if (t->alignment_offset & (t->logical_block_size - 1)) {
+ /* Optimal I/O a multiple of the physical block size? */
+ if (t->io_opt & (t->physical_block_size - 1)) {
+ t->io_opt = 0;
t->misaligned = 1;
- ret = -1;
}
- /* Find lcm() of optimal I/O size and granularity */
- t->io_opt = lcm(t->io_opt, b->io_opt);
- t->discard_granularity = lcm(t->discard_granularity,
- b->discard_granularity);
+ /* Find lowest common alignment_offset */
+ t->alignment_offset = lcm(t->alignment_offset, alignment)
+ & (max(t->physical_block_size, t->io_min) - 1);
- /* Verify that optimal I/O size is a multiple of io_min */
- if (t->io_min && t->io_opt % t->io_min)
- ret = -1;
+ /* Verify that new alignment_offset is on a logical block boundary */
+ if (t->alignment_offset & (t->logical_block_size - 1))
+ t->misaligned = 1;
+
+ /* Discard alignment and granularity */
+ if (b->discard_granularity) {
+ unsigned int granularity = b->discard_granularity;
+ offset &= granularity - 1;
+
+ alignment = (granularity + b->discard_alignment - offset)
+ & (granularity - 1);
+
+ if (t->discard_granularity != 0 &&
+ t->discard_alignment != alignment) {
+ top = t->discard_granularity + t->discard_alignment;
+ bottom = b->discard_granularity + alignment;
+
+ /* Verify that top and bottom intervals line up */
+ if (max(top, bottom) & (min(top, bottom) - 1))
+ t->discard_misaligned = 1;
+ }
+
+ t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
+ b->max_discard_sectors);
+ t->discard_granularity = max(t->discard_granularity,
+ b->discard_granularity);
+ t->discard_alignment = lcm(t->discard_alignment, alignment) &
+ (t->discard_granularity - 1);
+ }
- return ret;
+ return t->misaligned ? -1 : 0;
}
EXPORT_SYMBOL(blk_stack_limits);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e2f80463ed0..918c7fd9aeb 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -208,8 +208,6 @@ struct cfq_data {
/* Root service tree for cfq_groups */
struct cfq_rb_root grp_service_tree;
struct cfq_group root_group;
- /* Number of active cfq groups on group service tree */
- int nr_groups;
/*
* The priority currently being served
@@ -294,8 +292,7 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
enum wl_prio_t prio,
- enum wl_type_t type,
- struct cfq_data *cfqd)
+ enum wl_type_t type)
{
if (!cfqg)
return NULL;
@@ -842,7 +839,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
__cfq_group_service_tree_add(st, cfqg);
cfqg->on_st = true;
- cfqd->nr_groups++;
st->total_weight += cfqg->weight;
}
@@ -863,7 +859,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
cfqg->on_st = false;
- cfqd->nr_groups--;
st->total_weight -= cfqg->weight;
if (!RB_EMPTY_NODE(&cfqg->rb_node))
cfq_rb_erase(&cfqg->rb_node, st);
@@ -1150,7 +1145,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
#endif
service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
- cfqq_type(cfqq), cfqd);
+ cfqq_type(cfqq));
if (cfq_class_idle(cfqq)) {
rb_key = CFQ_IDLE_DELAY;
parent = rb_last(&service_tree->rb);
@@ -1513,9 +1508,6 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
struct cfq_io_context *cic;
struct cfq_queue *cfqq;
- /* Deny merge if bio and rq don't belong to same cfq group */
- if ((RQ_CFQQ(rq))->cfqg != cfq_get_cfqg(cfqd, 0))
- return false;
/*
* Disallow merge of a sync bio into an async request.
*/
@@ -1616,7 +1608,7 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
{
struct cfq_rb_root *service_tree =
service_tree_for(cfqd->serving_group, cfqd->serving_prio,
- cfqd->serving_type, cfqd);
+ cfqd->serving_type);
if (!cfqd->rq_queued)
return NULL;
@@ -1675,13 +1667,17 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR)
static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct request *rq)
+ struct request *rq, bool for_preempt)
{
sector_t sdist = cfqq->seek_mean;
if (!sample_valid(cfqq->seek_samples))
sdist = CFQQ_SEEK_THR;
+ /* if seek_mean is big, using it as close criteria is meaningless */
+ if (sdist > CFQQ_SEEK_THR && !for_preempt)
+ sdist = CFQQ_SEEK_THR;
+
return cfq_dist_from_last(cfqd, rq) <= sdist;
}
@@ -1709,7 +1705,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
* will contain the closest sector.
*/
__cfqq = rb_entry(parent, struct cfq_queue, p_node);
- if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
+ if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false))
return __cfqq;
if (blk_rq_pos(__cfqq->next_rq) < sector)
@@ -1720,7 +1716,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
return NULL;
__cfqq = rb_entry(node, struct cfq_queue, p_node);
- if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
+ if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false))
return __cfqq;
return NULL;
@@ -1963,8 +1959,7 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
}
static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
- struct cfq_group *cfqg, enum wl_prio_t prio,
- bool prio_changed)
+ struct cfq_group *cfqg, enum wl_prio_t prio)
{
struct cfq_queue *queue;
int i;
@@ -1972,24 +1967,9 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
unsigned long lowest_key = 0;
enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
- if (prio_changed) {
- /*
- * When priorities switched, we prefer starting
- * from SYNC_NOIDLE (first choice), or just SYNC
- * over ASYNC
- */
- if (service_tree_for(cfqg, prio, cur_best, cfqd)->count)
- return cur_best;
- cur_best = SYNC_WORKLOAD;
- if (service_tree_for(cfqg, prio, cur_best, cfqd)->count)
- return cur_best;
-
- return ASYNC_WORKLOAD;
- }
-
- for (i = 0; i < 3; ++i) {
- /* otherwise, select the one with lowest rb_key */
- queue = cfq_rb_first(service_tree_for(cfqg, prio, i, cfqd));
+ for (i = 0; i <= SYNC_WORKLOAD; ++i) {
+ /* select the one with lowest rb_key */
+ queue = cfq_rb_first(service_tree_for(cfqg, prio, i));
if (queue &&
(!key_valid || time_before(queue->rb_key, lowest_key))) {
lowest_key = queue->rb_key;
@@ -2003,8 +1983,6 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
- enum wl_prio_t previous_prio = cfqd->serving_prio;
- bool prio_changed;
unsigned slice;
unsigned count;
struct cfq_rb_root *st;
@@ -2032,24 +2010,19 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
* (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
* expiration time
*/
- prio_changed = (cfqd->serving_prio != previous_prio);
- st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type,
- cfqd);
+ st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
count = st->count;
/*
- * If priority didn't change, check workload expiration,
- * and that we still have other queues ready
+ * check workload expiration, and that we still have other queues ready
*/
- if (!prio_changed && count &&
- !time_after(jiffies, cfqd->workload_expires))
+ if (count && !time_after(jiffies, cfqd->workload_expires))
return;
/* otherwise select new workload type */
cfqd->serving_type =
- cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio, prio_changed);
- st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type,
- cfqd);
+ cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
+ st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
count = st->count;
/*
@@ -3143,7 +3116,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
* if this request is as-good as one we would expect from the
* current cfqq, let it preempt
*/
- if (cfq_rq_close(cfqd, cfqq, rq))
+ if (cfq_rq_close(cfqd, cfqq, rq, true))
return true;
return false;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index eb4fa194394..ce1fa923c41 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -7101,7 +7101,7 @@ static struct DAC960_privdata DAC960_BA_privdata = {
static struct DAC960_privdata DAC960_LP_privdata = {
.HardwareType = DAC960_LP_Controller,
- .FirmwareType = DAC960_LP_Controller,
+ .FirmwareType = DAC960_V2_Controller,
.InterruptHandler = DAC960_LP_InterruptHandler,
.MemoryWindowSize = DAC960_LP_RegisterWindowSize,
};
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 13bb69d2abb..64a223b0cc2 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -735,21 +735,6 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector
part_stat_unlock();
}
-/*
- * Ensure we don't create aliases in VI caches
- */
-static inline void
-killalias(struct bio *bio)
-{
- struct bio_vec *bv;
- int i;
-
- if (bio_data_dir(bio) == READ)
- __bio_for_each_segment(bv, bio, i, 0) {
- flush_dcache_page(bv->bv_page);
- }
-}
-
void
aoecmd_ata_rsp(struct sk_buff *skb)
{
@@ -871,7 +856,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
if (buf->flags & BUFFL_FAIL)
bio_endio(buf->bio, -EIO);
else {
- killalias(buf->bio);
+ bio_flush_dcache_pages(buf->bio);
bio_endio(buf->bio, 0);
}
mempool_free(buf, d->bufpool);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 2312d782fe9..c9755876343 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1490,7 +1490,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo);
/* drbd_proc.c */
extern struct proc_dir_entry *drbd_proc;
-extern struct file_operations drbd_proc_fops;
+extern const struct file_operations drbd_proc_fops;
extern const char *drbd_conn_str(enum drbd_conns s);
extern const char *drbd_role_str(enum drbd_role s);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 157d1e4343c..9348f33f624 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -27,7 +27,6 @@
*/
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/drbd.h>
#include <asm/uaccess.h>
#include <asm/types.h>
@@ -151,7 +150,7 @@ wait_queue_head_t drbd_pp_wait;
DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
-static struct block_device_operations drbd_ops = {
+static const struct block_device_operations drbd_ops = {
.owner = THIS_MODULE,
.open = drbd_open,
.release = drbd_release,
@@ -3623,7 +3622,7 @@ _drbd_fault_random(struct fault_random_state *rsp)
{
long refresh;
- if (--rsp->count < 0) {
+ if (!rsp->count--) {
get_random_bytes(&refresh, sizeof(refresh));
rsp->state += refresh;
rsp->count = FAULT_RANDOM_REFRESH;
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index bdd0b4943b1..df8ad9660d8 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -38,7 +38,7 @@ static int drbd_proc_open(struct inode *inode, struct file *file);
struct proc_dir_entry *drbd_proc;
-struct file_operations drbd_proc_fops = {
+const struct file_operations drbd_proc_fops = {
.owner = THIS_MODULE,
.open = drbd_proc_open,
.read = seq_read,
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c548f24f54a..259c1351b15 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -28,7 +28,6 @@
#include <asm/uaccess.h>
#include <net/sock.h>
-#include <linux/version.h>
#include <linux/drbd.h>
#include <linux/fs.h>
#include <linux/file.h>
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index ed8796f1112..b453c2bca3b 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -24,7 +24,6 @@
*/
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/drbd.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
@@ -34,7 +33,6 @@
#include <linux/mm_inline.h>
#include <linux/slab.h>
#include <linux/random.h>
-#include <linux/mm.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index e0339aaa181..02b2583df7f 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -860,7 +860,7 @@ static int mg_probe(struct platform_device *plat_dev)
err = -EINVAL;
goto probe_err_2;
}
- host->dev_base = ioremap(rsc->start , rsc->end + 1);
+ host->dev_base = ioremap(rsc->start, resource_size(rsc));
if (!host->dev_base) {
printk(KERN_ERR "%s:%d ioremap fail\n",
__func__, __LINE__);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 784a919aa0d..9b98173a818 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -845,7 +845,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
* blk_rq_err_bytes() : bytes left till the next error boundary
* blk_rq_sectors() : sectors left in the entire request
* blk_rq_cur_sectors() : sectors left in the current segment
- * blk_rq_err_sectors() : sectors left till the next error boundary
*/
static inline sector_t blk_rq_pos(const struct request *rq)
{
@@ -874,11 +873,6 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
return blk_rq_cur_bytes(rq) >> 9;
}
-static inline unsigned int blk_rq_err_sectors(const struct request *rq)
-{
- return blk_rq_err_bytes(rq) >> 9;
-}
-
/*
* Request issue related functions.
*/
@@ -1116,11 +1110,18 @@ static inline int queue_alignment_offset(struct request_queue *q)
return q->limits.alignment_offset;
}
+static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset)
+{
+ unsigned int granularity = max(lim->physical_block_size, lim->io_min);
+
+ offset &= granularity - 1;
+ return (granularity + lim->alignment_offset - offset) & (granularity - 1);
+}
+
static inline int queue_sector_alignment_offset(struct request_queue *q,
sector_t sector)
{
- return ((sector << 9) - q->limits.alignment_offset)
- & (q->limits.io_min - 1);
+ return queue_limit_alignment_offset(&q->limits, sector << 9);
}
static inline int bdev_alignment_offset(struct block_device *bdev)