summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c24
-rw-r--r--block/blk-core.c33
-rw-r--r--block/blk-ioc.c228
-rw-r--r--block/blk-merge.c37
-rw-r--r--block/blk-softirq.c16
-rw-r--r--block/blk.h18
-rw-r--r--block/bsg.c3
-rw-r--r--block/cfq-iosched.c36
-rw-r--r--block/elevator.c55
-rw-r--r--block/genhd.c42
-rw-r--r--block/partition-generic.c48
-rw-r--r--block/partitions/ldm.c11
12 files changed, 266 insertions, 285 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index fa8f2630944..ea84a23d5e6 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -28,13 +28,10 @@ static LIST_HEAD(blkio_list);
struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
EXPORT_SYMBOL_GPL(blkio_root_cgroup);
-static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
- struct cgroup *);
-static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
- struct cgroup_taskset *);
-static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
- struct cgroup_taskset *);
-static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
+static struct cgroup_subsys_state *blkiocg_create(struct cgroup *);
+static int blkiocg_can_attach(struct cgroup *, struct cgroup_taskset *);
+static void blkiocg_attach(struct cgroup *, struct cgroup_taskset *);
+static void blkiocg_destroy(struct cgroup *);
static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
/* for encoding cft->private value on file */
@@ -1548,7 +1545,7 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
ARRAY_SIZE(blkio_files));
}
-static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+static void blkiocg_destroy(struct cgroup *cgroup)
{
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
unsigned long flags;
@@ -1598,8 +1595,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
kfree(blkcg);
}
-static struct cgroup_subsys_state *
-blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
{
struct blkio_cgroup *blkcg;
struct cgroup *parent = cgroup->parent;
@@ -1628,8 +1624,7 @@ done:
* of the main cic data structures. For now we allow a task to change
* its cgroup only if it's the only owner of its ioc.
*/
-static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset)
+static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *task;
struct io_context *ioc;
@@ -1648,8 +1643,7 @@ static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
return ret;
}
-static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset)
+static void blkiocg_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *task;
struct io_context *ioc;
@@ -1659,7 +1653,7 @@ static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
if (ioc) {
ioc_cgroup_changed(ioc);
- put_io_context(ioc, NULL);
+ put_io_context(ioc);
}
}
}
diff --git a/block/blk-core.c b/block/blk-core.c
index e6c05a97ee2..3a78b00edd7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -642,7 +642,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
if (rq->cmd_flags & REQ_ELVPRIV) {
elv_put_request(q, rq);
if (rq->elv.icq)
- put_io_context(rq->elv.icq->ioc, q);
+ put_io_context(rq->elv.icq->ioc);
}
mempool_free(rq, q->rq.rq_pool);
@@ -872,13 +872,15 @@ retry:
spin_unlock_irq(q->queue_lock);
/* create icq if missing */
- if (unlikely(et->icq_cache && !icq))
+ if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
icq = ioc_create_icq(q, gfp_mask);
+ if (!icq)
+ goto fail_icq;
+ }
- /* rqs are guaranteed to have icq on elv_set_request() if requested */
- if (likely(!et->icq_cache || icq))
- rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+ rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+fail_icq:
if (unlikely(!rq)) {
/*
* Allocation failed presumably due to memory. Undo anything
@@ -1210,7 +1212,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
drive_stat_acct(req, 0);
- elv_bio_merged(q, req, bio);
return true;
}
@@ -1241,7 +1242,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
drive_stat_acct(req, 0);
- elv_bio_merged(q, req, bio);
return true;
}
@@ -1255,13 +1255,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
* on %current's plugged list. Returns %true if merge was successful,
* otherwise %false.
*
- * This function is called without @q->queue_lock; however, elevator is
- * accessed iff there already are requests on the plugged list which in
- * turn guarantees validity of the elevator.
- *
- * Note that, on successful merge, elevator operation
- * elevator_bio_merged_fn() will be called without queue lock. Elevator
- * must be ready for this.
+ * Plugging coalesces IOs from the same issuer for the same purpose without
+ * going through @q->queue_lock. As such it's more of an issuing mechanism
+ * than scheduling, and the request, while may have elvpriv data, is not
+ * added on the elevator at this point. In addition, we don't have
+ * reliable access to the elevator outside queue lock. Only check basic
+ * merging parameters without querying the elevator.
*/
static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int *request_count)
@@ -1280,10 +1279,10 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
(*request_count)++;
- if (rq->q != q)
+ if (rq->q != q || !blk_rq_merge_ok(rq, bio))
continue;
- el_ret = elv_try_merge(rq, bio);
+ el_ret = blk_try_merge(rq, bio);
if (el_ret == ELEVATOR_BACK_MERGE) {
ret = bio_attempt_back_merge(q, rq, bio);
if (ret)
@@ -1345,12 +1344,14 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
el_ret = elv_merge(q, &req, bio);
if (el_ret == ELEVATOR_BACK_MERGE) {
if (bio_attempt_back_merge(q, req, bio)) {
+ elv_bio_merged(q, req, bio);
if (!attempt_back_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out_unlock;
}
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
if (bio_attempt_front_merge(q, req, bio)) {
+ elv_bio_merged(q, req, bio);
if (!attempt_front_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out_unlock;
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 27a06e00eae..fb95dd2f889 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -29,21 +29,6 @@ void get_io_context(struct io_context *ioc)
}
EXPORT_SYMBOL(get_io_context);
-/*
- * Releasing ioc may nest into another put_io_context() leading to nested
- * fast path release. As the ioc's can't be the same, this is okay but
- * makes lockdep whine. Keep track of nesting and use it as subclass.
- */
-#ifdef CONFIG_LOCKDEP
-#define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0)
-#define ioc_release_depth_inc(q) (q)->ioc_release_depth++
-#define ioc_release_depth_dec(q) (q)->ioc_release_depth--
-#else
-#define ioc_release_depth(q) 0
-#define ioc_release_depth_inc(q) do { } while (0)
-#define ioc_release_depth_dec(q) do { } while (0)
-#endif
-
static void icq_free_icq_rcu(struct rcu_head *head)
{
struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
@@ -51,11 +36,23 @@ static void icq_free_icq_rcu(struct rcu_head *head)
kmem_cache_free(icq->__rcu_icq_cache, icq);
}
-/*
- * Exit and free an icq. Called with both ioc and q locked.
- */
+/* Exit an icq. Called with both ioc and q locked. */
static void ioc_exit_icq(struct io_cq *icq)
{
+ struct elevator_type *et = icq->q->elevator->type;
+
+ if (icq->flags & ICQ_EXITED)
+ return;
+
+ if (et->ops.elevator_exit_icq_fn)
+ et->ops.elevator_exit_icq_fn(icq);
+
+ icq->flags |= ICQ_EXITED;
+}
+
+/* Release an icq. Called with both ioc and q locked. */
+static void ioc_destroy_icq(struct io_cq *icq)
+{
struct io_context *ioc = icq->ioc;
struct request_queue *q = icq->q;
struct elevator_type *et = q->elevator->type;
@@ -75,11 +72,7 @@ static void ioc_exit_icq(struct io_cq *icq)
if (rcu_dereference_raw(ioc->icq_hint) == icq)
rcu_assign_pointer(ioc->icq_hint, NULL);
- if (et->ops.elevator_exit_icq_fn) {
- ioc_release_depth_inc(q);
- et->ops.elevator_exit_icq_fn(icq);
- ioc_release_depth_dec(q);
- }
+ ioc_exit_icq(icq);
/*
* @icq->q might have gone away by the time RCU callback runs
@@ -97,51 +90,32 @@ static void ioc_release_fn(struct work_struct *work)
{
struct io_context *ioc = container_of(work, struct io_context,
release_work);
- struct request_queue *last_q = NULL;
+ unsigned long flags;
- spin_lock_irq(&ioc->lock);
+ /*
+ * Exiting icq may call into put_io_context() through elevator
+ * which will trigger lockdep warning. The ioc's are guaranteed to
+ * be different, use a different locking subclass here. Use
+ * irqsave variant as there's no spin_lock_irq_nested().
+ */
+ spin_lock_irqsave_nested(&ioc->lock, flags, 1);
while (!hlist_empty(&ioc->icq_list)) {
struct io_cq *icq = hlist_entry(ioc->icq_list.first,
struct io_cq, ioc_node);
- struct request_queue *this_q = icq->q;
-
- if (this_q != last_q) {
- /*
- * Need to switch to @this_q. Once we release
- * @ioc->lock, it can go away along with @cic.
- * Hold on to it.
- */
- __blk_get_queue(this_q);
-
- /*
- * blk_put_queue() might sleep thanks to kobject
- * idiocy. Always release both locks, put and
- * restart.
- */
- if (last_q) {
- spin_unlock(last_q->queue_lock);
- spin_unlock_irq(&ioc->lock);
- blk_put_queue(last_q);
- } else {
- spin_unlock_irq(&ioc->lock);
- }
-
- last_q = this_q;
- spin_lock_irq(this_q->queue_lock);
- spin_lock(&ioc->lock);
- continue;
+ struct request_queue *q = icq->q;
+
+ if (spin_trylock(q->queue_lock)) {
+ ioc_destroy_icq(icq);
+ spin_unlock(q->queue_lock);
+ } else {
+ spin_unlock_irqrestore(&ioc->lock, flags);
+ cpu_relax();
+ spin_lock_irqsave_nested(&ioc->lock, flags, 1);
}
- ioc_exit_icq(icq);
}
- if (last_q) {
- spin_unlock(last_q->queue_lock);
- spin_unlock_irq(&ioc->lock);
- blk_put_queue(last_q);
- } else {
- spin_unlock_irq(&ioc->lock);
- }
+ spin_unlock_irqrestore(&ioc->lock, flags);
kmem_cache_free(iocontext_cachep, ioc);
}
@@ -149,79 +123,35 @@ static void ioc_release_fn(struct work_struct *work)
/**
* put_io_context - put a reference of io_context
* @ioc: io_context to put
- * @locked_q: request_queue the caller is holding queue_lock of (hint)
*
* Decrement reference count of @ioc and release it if the count reaches
- * zero. If the caller is holding queue_lock of a queue, it can indicate
- * that with @locked_q. This is an optimization hint and the caller is
- * allowed to pass in %NULL even when it's holding a queue_lock.
+ * zero.
*/
-void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
+void put_io_context(struct io_context *ioc)
{
- struct request_queue *last_q = locked_q;
unsigned long flags;
+ bool free_ioc = false;
if (ioc == NULL)
return;
BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
- if (locked_q)
- lockdep_assert_held(locked_q->queue_lock);
-
- if (!atomic_long_dec_and_test(&ioc->refcount))
- return;
/*
- * Destroy @ioc. This is a bit messy because icq's are chained
- * from both ioc and queue, and ioc->lock nests inside queue_lock.
- * The inner ioc->lock should be held to walk our icq_list and then
- * for each icq the outer matching queue_lock should be grabbed.
- * ie. We need to do reverse-order double lock dancing.
- *
- * Another twist is that we are often called with one of the
- * matching queue_locks held as indicated by @locked_q, which
- * prevents performing double-lock dance for other queues.
- *
- * So, we do it in two stages. The fast path uses the queue_lock
- * the caller is holding and, if other queues need to be accessed,
- * uses trylock to avoid introducing locking dependency. This can
- * handle most cases, especially if @ioc was performing IO on only
- * single device.
- *
- * If trylock doesn't cut it, we defer to @ioc->release_work which
- * can do all the double-locking dancing.
+ * Releasing ioc requires reverse order double locking and we may
+ * already be holding a queue_lock. Do it asynchronously from wq.
*/
- spin_lock_irqsave_nested(&ioc->lock, flags,
- ioc_release_depth(locked_q));
-
- while (!hlist_empty(&ioc->icq_list)) {
- struct io_cq *icq = hlist_entry(ioc->icq_list.first,
- struct io_cq, ioc_node);
- struct request_queue *this_q = icq->q;
-
- if (this_q != last_q) {
- if (last_q && last_q != locked_q)
- spin_unlock(last_q->queue_lock);
- last_q = NULL;
-
- if (!spin_trylock(this_q->queue_lock))
- break;
- last_q = this_q;
- continue;
- }
- ioc_exit_icq(icq);
+ if (atomic_long_dec_and_test(&ioc->refcount)) {
+ spin_lock_irqsave(&ioc->lock, flags);
+ if (!hlist_empty(&ioc->icq_list))
+ schedule_work(&ioc->release_work);
+ else
+ free_ioc = true;
+ spin_unlock_irqrestore(&ioc->lock, flags);
}
- if (last_q && last_q != locked_q)
- spin_unlock(last_q->queue_lock);
-
- spin_unlock_irqrestore(&ioc->lock, flags);
-
- /* if no icq is left, we're done; otherwise, kick release_work */
- if (hlist_empty(&ioc->icq_list))
+ if (free_ioc)
kmem_cache_free(iocontext_cachep, ioc);
- else
- schedule_work(&ioc->release_work);
}
EXPORT_SYMBOL(put_io_context);
@@ -229,14 +159,42 @@ EXPORT_SYMBOL(put_io_context);
void exit_io_context(struct task_struct *task)
{
struct io_context *ioc;
+ struct io_cq *icq;
+ struct hlist_node *n;
+ unsigned long flags;
task_lock(task);
ioc = task->io_context;
task->io_context = NULL;
task_unlock(task);
- atomic_dec(&ioc->nr_tasks);
- put_io_context(ioc, NULL);
+ if (!atomic_dec_and_test(&ioc->nr_tasks)) {
+ put_io_context(ioc);
+ return;
+ }
+
+ /*
+ * Need ioc lock to walk icq_list and q lock to exit icq. Perform
+ * reverse double locking. Read comment in ioc_release_fn() for
+ * explanation on the nested locking annotation.
+ */
+retry:
+ spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+ hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node) {
+ if (icq->flags & ICQ_EXITED)
+ continue;
+ if (spin_trylock(icq->q->queue_lock)) {
+ ioc_exit_icq(icq);
+ spin_unlock(icq->q->queue_lock);
+ } else {
+ spin_unlock_irqrestore(&ioc->lock, flags);
+ cpu_relax();
+ goto retry;
+ }
+ }
+ spin_unlock_irqrestore(&ioc->lock, flags);
+
+ put_io_context(ioc);
}
/**
@@ -255,7 +213,7 @@ void ioc_clear_queue(struct request_queue *q)
struct io_context *ioc = icq->ioc;
spin_lock(&ioc->lock);
- ioc_exit_icq(icq);
+ ioc_destroy_icq(icq);
spin_unlock(&ioc->lock);
}
}
@@ -424,13 +382,13 @@ struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
return icq;
}
-void ioc_set_changed(struct io_context *ioc, int which)
+void ioc_set_icq_flags(struct io_context *ioc, unsigned int flags)
{
struct io_cq *icq;
struct hlist_node *n;
hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node)
- set_bit(which, &icq->changed);
+ icq->flags |= flags;
}
/**
@@ -448,7 +406,7 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
spin_lock_irqsave(&ioc->lock, flags);
ioc->ioprio = ioprio;
- ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED);
+ ioc_set_icq_flags(ioc, ICQ_IOPRIO_CHANGED);
spin_unlock_irqrestore(&ioc->lock, flags);
}
@@ -465,11 +423,33 @@ void ioc_cgroup_changed(struct io_context *ioc)
unsigned long flags;
spin_lock_irqsave(&ioc->lock, flags);
- ioc_set_changed(ioc, ICQ_CGROUP_CHANGED);
+ ioc_set_icq_flags(ioc, ICQ_CGROUP_CHANGED);
spin_unlock_irqrestore(&ioc->lock, flags);
}
EXPORT_SYMBOL(ioc_cgroup_changed);
+/**
+ * icq_get_changed - fetch and clear icq changed mask
+ * @icq: icq of interest
+ *
+ * Fetch and clear ICQ_*_CHANGED bits from @icq. Grabs and releases
+ * @icq->ioc->lock.
+ */
+unsigned icq_get_changed(struct io_cq *icq)
+{
+ unsigned int changed = 0;
+ unsigned long flags;
+
+ if (unlikely(icq->flags & ICQ_CHANGED_MASK)) {
+ spin_lock_irqsave(&icq->ioc->lock, flags);
+ changed = icq->flags & ICQ_CHANGED_MASK;
+ icq->flags &= ~ICQ_CHANGED_MASK;
+ spin_unlock_irqrestore(&icq->ioc->lock, flags);
+ }
+ return changed;
+}
+EXPORT_SYMBOL(icq_get_changed);
+
static int __init blk_ioc_init(void)
{
iocontext_cachep = kmem_cache_create("blkdev_ioc",
diff --git a/block/blk-merge.c b/block/blk-merge.c
index cfcc37cb222..160035f5488 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -471,3 +471,40 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
{
return attempt_merge(q, rq, next);
}
+
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
+{
+ if (!rq_mergeable(rq))
+ return false;
+
+ /* don't merge file system requests and discard requests */
+ if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
+ return false;
+
+ /* don't merge discard requests and secure discard requests */
+ if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
+ return false;
+
+ /* different data direction or already started, don't merge */
+ if (bio_data_dir(bio) != rq_data_dir(rq))
+ return false;
+
+ /* must be same device and not a special request */
+ if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
+ return false;
+
+ /* only merge integrity protected bio into ditto rq */
+ if (bio_integrity(bio) != blk_integrity_rq(rq))
+ return false;
+
+ return true;
+}
+
+int blk_try_merge(struct request *rq, struct bio *bio)
+{
+ if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector)
+ return ELEVATOR_BACK_MERGE;
+ else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector)
+ return ELEVATOR_FRONT_MERGE;
+ return ELEVATOR_NO_MERGE;
+}
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 1366a89d8e6..467c8de8864 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -8,6 +8,7 @@
#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
+#include <linux/sched.h>
#include "blk.h"
@@ -103,9 +104,10 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
void __blk_complete_request(struct request *req)
{
- int ccpu, cpu, group_cpu = NR_CPUS;
+ int ccpu, cpu;
struct request_queue *q = req->q;
unsigned long flags;
+ bool shared = false;
BUG_ON(!q->softirq_done_fn);
@@ -117,22 +119,20 @@ void __blk_complete_request(struct request *req)
*/
if (req->cpu != -1) {
ccpu = req->cpu;
- if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
- ccpu = blk_cpu_to_group(ccpu);
- group_cpu = blk_cpu_to_group(cpu);
- }
+ if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
+ shared = cpus_share_cache(cpu, ccpu);
} else
ccpu = cpu;
/*
- * If current CPU and requested CPU are in the same group, running
- * softirq in current CPU. One might concern this is just like
+ * If current CPU and requested CPU share a cache, run the softirq on
+ * the current CPU. One might concern this is just like
* QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
* running in interrupt handler, and currently I/O controller doesn't
* support multiple interrupts, so current CPU is unique actually. This
* avoids IPI sending from current CPU to the first CPU of a group.
*/
- if (ccpu == cpu || ccpu == group_cpu) {
+ if (ccpu == cpu || shared) {
struct list_head *list;
do_local:
list = &__get_cpu_var(blk_cpu_done);
diff --git a/block/blk.h b/block/blk.h
index 7efd772336d..d45be871329 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -137,6 +137,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next);
void blk_recalc_rq_segments(struct request *rq);
void blk_rq_set_mixed_merge(struct request *rq);
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
+int blk_try_merge(struct request *rq, struct bio *bio);
void blk_queue_congestion_threshold(struct request_queue *q);
@@ -164,22 +166,6 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
return q->nr_congestion_off;
}
-static inline int blk_cpu_to_group(int cpu)
-{
- int group = NR_CPUS;
-#ifdef CONFIG_SCHED_MC
- const struct cpumask *mask = cpu_coregroup_mask(cpu);
- group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
- group = cpumask_first(topology_thread_cpumask(cpu));
-#else
- return cpu;
-#endif
- if (likely(group < NR_CPUS))
- return group;
- return cpu;
-}
-
/*
* Contribute to IO statistics IFF:
*
diff --git a/block/bsg.c b/block/bsg.c
index 4cf703fd98b..ff64ae3bace 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -983,7 +983,8 @@ void bsg_unregister_queue(struct request_queue *q)
mutex_lock(&bsg_mutex);
idr_remove(&bsg_minor_idr, bcd->minor);
- sysfs_remove_link(&q->kobj, "bsg");
+ if (q->kobj.sd)
+ sysfs_remove_link(&q->kobj, "bsg");
device_unregister(bcd->class_dev);
bcd->class_dev = NULL;
kref_put(&bcd->ref, bsg_kref_release_function);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ee55019066a..45729525356 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1699,18 +1699,11 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
/*
* Lookup the cfqq that this bio will be queued with and allow
- * merge only if rq is queued there. This function can be called
- * from plug merge without queue_lock. In such cases, ioc of @rq
- * and %current are guaranteed to be equal. Avoid lookup which
- * requires queue_lock by using @rq's cic.
+ * merge only if rq is queued there.
*/
- if (current->io_context == RQ_CIC(rq)->icq.ioc) {
- cic = RQ_CIC(rq);
- } else {
- cic = cfq_cic_lookup(cfqd, current->io_context);
- if (!cic)
- return false;
- }
+ cic = cfq_cic_lookup(cfqd, current->io_context);
+ if (!cic)
+ return false;
cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
return cfqq == RQ_CFQQ(rq);
@@ -1794,7 +1787,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
cfqd->active_queue = NULL;
if (cfqd->active_cic) {
- put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue);
+ put_io_context(cfqd->active_cic->icq.ioc);
cfqd->active_cic = NULL;
}
}
@@ -3117,17 +3110,18 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
*/
static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
+ enum wl_type_t old_type = cfqq_type(cfqd->active_queue);
+
cfq_log_cfqq(cfqd, cfqq, "preempt");
+ cfq_slice_expired(cfqd, 1);
/*
* workload type is changed, don't save slice, otherwise preempt
* doesn't happen
*/
- if (cfqq_type(cfqd->active_queue) != cfqq_type(cfqq))
+ if (old_type != cfqq_type(cfqq))
cfqq->cfqg->saved_workload_slice = 0;
- cfq_slice_expired(cfqd, 1);
-
/*
* Put the new queue at the front of the of the current list,
* so we know that it will be selected next.
@@ -3476,20 +3470,20 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
const int rw = rq_data_dir(rq);
const bool is_sync = rq_is_sync(rq);
struct cfq_queue *cfqq;
+ unsigned int changed;
might_sleep_if(gfp_mask & __GFP_WAIT);
spin_lock_irq(q->queue_lock);
/* handle changed notifications */
- if (unlikely(cic->icq.changed)) {
- if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
- changed_ioprio(cic);
+ changed = icq_get_changed(&cic->icq);
+ if (unlikely(changed & ICQ_IOPRIO_CHANGED))
+ changed_ioprio(cic);
#ifdef CONFIG_CFQ_GROUP_IOSCHED
- if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
- changed_cgroup(cic);
+ if (unlikely(changed & ICQ_CGROUP_CHANGED))
+ changed_cgroup(cic);
#endif
- }
new_queue:
cfqq = cic_to_cfqq(cic, is_sync);
diff --git a/block/elevator.c b/block/elevator.c
index 91e18f8af9b..f016855a46b 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -70,39 +70,9 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
/*
* can we safely merge with this request?
*/
-int elv_rq_merge_ok(struct request *rq, struct bio *bio)
+bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
{
- if (!rq_mergeable(rq))
- return 0;
-
- /*
- * Don't merge file system requests and discard requests
- */
- if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
- return 0;
-
- /*
- * Don't merge discard requests and secure discard requests
- */
- if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
- return 0;
-
- /*
- * different data direction or already started, don't merge
- */
- if (bio_data_dir(bio) != rq_data_dir(rq))
- return 0;
-
- /*
- * must be same device and not a special request
- */
- if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
- return 0;
-
- /*
- * only merge integrity protected bio into ditto rq
- */
- if (bio_integrity(bio) != blk_integrity_rq(rq))
+ if (!blk_rq_merge_ok(rq, bio))
return 0;
if (!elv_iosched_allow_merge(rq, bio))
@@ -112,23 +82,6 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
}
EXPORT_SYMBOL(elv_rq_merge_ok);
-int elv_try_merge(struct request *__rq, struct bio *bio)
-{
- int ret = ELEVATOR_NO_MERGE;
-
- /*
- * we can merge and sequence is ok, check if it's possible
- */
- if (elv_rq_merge_ok(__rq, bio)) {
- if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector)
- ret = ELEVATOR_BACK_MERGE;
- else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector)
- ret = ELEVATOR_FRONT_MERGE;
- }
-
- return ret;
-}
-
static struct elevator_type *elevator_find(const char *name)
{
struct elevator_type *e;
@@ -478,8 +431,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
/*
* First try one-hit cache.
*/
- if (q->last_merge) {
- ret = elv_try_merge(q->last_merge, bio);
+ if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
+ ret = blk_try_merge(q->last_merge, bio);
if (ret != ELEVATOR_NO_MERGE) {
*req = q->last_merge;
return ret;
diff --git a/block/genhd.c b/block/genhd.c
index 23b4f706332..df9816ede75 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -35,6 +35,7 @@ static DEFINE_IDR(ext_devt_idr);
static struct device_type disk_type;
+static void disk_alloc_events(struct gendisk *disk);
static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk);
static void disk_release_events(struct gendisk *disk);
@@ -601,6 +602,8 @@ void add_disk(struct gendisk *disk)
disk->major = MAJOR(devt);
disk->first_minor = MINOR(devt);
+ disk_alloc_events(disk);
+
/* Register BDI before referencing it from bdev */
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, disk_devt(disk));
@@ -1475,9 +1478,9 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
intv = disk_events_poll_jiffies(disk);
set_timer_slack(&ev->dwork.timer, intv / 4);
if (check_now)
- queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+ queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
else if (intv)
- queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+ queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
out_unlock:
spin_unlock_irqrestore(&ev->lock, flags);
}
@@ -1521,7 +1524,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
ev->clearing |= mask;
if (!ev->block) {
cancel_delayed_work(&ev->dwork);
- queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+ queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
}
spin_unlock_irq(&ev->lock);
}
@@ -1558,7 +1561,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
/* uncondtionally schedule event check and wait for it to finish */
disk_block_events(disk);
- queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+ queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
flush_delayed_work(&ev->dwork);
__disk_unblock_events(disk, false);
@@ -1595,7 +1598,7 @@ static void disk_events_workfn(struct work_struct *work)
intv = disk_events_poll_jiffies(disk);
if (!ev->block && intv)
- queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+ queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
spin_unlock_irq(&ev->lock);
@@ -1733,9 +1736,9 @@ module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
&disk_events_dfl_poll_msecs, 0644);
/*
- * disk_{add|del|release}_events - initialize and destroy disk_events.
+ * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
*/
-static void disk_add_events(struct gendisk *disk)
+static void disk_alloc_events(struct gendisk *disk)
{
struct disk_events *ev;
@@ -1748,16 +1751,6 @@ static void disk_add_events(struct gendisk *disk)
return;
}
- if (sysfs_create_files(&disk_to_dev(disk)->kobj,
- disk_events_attrs) < 0) {
- pr_warn("%s: failed to create sysfs files for events\n",
- disk->disk_name);
- kfree(ev);
- return;
- }
-
- disk->ev = ev;
-
INIT_LIST_HEAD(&ev->node);
ev->disk = disk;
spin_lock_init(&ev->lock);
@@ -1766,8 +1759,21 @@ static void disk_add_events(struct gendisk *disk)
ev->poll_msecs = -1;
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
+ disk->ev = ev;
+}
+
+static void disk_add_events(struct gendisk *disk)
+{
+ if (!disk->ev)
+ return;
+
+ /* FIXME: error handling */
+ if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
+ pr_warn("%s: failed to create sysfs files for events\n",
+ disk->disk_name);
+
mutex_lock(&disk_events_mutex);
- list_add_tail(&ev->node, &disk_events);
+ list_add_tail(&disk->ev->node, &disk_events);
mutex_unlock(&disk_events_mutex);
/*
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d06ec1c829c..6df5d6928a4 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -389,17 +389,11 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
}
}
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
{
- struct parsed_partitions *state = NULL;
struct disk_part_iter piter;
struct hd_struct *part;
- int p, highest, res;
-rescan:
- if (state && !IS_ERR(state)) {
- kfree(state);
- state = NULL;
- }
+ int res;
if (bdev->bd_part_count)
return -EBUSY;
@@ -412,6 +406,24 @@ rescan:
delete_partition(disk, part->partno);
disk_part_iter_exit(&piter);
+ return 0;
+}
+
+int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+ struct parsed_partitions *state = NULL;
+ struct hd_struct *part;
+ int p, highest, res;
+rescan:
+ if (state && !IS_ERR(state)) {
+ kfree(state);
+ state = NULL;
+ }
+
+ res = drop_partitions(disk, bdev);
+ if (res)
+ return res;
+
if (disk->fops->revalidate_disk)
disk->fops->revalidate_disk(disk);
check_disk_size_change(disk, bdev);
@@ -515,6 +527,26 @@ rescan:
return 0;
}
+int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+ int res;
+
+ if (!bdev->bd_invalidated)
+ return 0;
+
+ res = drop_partitions(disk, bdev);
+ if (res)
+ return res;
+
+ set_capacity(disk, 0);
+ check_disk_size_change(disk, bdev);
+ bdev->bd_invalidated = 0;
+ /* tell userspace that the media / partition table may have changed */
+ kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+
+ return 0;
+}
+
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index bd8ae788f68..e507cfbd044 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -2,7 +2,7 @@
* ldm - Support for Windows Logical Disk Manager (Dynamic Disks)
*
* Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
- * Copyright (c) 2001-2007 Anton Altaparmakov
+ * Copyright (c) 2001-2012 Anton Altaparmakov
* Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
*
* Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads
@@ -1341,20 +1341,17 @@ found:
ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
return false;
}
-
if (f->map & (1 << rec)) {
ldm_error ("Duplicate VBLK, part %d.", rec);
f->map &= 0x7F; /* Mark the group as broken */
return false;
}
-
f->map |= (1 << rec);
-
+ if (!rec)
+ memcpy(f->data, data, VBLK_SIZE_HEAD);
data += VBLK_SIZE_HEAD;
size -= VBLK_SIZE_HEAD;
-
- memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
-
+ memcpy(f->data + VBLK_SIZE_HEAD + rec * size, data, size);
return true;
}