diff options
47 files changed, 1089 insertions, 751 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 4dbb8be1c99..3c5434c83da 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -914,7 +914,7 @@ I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch queue and specific I/O schedulers. Unless stated otherwise, elevator is used to refer to both parts and I/O scheduler to specific I/O schedulers. -Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c. +Block layer implements generic dispatch queue in block/*.c. The generic dispatch queue is responsible for properly ordering barrier requests, requeueing, handling non-fs requests and all other subtleties. @@ -926,8 +926,8 @@ be built inside the kernel. Each queue can choose different one and can also change to another one dynamically. A block layer call to the i/o scheduler follows the convention elv_xxx(). This -calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh, -xxx and xxx might not match exactly, but use your imagination. If an elevator +calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx +and xxx might not match exactly, but use your imagination. If an elevator doesn't implement a function, the switch does nothing or some minimal house keeping work. diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index e971ab000f9..eda9b909aa0 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -96,9 +96,6 @@ static inline dma_addr_t __deprecated isa_page_to_bus(struct page *page) return page_to_phys(page); } -/* This depends on working iommu. */ -#define BIO_VMERGE_BOUNDARY (alpha_mv.mv_pci_tbi ? PAGE_SIZE : 0) - /* Maximum PIO space address supported? */ #define IO_SPACE_LIMIT 0xffff diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ef3635b52fc..0767827540b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -263,7 +263,7 @@ int s390_enable_sie(void) /* lets check if we are allowed to replace the mm */ task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || - tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { + tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) { task_unlock(tsk); return -EINVAL; } @@ -279,7 +279,7 @@ int s390_enable_sie(void) /* Now lets check again if something happened */ task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || - tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { + tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) { mmput(mm); task_unlock(tsk); return -EINVAL; diff --git a/block/Kconfig b/block/Kconfig index 290b219fad9..ac0956f7778 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -24,21 +24,17 @@ menuconfig BLOCK if BLOCK config LBD - bool "Support for Large Block Devices" + bool "Support for large block devices and files" depends on !64BIT help - Enable block devices of size 2TB and larger. + Enable block devices or files of size 2TB and larger. This option is required to support the full capacity of large (2TB+) block devices, including RAID, disk, Network Block Device, Logical Volume Manager (LVM) and loopback. - - For example, RAID devices are frequently bigger than the capacity - of the largest individual hard drive. - - This option is not required if you have individual disk drives - which total 2TB+ and you are not aggregating the capacity into - a large block device (e.g. using RAID or LVM). + + This option also enables support for single files larger than + 2TB. If unsure, say N. @@ -58,15 +54,6 @@ config BLK_DEV_IO_TRACE If unsure, say N. -config LSF - bool "Support for Large Single Files" - depends on !64BIT - help - Say Y here if you want to be able to handle very large files (2TB - and larger), otherwise say N. - - If unsure, say Y. - config BLK_DEV_BSG bool "Block layer SG support v4 (EXPERIMENTAL)" depends on EXPERIMENTAL diff --git a/block/as-iosched.c b/block/as-iosched.c index 71f0abb219e..631f6f44460 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1339,12 +1339,12 @@ static int as_may_queue(struct request_queue *q, int rw) return ret; } -static void as_exit_queue(elevator_t *e) +static void as_exit_queue(struct elevator_queue *e) { struct as_data *ad = e->elevator_data; del_timer_sync(&ad->antic_timer); - kblockd_flush_work(&ad->antic_work); + cancel_work_sync(&ad->antic_work); BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); @@ -1409,7 +1409,7 @@ as_var_store(unsigned long *var, const char *page, size_t count) return count; } -static ssize_t est_time_show(elevator_t *e, char *page) +static ssize_t est_time_show(struct elevator_queue *e, char *page) { struct as_data *ad = e->elevator_data; int pos = 0; @@ -1427,7 +1427,7 @@ static ssize_t est_time_show(elevator_t *e, char *page) } #define SHOW_FUNCTION(__FUNC, __VAR) \ -static ssize_t __FUNC(elevator_t *e, char *page) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ { \ struct as_data *ad = e->elevator_data; \ return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ @@ -1440,7 +1440,7 @@ SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ -static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ { \ struct as_data *ad = e->elevator_data; \ int ret = as_var_store(__PTR, (page), count); \ diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 6e72d661ae4..8eba4e43bb0 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -24,8 +24,8 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered, prepare_flush_fn *prepare_flush_fn) { - if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && - prepare_flush_fn == NULL) { + if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH | + QUEUE_ORDERED_DO_POSTFLUSH))) { printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__); return -EINVAL; } @@ -88,7 +88,7 @@ unsigned blk_ordered_req_seq(struct request *rq) return QUEUE_ORDSEQ_DONE; } -void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) +bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) { struct request *rq; @@ -99,7 +99,7 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) q->ordseq |= seq; if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) - return; + return false; /* * Okay, sequence complete. @@ -109,6 +109,8 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) BUG(); + + return true; } static void pre_flush_end_io(struct request *rq, int error) @@ -134,7 +136,7 @@ static void queue_flush(struct request_queue *q, unsigned which) struct request *rq; rq_end_io_fn *end_io; - if (which == QUEUE_ORDERED_PREFLUSH) { + if (which == QUEUE_ORDERED_DO_PREFLUSH) { rq = &q->pre_flush_rq; end_io = pre_flush_end_io; } else { @@ -151,80 +153,110 @@ static void queue_flush(struct request_queue *q, unsigned which) elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline struct request *start_ordered(struct request_queue *q, - struct request *rq) +static inline bool start_ordered(struct request_queue *q, struct request **rqp) { + struct request *rq = *rqp; + unsigned skip = 0; + q->orderr = 0; q->ordered = q->next_ordered; q->ordseq |= QUEUE_ORDSEQ_STARTED; /* - * Prep proxy barrier request. + * For an empty barrier, there's no actual BAR request, which + * in turn makes POSTFLUSH unnecessary. Mask them off. */ + if (!rq->hard_nr_sectors) { + q->ordered &= ~(QUEUE_ORDERED_DO_BAR | + QUEUE_ORDERED_DO_POSTFLUSH); + /* + * Empty barrier on a write-through device w/ ordered + * tag has no command to issue and without any command + * to issue, ordering by tag can't be used. Drain + * instead. + */ + if ((q->ordered & QUEUE_ORDERED_BY_TAG) && + !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { + q->ordered &= ~QUEUE_ORDERED_BY_TAG; + q->ordered |= QUEUE_ORDERED_BY_DRAIN; + } + } + + /* stash away the original request */ elv_dequeue_request(q, rq); q->orig_bar_rq = rq; - rq = &q->bar_rq; - blk_rq_init(q, rq); - if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) - rq->cmd_flags |= REQ_RW; - if (q->ordered & QUEUE_ORDERED_FUA) - rq->cmd_flags |= REQ_FUA; - init_request_from_bio(rq, q->orig_bar_rq->bio); - rq->end_io = bar_end_io; + rq = NULL; /* * Queue ordered sequence. As we stack them at the head, we * need to queue in reverse order. Note that we rely on that * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs - * request gets inbetween ordered sequence. If this request is - * an empty barrier, we don't need to do a postflush ever since - * there will be no data written between the pre and post flush. - * Hence a single flush will suffice. + * request gets inbetween ordered sequence. */ - if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) - queue_flush(q, QUEUE_ORDERED_POSTFLUSH); - else - q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; + if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { + queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); + rq = &q->post_flush_rq; + } else + skip |= QUEUE_ORDSEQ_POSTFLUSH; - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); + if (q->ordered & QUEUE_ORDERED_DO_BAR) { + rq = &q->bar_rq; + + /* initialize proxy request and queue it */ + blk_rq_init(q, rq); + if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) + rq->cmd_flags |= REQ_RW; + if (q->ordered & QUEUE_ORDERED_DO_FUA) + rq->cmd_flags |= REQ_FUA; + init_request_from_bio(rq, q->orig_bar_rq->bio); + rq->end_io = bar_end_io; - if (q->ordered & QUEUE_ORDERED_PREFLUSH) { - queue_flush(q, QUEUE_ORDERED_PREFLUSH); + elv_insert(q, rq, ELEVATOR_INSERT_FRONT); + } else + skip |= QUEUE_ORDSEQ_BAR; + + if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { + queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); rq = &q->pre_flush_rq; } else - q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; + skip |= QUEUE_ORDSEQ_PREFLUSH; - if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) - q->ordseq |= QUEUE_ORDSEQ_DRAIN; - else + if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight) rq = NULL; + else + skip |= QUEUE_ORDSEQ_DRAIN; + + *rqp = rq; - return rq; + /* + * Complete skipped sequences. If whole sequence is complete, + * return false to tell elevator that this request is gone. + */ + return !blk_ordered_complete_seq(q, skip, 0); } -int blk_do_ordered(struct request_queue *q, struct request **rqp) +bool blk_do_ordered(struct request_queue *q, struct request **rqp) { struct request *rq = *rqp; const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); if (!q->ordseq) { if (!is_barrier) - return 1; + return true; - if (q->next_ordered != QUEUE_ORDERED_NONE) { - *rqp = start_ordered(q, rq); - return 1; - } else { + if (q->next_ordered != QUEUE_ORDERED_NONE) + return start_ordered(q, rqp); + else { /* - * This can happen when the queue switches to - * ORDERED_NONE while this request is on it. + * Queue ordering not supported. Terminate + * with prejudice. */ elv_dequeue_request(q, rq); if (__blk_end_request(rq, -EOPNOTSUPP, blk_rq_bytes(rq))) BUG(); *rqp = NULL; - return 0; + return false; } } @@ -235,9 +267,9 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp) /* Special requests are not subject to ordering rules. */ if (!blk_fs_request(rq) && rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return 1; + return true; - if (q->ordered & QUEUE_ORDERED_TAG) { + if (q->ordered & QUEUE_ORDERED_BY_TAG) { /* Ordered by tag. Blocking the next barrier is enough. */ if (is_barrier && rq != &q->bar_rq) *rqp = NULL; @@ -248,7 +280,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp) *rqp = NULL; } - return 1; + return true; } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk-core.c b/block/blk-core.c index 561e8a1b43a..a824e49c0d0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -153,6 +153,9 @@ static void req_bio_endio(struct request *rq, struct bio *bio, nbytes = bio->bi_size; } + if (unlikely(rq->cmd_flags & REQ_QUIET)) + set_bit(BIO_QUIET, &bio->bi_flags); + bio->bi_size -= nbytes; bio->bi_sector += (nbytes >> 9); @@ -265,8 +268,7 @@ void __generic_unplug_device(struct request_queue *q) { if (unlikely(blk_queue_stopped(q))) return; - - if (!blk_remove_plug(q)) + if (!blk_remove_plug(q) && !blk_queue_nonrot(q)) return; q->request_fn(q); @@ -404,7 +406,8 @@ EXPORT_SYMBOL(blk_stop_queue); void blk_sync_queue(struct request_queue *q) { del_timer_sync(&q->unplug_timer); - kblockd_flush_work(&q->unplug_work); + del_timer_sync(&q->timeout); + cancel_work_sync(&q->unplug_work); } EXPORT_SYMBOL(blk_sync_queue); @@ -1135,7 +1138,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) static int __make_request(struct request_queue *q, struct bio *bio) { struct request *req; - int el_ret, nr_sectors, barrier, discard, err; + int el_ret, nr_sectors; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); int rw_flags; @@ -1149,22 +1152,9 @@ static int __make_request(struct request_queue *q, struct bio *bio) */ blk_queue_bounce(q, &bio); - barrier = bio_barrier(bio); - if (unlikely(barrier) && bio_has_data(bio) && - (q->next_ordered == QUEUE_ORDERED_NONE)) { - err = -EOPNOTSUPP; - goto end_io; - } - - discard = bio_discard(bio); - if (unlikely(discard) && !q->prepare_discard_fn) { - err = -EOPNOTSUPP; - goto end_io; - } - spin_lock_irq(q->queue_lock); - if (unlikely(barrier) || elv_queue_empty(q)) + if (unlikely(bio_barrier(bio)) || elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1250,18 +1240,14 @@ get_rq: if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || bio_flagged(bio, BIO_CPU_AFFINE)) req->cpu = blk_cpu_to_group(smp_processor_id()); - if (elv_queue_empty(q)) + if (!blk_queue_nonrot(q) && elv_queue_empty(q)) blk_plug_device(q); add_request(q, req); out: - if (sync) + if (sync || blk_queue_nonrot(q)) __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); return 0; - -end_io: - bio_endio(bio, err); - return 0; } /* @@ -1414,15 +1400,13 @@ static inline void __generic_make_request(struct bio *bio) char b[BDEVNAME_SIZE]; q = bdev_get_queue(bio->bi_bdev); - if (!q) { + if (unlikely(!q)) { printk(KERN_ERR "generic_make_request: Trying to access " "nonexistent block-device %s (%Lu)\n", bdevname(bio->bi_bdev, b), (long long) bio->bi_sector); -end_io: - bio_endio(bio, err); - break; + goto end_io; } if (unlikely(nr_sectors > q->max_hw_sectors)) { @@ -1459,14 +1443,19 @@ end_io: if (bio_check_eod(bio, nr_sectors)) goto end_io; - if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || - (bio_discard(bio) && !q->prepare_discard_fn)) { + + if (bio_discard(bio) && !q->prepare_discard_fn) { err = -EOPNOTSUPP; goto end_io; } ret = q->make_request_fn(q, bio); } while (ret); + + return; + +end_io: + bio_endio(bio, err); } /* @@ -1716,14 +1705,6 @@ static int __end_that_request_first(struct request *req, int error, while ((bio = req->bio) != NULL) { int nbytes; - /* - * For an empty barrier request, the low level driver must - * store a potential error location in ->sector. We pass - * that back up in ->bi_sector. - */ - if (blk_empty_barrier(req)) - bio->bi_sector = req->sector; - if (nr_bytes >= bio->bi_size) { req->bio = bio->bi_next; nbytes = bio->bi_size; @@ -2143,12 +2124,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) } EXPORT_SYMBOL(kblockd_schedule_work); -void kblockd_flush_work(struct work_struct *work) -{ - cancel_work_sync(work); -} -EXPORT_SYMBOL(kblockd_flush_work); - int __init blk_dev_init(void) { kblockd_workqueue = create_workqueue("kblockd"); diff --git a/block/blk-settings.c b/block/blk-settings.c index afa55e14e27..59fd05d9f1d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -319,9 +319,9 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); - t->max_phys_segments = min(t->max_phys_segments, b->max_phys_segments); - t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments); - t->max_segment_size = min(t->max_segment_size, b->max_segment_size); + t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments); + t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments); + t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); t->hardsect_size = max(t->hardsect_size, b->hardsect_size); if (!t->queue_lock) WARN_ON_ONCE(1); diff --git a/block/blk-softirq.c b/block/blk-softirq.c index e660d26ca65..ce0efc6b26d 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -161,7 +161,7 @@ void blk_complete_request(struct request *req) } EXPORT_SYMBOL(blk_complete_request); -__init int blk_softirq_init(void) +static __init int blk_softirq_init(void) { int i; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 21e275d7eed..a29cb788e40 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -88,9 +88,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) unsigned long ra_kb; ssize_t ret = queue_var_store(&ra_kb, page, count); - spin_lock_irq(q->queue_lock); q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); - spin_unlock_irq(q->queue_lock); return ret; } @@ -117,10 +115,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) return -EINVAL; - /* - * Take the queue lock to update the readahead and max_sectors - * values synchronously: - */ + spin_lock_irq(q->queue_lock); q->max_sectors = max_sectors_kb << 1; spin_unlock_irq(q->queue_lock); diff --git a/block/blk-tag.c b/block/blk-tag.c index c0d419e84ce..3c518e3303a 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -158,7 +158,6 @@ fail: /** * blk_init_tags - initialize the tag info for an external tag map * @depth: the maximum queue depth supported - * @tags: the tag to use **/ struct blk_queue_tag *blk_init_tags(int depth) { diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 69185ea9fae..a09535377a9 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -73,11 +73,7 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr, */ void blk_delete_timer(struct request *req) { - struct request_queue *q = req->q; - list_del_init(&req->timeout_list); - if (list_empty(&q->timeout_list)) - del_timer(&q->timeout); } static void blk_rq_timed_out(struct request *req) @@ -111,7 +107,7 @@ static void blk_rq_timed_out(struct request *req) void blk_rq_timed_out_timer(unsigned long data) { struct request_queue *q = (struct request_queue *) data; - unsigned long flags, uninitialized_var(next), next_set = 0; + unsigned long flags, next = 0; struct request *rq, *tmp; spin_lock_irqsave(q->queue_lock, flags); @@ -126,15 +122,18 @@ void blk_rq_timed_out_timer(unsigned long data) if (blk_mark_rq_complete(rq)) continue; blk_rq_timed_out(rq); + } else { + if (!next || time_after(next, rq->deadline)) + next = rq->deadline; } - if (!next_set) { - next = rq->deadline; - next_set = 1; - } else if (time_after(next, rq->deadline)) - next = rq->deadline; } - if (next_set && !list_empty(&q->timeout_list)) + /* + * next can never be 0 here with the list non-empty, since we always + * bump ->deadline to 1 so we can detect if the timer was ever added + * or not. See comment in blk_add_timer() + */ + if (next) mod_timer(&q->timeout, round_jiffies_up(next)); spin_unlock_irqrestore(q->queue_lock, flags); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 6a062eebbd1..e8525fa7282 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1136,12 +1136,8 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) if (cfq_class_idle(cfqq)) max_dispatch = 1; - if (cfqq->dispatched >= max_dispatch) { - if (cfqd->busy_queues > 1) - break; - if (cfqq->dispatched >= 4 * max_dispatch) - break; - } + if (cfqq->dispatched >= max_dispatch && cfqd->busy_queues > 1) + break; if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) break; @@ -1318,7 +1314,15 @@ static void cfq_exit_single_io_context(struct io_context *ioc, unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - __cfq_exit_single_io_context(cfqd, cic); + + /* + * Ensure we get a fresh copy of the ->key to prevent + * race between exiting task and queue + */ + smp_read_barrier_depends(); + if (cic->key) + __cfq_exit_single_io_context(cfqd, cic); + spin_unlock_irqrestore(q->queue_lock, flags); } } @@ -2160,7 +2164,7 @@ out_cont: static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) { del_timer_sync(&cfqd->idle_slice_timer); - kblockd_flush_work(&cfqd->unplug_work); + cancel_work_sync(&cfqd->unplug_work); } static void cfq_put_async_queues(struct cfq_data *cfqd) @@ -2178,7 +2182,7 @@ static void cfq_put_async_queues(struct cfq_data *cfqd) cfq_put_queue(cfqd->async_idle_cfqq); } -static void cfq_exit_queue(elevator_t *e) +static void cfq_exit_queue(struct elevator_queue *e) { struct cfq_data *cfqd = e->elevator_data; struct request_queue *q = cfqd->queue; @@ -2288,7 +2292,7 @@ cfq_var_store(unsigned int *var, const char *page, size_t count) } #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ -static ssize_t __FUNC(elevator_t *e, char *page) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ { \ struct cfq_data *cfqd = e->elevator_data; \ unsigned int __data = __VAR; \ @@ -2308,7 +2312,7 @@ SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ -static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ { \ struct cfq_data *cfqd = e->elevator_data; \ unsigned int __data; \ diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 67eb93cff69..f87615dea46 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -774,9 +774,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) bdi = blk_get_backing_dev_info(bdev); if (bdi == NULL) return -ENOTTY; - lock_kernel(); bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; - unlock_kernel(); return 0; case BLKGETSIZE: size = bdev->bd_inode->i_size; diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index fd311179f44..c4d991d4ade 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -334,7 +334,7 @@ static int deadline_queue_empty(struct request_queue *q) && list_empty(&dd->fifo_list[READ]); } -static void deadline_exit_queue(elevator_t *e) +static void deadline_exit_queue(struct elevator_queue *e) { struct deadline_data *dd = e->elevator_data; @@ -387,7 +387,7 @@ deadline_var_store(int *var, const char *page, size_t count) } #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ -static ssize_t __FUNC(elevator_t *e, char *page) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ { \ struct deadline_data *dd = e->elevator_data; \ int __data = __VAR; \ @@ -403,7 +403,7 @@ SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ -static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ { \ struct deadline_data *dd = e->elevator_data; \ int __data; \ diff --git a/block/elevator.c b/block/elevator.c index 86836dd179c..98259eda0ef 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -65,7 +65,7 @@ DEFINE_TRACE(block_rq_issue); static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) { struct request_queue *q = rq->q; - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (e->ops->elevator_allow_merge_fn) return e->ops->elevator_allow_merge_fn(q, rq, bio); @@ -208,13 +208,13 @@ __setup("elevator=", elevator_setup); static struct kobj_type elv_ktype; -static elevator_t *elevator_alloc(struct request_queue *q, +static struct elevator_queue *elevator_alloc(struct request_queue *q, struct elevator_type *e) { - elevator_t *eq; + struct elevator_queue *eq; int i; - eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node); + eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node); if (unlikely(!eq)) goto err; @@ -240,8 +240,9 @@ err: static void elevator_release(struct kobject *kobj) { - elevator_t *e = container_of(kobj, elevator_t, kobj); + struct elevator_queue *e; + e = container_of(kobj, struct elevator_queue, kobj); elevator_put(e->elevator_type); kfree(e->hash); kfree(e); @@ -297,7 +298,7 @@ int elevator_init(struct request_queue *q, char *name) } EXPORT_SYMBOL(elevator_init); -void elevator_exit(elevator_t *e) +void elevator_exit(struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); if (e->ops->elevator_exit_fn) @@ -311,7 +312,7 @@ EXPORT_SYMBOL(elevator_exit); static void elv_activate_rq(struct request_queue *q, struct request *rq) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (e->ops->elevator_activate_req_fn) e->ops->elevator_activate_req_fn(q, rq); @@ -319,7 +320,7 @@ static void elv_activate_rq(struct request_queue *q, struct request *rq) static void elv_deactivate_rq(struct request_queue *q, struct request *rq) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (e->ops->elevator_deactivate_req_fn) e->ops->elevator_deactivate_req_fn(q, rq); @@ -338,7 +339,7 @@ static void elv_rqhash_del(struct request_queue *q, struct request *rq) static void elv_rqhash_add(struct request_queue *q, struct request *rq) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; BUG_ON(ELV_ON_HASH(rq)); hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); @@ -352,7 +353,7 @@ static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; struct hlist_node *entry, *next; struct request *rq; @@ -494,7 +495,7 @@ EXPORT_SYMBOL(elv_dispatch_add_tail); int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; struct request *__rq; int ret; @@ -529,7 +530,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) void elv_merged_request(struct request_queue *q, struct request *rq, int type) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (e->ops->elevator_merged_fn) e->ops->elevator_merged_fn(q, rq, type); @@ -543,7 +544,7 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type) void elv_merge_requests(struct request_queue *q, struct request *rq, struct request *next) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (e->ops->elevator_merge_req_fn) e->ops->elevator_merge_req_fn(q, rq, next); @@ -755,14 +756,6 @@ struct request *elv_next_request(struct request_queue *q) int ret; while ((rq = __elv_next_request(q)) != NULL) { - /* - * Kill the empty barrier place holder, the driver must - * not ever see it. - */ - if (blk_empty_barrier(rq)) { - __blk_end_request(rq, 0, blk_rq_bytes(rq)); - continue; - } if (!(rq->cmd_flags & REQ_STARTED)) { /* * This is the first time the device driver @@ -854,7 +847,7 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq) int elv_queue_empty(struct request_queue *q) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (!list_empty(&q->queue_head)) return 0; @@ -868,7 +861,7 @@ EXPORT_SYMBOL(elv_queue_empty); struct request *elv_latter_request(struct request_queue *q, struct request *rq) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (e->ops->elevator_latter_req_fn) return e->ops->elevator_latter_req_fn(q, rq); @@ -877,7 +870,7 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq) struct request *elv_former_request(struct request_queue *q, struct request *rq) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (e->ops->elevator_former_req_fn) return e->ops->elevator_former_req_fn(q, rq); @@ -886,7 +879,7 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (e->ops->elevator_set_req_fn) return e->ops->elevator_set_req_fn(q, rq, gfp_mask); @@ -897,7 +890,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) void elv_put_request(struct request_queue *q, struct request *rq) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (e->ops->elevator_put_req_fn) e->ops->elevator_put_req_fn(rq); @@ -905,7 +898,7 @@ void elv_put_request(struct request_queue *q, struct request *rq) int elv_may_queue(struct request_queue *q, int rw) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; if (e->ops->elevator_may_queue_fn) return e->ops->elevator_may_queue_fn(q, rw); @@ -928,7 +921,7 @@ EXPORT_SYMBOL(elv_abort_queue); void elv_completed_request(struct request_queue *q, struct request *rq) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; /* * request is released from the driver, io must be done @@ -944,10 +937,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq) * drained for flush sequence. */ if (unlikely(q->ordseq)) { - struct request *first_rq = list_entry_rq(q->queue_head.next); - if (q->in_flight == 0 && + struct request *next = NULL; + + if (!list_empty(&q->queue_head)) + next = list_entry_rq(q->queue_head.next); + + if (!q->in_flight && blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && - blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { + (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); blk_start_queueing(q); } @@ -959,13 +956,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq) static ssize_t elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { - elevator_t *e = container_of(kobj, elevator_t, kobj); struct elv_fs_entry *entry = to_elv(attr); + struct elevator_queue *e; ssize_t error; if (!entry->show) return -EIO; + e = container_of(kobj, struct elevator_queue, kobj); mutex_lock(&e->sysfs_lock); error = e->ops ? entry->show(e, page) : -ENOENT; mutex_unlock(&e->sysfs_lock); @@ -976,13 +974,14 @@ static ssize_t elv_attr_store(struct kobject *kobj, struct attribute *attr, const char *page, size_t length) { - elevator_t *e = container_of(kobj, elevator_t, kobj); struct elv_fs_entry *entry = to_elv(attr); + struct elevator_queue *e; ssize_t error; if (!entry->store) return -EIO; + e = container_of(kobj, struct elevator_queue, kobj); mutex_lock(&e->sysfs_lock); error = e->ops ? entry->store(e, page, length) : -ENOENT; mutex_unlock(&e->sysfs_lock); @@ -1001,7 +1000,7 @@ static struct kobj_type elv_ktype = { int elv_register_queue(struct request_queue *q) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; int error; error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); @@ -1019,7 +1018,7 @@ int elv_register_queue(struct request_queue *q) return error; } -static void __elv_unregister_queue(elevator_t *e) +static void __elv_unregister_queue(struct elevator_queue *e) { kobject_uevent(&e->kobj, KOBJ_REMOVE); kobject_del(&e->kobj); @@ -1082,7 +1081,7 @@ EXPORT_SYMBOL_GPL(elv_unregister); */ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { - elevator_t *old_elevator, *e; + struct elevator_queue *old_elevator, *e; void *data; /* @@ -1188,7 +1187,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, ssize_t elv_iosched_show(struct request_queue *q, char *name) { - elevator_t *e = q->elevator; + struct elevator_queue *e = q->elevator; struct elevator_type *elv = e->elevator_type; struct elevator_type *__e; int len = 0; diff --git a/block/genhd.c b/block/genhd.c index 2f7feda61e3..d84a7df1e2a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -181,6 +181,12 @@ void disk_part_iter_exit(struct disk_part_iter *piter) } EXPORT_SYMBOL_GPL(disk_part_iter_exit); +static inline int sector_in_part(struct hd_struct *part, sector_t sector) +{ + return part->start_sect <= sector && + sector < part->start_sect + part->nr_sects; +} + /** * disk_map_sector_rcu - map sector to partition * @disk: gendisk of interest @@ -199,16 +205,22 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) { struct disk_part_tbl *ptbl; + struct hd_struct *part; int i; ptbl = rcu_dereference(disk->part_tbl); + part = rcu_dereference(ptbl->last_lookup); + if (part && sector_in_part(part, sector)) + return part; + for (i = 1; i < ptbl->len; i++) { - struct hd_struct *part = rcu_dereference(ptbl->part[i]); + part = rcu_dereference(ptbl->part[i]); - if (part && part->start_sect <= sector && - sector < part->start_sect + part->nr_sects) + if (part && sector_in_part(part, sector)) { + rcu_assign_pointer(ptbl->last_lookup, part); return part; + } } return &disk->part0; } @@ -888,8 +900,11 @@ static void disk_replace_part_tbl(struct gendisk *disk, struct disk_part_tbl *old_ptbl = disk->part_tbl; rcu_assign_pointer(disk->part_tbl, new_ptbl); - if (old_ptbl) + + if (old_ptbl) { + rcu_assign_pointer(old_ptbl->last_lookup, NULL); call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); + } } /** diff --git a/block/ioctl.c b/block/ioctl.c index d03985b04d6..0f22e629b13 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -323,9 +323,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, bdi = blk_get_backing_dev_info(bdev); if (bdi == NULL) return -ENOTTY; - lock_kernel(); bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; - unlock_kernel(); return 0; case BLKBSZSET: /* set the logical block size */ diff --git a/block/noop-iosched.c b/block/noop-iosched.c index c23e0296965..3a0d369d08c 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -76,7 +76,7 @@ static void *noop_init_queue(struct request_queue *q) return nd; } -static void noop_exit_queue(elevator_t *e) +static void noop_exit_queue(struct elevator_queue *e) { struct noop_data *nd = e->elevator_data; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index d0bb92cbefb..ee9c67d7e1b 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -60,7 +60,7 @@ static int scsi_get_bus(struct request_queue *q, int __user *p) static int sg_get_timeout(struct request_queue *q) { - return q->sg_timeout / (HZ / USER_HZ); + return jiffies_to_clock_t(q->sg_timeout); } static int sg_set_timeout(struct request_queue *q, int __user *p) @@ -68,7 +68,7 @@ static int sg_set_timeout(struct request_queue *q, int __user *p) int timeout, err = get_user(timeout, p); if (!err) - q->sg_timeout = timeout * (HZ / USER_HZ); + q->sg_timeout = clock_t_to_jiffies(timeout); return err; } diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 9f7c543cc04..01e69383d9c 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -164,7 +164,7 @@ static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int cciss_revalidate(struct gendisk *disk); static int rebuild_lun_table(ctlr_info_t *h, int first_time); -static int deregister_disk(struct gendisk *disk, drive_info_struct *drv, +static int deregister_disk(ctlr_info_t *h, int drv_index, int clear_all); static void cciss_read_capacity(int ctlr, int logvol, int withirq, @@ -215,31 +215,17 @@ static struct block_device_operations cciss_fops = { /* * Enqueuing and dequeuing functions for cmdlists. */ -static inline void addQ(CommandList_struct **Qptr, CommandList_struct *c) +static inline void addQ(struct hlist_head *list, CommandList_struct *c) { - if (*Qptr == NULL) { - *Qptr = c; - c->next = c->prev = c; - } else { - c->prev = (*Qptr)->prev; - c->next = (*Qptr); - (*Qptr)->prev->next = c; - (*Qptr)->prev = c; - } + hlist_add_head(&c->list, list); } -static inline CommandList_struct *removeQ(CommandList_struct **Qptr, - CommandList_struct *c) +static inline void removeQ(CommandList_struct *c) { - if (c && c->next != c) { - if (*Qptr == c) - *Qptr = c->next; - c->prev->next = c->next; - c->next->prev = c->prev; - } else { - *Qptr = NULL; - } - return c; + if (WARN_ON(hlist_unhashed(&c->list))) + return; + + hlist_del_init(&c->list); } #include "cciss_scsi.c" /* For SCSI tape support */ @@ -506,6 +492,7 @@ static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool) c->cmdindex = i; } + INIT_HLIST_NODE(&c->list); c->busaddr = (__u32) cmd_dma_handle; temp64.val = (__u64) err_dma_handle; c->ErrDesc.Addr.lower = temp64.val32.lower; @@ -1492,8 +1479,7 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) * which keeps the interrupt handler from starting * the queue. */ - ret = deregister_disk(h->gendisk[drv_index], - &h->drv[drv_index], 0); + ret = deregister_disk(h, drv_index, 0); h->drv[drv_index].busy_configuring = 0; } @@ -1711,8 +1697,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time) spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); h->drv[i].busy_configuring = 1; spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); - return_code = deregister_disk(h->gendisk[i], - &h->drv[i], 1); + return_code = deregister_disk(h, i, 1); h->drv[i].busy_configuring = 0; } } @@ -1782,15 +1767,19 @@ mem_msg: * the highest_lun should be left unchanged and the LunID * should not be cleared. */ -static int deregister_disk(struct gendisk *disk, drive_info_struct *drv, +static int deregister_disk(ctlr_info_t *h, int drv_index, int clear_all) { int i; - ctlr_info_t *h = get_host(disk); + struct gendisk *disk; + drive_info_struct *drv; if (!capable(CAP_SYS_RAWIO)) return -EPERM; + drv = &h->drv[drv_index]; + disk = h->gendisk[drv_index]; + /* make sure logical volume is NOT is use */ if (clear_all || (h->gendisk[0] == disk)) { if (drv->usage_count > 1) @@ -2548,7 +2537,8 @@ static void start_io(ctlr_info_t *h) { CommandList_struct *c; - while ((c = h->reqQ) != NULL) { + while (!hlist_empty(&h->reqQ)) { + c = hlist_entry(h->reqQ.first, CommandList_struct, list); /* can't do anything if fifo is full */ if ((h->access.fifo_full(h))) { printk(KERN_WARNING "cciss: fifo full\n"); @@ -2556,14 +2546,14 @@ static void start_io(ctlr_info_t *h) } /* Get the first entry from the Request Q */ - removeQ(&(h->reqQ), c); + removeQ(c); h->Qdepth--; /* Tell the controller execute command */ h->access.submit_command(h, c); /* Put job onto the completed Q */ - addQ(&(h->cmpQ), c); + addQ(&h->cmpQ, c); } } @@ -2576,7 +2566,7 @@ static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c) memset(c->err_info, 0, sizeof(ErrorInfo_struct)); /* add it to software queue and then send it to the controller */ - addQ(&(h->reqQ), c); + addQ(&h->reqQ, c); h->Qdepth++; if (h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; @@ -2897,7 +2887,7 @@ static void do_cciss_request(struct request_queue *q) spin_lock_irq(q->queue_lock); - addQ(&(h->reqQ), c); + addQ(&h->reqQ, c); h->Qdepth++; if (h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; @@ -2985,16 +2975,12 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id) a = c->busaddr; } else { + struct hlist_node *tmp; + a &= ~3; - if ((c = h->cmpQ) == NULL) { - printk(KERN_WARNING - "cciss: Completion of %08x ignored\n", - a1); - continue; - } - while (c->busaddr != a) { - c = c->next; - if (c == h->cmpQ) + c = NULL; + hlist_for_each_entry(c, tmp, &h->cmpQ, list) { + if (c->busaddr == a) break; } } @@ -3002,8 +2988,8 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id) * If we've found the command, take it off the * completion Q and free it */ - if (c->busaddr == a) { - removeQ(&h->cmpQ, c); + if (c && c->busaddr == a) { + removeQ(c); if (c->cmd_type == CMD_RWREQ) { complete_command(h, c, 0); } else if (c->cmd_type == CMD_IOCTL_PEND) { @@ -3423,6 +3409,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, return -1; hba[i]->busy_initializing = 1; + INIT_HLIST_HEAD(&hba[i]->cmpQ); + INIT_HLIST_HEAD(&hba[i]->reqQ); if (cciss_pci_init(hba[i], pdev) != 0) goto clean1; @@ -3730,15 +3718,17 @@ static void fail_all_cmds(unsigned long ctlr) pci_disable_device(h->pdev); /* Make sure it is really dead. */ /* move everything off the request queue onto the completed queue */ - while ((c = h->reqQ) != NULL) { - removeQ(&(h->reqQ), c); + while (!hlist_empty(&h->reqQ)) { + c = hlist_entry(h->reqQ.first, CommandList_struct, list); + removeQ(c); h->Qdepth--; - addQ(&(h->cmpQ), c); + addQ(&h->cmpQ, c); } /* Now, fail everything on the completed queue with a HW error */ - while ((c = h->cmpQ) != NULL) { - removeQ(&h->cmpQ, c); + while (!hlist_empty(&h->cmpQ)) { + c = hlist_entry(h->cmpQ.first, CommandList_struct, list); + removeQ(c); c->err_info->CommandStatus = CMD_HARDWARE_ERR; if (c->cmd_type == CMD_RWREQ) { complete_command(h, c, 0); diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 24a7efa993a..15e2b84734e 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -89,8 +89,8 @@ struct ctlr_info struct access_method access; /* queue and queue Info */ - CommandList_struct *reqQ; - CommandList_struct *cmpQ; + struct hlist_head reqQ; + struct hlist_head cmpQ; unsigned int Qdepth; unsigned int maxQsinceinit; unsigned int maxSG; diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index 43bf5593b59..24e22dea1a9 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -265,8 +265,7 @@ typedef struct _CommandList_struct { int ctlr; int cmd_type; long cmdindex; - struct _CommandList_struct *prev; - struct _CommandList_struct *next; + struct hlist_node list; struct request * rq; struct completion *waiting; int retry_count; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fb06ed65921..edbaac6c057 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -624,20 +624,38 @@ static int loop_switch(struct loop_device *lo, struct file *file) } /* + * Helper to flush the IOs in loop, but keeping loop thread running + */ +static int loop_flush(struct loop_device *lo) +{ + /* loop not yet configured, no running thread, nothing to flush */ + if (!lo->lo_thread) + return 0; + + return loop_switch(lo, NULL); +} + +/* * Do the actual switch; called from the BIO completion routine */ static void do_loop_switch(struct loop_device *lo, struct switch_request *p) { struct file *file = p->file; struct file *old_file = lo->lo_backing_file; - struct address_space *mapping = file->f_mapping; + struct address_space *mapping; + + /* if no new file, only flush of queued bios requested */ + if (!file) + goto out; + mapping = file->f_mapping; mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); lo->lo_backing_file = file; lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? mapping->host->i_bdev->bd_block_size : PAGE_SIZE; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); +out: complete(&p->wait); } @@ -901,6 +919,7 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) kthread_stop(lo->lo_thread); + lo->lo_queue->unplug_fn = NULL; lo->lo_backing_file = NULL; loop_release_xfer(lo); @@ -1345,11 +1364,25 @@ static int lo_release(struct gendisk *disk, fmode_t mode) struct loop_device *lo = disk->private_data; mutex_lock(&lo->lo_ctl_mutex); - --lo->lo_refcnt; - if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) && !lo->lo_refcnt) + if (--lo->lo_refcnt) + goto out; + + if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { + /* + * In autoclear mode, stop the loop thread + * and remove configuration after last close. + */ loop_clr_fd(lo, NULL); + } else { + /* + * Otherwise keep thread (if running) and config, + * but flush possible ongoing bios in thread. + */ + loop_flush(lo); + } +out: mutex_unlock(&lo->lo_ctl_mutex); return 0; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d3a91cacee8..7bcc1d8bc96 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -722,7 +722,6 @@ static int __init nbd_init(void) for (i = 0; i < nbds_max; i++) { struct gendisk *disk = alloc_disk(1 << part_shift); - elevator_t *old_e; if (!disk) goto out; nbd_dev[i].disk = disk; @@ -736,11 +735,10 @@ static int __init nbd_init(void) put_disk(disk); goto out; } - old_e = disk->queue->elevator; - if (elevator_init(disk->queue, "deadline") == 0 || - elevator_init(disk->queue, "noop") == 0) { - elevator_exit(old_e); - } + /* + * Tell the block layer that we are not a rotational device + */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); } if (register_blkdev(NBD_MAJOR, "nbd")) { diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 85d79a02d48..f151592ecf7 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -237,6 +237,8 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_put_disk; } + queue_flag_set_unlocked(QUEUE_FLAG_VIRT, vblk->disk->queue); + if (index < 26) { sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26); } else if (index < (26 + 1) * 26) { diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2d19f0cc47f..918ef725de4 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -338,18 +338,12 @@ wait: static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) { struct request_queue *rq; - elevator_t *old_e; rq = blk_init_queue(do_blkif_request, &blkif_io_lock); if (rq == NULL) return -1; - old_e = rq->elevator; - if (IS_ERR_VALUE(elevator_init(rq, "noop"))) - printk(KERN_WARNING - "blkfront: Switch elevator failed, use default\n"); - else - elevator_exit(old_e); + queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_hardsect_size(rq, sector_size); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 7d2e91cccb1..cceace61ef2 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -1712,29 +1712,30 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) return 0; } -static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s) +static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s, + struct packet_command *cgc) { unsigned char buf[21], *base; struct dvd_layer *layer; - struct packet_command cgc; struct cdrom_device_ops *cdo = cdi->ops; int ret, layer_num = s->physical.layer_num; if (layer_num >= DVD_LAYERS) return -EINVAL; - init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); - cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; - cgc.cmd[6] = layer_num; - cgc.cmd[7] = s->type; - cgc.cmd[9] = cgc.buflen & 0xff; + init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ); + cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE; + cgc->cmd[6] = layer_num; + cgc->cmd[7] = s->type; + cgc->cmd[9] = cgc->buflen & 0xff; /* * refrain from reporting errors on non-existing layers (mainly) */ - cgc.quiet = 1; + cgc->quiet = 1; - if ((ret = cdo->generic_packet(cdi, &cgc))) + ret = cdo->generic_packet(cdi, cgc); + if (ret) return ret; base = &buf[4]; @@ -1762,21 +1763,22 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s) return 0; } -static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s) +static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s, + struct packet_command *cgc) { int ret; u_char buf[8]; - struct packet_command cgc; struct cdrom_device_ops *cdo = cdi->ops; - init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); - cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; - cgc.cmd[6] = s->copyright.layer_num; - cgc.cmd[7] = s->type; - cgc.cmd[8] = cgc.buflen >> 8; - cgc.cmd[9] = cgc.buflen & 0xff; + init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ); + cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE; + cgc->cmd[6] = s->copyright.layer_num; + cgc->cmd[7] = s->type; + cgc->cmd[8] = cgc->buflen >> 8; + cgc->cmd[9] = cgc->buflen & 0xff; - if ((ret = cdo->generic_packet(cdi, &cgc))) + ret = cdo->generic_packet(cdi, cgc); + if (ret) return ret; s->copyright.cpst = buf[4]; @@ -1785,79 +1787,89 @@ static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s) return 0; } -static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s) +static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s, + struct packet_command *cgc) { int ret, size; u_char *buf; - struct packet_command cgc; struct cdrom_device_ops *cdo = cdi->ops; size = sizeof(s->disckey.value) + 4; - if ((buf = kmalloc(size, GFP_KERNEL)) == NULL) + buf = kmalloc(size, GFP_KERNEL); + if (!buf) return -ENOMEM; - init_cdrom_command(&cgc, buf, size, CGC_DATA_READ); - cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; - cgc.cmd[7] = s->type; - cgc.cmd[8] = size >> 8; - cgc.cmd[9] = size & 0xff; - cgc.cmd[10] = s->disckey.agid << 6; + init_cdrom_command(cgc, buf, size, CGC_DATA_READ); + cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE; + cgc->cmd[7] = s->type; + cgc->cmd[8] = size >> 8; + cgc->cmd[9] = size & 0xff; + cgc->cmd[10] = s->disckey.agid << 6; - if (!(ret = cdo->generic_packet(cdi, &cgc))) + ret = cdo->generic_packet(cdi, cgc); + if (!ret) memcpy(s->disckey.value, &buf[4], sizeof(s->disckey.value)); kfree(buf); return ret; } -static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s) +static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s, + struct packet_command *cgc) { - int ret; - u_char buf[4 + 188]; - struct packet_command cgc; + int ret, size = 4 + 188; + u_char *buf; struct cdrom_device_ops *cdo = cdi->ops; - init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); - cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; - cgc.cmd[7] = s->type; - cgc.cmd[9] = cgc.buflen & 0xff; + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; - if ((ret = cdo->generic_packet(cdi, &cgc))) - return ret; + init_cdrom_command(cgc, buf, size, CGC_DATA_READ); + cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE; + cgc->cmd[7] = s->type; + cgc->cmd[9] = cgc->buflen & 0xff; + + ret = cdo->generic_packet(cdi, cgc); + if (ret) + goto out; s->bca.len = buf[0] << 8 | buf[1]; if (s->bca.len < 12 || s->bca.len > 188) { cdinfo(CD_WARNING, "Received invalid BCA length (%d)\n", s->bca.len); - return -EIO; + ret = -EIO; + goto out; } memcpy(s->bca.value, &buf[4], s->bca.len); - - return 0; + ret = 0; +out: + kfree(buf); + return ret; } -static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s) +static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s, + struct packet_command *cgc) { int ret = 0, size; u_char *buf; - struct packet_command cgc; struct cdrom_device_ops *cdo = cdi->ops; size = sizeof(s->manufact.value) + 4; - if ((buf = kmalloc(size, GFP_KERNEL)) == NULL) + buf = kmalloc(size, GFP_KERNEL); + if (!buf) return -ENOMEM; - init_cdrom_command(&cgc, buf, size, CGC_DATA_READ); - cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; - cgc.cmd[7] = s->type; - cgc.cmd[8] = size >> 8; - cgc.cmd[9] = size & 0xff; + init_cdrom_command(cgc, buf, size, CGC_DATA_READ); + cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE; + cgc->cmd[7] = s->type; + cgc->cmd[8] = size >> 8; + cgc->cmd[9] = size & 0xff; - if ((ret = cdo->generic_packet(cdi, &cgc))) { - kfree(buf); - return ret; - } + ret = cdo->generic_packet(cdi, cgc); + if (ret) + goto out; s->manufact.len = buf[0] << 8 | buf[1]; if (s->manufact.len < 0 || s->manufact.len > 2048) { @@ -1868,27 +1880,29 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s) memcpy(s->manufact.value, &buf[4], s->manufact.len); } +out: kfree(buf); return ret; } -static int dvd_read_struct(struct cdrom_device_info *cdi, dvd_struct *s) +static int dvd_read_struct(struct cdrom_device_info *cdi, dvd_struct *s, + struct packet_command *cgc) { switch (s->type) { case DVD_STRUCT_PHYSICAL: - return dvd_read_physical(cdi, s); + return dvd_read_physical(cdi, s, cgc); case DVD_STRUCT_COPYRIGHT: - return dvd_read_copyright(cdi, s); + return dvd_read_copyright(cdi, s, cgc); case DVD_STRUCT_DISCKEY: - return dvd_read_disckey(cdi, s); + return dvd_read_disckey(cdi, s, cgc); case DVD_STRUCT_BCA: - return dvd_read_bca(cdi, s); + return dvd_read_bca(cdi, s, cgc); case DVD_STRUCT_MANUFACT: - return dvd_read_manufact(cdi, s); + return dvd_read_manufact(cdi, s, cgc); default: cdinfo(CD_WARNING, ": Invalid DVD structure read requested (%d)\n", @@ -2787,271 +2801,360 @@ static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size) return cdo->generic_packet(cdi, &cgc); } -static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, - unsigned long arg) -{ - struct cdrom_device_ops *cdo = cdi->ops; - struct packet_command cgc; +static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, + void __user *arg, + struct packet_command *cgc, + int cmd) +{ struct request_sense sense; - unsigned char buffer[32]; - int ret = 0; - - memset(&cgc, 0, sizeof(cgc)); + struct cdrom_msf msf; + int blocksize = 0, format = 0, lba; + int ret; - /* build a unified command and queue it through - cdo->generic_packet() */ switch (cmd) { case CDROMREADRAW: + blocksize = CD_FRAMESIZE_RAW; + break; case CDROMREADMODE1: - case CDROMREADMODE2: { - struct cdrom_msf msf; - int blocksize = 0, format = 0, lba; - - switch (cmd) { - case CDROMREADRAW: - blocksize = CD_FRAMESIZE_RAW; - break; - case CDROMREADMODE1: - blocksize = CD_FRAMESIZE; - format = 2; - break; - case CDROMREADMODE2: - blocksize = CD_FRAMESIZE_RAW0; - break; - } - IOCTL_IN(arg, struct cdrom_msf, msf); - lba = msf_to_lba(msf.cdmsf_min0,msf.cdmsf_sec0,msf.cdmsf_frame0); - /* FIXME: we need upper bound checking, too!! */ - if (lba < 0) - return -EINVAL; - cgc.buffer = kmalloc(blocksize, GFP_KERNEL); - if (cgc.buffer == NULL) - return -ENOMEM; - memset(&sense, 0, sizeof(sense)); - cgc.sense = &sense; - cgc.data_direction = CGC_DATA_READ; - ret = cdrom_read_block(cdi, &cgc, lba, 1, format, blocksize); - if (ret && sense.sense_key==0x05 && sense.asc==0x20 && sense.ascq==0x00) { - /* - * SCSI-II devices are not required to support - * READ_CD, so let's try switching block size - */ - /* FIXME: switch back again... */ - if ((ret = cdrom_switch_blocksize(cdi, blocksize))) { - kfree(cgc.buffer); - return ret; - } - cgc.sense = NULL; - ret = cdrom_read_cd(cdi, &cgc, lba, blocksize, 1); - ret |= cdrom_switch_blocksize(cdi, blocksize); - } - if (!ret && copy_to_user((char __user *)arg, cgc.buffer, blocksize)) - ret = -EFAULT; - kfree(cgc.buffer); + blocksize = CD_FRAMESIZE; + format = 2; + break; + case CDROMREADMODE2: + blocksize = CD_FRAMESIZE_RAW0; + break; + } + IOCTL_IN(arg, struct cdrom_msf, msf); + lba = msf_to_lba(msf.cdmsf_min0, msf.cdmsf_sec0, msf.cdmsf_frame0); + /* FIXME: we need upper bound checking, too!! */ + if (lba < 0) + return -EINVAL; + + cgc->buffer = kmalloc(blocksize, GFP_KERNEL); + if (cgc->buffer == NULL) + return -ENOMEM; + + memset(&sense, 0, sizeof(sense)); + cgc->sense = &sense; + cgc->data_direction = CGC_DATA_READ; + ret = cdrom_read_block(cdi, cgc, lba, 1, format, blocksize); + if (ret && sense.sense_key == 0x05 && + sense.asc == 0x20 && + sense.ascq == 0x00) { + /* + * SCSI-II devices are not required to support + * READ_CD, so let's try switching block size + */ + /* FIXME: switch back again... */ + ret = cdrom_switch_blocksize(cdi, blocksize); + if (ret) + goto out; + cgc->sense = NULL; + ret = cdrom_read_cd(cdi, cgc, lba, blocksize, 1); + ret |= cdrom_switch_blocksize(cdi, blocksize); + } + if (!ret && copy_to_user(arg, cgc->buffer, blocksize)) + ret = -EFAULT; +out: + kfree(cgc->buffer); + return ret; +} + +static noinline int mmc_ioctl_cdrom_read_audio(struct cdrom_device_info *cdi, + void __user *arg) +{ + struct cdrom_read_audio ra; + int lba; + + IOCTL_IN(arg, struct cdrom_read_audio, ra); + + if (ra.addr_format == CDROM_MSF) + lba = msf_to_lba(ra.addr.msf.minute, + ra.addr.msf.second, + ra.addr.msf.frame); + else if (ra.addr_format == CDROM_LBA) + lba = ra.addr.lba; + else + return -EINVAL; + + /* FIXME: we need upper bound checking, too!! */ + if (lba < 0 || ra.nframes <= 0 || ra.nframes > CD_FRAMES) + return -EINVAL; + + return cdrom_read_cdda(cdi, ra.buf, lba, ra.nframes); +} + +static noinline int mmc_ioctl_cdrom_subchannel(struct cdrom_device_info *cdi, + void __user *arg) +{ + int ret; + struct cdrom_subchnl q; + u_char requested, back; + IOCTL_IN(arg, struct cdrom_subchnl, q); + requested = q.cdsc_format; + if (!((requested == CDROM_MSF) || + (requested == CDROM_LBA))) + return -EINVAL; + q.cdsc_format = CDROM_MSF; + ret = cdrom_read_subchannel(cdi, &q, 0); + if (ret) return ret; - } - case CDROMREADAUDIO: { - struct cdrom_read_audio ra; - int lba; - - IOCTL_IN(arg, struct cdrom_read_audio, ra); - - if (ra.addr_format == CDROM_MSF) - lba = msf_to_lba(ra.addr.msf.minute, - ra.addr.msf.second, - ra.addr.msf.frame); - else if (ra.addr_format == CDROM_LBA) - lba = ra.addr.lba; - else - return -EINVAL; + back = q.cdsc_format; /* local copy */ + sanitize_format(&q.cdsc_absaddr, &back, requested); + sanitize_format(&q.cdsc_reladdr, &q.cdsc_format, requested); + IOCTL_OUT(arg, struct cdrom_subchnl, q); + /* cdinfo(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */ + return 0; +} - /* FIXME: we need upper bound checking, too!! */ - if (lba < 0 || ra.nframes <= 0 || ra.nframes > CD_FRAMES) - return -EINVAL; +static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi, + void __user *arg, + struct packet_command *cgc) +{ + struct cdrom_device_ops *cdo = cdi->ops; + struct cdrom_msf msf; + cdinfo(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); + IOCTL_IN(arg, struct cdrom_msf, msf); + cgc->cmd[0] = GPCMD_PLAY_AUDIO_MSF; + cgc->cmd[3] = msf.cdmsf_min0; + cgc->cmd[4] = msf.cdmsf_sec0; + cgc->cmd[5] = msf.cdmsf_frame0; + cgc->cmd[6] = msf.cdmsf_min1; + cgc->cmd[7] = msf.cdmsf_sec1; + cgc->cmd[8] = msf.cdmsf_frame1; + cgc->data_direction = CGC_DATA_NONE; + return cdo->generic_packet(cdi, cgc); +} - return cdrom_read_cdda(cdi, ra.buf, lba, ra.nframes); - } - case CDROMSUBCHNL: { - struct cdrom_subchnl q; - u_char requested, back; - IOCTL_IN(arg, struct cdrom_subchnl, q); - requested = q.cdsc_format; - if (!((requested == CDROM_MSF) || - (requested == CDROM_LBA))) - return -EINVAL; - q.cdsc_format = CDROM_MSF; - if ((ret = cdrom_read_subchannel(cdi, &q, 0))) - return ret; - back = q.cdsc_format; /* local copy */ - sanitize_format(&q.cdsc_absaddr, &back, requested); - sanitize_format(&q.cdsc_reladdr, &q.cdsc_format, requested); - IOCTL_OUT(arg, struct cdrom_subchnl, q); - /* cdinfo(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */ - return 0; - } - case CDROMPLAYMSF: { - struct cdrom_msf msf; - cdinfo(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); - IOCTL_IN(arg, struct cdrom_msf, msf); - cgc.cmd[0] = GPCMD_PLAY_AUDIO_MSF; - cgc.cmd[3] = msf.cdmsf_min0; - cgc.cmd[4] = msf.cdmsf_sec0; - cgc.cmd[5] = msf.cdmsf_frame0; - cgc.cmd[6] = msf.cdmsf_min1; - cgc.cmd[7] = msf.cdmsf_sec1; - cgc.cmd[8] = msf.cdmsf_frame1; - cgc.data_direction = CGC_DATA_NONE; - return cdo->generic_packet(cdi, &cgc); - } - case CDROMPLAYBLK: { - struct cdrom_blk blk; - cdinfo(CD_DO_IOCTL, "entering CDROMPLAYBLK\n"); - IOCTL_IN(arg, struct cdrom_blk, blk); - cgc.cmd[0] = GPCMD_PLAY_AUDIO_10; - cgc.cmd[2] = (blk.from >> 24) & 0xff; - cgc.cmd[3] = (blk.from >> 16) & 0xff; - cgc.cmd[4] = (blk.from >> 8) & 0xff; - cgc.cmd[5] = blk.from & 0xff; - cgc.cmd[7] = (blk.len >> 8) & 0xff; - cgc.cmd[8] = blk.len & 0xff; - cgc.data_direction = CGC_DATA_NONE; - return cdo->generic_packet(cdi, &cgc); - } - case CDROMVOLCTRL: - case CDROMVOLREAD: { - struct cdrom_volctrl volctrl; - char mask[sizeof(buffer)]; - unsigned short offset; +static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi, + void __user *arg, + struct packet_command *cgc) +{ + struct cdrom_device_ops *cdo = cdi->ops; + struct cdrom_blk blk; + cdinfo(CD_DO_IOCTL, "entering CDROMPLAYBLK\n"); + IOCTL_IN(arg, struct cdrom_blk, blk); + cgc->cmd[0] = GPCMD_PLAY_AUDIO_10; + cgc->cmd[2] = (blk.from >> 24) & 0xff; + cgc->cmd[3] = (blk.from >> 16) & 0xff; + cgc->cmd[4] = (blk.from >> 8) & 0xff; + cgc->cmd[5] = blk.from & 0xff; + cgc->cmd[7] = (blk.len >> 8) & 0xff; + cgc->cmd[8] = blk.len & 0xff; + cgc->data_direction = CGC_DATA_NONE; + return cdo->generic_packet(cdi, cgc); +} + +static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi, + void __user *arg, + struct packet_command *cgc, + unsigned int cmd) +{ + struct cdrom_volctrl volctrl; + unsigned char buffer[32]; + char mask[sizeof(buffer)]; + unsigned short offset; + int ret; - cdinfo(CD_DO_IOCTL, "entering CDROMVOLUME\n"); + cdinfo(CD_DO_IOCTL, "entering CDROMVOLUME\n"); - IOCTL_IN(arg, struct cdrom_volctrl, volctrl); + IOCTL_IN(arg, struct cdrom_volctrl, volctrl); - cgc.buffer = buffer; - cgc.buflen = 24; - if ((ret = cdrom_mode_sense(cdi, &cgc, GPMODE_AUDIO_CTL_PAGE, 0))) - return ret; + cgc->buffer = buffer; + cgc->buflen = 24; + ret = cdrom_mode_sense(cdi, cgc, GPMODE_AUDIO_CTL_PAGE, 0); + if (ret) + return ret; - /* originally the code depended on buffer[1] to determine - how much data is available for transfer. buffer[1] is - unfortunately ambigious and the only reliable way seem - to be to simply skip over the block descriptor... */ - offset = 8 + be16_to_cpu(*(__be16 *)(buffer+6)); - - if (offset + 16 > sizeof(buffer)) - return -E2BIG; - - if (offset + 16 > cgc.buflen) { - cgc.buflen = offset+16; - ret = cdrom_mode_sense(cdi, &cgc, - GPMODE_AUDIO_CTL_PAGE, 0); - if (ret) - return ret; - } + /* originally the code depended on buffer[1] to determine + how much data is available for transfer. buffer[1] is + unfortunately ambigious and the only reliable way seem + to be to simply skip over the block descriptor... */ + offset = 8 + be16_to_cpu(*(__be16 *)(buffer + 6)); + + if (offset + 16 > sizeof(buffer)) + return -E2BIG; + + if (offset + 16 > cgc->buflen) { + cgc->buflen = offset + 16; + ret = cdrom_mode_sense(cdi, cgc, + GPMODE_AUDIO_CTL_PAGE, 0); + if (ret) + return ret; + } - /* sanity check */ - if ((buffer[offset] & 0x3f) != GPMODE_AUDIO_CTL_PAGE || - buffer[offset+1] < 14) - return -EINVAL; + /* sanity check */ + if ((buffer[offset] & 0x3f) != GPMODE_AUDIO_CTL_PAGE || + buffer[offset + 1] < 14) + return -EINVAL; - /* now we have the current volume settings. if it was only - a CDROMVOLREAD, return these values */ - if (cmd == CDROMVOLREAD) { - volctrl.channel0 = buffer[offset+9]; - volctrl.channel1 = buffer[offset+11]; - volctrl.channel2 = buffer[offset+13]; - volctrl.channel3 = buffer[offset+15]; - IOCTL_OUT(arg, struct cdrom_volctrl, volctrl); - return 0; - } + /* now we have the current volume settings. if it was only + a CDROMVOLREAD, return these values */ + if (cmd == CDROMVOLREAD) { + volctrl.channel0 = buffer[offset+9]; + volctrl.channel1 = buffer[offset+11]; + volctrl.channel2 = buffer[offset+13]; + volctrl.channel3 = buffer[offset+15]; + IOCTL_OUT(arg, struct cdrom_volctrl, volctrl); + return 0; + } - /* get the volume mask */ - cgc.buffer = mask; - if ((ret = cdrom_mode_sense(cdi, &cgc, - GPMODE_AUDIO_CTL_PAGE, 1))) - return ret; + /* get the volume mask */ + cgc->buffer = mask; + ret = cdrom_mode_sense(cdi, cgc, GPMODE_AUDIO_CTL_PAGE, 1); + if (ret) + return ret; - buffer[offset+9] = volctrl.channel0 & mask[offset+9]; - buffer[offset+11] = volctrl.channel1 & mask[offset+11]; - buffer[offset+13] = volctrl.channel2 & mask[offset+13]; - buffer[offset+15] = volctrl.channel3 & mask[offset+15]; + buffer[offset + 9] = volctrl.channel0 & mask[offset + 9]; + buffer[offset + 11] = volctrl.channel1 & mask[offset + 11]; + buffer[offset + 13] = volctrl.channel2 & mask[offset + 13]; + buffer[offset + 15] = volctrl.channel3 & mask[offset + 15]; - /* set volume */ - cgc.buffer = buffer + offset - 8; - memset(cgc.buffer, 0, 8); - return cdrom_mode_select(cdi, &cgc); - } + /* set volume */ + cgc->buffer = buffer + offset - 8; + memset(cgc->buffer, 0, 8); + return cdrom_mode_select(cdi, cgc); +} - case CDROMSTART: - case CDROMSTOP: { - cdinfo(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n"); - cgc.cmd[0] = GPCMD_START_STOP_UNIT; - cgc.cmd[1] = 1; - cgc.cmd[4] = (cmd == CDROMSTART) ? 1 : 0; - cgc.data_direction = CGC_DATA_NONE; - return cdo->generic_packet(cdi, &cgc); - } +static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi, + struct packet_command *cgc, + int cmd) +{ + struct cdrom_device_ops *cdo = cdi->ops; + cdinfo(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n"); + cgc->cmd[0] = GPCMD_START_STOP_UNIT; + cgc->cmd[1] = 1; + cgc->cmd[4] = (cmd == CDROMSTART) ? 1 : 0; + cgc->data_direction = CGC_DATA_NONE; + return cdo->generic_packet(cdi, cgc); +} - case CDROMPAUSE: - case CDROMRESUME: { - cdinfo(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n"); - cgc.cmd[0] = GPCMD_PAUSE_RESUME; - cgc.cmd[8] = (cmd == CDROMRESUME) ? 1 : 0; - cgc.data_direction = CGC_DATA_NONE; - return cdo->generic_packet(cdi, &cgc); - } +static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi, + struct packet_command *cgc, + int cmd) +{ + struct cdrom_device_ops *cdo = cdi->ops; + cdinfo(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n"); + cgc->cmd[0] = GPCMD_PAUSE_RESUME; + cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0; + cgc->data_direction = CGC_DATA_NONE; + return cdo->generic_packet(cdi, cgc); +} - case DVD_READ_STRUCT: { - dvd_struct *s; - int size = sizeof(dvd_struct); - if (!CDROM_CAN(CDC_DVD)) - return -ENOSYS; - if ((s = kmalloc(size, GFP_KERNEL)) == NULL) - return -ENOMEM; - cdinfo(CD_DO_IOCTL, "entering DVD_READ_STRUCT\n"); - if (copy_from_user(s, (dvd_struct __user *)arg, size)) { - kfree(s); - return -EFAULT; - } - if ((ret = dvd_read_struct(cdi, s))) { - kfree(s); - return ret; - } - if (copy_to_user((dvd_struct __user *)arg, s, size)) - ret = -EFAULT; +static noinline int mmc_ioctl_dvd_read_struct(struct cdrom_device_info *cdi, + void __user *arg, + struct packet_command *cgc) +{ + int ret; + dvd_struct *s; + int size = sizeof(dvd_struct); + + if (!CDROM_CAN(CDC_DVD)) + return -ENOSYS; + + s = kmalloc(size, GFP_KERNEL); + if (!s) + return -ENOMEM; + + cdinfo(CD_DO_IOCTL, "entering DVD_READ_STRUCT\n"); + if (copy_from_user(s, arg, size)) { kfree(s); + return -EFAULT; + } + + ret = dvd_read_struct(cdi, s, cgc); + if (ret) + goto out; + + if (copy_to_user(arg, s, size)) + ret = -EFAULT; +out: + kfree(s); + return ret; +} + +static noinline int mmc_ioctl_dvd_auth(struct cdrom_device_info *cdi, + void __user *arg) +{ + int ret; + dvd_authinfo ai; + if (!CDROM_CAN(CDC_DVD)) + return -ENOSYS; + cdinfo(CD_DO_IOCTL, "entering DVD_AUTH\n"); + IOCTL_IN(arg, dvd_authinfo, ai); + ret = dvd_do_auth(cdi, &ai); + if (ret) return ret; - } + IOCTL_OUT(arg, dvd_authinfo, ai); + return 0; +} - case DVD_AUTH: { - dvd_authinfo ai; - if (!CDROM_CAN(CDC_DVD)) - return -ENOSYS; - cdinfo(CD_DO_IOCTL, "entering DVD_AUTH\n"); - IOCTL_IN(arg, dvd_authinfo, ai); - if ((ret = dvd_do_auth (cdi, &ai))) - return ret; - IOCTL_OUT(arg, dvd_authinfo, ai); - return 0; - } +static noinline int mmc_ioctl_cdrom_next_writable(struct cdrom_device_info *cdi, + void __user *arg) +{ + int ret; + long next = 0; + cdinfo(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n"); + ret = cdrom_get_next_writable(cdi, &next); + if (ret) + return ret; + IOCTL_OUT(arg, long, next); + return 0; +} - case CDROM_NEXT_WRITABLE: { - long next = 0; - cdinfo(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n"); - if ((ret = cdrom_get_next_writable(cdi, &next))) - return ret; - IOCTL_OUT(arg, long, next); - return 0; - } - case CDROM_LAST_WRITTEN: { - long last = 0; - cdinfo(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n"); - if ((ret = cdrom_get_last_written(cdi, &last))) - return ret; - IOCTL_OUT(arg, long, last); - return 0; - } - } /* switch */ +static noinline int mmc_ioctl_cdrom_last_written(struct cdrom_device_info *cdi, + void __user *arg) +{ + int ret; + long last = 0; + cdinfo(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n"); + ret = cdrom_get_last_written(cdi, &last); + if (ret) + return ret; + IOCTL_OUT(arg, long, last); + return 0; +} + +static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, + unsigned long arg) +{ + struct packet_command cgc; + void __user *userptr = (void __user *)arg; + + memset(&cgc, 0, sizeof(cgc)); + + /* build a unified command and queue it through + cdo->generic_packet() */ + switch (cmd) { + case CDROMREADRAW: + case CDROMREADMODE1: + case CDROMREADMODE2: + return mmc_ioctl_cdrom_read_data(cdi, userptr, &cgc, cmd); + case CDROMREADAUDIO: + return mmc_ioctl_cdrom_read_audio(cdi, userptr); + case CDROMSUBCHNL: + return mmc_ioctl_cdrom_subchannel(cdi, userptr); + case CDROMPLAYMSF: + return mmc_ioctl_cdrom_play_msf(cdi, userptr, &cgc); + case CDROMPLAYBLK: + return mmc_ioctl_cdrom_play_blk(cdi, userptr, &cgc); + case CDROMVOLCTRL: + case CDROMVOLREAD: + return mmc_ioctl_cdrom_volume(cdi, userptr, &cgc, cmd); + case CDROMSTART: + case CDROMSTOP: + return mmc_ioctl_cdrom_start_stop(cdi, &cgc, cmd); + case CDROMPAUSE: + case CDROMRESUME: + return mmc_ioctl_cdrom_pause_resume(cdi, &cgc, cmd); + case DVD_READ_STRUCT: + return mmc_ioctl_dvd_read_struct(cdi, userptr, &cgc); + case DVD_AUTH: + return mmc_ioctl_dvd_auth(cdi, userptr); + case CDROM_NEXT_WRITABLE: + return mmc_ioctl_cdrom_next_writable(cdi, userptr); + case CDROM_LAST_WRITTEN: + return mmc_ioctl_cdrom_last_written(cdi, userptr); + } return -ENOTTY; } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ce26c84af06..3326750ec02 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1060,7 +1060,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_page_pool; } - cc->bs = bioset_create(MIN_IOS, MIN_IOS); + cc->bs = bioset_create(MIN_IOS, 0); if (!cc->bs) { ti->error = "Cannot allocate crypt bioset"; goto bad_bs; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 2fd6d445063..a34338567a2 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -56,7 +56,7 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages) if (!client->pool) goto bad; - client->bios = bioset_create(16, 16); + client->bios = bioset_create(16, 0); if (!client->bios) goto bad; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 343094c3fee..421c9f02d8c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1093,7 +1093,7 @@ static struct mapped_device *alloc_dev(int minor) if (!md->tio_pool) goto bad_tio_pool; - md->bs = bioset_create(16, 16); + md->bs = bioset_create(16, 0); if (!md->bs) goto bad_no_bioset; @@ -191,6 +191,20 @@ static int aio_setup_ring(struct kioctx *ctx) kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ } while(0) +static void ctx_rcu_free(struct rcu_head *head) +{ + struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); + unsigned nr_events = ctx->max_reqs; + + kmem_cache_free(kioctx_cachep, ctx); + + if (nr_events) { + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - nr_events > aio_nr); + aio_nr -= nr_events; + spin_unlock(&aio_nr_lock); + } +} /* __put_ioctx * Called when the last user of an aio context has gone away, @@ -198,8 +212,6 @@ static int aio_setup_ring(struct kioctx *ctx) */ static void __put_ioctx(struct kioctx *ctx) { - unsigned nr_events = ctx->max_reqs; - BUG_ON(ctx->reqs_active); cancel_delayed_work(&ctx->wq); @@ -208,14 +220,7 @@ static void __put_ioctx(struct kioctx *ctx) mmdrop(ctx->mm); ctx->mm = NULL; pr_debug("__put_ioctx: freeing %p\n", ctx); - kmem_cache_free(kioctx_cachep, ctx); - - if (nr_events) { - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - nr_events > aio_nr); - aio_nr -= nr_events; - spin_unlock(&aio_nr_lock); - } + call_rcu(&ctx->rcu_head, ctx_rcu_free); } #define get_ioctx(kioctx) do { \ @@ -235,6 +240,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) { struct mm_struct *mm; struct kioctx *ctx; + int did_sync = 0; /* Prevent overflows */ if ((nr_events > (0x10000000U / sizeof(struct io_event))) || @@ -267,21 +273,30 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) goto out_freectx; /* limit the number of system wide aios */ - spin_lock(&aio_nr_lock); - if (aio_nr + ctx->max_reqs > aio_max_nr || - aio_nr + ctx->max_reqs < aio_nr) - ctx->max_reqs = 0; - else - aio_nr += ctx->max_reqs; - spin_unlock(&aio_nr_lock); + do { + spin_lock_bh(&aio_nr_lock); + if (aio_nr + nr_events > aio_max_nr || + aio_nr + nr_events < aio_nr) + ctx->max_reqs = 0; + else + aio_nr += ctx->max_reqs; + spin_unlock_bh(&aio_nr_lock); + if (ctx->max_reqs || did_sync) + break; + + /* wait for rcu callbacks to have completed before giving up */ + synchronize_rcu(); + did_sync = 1; + ctx->max_reqs = nr_events; + } while (1); + if (ctx->max_reqs == 0) goto out_cleanup; /* now link into global list. */ - write_lock(&mm->ioctx_list_lock); - ctx->next = mm->ioctx_list; - mm->ioctx_list = ctx; - write_unlock(&mm->ioctx_list_lock); + spin_lock(&mm->ioctx_lock); + hlist_add_head_rcu(&ctx->list, &mm->ioctx_list); + spin_unlock(&mm->ioctx_lock); dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, current->mm, ctx->ring_info.nr); @@ -375,11 +390,12 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb) */ void exit_aio(struct mm_struct *mm) { - struct kioctx *ctx = mm->ioctx_list; - mm->ioctx_list = NULL; - while (ctx) { - struct kioctx *next = ctx->next; - ctx->next = NULL; + struct kioctx *ctx; + + while (!hlist_empty(&mm->ioctx_list)) { + ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list); + hlist_del_rcu(&ctx->list); + aio_cancel_all(ctx); wait_for_all_aios(ctx); @@ -394,7 +410,6 @@ void exit_aio(struct mm_struct *mm) atomic_read(&ctx->users), ctx->dead, ctx->reqs_active); put_ioctx(ctx); - ctx = next; } } @@ -555,19 +570,21 @@ int aio_put_req(struct kiocb *req) static struct kioctx *lookup_ioctx(unsigned long ctx_id) { - struct kioctx *ioctx; - struct mm_struct *mm; + struct mm_struct *mm = current->mm; + struct kioctx *ctx = NULL; + struct hlist_node *n; - mm = current->mm; - read_lock(&mm->ioctx_list_lock); - for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next) - if (likely(ioctx->user_id == ctx_id && !ioctx->dead)) { - get_ioctx(ioctx); + rcu_read_lock(); + + hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { + if (ctx->user_id == ctx_id && !ctx->dead) { + get_ioctx(ctx); break; } - read_unlock(&mm->ioctx_list_lock); + } - return ioctx; + rcu_read_unlock(); + return ctx; } /* @@ -1215,19 +1232,14 @@ out: static void io_destroy(struct kioctx *ioctx) { struct mm_struct *mm = current->mm; - struct kioctx **tmp; int was_dead; /* delete the entry from the list is someone else hasn't already */ - write_lock(&mm->ioctx_list_lock); + spin_lock(&mm->ioctx_lock); was_dead = ioctx->dead; ioctx->dead = 1; - for (tmp = &mm->ioctx_list; *tmp && *tmp != ioctx; - tmp = &(*tmp)->next) - ; - if (*tmp) - *tmp = ioctx->next; - write_unlock(&mm->ioctx_list_lock); + hlist_del_rcu(&ioctx->list); + spin_unlock(&mm->ioctx_lock); dprintk("aio_release(%p)\n", ioctx); if (likely(!was_dead)) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 19caf7c962a..77ebc3c263d 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -111,7 +111,7 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs) && bip->bip_buf != NULL) kfree(bip->bip_buf); - mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); + bvec_free_bs(bs, bip->bip_vec, bip->bip_pool); mempool_free(bip, bs->bio_integrity_pool); bio->bi_integrity = NULL; @@ -31,7 +31,11 @@ DEFINE_TRACE(block_split); -static struct kmem_cache *bio_slab __read_mostly; +/* + * Test patch to inline a certain number of bi_io_vec's inside the bio + * itself, to shrink a bio data allocation from two mempool calls to one + */ +#define BIO_INLINE_VECS 4 static mempool_t *bio_split_pool __read_mostly; @@ -40,9 +44,8 @@ static mempool_t *bio_split_pool __read_mostly; * break badly! cannot be bigger than what you can fit into an * unsigned short */ - #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } -static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { +struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), }; #undef BV @@ -53,12 +56,121 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { */ struct bio_set *fs_bio_set; +/* + * Our slab pool management + */ +struct bio_slab { + struct kmem_cache *slab; + unsigned int slab_ref; + unsigned int slab_size; + char name[8]; +}; +static DEFINE_MUTEX(bio_slab_lock); +static struct bio_slab *bio_slabs; +static unsigned int bio_slab_nr, bio_slab_max; + +static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) +{ + unsigned int sz = sizeof(struct bio) + extra_size; + struct kmem_cache *slab = NULL; + struct bio_slab *bslab; + unsigned int i, entry = -1; + + mutex_lock(&bio_slab_lock); + + i = 0; + while (i < bio_slab_nr) { + struct bio_slab *bslab = &bio_slabs[i]; + + if (!bslab->slab && entry == -1) + entry = i; + else if (bslab->slab_size == sz) { + slab = bslab->slab; + bslab->slab_ref++; + break; + } + i++; + } + + if (slab) + goto out_unlock; + + if (bio_slab_nr == bio_slab_max && entry == -1) { + bio_slab_max <<= 1; + bio_slabs = krealloc(bio_slabs, + bio_slab_max * sizeof(struct bio_slab), + GFP_KERNEL); + if (!bio_slabs) + goto out_unlock; + } + if (entry == -1) + entry = bio_slab_nr++; + + bslab = &bio_slabs[entry]; + + snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); + slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL); + if (!slab) + goto out_unlock; + + printk("bio: create slab <%s> at %d\n", bslab->name, entry); + bslab->slab = slab; + bslab->slab_ref = 1; + bslab->slab_size = sz; +out_unlock: + mutex_unlock(&bio_slab_lock); + return slab; +} + +static void bio_put_slab(struct bio_set *bs) +{ + struct bio_slab *bslab = NULL; + unsigned int i; + + mutex_lock(&bio_slab_lock); + + for (i = 0; i < bio_slab_nr; i++) { + if (bs->bio_slab == bio_slabs[i].slab) { + bslab = &bio_slabs[i]; + break; + } + } + + if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n")) + goto out; + + WARN_ON(!bslab->slab_ref); + + if (--bslab->slab_ref) + goto out; + + kmem_cache_destroy(bslab->slab); + bslab->slab = NULL; + +out: + mutex_unlock(&bio_slab_lock); +} + unsigned int bvec_nr_vecs(unsigned short idx) { return bvec_slabs[idx].nr_vecs; } -struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) +void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) +{ + BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); + + if (idx == BIOVEC_MAX_IDX) + mempool_free(bv, bs->bvec_pool); + else { + struct biovec_slab *bvs = bvec_slabs + idx; + + kmem_cache_free(bvs->slab, bv); + } +} + +struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, + struct bio_set *bs) { struct bio_vec *bvl; @@ -67,60 +179,85 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct * If not, this is a bio_kmalloc() allocation and just do a * kzalloc() for the exact number of vecs right away. */ - if (bs) { + if (!bs) + bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask); + + /* + * see comment near bvec_array define! + */ + switch (nr) { + case 1: + *idx = 0; + break; + case 2 ... 4: + *idx = 1; + break; + case 5 ... 16: + *idx = 2; + break; + case 17 ... 64: + *idx = 3; + break; + case 65 ... 128: + *idx = 4; + break; + case 129 ... BIO_MAX_PAGES: + *idx = 5; + break; + default: + return NULL; + } + + /* + * idx now points to the pool we want to allocate from. only the + * 1-vec entry pool is mempool backed. + */ + if (*idx == BIOVEC_MAX_IDX) { +fallback: + bvl = mempool_alloc(bs->bvec_pool, gfp_mask); + } else { + struct biovec_slab *bvs = bvec_slabs + *idx; + gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); + /* - * see comment near bvec_array define! + * Make this allocation restricted and don't dump info on + * allocation failures, since we'll fallback to the mempool + * in case of failure. */ - switch (nr) { - case 1: - *idx = 0; - break; - case 2 ... 4: - *idx = 1; - break; - case 5 ... 16: - *idx = 2; - break; - case 17 ... 64: - *idx = 3; - break; - case 65 ... 128: - *idx = 4; - break; - case 129 ... BIO_MAX_PAGES: - *idx = 5; - break; - default: - return NULL; - } + __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; /* - * idx now points to the pool we want to allocate from + * Try a slab allocation. If this fails and __GFP_WAIT + * is set, retry with the 1-entry mempool */ - bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); - if (bvl) - memset(bvl, 0, - bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); - } else - bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); + bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); + if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) { + *idx = BIOVEC_MAX_IDX; + goto fallback; + } + } return bvl; } -void bio_free(struct bio *bio, struct bio_set *bio_set) +void bio_free(struct bio *bio, struct bio_set *bs) { - if (bio->bi_io_vec) { - const int pool_idx = BIO_POOL_IDX(bio); + void *p; - BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); - - mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); - } + if (bio_has_allocated_vec(bio)) + bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); if (bio_integrity(bio)) - bio_integrity_free(bio, bio_set); + bio_integrity_free(bio, bs); + + /* + * If we have front padding, adjust the bio pointer before freeing + */ + p = bio; + if (bs->front_pad) + p -= bs->front_pad; - mempool_free(bio, bio_set->bio_pool); + mempool_free(p, bs->bio_pool); } /* @@ -133,7 +270,8 @@ static void bio_fs_destructor(struct bio *bio) static void bio_kmalloc_destructor(struct bio *bio) { - kfree(bio->bi_io_vec); + if (bio_has_allocated_vec(bio)) + kfree(bio->bi_io_vec); kfree(bio); } @@ -157,16 +295,20 @@ void bio_init(struct bio *bio) * for a &struct bio to become free. If a %NULL @bs is passed in, we will * fall back to just using @kmalloc to allocate the required memory. * - * allocate bio and iovecs from the memory pools specified by the - * bio_set structure, or @kmalloc if none given. + * Note that the caller must set ->bi_destructor on succesful return + * of a bio, to do the appropriate freeing of the bio once the reference + * count drops to zero. **/ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { - struct bio *bio; + struct bio *bio = NULL; + + if (bs) { + void *p = mempool_alloc(bs->bio_pool, gfp_mask); - if (bs) - bio = mempool_alloc(bs->bio_pool, gfp_mask); - else + if (p) + bio = p + bs->front_pad; + } else bio = kmalloc(sizeof(*bio), gfp_mask); if (likely(bio)) { @@ -176,7 +318,15 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (likely(nr_iovecs)) { unsigned long uninitialized_var(idx); - bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + if (nr_iovecs <= BIO_INLINE_VECS) { + idx = 0; + bvl = bio->bi_inline_vecs; + nr_iovecs = BIO_INLINE_VECS; + } else { + bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, + bs); + nr_iovecs = bvec_nr_vecs(idx); + } if (unlikely(!bvl)) { if (bs) mempool_free(bio, bs->bio_pool); @@ -186,7 +336,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) goto out; } bio->bi_flags |= idx << BIO_POOL_OFFSET; - bio->bi_max_vecs = bvec_nr_vecs(idx); + bio->bi_max_vecs = nr_iovecs; } bio->bi_io_vec = bvl; } @@ -1346,30 +1496,18 @@ EXPORT_SYMBOL(bio_sector_offset); */ static int biovec_create_pools(struct bio_set *bs, int pool_entries) { - int i; + struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; - for (i = 0; i < BIOVEC_NR_POOLS; i++) { - struct biovec_slab *bp = bvec_slabs + i; - mempool_t **bvp = bs->bvec_pools + i; + bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab); + if (!bs->bvec_pool) + return -ENOMEM; - *bvp = mempool_create_slab_pool(pool_entries, bp->slab); - if (!*bvp) - return -ENOMEM; - } return 0; } static void biovec_free_pools(struct bio_set *bs) { - int i; - - for (i = 0; i < BIOVEC_NR_POOLS; i++) { - mempool_t *bvp = bs->bvec_pools[i]; - - if (bvp) - mempool_destroy(bvp); - } - + mempool_destroy(bs->bvec_pool); } void bioset_free(struct bio_set *bs) @@ -1379,25 +1517,49 @@ void bioset_free(struct bio_set *bs) bioset_integrity_free(bs); biovec_free_pools(bs); + bio_put_slab(bs); kfree(bs); } -struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size) +/** + * bioset_create - Create a bio_set + * @pool_size: Number of bio and bio_vecs to cache in the mempool + * @front_pad: Number of bytes to allocate in front of the returned bio + * + * Description: + * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller + * to ask for a number of bytes to be allocated in front of the bio. + * Front pad allocation is useful for embedding the bio inside + * another structure, to avoid allocating extra data to go with the bio. + * Note that the bio must be embedded at the END of that structure always, + * or things will break badly. + */ +struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) { - struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL); + unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); + struct bio_set *bs; + bs = kzalloc(sizeof(*bs), GFP_KERNEL); if (!bs) return NULL; - bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab); + bs->front_pad = front_pad; + + bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); + if (!bs->bio_slab) { + kfree(bs); + return NULL; + } + + bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab); if (!bs->bio_pool) goto bad; - if (bioset_integrity_create(bs, bio_pool_size)) + if (bioset_integrity_create(bs, pool_size)) goto bad; - if (!biovec_create_pools(bs, bvec_pool_size)) + if (!biovec_create_pools(bs, pool_size)) return bs; bad: @@ -1421,12 +1583,16 @@ static void __init biovec_init_slabs(void) static int __init init_bio(void) { - bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); + bio_slab_max = 2; + bio_slab_nr = 0; + bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL); + if (!bio_slabs) + panic("bio: can't allocate bios\n"); bio_integrity_init_slab(); biovec_init_slabs(); - fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); + fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); if (!fs_bio_set) panic("bio: can't allocate bios\n"); diff --git a/fs/buffer.c b/fs/buffer.c index 10179cfa115..776ae091d3b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -99,10 +99,18 @@ __clear_page_buffers(struct page *page) page_cache_release(page); } + +static int quiet_error(struct buffer_head *bh) +{ + if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit()) + return 0; + return 1; +} + + static void buffer_io_error(struct buffer_head *bh) { char b[BDEVNAME_SIZE]; - printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); @@ -144,7 +152,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { + if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) { buffer_io_error(bh); printk(KERN_WARNING "lost page write due to " "I/O error on %s\n", @@ -394,7 +402,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) set_buffer_uptodate(bh); } else { clear_buffer_uptodate(bh); - if (printk_ratelimit()) + if (!quiet_error(bh)) buffer_io_error(bh); SetPageError(page); } @@ -455,7 +463,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - if (printk_ratelimit()) { + if (!quiet_error(bh)) { buffer_io_error(bh); printk(KERN_WARNING "lost page write due to " "I/O error on %s\n", @@ -2913,6 +2921,9 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) set_bit(BH_Eopnotsupp, &bh->b_state); } + if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) + set_bit(BH_Quiet, &bh->b_state); + bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); bio_put(bio); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e4a241c65db..04158ad74db 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1721,7 +1721,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) /* small i_blocks in vfs inode? */ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { /* - * CONFIG_LSF is not enabled implies the inode + * CONFIG_LBD is not enabled implies the inode * i_block represent total blocks in 512 bytes * 32 == size of vfs inode i_blocks * 8 */ @@ -1764,7 +1764,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { /* - * !has_huge_files or CONFIG_LSF is not enabled + * !has_huge_files or CONFIG_LBD is not enabled * implies the inode i_block represent total blocks in * 512 bytes 32 == size of vfs inode i_blocks * 8 */ @@ -2021,13 +2021,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (has_huge_files) { /* * Large file size enabled file system can only be - * mount if kernel is build with CONFIG_LSF + * mount if kernel is build with CONFIG_LBD */ if (sizeof(root->i_blocks) < sizeof(u64) && !(sb->s_flags & MS_RDONLY)) { printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " "files cannot be mounted read-write " - "without CONFIG_LSF.\n", sb->s_id); + "without CONFIG_LBD.\n", sb->s_id); goto failed_mount; } } diff --git a/include/linux/aio.h b/include/linux/aio.h index f6b8cf99b59..b16a957030f 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -5,6 +5,7 @@ #include <linux/workqueue.h> #include <linux/aio_abi.h> #include <linux/uio.h> +#include <linux/rcupdate.h> #include <asm/atomic.h> @@ -183,7 +184,7 @@ struct kioctx { /* This needs improving */ unsigned long user_id; - struct kioctx *next; + struct hlist_node list; wait_queue_head_t wait; @@ -199,6 +200,8 @@ struct kioctx { struct aio_ring_info ring_info; struct delayed_work wq; + + struct rcu_head rcu_head; }; /* prototypes */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 6a642098e5c..18462c5b8ff 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -90,10 +90,11 @@ struct bio { unsigned int bi_comp_cpu; /* completion CPU */ + atomic_t bi_cnt; /* pin count */ + struct bio_vec *bi_io_vec; /* the actual vec list */ bio_end_io_t *bi_end_io; - atomic_t bi_cnt; /* pin count */ void *bi_private; #if defined(CONFIG_BLK_DEV_INTEGRITY) @@ -101,6 +102,13 @@ struct bio { #endif bio_destructor_t *bi_destructor; /* destructor */ + + /* + * We can inline a number of vecs at the end of the bio, to avoid + * double allocations for a small number of bio_vecs. This member + * MUST obviously be kept at the very end of the bio. + */ + struct bio_vec bi_inline_vecs[0]; }; /* @@ -117,6 +125,7 @@ struct bio { #define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ #define BIO_NULL_MAPPED 9 /* contains invalid user pages */ #define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ +#define BIO_QUIET 11 /* Make BIO Quiet */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* @@ -211,6 +220,11 @@ static inline void *bio_data(struct bio *bio) return NULL; } +static inline int bio_has_allocated_vec(struct bio *bio) +{ + return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs; +} + /* * will die */ @@ -332,7 +346,7 @@ struct bio_pair { extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); -extern struct bio_set *bioset_create(int, int); +extern struct bio_set *bioset_create(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); extern struct bio *bio_alloc(gfp_t, int); @@ -377,6 +391,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); +extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); /* @@ -395,13 +410,17 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) */ #define BIO_POOL_SIZE 2 #define BIOVEC_NR_POOLS 6 +#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) struct bio_set { + struct kmem_cache *bio_slab; + unsigned int front_pad; + mempool_t *bio_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) mempool_t *bio_integrity_pool; #endif - mempool_t *bvec_pools[BIOVEC_NR_POOLS]; + mempool_t *bvec_pool; }; struct biovec_slab { @@ -411,6 +430,7 @@ struct biovec_slab { }; extern struct bio_set *fs_bio_set; +extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly; /* * a small number of entries is fine, not going to be performance critical. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 031a315c050..7035cec583b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -26,7 +26,6 @@ struct scsi_ioctl_command; struct request_queue; struct elevator_queue; -typedef struct elevator_queue elevator_t; struct request_pm_state; struct blk_trace; struct request; @@ -313,7 +312,7 @@ struct request_queue */ struct list_head queue_head; struct request *last_merge; - elevator_t *elevator; + struct elevator_queue *elevator; /* * the queue request freelist, one for reads and one for writes @@ -449,6 +448,7 @@ struct request_queue #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ +#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ static inline int queue_is_locked(struct request_queue *q) { @@ -522,22 +522,32 @@ enum { * TAG_FLUSH : ordering by tag w/ pre and post flushes * TAG_FUA : ordering by tag w/ pre flush and FUA write */ - QUEUE_ORDERED_NONE = 0x00, - QUEUE_ORDERED_DRAIN = 0x01, - QUEUE_ORDERED_TAG = 0x02, - - QUEUE_ORDERED_PREFLUSH = 0x10, - QUEUE_ORDERED_POSTFLUSH = 0x20, - QUEUE_ORDERED_FUA = 0x40, - - QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, - QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, - QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, - QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, + QUEUE_ORDERED_BY_DRAIN = 0x01, + QUEUE_ORDERED_BY_TAG = 0x02, + QUEUE_ORDERED_DO_PREFLUSH = 0x10, + QUEUE_ORDERED_DO_BAR = 0x20, + QUEUE_ORDERED_DO_POSTFLUSH = 0x40, + QUEUE_ORDERED_DO_FUA = 0x80, + + QUEUE_ORDERED_NONE = 0x00, + + QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN | + QUEUE_ORDERED_DO_BAR, + QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | + QUEUE_ORDERED_DO_PREFLUSH | + QUEUE_ORDERED_DO_POSTFLUSH, + QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | + QUEUE_ORDERED_DO_PREFLUSH | + QUEUE_ORDERED_DO_FUA, + + QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG | + QUEUE_ORDERED_DO_BAR, + QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | + QUEUE_ORDERED_DO_PREFLUSH | + QUEUE_ORDERED_DO_POSTFLUSH, + QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | + QUEUE_ORDERED_DO_PREFLUSH | + QUEUE_ORDERED_DO_FUA, /* * Ordered operation sequence @@ -585,7 +595,6 @@ enum { #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) #define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) -#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) /* rq->queuelist of dequeued request must be list_empty() */ #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) @@ -855,10 +864,10 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); -extern int blk_do_ordered(struct request_queue *, struct request **); +extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); -extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); +extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); @@ -977,7 +986,6 @@ static inline void put_dev_sector(Sector p) struct work_struct; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); -void kblockd_flush_work(struct work_struct *work); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 3ce64b90118..8605f8a74df 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -35,6 +35,7 @@ enum bh_state_bits { BH_Ordered, /* ordered write */ BH_Eopnotsupp, /* operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ + BH_Quiet, /* Buffer Error Prinks to be quiet */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 92f6f634e3e..7a204256b15 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -28,7 +28,7 @@ typedef void (elevator_activate_req_fn) (struct request_queue *, struct request typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); typedef void *(elevator_init_fn) (struct request_queue *); -typedef void (elevator_exit_fn) (elevator_t *); +typedef void (elevator_exit_fn) (struct elevator_queue *); struct elevator_ops { @@ -62,8 +62,8 @@ struct elevator_ops struct elv_fs_entry { struct attribute attr; - ssize_t (*show)(elevator_t *, char *); - ssize_t (*store)(elevator_t *, const char *, size_t); + ssize_t (*show)(struct elevator_queue *, char *); + ssize_t (*store)(struct elevator_queue *, const char *, size_t); }; /* @@ -130,7 +130,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *); extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); -extern void elevator_exit(elevator_t *); +extern void elevator_exit(struct elevator_queue *); extern int elv_rq_merge_ok(struct request *, struct bio *); /* diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3df7742ce24..16948eaecae 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -126,6 +126,7 @@ struct blk_scsi_cmd_filter { struct disk_part_tbl { struct rcu_head rcu_head; int len; + struct hd_struct *last_lookup; struct hd_struct *part[]; }; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index fe825471d5a..9cfc9b627fd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -232,8 +232,9 @@ struct mm_struct { struct core_state *core_state; /* coredumping support */ /* aio bits */ - rwlock_t ioctx_list_lock; /* aio lock */ - struct kioctx *ioctx_list; + spinlock_t ioctx_lock; + struct hlist_head ioctx_list; + #ifdef CONFIG_MM_OWNER /* * "owner" points to a task that is regarded as the canonical diff --git a/include/linux/types.h b/include/linux/types.h index 1d98330b1f2..121f349cb7e 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -135,19 +135,14 @@ typedef __s64 int64_t; * * Linux always considers sectors to be 512 bytes long independently * of the devices real block size. + * + * blkcnt_t is the type of the inode's block count. */ #ifdef CONFIG_LBD typedef u64 sector_t; -#else -typedef unsigned long sector_t; -#endif - -/* - * The type of the inode's block count. - */ -#ifdef CONFIG_LSF typedef u64 blkcnt_t; #else +typedef unsigned long sector_t; typedef unsigned long blkcnt_t; #endif diff --git a/kernel/exit.c b/kernel/exit.c index a946221879d..c9e5a1c14e0 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1037,8 +1037,6 @@ NORET_TYPE void do_exit(long code) * task into the wait for ever nirwana as well. */ tsk->flags |= PF_EXITPIDONE; - if (tsk->io_context) - exit_io_context(); set_current_state(TASK_UNINTERRUPTIBLE); schedule(); } diff --git a/kernel/fork.c b/kernel/fork.c index 6144b36cd89..43cbf30669e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -415,8 +415,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) set_mm_counter(mm, file_rss, 0); set_mm_counter(mm, anon_rss, 0); spin_lock_init(&mm->page_table_lock); - rwlock_init(&mm->ioctx_list_lock); - mm->ioctx_list = NULL; + spin_lock_init(&mm->ioctx_lock); + INIT_HLIST_HEAD(&mm->ioctx_list); mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; mm_init_owner(mm, p); diff --git a/mm/bounce.c b/mm/bounce.c index bf0cf7c8387..e590272fe7a 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -198,8 +198,13 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, /* * irk, bounce it */ - if (!bio) - bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt); + if (!bio) { + unsigned int cnt = (*bio_orig)->bi_vcnt; + + bio = bio_alloc(GFP_NOIO, cnt); + memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec)); + } + to = bio->bi_io_vec + i; |