diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-08 10:13:35 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-08 10:13:35 -0700 |
commit | 4de13d7aa8f4d02f4dc99d4609575659f92b3c5a (patch) | |
tree | 3bc9729eabe79c6164cd29a5d605000bc82bf837 /include | |
parent | 5af43c24ca59a448c9312dd4a4a51d27ec3b9a73 (diff) | |
parent | b8d4a5bf6a049303a29a3275f463f09a490b50ea (diff) |
Merge branch 'for-3.10/core' of git://git.kernel.dk/linux-block
Pull block core updates from Jens Axboe:
- Major bit is Kents prep work for immutable bio vecs.
- Stable candidate fix for a scheduling-while-atomic in the queue
bypass operation.
- Fix for the hang on exceeded rq->datalen 32-bit unsigned when merging
discard bios.
- Tejuns changes to convert the writeback thread pool to the generic
workqueue mechanism.
- Runtime PM framework, SCSI patches exists on top of these in James'
tree.
- A few random fixes.
* 'for-3.10/core' of git://git.kernel.dk/linux-block: (40 commits)
relay: move remove_buf_file inside relay_close_buf
partitions/efi.c: replace useless kzalloc's by kmalloc's
fs/block_dev.c: fix iov_shorten() criteria in blkdev_aio_read()
block: fix max discard sectors limit
blkcg: fix "scheduling while atomic" in blk_queue_bypass_start
Documentation: cfq-iosched: update documentation help for cfq tunables
writeback: expose the bdi_wq workqueue
writeback: replace custom worker pool implementation with unbound workqueue
writeback: remove unused bdi_pending_list
aoe: Fix unitialized var usage
bio-integrity: Add explicit field for owner of bip_buf
block: Add an explicit bio flag for bios that own their bvec
block: Add bio_alloc_pages()
block: Convert some code to bio_for_each_segment_all()
block: Add bio_for_each_segment_all()
bounce: Refactor __blk_queue_bounce to not use bi_io_vec
raid1: use bio_copy_data()
pktcdvd: Use bio_reset() in disabled code to kill bi_idx usage
pktcdvd: use bio_copy_data()
block: Add bio_copy_data()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/backing-dev.h | 16 | ||||
-rw-r--r-- | include/linux/bio.h | 115 | ||||
-rw-r--r-- | include/linux/blk_types.h | 5 | ||||
-rw-r--r-- | include/linux/blkdev.h | 29 | ||||
-rw-r--r-- | include/trace/events/block.h | 12 | ||||
-rw-r--r-- | include/trace/events/writeback.h | 5 |
6 files changed, 115 insertions, 67 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 350459910fe..c3881553f7d 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -18,6 +18,7 @@ #include <linux/writeback.h> #include <linux/atomic.h> #include <linux/sysctl.h> +#include <linux/workqueue.h> struct page; struct device; @@ -27,7 +28,6 @@ struct dentry; * Bits in backing_dev_info.state */ enum bdi_state { - BDI_pending, /* On its way to being activated */ BDI_wb_alloc, /* Default embedded wb allocated */ BDI_async_congested, /* The async (write) queue is getting full */ BDI_sync_congested, /* The sync queue is getting full */ @@ -53,10 +53,8 @@ struct bdi_writeback { unsigned int nr; unsigned long last_old_flush; /* last old data flush */ - unsigned long last_active; /* last time bdi thread was active */ - struct task_struct *task; /* writeback thread */ - struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */ + struct delayed_work dwork; /* work item used for writeback */ struct list_head b_dirty; /* dirty inodes */ struct list_head b_io; /* parked for writeback */ struct list_head b_more_io; /* parked for more writeback */ @@ -123,14 +121,15 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason); void bdi_start_background_writeback(struct backing_dev_info *bdi); -int bdi_writeback_thread(void *data); +void bdi_writeback_workfn(struct work_struct *work); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); extern spinlock_t bdi_lock; extern struct list_head bdi_list; -extern struct list_head bdi_pending_list; + +extern struct workqueue_struct *bdi_wq; static inline int wb_has_dirty_io(struct bdi_writeback *wb) { @@ -336,11 +335,6 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi) return bdi->capabilities & BDI_CAP_SWAP_BACKED; } -static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi) -{ - return bdi == &default_backing_dev_info; -} - static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) { return bdi_cap_writeback_dirty(mapping->backing_dev_info); diff --git a/include/linux/bio.h b/include/linux/bio.h index 820e7aaad4f..ef24466d8f8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -67,6 +67,7 @@ #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) +#define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio))) static inline unsigned int bio_cur_bytes(struct bio *bio) { @@ -84,11 +85,6 @@ static inline void *bio_data(struct bio *bio) return NULL; } -static inline int bio_has_allocated_vec(struct bio *bio) -{ - return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs; -} - /* * will die */ @@ -136,16 +132,27 @@ static inline int bio_has_allocated_vec(struct bio *bio) #define bio_io_error(bio) bio_endio((bio), -EIO) /* - * drivers should not use the __ version unless they _really_ want to - * run through the entire bio and not just pending pieces + * drivers should not use the __ version unless they _really_ know what + * they're doing */ #define __bio_for_each_segment(bvl, bio, i, start_idx) \ for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ i < (bio)->bi_vcnt; \ bvl++, i++) +/* + * drivers should _never_ use the all version - the bio may have been split + * before it got to the driver and the driver won't own all of it + */ +#define bio_for_each_segment_all(bvl, bio, i) \ + for (i = 0; \ + bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ + i++) + #define bio_for_each_segment(bvl, bio, i) \ - __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx) + for (i = (bio)->bi_idx; \ + bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ + i++) /* * get a reference to a bio, so it won't disappear. the intended use is @@ -180,9 +187,12 @@ struct bio_integrity_payload { unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_idx; /* current bip_vec index */ + unsigned bip_owns_buf:1; /* should free bip_buf */ struct work_struct bip_work; /* I/O completion */ - struct bio_vec bip_vec[0]; /* embedded bvec array */ + + struct bio_vec *bip_vec; + struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -211,6 +221,7 @@ extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); +extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); @@ -245,6 +256,9 @@ extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); +extern int submit_bio_wait(int rw, struct bio *bio); +extern void bio_advance(struct bio *, unsigned); + extern void bio_init(struct bio *); extern void bio_reset(struct bio *); @@ -279,6 +293,9 @@ static inline void bio_flush_dcache_pages(struct bio *bi) } #endif +extern void bio_copy_data(struct bio *dst, struct bio *src); +extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); + extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, unsigned long, unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, @@ -286,8 +303,8 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); -extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); -extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); +extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); +extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); #ifdef CONFIG_BLK_CGROUP @@ -298,39 +315,6 @@ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; } static inline void bio_disassociate_task(struct bio *bio) { } #endif /* CONFIG_BLK_CGROUP */ -/* - * bio_set is used to allow other portions of the IO system to - * allocate their own private memory pools for bio and iovec structures. - * These memory pools in turn all allocate from the bio_slab - * and the bvec_slabs[]. - */ -#define BIO_POOL_SIZE 2 -#define BIOVEC_NR_POOLS 6 -#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) - -struct bio_set { - struct kmem_cache *bio_slab; - unsigned int front_pad; - - mempool_t *bio_pool; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - mempool_t *bio_integrity_pool; -#endif - mempool_t *bvec_pool; -}; - -struct biovec_slab { - int nr_vecs; - char *name; - struct kmem_cache *slab; -}; - -/* - * a small number of entries is fine, not going to be performance critical. - * basically we just need to survive - */ -#define BIO_SPLIT_ENTRIES 2 - #ifdef CONFIG_HIGHMEM /* * remember never ever reenable interrupts between a bvec_kmap_irq and @@ -527,6 +511,49 @@ static inline struct bio *bio_list_get(struct bio_list *bl) return bio; } +/* + * bio_set is used to allow other portions of the IO system to + * allocate their own private memory pools for bio and iovec structures. + * These memory pools in turn all allocate from the bio_slab + * and the bvec_slabs[]. + */ +#define BIO_POOL_SIZE 2 +#define BIOVEC_NR_POOLS 6 +#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) + +struct bio_set { + struct kmem_cache *bio_slab; + unsigned int front_pad; + + mempool_t *bio_pool; + mempool_t *bvec_pool; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + mempool_t *bio_integrity_pool; + mempool_t *bvec_integrity_pool; +#endif + + /* + * Deadlock avoidance for stacking block drivers: see comments in + * bio_alloc_bioset() for details + */ + spinlock_t rescue_lock; + struct bio_list rescue_list; + struct work_struct rescue_work; + struct workqueue_struct *rescue_workqueue; +}; + +struct biovec_slab { + int nr_vecs; + char *name; + struct kmem_cache *slab; +}; + +/* + * a small number of entries is fine, not going to be performance critical. + * basically we just need to survive + */ +#define BIO_SPLIT_ENTRIES 2 + #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 22990cf4439..fa1abeb45b7 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -118,6 +118,7 @@ struct bio { * BIO_POOL_IDX() */ #define BIO_RESET_BITS 13 +#define BIO_OWNS_VEC 13 /* bio_free() should free bvec */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) @@ -176,6 +177,7 @@ enum rq_flag_bits { __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_KERNEL, /* direct IO to kernel pages */ + __REQ_PM, /* runtime pm request */ __REQ_NR_BITS, /* stops here */ }; @@ -198,6 +200,8 @@ enum rq_flag_bits { REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK +#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) + /* This mask is used for both bio and request merge checking */ #define REQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) @@ -224,5 +228,6 @@ enum rq_flag_bits { #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) #define REQ_SECURE (1 << __REQ_SECURE) #define REQ_KERNEL (1 << __REQ_KERNEL) +#define REQ_PM (1 << __REQ_PM) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e38cfe77f7f..2fdb4a451b4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -361,6 +361,12 @@ struct request_queue { */ struct kobject kobj; +#ifdef CONFIG_PM_RUNTIME + struct device *dev; + int rpm_status; + unsigned int nr_pending; +#endif + /* * queue settings */ @@ -838,7 +844,7 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, unsigned int cmd_flags) { if (unlikely(cmd_flags & REQ_DISCARD)) - return q->limits.max_discard_sectors; + return min(q->limits.max_discard_sectors, UINT_MAX >> 9); if (unlikely(cmd_flags & REQ_WRITE_SAME)) return q->limits.max_write_same_sectors; @@ -961,6 +967,27 @@ struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); /* + * block layer runtime pm functions + */ +#ifdef CONFIG_PM_RUNTIME +extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); +extern int blk_pre_runtime_suspend(struct request_queue *q); +extern void blk_post_runtime_suspend(struct request_queue *q, int err); +extern void blk_pre_runtime_resume(struct request_queue *q); +extern void blk_post_runtime_resume(struct request_queue *q, int err); +#else +static inline void blk_pm_runtime_init(struct request_queue *q, + struct device *dev) {} +static inline int blk_pre_runtime_suspend(struct request_queue *q) +{ + return -ENOSYS; +} +static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {} +static inline void blk_pre_runtime_resume(struct request_queue *q) {} +static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} +#endif + +/* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests * into single larger request. As the requests are moved from a per-task list to diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 9c1467357b0..60ae7c3db91 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -244,7 +244,7 @@ TRACE_EVENT(block_bio_bounce, __entry->dev = bio->bi_bdev ? bio->bi_bdev->bd_dev : 0; __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; + __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -281,7 +281,7 @@ TRACE_EVENT(block_bio_complete, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; + __entry->nr_sector = bio_sectors(bio); __entry->error = error; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); ), @@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(block_bio_merge, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; + __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -376,7 +376,7 @@ TRACE_EVENT(block_bio_queue, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; + __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -404,7 +404,7 @@ DECLARE_EVENT_CLASS(block_get_rq, TP_fast_assign( __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; __entry->sector = bio ? bio->bi_sector : 0; - __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + __entry->nr_sector = bio ? bio_sectors(bio) : 0; blk_fill_rwbs(__entry->rwbs, bio ? bio->bi_rw : 0, __entry->nr_sector); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); @@ -580,7 +580,7 @@ TRACE_EVENT(block_bio_remap, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; + __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; __entry->old_sector = from; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 6a16fd2e70e..464ea82e10d 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -183,7 +183,6 @@ DECLARE_EVENT_CLASS(writeback_work_class, DEFINE_EVENT(writeback_work_class, name, \ TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ TP_ARGS(bdi, work)) -DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread); DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); DEFINE_WRITEBACK_WORK_EVENT(writeback_start); @@ -222,12 +221,8 @@ DEFINE_EVENT(writeback_class, name, \ DEFINE_WRITEBACK_EVENT(writeback_nowork); DEFINE_WRITEBACK_EVENT(writeback_wake_background); -DEFINE_WRITEBACK_EVENT(writeback_wake_thread); -DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread); DEFINE_WRITEBACK_EVENT(writeback_bdi_register); DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); -DEFINE_WRITEBACK_EVENT(writeback_thread_start); -DEFINE_WRITEBACK_EVENT(writeback_thread_stop); DECLARE_EVENT_CLASS(wbc_class, TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), |