summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-11 09:04:23 +0900
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-11 09:04:23 +0900
commitce40be7a820bb393ac4ac69865f018d2f4038cf0 (patch)
treeb1fe5a93346eb06f22b1c303d63ec5456d7212ab /include
parentba0a5a36f60e4c1152af3a2ae2813251974405bf (diff)
parent02f3939e1a9357b7c370a4a69717cf9c02452737 (diff)
Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block
Pull block IO update from Jens Axboe: "Core block IO bits for 3.7. Not a huge round this time, it contains: - First series from Kent cleaning up and generalizing bio allocation and freeing. - WRITE_SAME support from Martin. - Mikulas patches to prevent O_DIRECT crashes when someone changes the block size of a device. - Make bio_split() work on data-less bio's (like trim/discards). - A few other minor fixups." Fixed up silent semantic mis-merge as per Mikulas Patocka and Andrew Morton. It is due to the VM no longer using a prio-tree (see commit 6b2dbba8b6ac: "mm: replace vma prio_tree with an interval tree"). So make set_blocksize() use mapping_mapped() instead of open-coding the internal VM knowledge that has changed. * 'for-3.7/core' of git://git.kernel.dk/linux-block: (26 commits) block: makes bio_split support bio without data scatterlist: refactor the sg_nents scatterlist: add sg_nents fs: fix include/percpu-rwsem.h export error percpu-rw-semaphore: fix documentation typos fs/block_dev.c:1644:5: sparse: symbol 'blkdev_mmap' was not declared blockdev: turn a rw semaphore into a percpu rw semaphore Fix a crash when block device is read and block size is changed at the same time block: fix request_queue->flags initialization block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue() block: ioctl to zero block ranges block: Make blkdev_issue_zeroout use WRITE SAME block: Implement support for WRITE SAME block: Consolidate command flag and queue limit checks for merges block: Clean up special command handling logic block/blk-tag.c: Remove useless kfree block: remove the duplicated setting for congestion_threshold block: reject invalid queue attribute values block: Add bio_clone_bioset(), bio_clone_kmalloc() block: Consolidate bio_alloc_bioset(), bio_kmalloc() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h70
-rw-r--r--include/linux/blk_types.h36
-rw-r--r--include/linux/blkdev.h82
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/percpu-rwsem.h89
-rw-r--r--include/linux/scatterlist.h1
6 files changed, 249 insertions, 35 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 26435890dc8..820e7aaad4f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -212,20 +212,41 @@ extern void bio_pair_release(struct bio_pair *dbio);
extern struct bio_set *bioset_create(unsigned int, unsigned int);
extern void bioset_free(struct bio_set *);
-extern struct bio *bio_alloc(gfp_t, unsigned int);
-extern struct bio *bio_kmalloc(gfp_t, unsigned int);
extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
extern void bio_put(struct bio *);
-extern void bio_free(struct bio *, struct bio_set *);
+
+extern void __bio_clone(struct bio *, struct bio *);
+extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
+
+extern struct bio_set *fs_bio_set;
+
+static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+ return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
+}
+
+static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
+{
+ return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
+}
+
+static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+ return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
+}
+
+static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
+{
+ return bio_clone_bioset(bio, gfp_mask, NULL);
+
+}
extern void bio_endio(struct bio *, int);
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
-extern void __bio_clone(struct bio *, struct bio *);
-extern struct bio *bio_clone(struct bio *, gfp_t);
-
extern void bio_init(struct bio *);
+extern void bio_reset(struct bio *);
extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
@@ -304,8 +325,6 @@ struct biovec_slab {
struct kmem_cache *slab;
};
-extern struct bio_set *fs_bio_set;
-
/*
* a small number of entries is fine, not going to be performance critical.
* basically we just need to survive
@@ -367,9 +386,31 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
/*
* Check whether this bio carries any data or not. A NULL bio is allowed.
*/
-static inline int bio_has_data(struct bio *bio)
+static inline bool bio_has_data(struct bio *bio)
{
- return bio && bio->bi_io_vec != NULL;
+ if (bio && bio->bi_vcnt)
+ return true;
+
+ return false;
+}
+
+static inline bool bio_is_rw(struct bio *bio)
+{
+ if (!bio_has_data(bio))
+ return false;
+
+ if (bio->bi_rw & REQ_WRITE_SAME)
+ return false;
+
+ return true;
+}
+
+static inline bool bio_mergeable(struct bio *bio)
+{
+ if (bio->bi_rw & REQ_NOMERGE_FLAGS)
+ return false;
+
+ return true;
}
/*
@@ -505,9 +546,8 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
#define bio_integrity(bio) (bio->bi_integrity != NULL)
-extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
-extern void bio_integrity_free(struct bio *, struct bio_set *);
+extern void bio_integrity_free(struct bio *);
extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
extern int bio_integrity_enabled(struct bio *bio);
extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
@@ -517,7 +557,7 @@ extern void bio_integrity_endio(struct bio *, int);
extern void bio_integrity_advance(struct bio *, unsigned int);
extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
-extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
+extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
extern int bioset_integrity_create(struct bio_set *, int);
extern void bioset_integrity_free(struct bio_set *);
extern void bio_integrity_init(void);
@@ -549,13 +589,13 @@ static inline int bio_integrity_prep(struct bio *bio)
return 0;
}
-static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs)
+static inline void bio_integrity_free(struct bio *bio)
{
return;
}
static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
- gfp_t gfp_mask, struct bio_set *bs)
+ gfp_t gfp_mask)
{
return 0;
}
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 7b7ac9ccec7..cdf11191e64 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -59,12 +59,6 @@ struct bio {
unsigned int bi_seg_front_size;
unsigned int bi_seg_back_size;
- unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
-
- atomic_t bi_cnt; /* pin count */
-
- struct bio_vec *bi_io_vec; /* the actual vec list */
-
bio_end_io_t *bi_end_io;
void *bi_private;
@@ -80,7 +74,17 @@ struct bio {
struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
- bio_destructor_t *bi_destructor; /* destructor */
+ /*
+ * Everything starting with bi_max_vecs will be preserved by bio_reset()
+ */
+
+ unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
+
+ atomic_t bi_cnt; /* pin count */
+
+ struct bio_vec *bi_io_vec; /* the actual vec list */
+
+ struct bio_set *bi_pool;
/*
* We can inline a number of vecs at the end of the bio, to avoid
@@ -90,6 +94,8 @@ struct bio {
struct bio_vec bi_inline_vecs[0];
};
+#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
+
/*
* bio flags
*/
@@ -105,6 +111,13 @@ struct bio {
#define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */
#define BIO_QUIET 10 /* Make BIO Quiet */
#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */
+
+/*
+ * Flags starting here get preserved by bio_reset() - this includes
+ * BIO_POOL_IDX()
+ */
+#define BIO_RESET_BITS 12
+
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
/*
@@ -134,6 +147,7 @@ enum rq_flag_bits {
__REQ_PRIO, /* boost priority in cfq */
__REQ_DISCARD, /* request to discard sectors */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
+ __REQ_WRITE_SAME, /* write same block many times */
__REQ_NOIDLE, /* don't anticipate more IO after this one */
__REQ_FUA, /* forced unit access */
@@ -172,15 +186,21 @@ enum rq_flag_bits {
#define REQ_META (1 << __REQ_META)
#define REQ_PRIO (1 << __REQ_PRIO)
#define REQ_DISCARD (1 << __REQ_DISCARD)
+#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME)
#define REQ_NOIDLE (1 << __REQ_NOIDLE)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
#define REQ_COMMON_MASK \
(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
- REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE)
+ REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
+ REQ_SECURE)
#define REQ_CLONE_MASK REQ_COMMON_MASK
+/* This mask is used for both bio and request merge checking */
+#define REQ_NOMERGE_FLAGS \
+ (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
+
#define REQ_RAHEAD (1 << __REQ_RAHEAD)
#define REQ_THROTTLED (1 << __REQ_THROTTLED)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4a2ab7c8539..1756001210d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -270,6 +270,7 @@ struct queue_limits {
unsigned int io_min;
unsigned int io_opt;
unsigned int max_discard_sectors;
+ unsigned int max_write_same_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
@@ -540,8 +541,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_account_rq(rq) \
(((rq)->cmd_flags & REQ_STARTED) && \
- ((rq)->cmd_type == REQ_TYPE_FS || \
- ((rq)->cmd_flags & REQ_DISCARD)))
+ ((rq)->cmd_type == REQ_TYPE_FS))
#define blk_pm_request(rq) \
((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
@@ -595,17 +595,39 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
rl->flags &= ~flag;
}
+static inline bool rq_mergeable(struct request *rq)
+{
+ if (rq->cmd_type != REQ_TYPE_FS)
+ return false;
-/*
- * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
- * it already be started by driver.
- */
-#define RQ_NOMERGE_FLAGS \
- (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD)
-#define rq_mergeable(rq) \
- (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
- (((rq)->cmd_flags & REQ_DISCARD) || \
- (rq)->cmd_type == REQ_TYPE_FS))
+ if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
+ return false;
+
+ return true;
+}
+
+static inline bool blk_check_merge_flags(unsigned int flags1,
+ unsigned int flags2)
+{
+ if ((flags1 & REQ_DISCARD) != (flags2 & REQ_DISCARD))
+ return false;
+
+ if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE))
+ return false;
+
+ if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME))
+ return false;
+
+ return true;
+}
+
+static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
+{
+ if (bio_data(a) == bio_data(b))
+ return true;
+
+ return false;
+}
/*
* q->prep_rq_fn return values
@@ -802,6 +824,28 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
return blk_rq_cur_bytes(rq) >> 9;
}
+static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
+ unsigned int cmd_flags)
+{
+ if (unlikely(cmd_flags & REQ_DISCARD))
+ return q->limits.max_discard_sectors;
+
+ if (unlikely(cmd_flags & REQ_WRITE_SAME))
+ return q->limits.max_write_same_sectors;
+
+ return q->limits.max_sectors;
+}
+
+static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
+{
+ struct request_queue *q = rq->q;
+
+ if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
+ return q->limits.max_hw_sectors;
+
+ return blk_queue_get_max_sectors(q, rq->cmd_flags);
+}
+
/*
* Request issue related functions.
*/
@@ -857,6 +901,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
+extern void blk_queue_max_write_same_sectors(struct request_queue *q,
+ unsigned int max_write_same_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_alignment_offset(struct request_queue *q,
@@ -987,6 +1033,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
+extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, struct page *page);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask);
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
@@ -1164,6 +1212,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
return queue_discard_zeroes_data(bdev_get_queue(bdev));
}
+static inline unsigned int bdev_write_same(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (q)
+ return q->limits.max_write_same_sectors;
+
+ return 0;
+}
+
static inline int queue_dma_alignment(struct request_queue *q)
{
return q ? q->dma_alignment : 511;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c617ed024df..39f3e12ca75 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -335,6 +335,7 @@ struct inodes_stat_t {
#define BLKDISCARDZEROES _IO(0x12,124)
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
+#define BLKZEROOUT _IO(0x12,127)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
@@ -415,6 +416,7 @@ struct inodes_stat_t {
#include <linux/migrate_mode.h>
#include <linux/uidgid.h>
#include <linux/lockdep.h>
+#include <linux/percpu-rwsem.h>
#include <asm/byteorder.h>
@@ -724,6 +726,8 @@ struct block_device {
int bd_fsfreeze_count;
/* Mutex for freeze */
struct mutex bd_fsfreeze_mutex;
+ /* A semaphore that prevents I/O while block size is being changed */
+ struct percpu_rw_semaphore bd_block_size_semaphore;
};
/*
@@ -2570,6 +2574,8 @@ extern int generic_segment_checks(const struct iovec *iov,
unsigned long *nr_segs, size_t *count, int access_flags);
/* fs/block_dev.c */
+extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos);
extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
new file mode 100644
index 00000000000..cf80f7e5277
--- /dev/null
+++ b/include/linux/percpu-rwsem.h
@@ -0,0 +1,89 @@
+#ifndef _LINUX_PERCPU_RWSEM_H
+#define _LINUX_PERCPU_RWSEM_H
+
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/delay.h>
+
+struct percpu_rw_semaphore {
+ unsigned __percpu *counters;
+ bool locked;
+ struct mutex mtx;
+};
+
+static inline void percpu_down_read(struct percpu_rw_semaphore *p)
+{
+ rcu_read_lock();
+ if (unlikely(p->locked)) {
+ rcu_read_unlock();
+ mutex_lock(&p->mtx);
+ this_cpu_inc(*p->counters);
+ mutex_unlock(&p->mtx);
+ return;
+ }
+ this_cpu_inc(*p->counters);
+ rcu_read_unlock();
+}
+
+static inline void percpu_up_read(struct percpu_rw_semaphore *p)
+{
+ /*
+ * On X86, write operation in this_cpu_dec serves as a memory unlock
+ * barrier (i.e. memory accesses may be moved before the write, but
+ * no memory accesses are moved past the write).
+ * On other architectures this may not be the case, so we need smp_mb()
+ * there.
+ */
+#if defined(CONFIG_X86) && (!defined(CONFIG_X86_PPRO_FENCE) && !defined(CONFIG_X86_OOSTORE))
+ barrier();
+#else
+ smp_mb();
+#endif
+ this_cpu_dec(*p->counters);
+}
+
+static inline unsigned __percpu_count(unsigned __percpu *counters)
+{
+ unsigned total = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu));
+
+ return total;
+}
+
+static inline void percpu_down_write(struct percpu_rw_semaphore *p)
+{
+ mutex_lock(&p->mtx);
+ p->locked = true;
+ synchronize_rcu();
+ while (__percpu_count(p->counters))
+ msleep(1);
+ smp_rmb(); /* paired with smp_mb() in percpu_sem_up_read() */
+}
+
+static inline void percpu_up_write(struct percpu_rw_semaphore *p)
+{
+ p->locked = false;
+ mutex_unlock(&p->mtx);
+}
+
+static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p)
+{
+ p->counters = alloc_percpu(unsigned);
+ if (unlikely(!p->counters))
+ return -ENOMEM;
+ p->locked = false;
+ mutex_init(&p->mtx);
+ return 0;
+}
+
+static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p)
+{
+ free_percpu(p->counters);
+ p->counters = NULL; /* catch use after free bugs */
+}
+
+#endif
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 7b600da9a63..4bd6c06eb28 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -201,6 +201,7 @@ static inline void *sg_virt(struct scatterlist *sg)
return page_address(sg_page(sg)) + sg->offset;
}
+int sg_nents(struct scatterlist *sg);
struct scatterlist *sg_next(struct scatterlist *);
struct scatterlist *sg_last(struct scatterlist *s, unsigned int);
void sg_init_table(struct scatterlist *, unsigned int);