Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

author: David Woodhouse <dwmw2@infradead.org> 2008-04-22 12:34:25 +0100
committer: David Woodhouse <dwmw2@infradead.org> 2008-04-22 12:34:25 +0100
commit: f838bad1b3be8ca0c785ee0e0c570dfda74cf377 (patch)
tree: 5a842a8056a708cfad55a20fa8ab733dd94b0903 /block
parent: dd919660aacdf4adfcd279556aa03e595f7f0fc2 (diff)
parent: 807501475fce0ebe68baedf87f202c3e4ee0d12c (diff)
16 files changed, 245 insertions, 152 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 9bda7bc8030..3e97f2bc446 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -5,14 +5,18 @@ menuconfig BLOCK
        bool "Enable the block layer" if EMBEDDED
        default y
        help
-	 This permits the block layer to be removed from the kernel if it's not
-	 needed (on some embedded devices for example).  If this option is
-	 disabled, then blockdev files will become unusable and some
-	 filesystems (such as ext3) will become unavailable.
+	 Provide block layer support for the kernel.
 
-	 This option will also disable SCSI character devices and USB storage
-	 since they make use of various block layer definitions and
-	 facilities.
+	 Disable this option to remove the block layer support from the
+	 kernel. This may be useful for embedded devices.
+
+	 If this option is disabled:
+
+	   - block device files will become unusable
+	   - some filesystems (such as ext3) will become unavailable.
+
+	 Also, SCSI character devices and USB storage will be disabled since
+	 they make use of various block layer definitions and facilities.
 
 	 Say Y here unless you know you really don't want to mount disks and
 	 suchlike.
@@ -23,9 +27,20 @@ config LBD
 	bool "Support for Large Block Devices"
 	depends on !64BIT
 	help
-	  Say Y here if you want to attach large (bigger than 2TB) discs to
-	  your machine, or if you want to have a raid or loopback device
-	  bigger than 2TB.  Otherwise say N.
+	  Enable block devices of size 2TB and larger.
+
+	  This option is required to support the full capacity of large
+	  (2TB+) block devices, including RAID, disk, Network Block Device,
+	  Logical Volume Manager (LVM) and loopback.
+
+	  For example, RAID devices are frequently bigger than the capacity
+	  of the largest individual hard drive.
+
+	  This option is not required if you have individual disk drives
+	  which total 2TB+ and you are not aggregating the capacity into
+	  a large block device (e.g. using RAID or LVM).
+
+	  If unsure, say N.
 
 config BLK_DEV_IO_TRACE
 	bool "Support for tracing block io actions"
@@ -33,19 +48,21 @@ config BLK_DEV_IO_TRACE
 	select RELAY
 	select DEBUG_FS
 	help
-	  Say Y here, if you want to be able to trace the block layer actions
+	  Say Y here if you want to be able to trace the block layer actions
 	  on a given queue. Tracing allows you to see any traffic happening
-	  on a block device queue. For more information (and the user space
-	  support tools needed), fetch the blktrace app from:
+	  on a block device queue. For more information (and the userspace
+	  support tools needed), fetch the blktrace tools from:
+
+	  git://git.kernel.dk/blktrace.git
 
-	  git://brick.kernel.dk/data/git/blktrace.git
+	  If unsure, say N.
 
 config LSF
 	bool "Support for Large Single Files"
 	depends on !64BIT
 	help
-	  Say Y here if you want to be able to handle very large files (bigger
-	  than 2TB), otherwise say N.
+	  Say Y here if you want to be able to handle very large files (2TB
+	  and larger), otherwise say N.
 
 	  If unsure, say Y.
 
@@ -53,14 +70,16 @@ config BLK_DEV_BSG
 	bool "Block layer SG support v4 (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
 	---help---
-	Saying Y here will enable generic SG (SCSI generic) v4 support
-	for any block device.
-
-	Unlike SG v3 (aka block/scsi_ioctl.c drivers/scsi/sg.c), SG v4
-	can handle complicated SCSI commands: tagged variable length cdbs
-	with bidirectional data transfers and generic request/response
-	protocols (e.g. Task Management Functions and SMP in Serial
-	Attached SCSI).
+	  Saying Y here will enable generic SG (SCSI generic) v4 support
+	  for any block device.
+
+	  Unlike SG v3 (aka block/scsi_ioctl.c drivers/scsi/sg.c), SG v4
+	  can handle complicated SCSI commands: tagged variable length cdbs
+	  with bidirectional data transfers and generic request/response
+	  protocols (e.g. Task Management Functions and SMP in Serial
+	  Attached SCSI).
+
+	  If unsure, say N.
 
 endif # BLOCK
 
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6901eedeffc..55c5f1fc4f1 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -259,8 +259,11 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
 
 static void bio_end_empty_barrier(struct bio *bio, int err)
 {
-	if (err)
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
 		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	}
 
 	complete(bio->bi_private);
 }
@@ -309,7 +312,9 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 		*error_sector = bio->bi_sector;
 
 	ret = 0;
-	if (!bio_flagged(bio, BIO_UPTODATE))
+	if (bio_flagged(bio, BIO_EOPNOTSUPP))
+		ret = -EOPNOTSUPP;
+	else if (!bio_flagged(bio, BIO_UPTODATE))
 		ret = -EIO;
 
 	bio_put(bio);
diff --git a/block/blk-core.c b/block/blk-core.c
index e9754dc98ec..2a438a93f72 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -38,7 +38,7 @@ static int __make_request(struct request_queue *q, struct bio *bio);
 /*
  * For the allocated request tables
  */
-struct kmem_cache *request_cachep;
+static struct kmem_cache *request_cachep;
 
 /*
  * For queue allocation
@@ -134,6 +134,7 @@ void rq_init(struct request_queue *q, struct request *rq)
 	rq->cmd_len = 0;
 	memset(rq->cmd, 0, sizeof(rq->cmd));
 	rq->data_len = 0;
+	rq->extra_len = 0;
 	rq->sense_len = 0;
 	rq->data = NULL;
 	rq->sense = NULL;
@@ -423,7 +424,6 @@ void blk_put_queue(struct request_queue *q)
 {
 	kobject_put(&q->kobj);
 }
-EXPORT_SYMBOL(blk_put_queue);
 
 void blk_cleanup_queue(struct request_queue *q)
 {
@@ -591,7 +591,6 @@ int blk_get_queue(struct request_queue *q)
 
 	return 1;
 }
-EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
@@ -1767,6 +1766,7 @@ static inline void __end_request(struct request *rq, int uptodate,
 
 /**
  * blk_rq_bytes - Returns bytes left to complete in the entire request
+ * @rq: the request being processed
  **/
 unsigned int blk_rq_bytes(struct request *rq)
 {
@@ -1779,6 +1779,7 @@ EXPORT_SYMBOL_GPL(blk_rq_bytes);
 
 /**
  * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
+ * @rq: the request being processed
  **/
 unsigned int blk_rq_cur_bytes(struct request *rq)
 {
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 80245dc30c7..e34df7c9fc3 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -17,17 +17,13 @@ static struct kmem_cache *iocontext_cachep;
 
 static void cfq_dtor(struct io_context *ioc)
 {
-	struct cfq_io_context *cic[1];
-	int r;
+	if (!hlist_empty(&ioc->cic_list)) {
+		struct cfq_io_context *cic;
 
-	/*
-	 * We don't have a specific key to lookup with, so use the gang
-	 * lookup to just retrieve the first item stored. The cfq exit
-	 * function will iterate the full tree, so any member will do.
-	 */
-	r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1);
-	if (r > 0)
-		cic[0]->dtor(ioc);
+		cic = list_entry(ioc->cic_list.first, struct cfq_io_context,
+								cic_list);
+		cic->dtor(ioc);
+	}
 }
 
 /*
@@ -57,18 +53,16 @@ EXPORT_SYMBOL(put_io_context);
 
 static void cfq_exit(struct io_context *ioc)
 {
-	struct cfq_io_context *cic[1];
-	int r;
-
 	rcu_read_lock();
-	/*
-	 * See comment for cfq_dtor()
-	 */
-	r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1);
-	rcu_read_unlock();
 
-	if (r > 0)
-		cic[0]->exit(ioc);
+	if (!hlist_empty(&ioc->cic_list)) {
+		struct cfq_io_context *cic;
+
+		cic = list_entry(ioc->cic_list.first, struct cfq_io_context,
+								cic_list);
+		cic->exit(ioc);
+	}
+	rcu_read_unlock();
 }
 
 /* Called by the exitting task */
@@ -105,6 +99,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
 		INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
+		INIT_HLIST_HEAD(&ret->cic_list);
 		ret->ioc_data = NULL;
 	}
 
@@ -176,7 +171,7 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc)
 }
 EXPORT_SYMBOL(copy_io_context);
 
-int __init blk_ioc_init(void)
+static int __init blk_ioc_init(void)
 {
 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
 			sizeof(struct io_context), 0, SLAB_PANIC, NULL);
diff --git a/block/blk-map.c b/block/blk-map.c
index 955d75c1a58..3c942bd6422 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <scsi/sg.h>		/* for struct sg_iovec */
 
 #include "blk.h"
 
@@ -43,6 +44,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
 			     void __user *ubuf, unsigned int len)
 {
 	unsigned long uaddr;
+	unsigned int alignment;
 	struct bio *bio, *orig_bio;
 	int reading, ret;
 
@@ -53,8 +55,8 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
 	 * direct dma. else, set up kernel bounce buffers
 	 */
 	uaddr = (unsigned long) ubuf;
-	if (!(uaddr & queue_dma_alignment(q)) &&
-	    !(len & queue_dma_alignment(q)))
+	alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+	if (!(uaddr & alignment) && !(len & alignment))
 		bio = bio_map_user(q, NULL, uaddr, len, reading);
 	else
 		bio = bio_copy_user(q, uaddr, len, reading);
@@ -139,10 +141,14 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 		ubuf += ret;
 	}
 
+	if (!bio_flagged(bio, BIO_USER_MAPPED))
+		rq->cmd_flags |= REQ_COPY_USER;
+
 	rq->buffer = rq->data = NULL;
 	return 0;
 unmap_rq:
 	blk_rq_unmap_user(bio);
+	rq->bio = NULL;
 	return ret;
 }
 EXPORT_SYMBOL(blk_rq_map_user);
@@ -172,15 +178,26 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 			struct sg_iovec *iov, int iov_count, unsigned int len)
 {
 	struct bio *bio;
+	int i, read = rq_data_dir(rq) == READ;
+	int unaligned = 0;
 
 	if (!iov || iov_count <= 0)
 		return -EINVAL;
 
-	/* we don't allow misaligned data like bio_map_user() does.  If the
-	 * user is using sg, they're expected to know the alignment constraints
-	 * and respect them accordingly */
-	bio = bio_map_user_iov(q, NULL, iov, iov_count,
-				rq_data_dir(rq) == READ);
+	for (i = 0; i < iov_count; i++) {
+		unsigned long uaddr = (unsigned long)iov[i].iov_base;
+
+		if (uaddr & queue_dma_alignment(q)) {
+			unaligned = 1;
+			break;
+		}
+	}
+
+	if (unaligned || (q->dma_pad_mask & len))
+		bio = bio_copy_user_iov(q, iov, iov_count, read);
+	else
+		bio = bio_map_user_iov(q, NULL, iov, iov_count, read);
+
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
@@ -190,12 +207,14 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 		return -EINVAL;
 	}
 
+	if (!bio_flagged(bio, BIO_USER_MAPPED))
+		rq->cmd_flags |= REQ_COPY_USER;
+
 	bio_get(bio);
 	blk_rq_bio_prep(q, rq, bio);
 	rq->buffer = rq->data = NULL;
 	return 0;
 }
-EXPORT_SYMBOL(blk_rq_map_user_iov);
 
 /**
  * blk_rq_unmap_user - unmap a request with user data
diff --git a/block/blk-merge.c b/block/blk-merge.c
index d3b84bbb776..b5c5c4a9e3f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -220,7 +220,19 @@ new_segment:
 		bvprv = bvec;
 	} /* segments in rq */
 
-	if (q->dma_drain_size) {
+
+	if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
+	    (rq->data_len & q->dma_pad_mask)) {
+		unsigned int pad_len = (q->dma_pad_mask & ~rq->data_len) + 1;
+
+		sg->length += pad_len;
+		rq->extra_len += pad_len;
+	}
+
+	if (q->dma_drain_size && q->dma_drain_needed(rq)) {
+		if (rq->cmd_flags & REQ_RW)
+			memset(q->dma_drain_buffer, 0, q->dma_drain_size);
+
 		sg->page_link &= ~0x02;
 		sg = sg_next(sg);
 		sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
@@ -228,6 +240,7 @@ new_segment:
 			    ((unsigned long)q->dma_drain_buffer) &
 			    (PAGE_SIZE - 1));
 		nsegs++;
+		rq->extra_len += q->dma_drain_size;
 	}
 
 	if (sg)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c8d0c572409..5713f7e5cbd 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -140,7 +140,7 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
 	/* Assume anything <= 4GB can be handled by IOMMU.
 	   Actually some IOMMUs can handle everything, but I don't
 	   know of a way to test this here. */
-	if (b_pfn < (min_t(u64, 0xffffffff, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
+	if (b_pfn < (min_t(u64, 0x100000000UL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
 		dma = 1;
 	q->bounce_pfn = max_low_pfn;
 #else
@@ -293,9 +293,26 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
 /**
- * blk_queue_dma_drain - Set up a drain buffer for excess dma.
+ * blk_queue_dma_pad - set pad mask
+ * @q:     the request queue for the device
+ * @mask:  pad mask
+ *
+ * Set pad mask.  Direct IO requests are padded to the mask specified.
  *
+ * Appending pad buffer to a request modifies ->data_len such that it
+ * includes the pad buffer.  The original requested data length can be
+ * obtained using blk_rq_raw_data_len().
+ **/
+void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
+{
+	q->dma_pad_mask = mask;
+}
+EXPORT_SYMBOL(blk_queue_dma_pad);
+
+/**
+ * blk_queue_dma_drain - Set up a drain buffer for excess dma.
  * @q:  the request queue for the device
+ * @dma_drain_needed: fn which returns non-zero if drain is necessary
  * @buf:	physically contiguous buffer
  * @size:	size of the buffer in bytes
  *
@@ -315,14 +332,16 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
  * device can support otherwise there won't be room for the drain
  * buffer.
  */
-int blk_queue_dma_drain(struct request_queue *q, void *buf,
-				unsigned int size)
+int blk_queue_dma_drain(struct request_queue *q,
+			       dma_drain_needed_fn *dma_drain_needed,
+			       void *buf, unsigned int size)
 {
 	if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
 		return -EINVAL;
 	/* make room for appending the drain */
 	--q->max_hw_segments;
 	--q->max_phys_segments;
+	q->dma_drain_needed = dma_drain_needed;
 	q->dma_drain_buffer = buf;
 	q->dma_drain_size = size;
 
@@ -386,7 +405,7 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
 }
 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
 
-int __init blk_settings_init(void)
+static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
 	blk_max_pfn = max_pfn - 1;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 54d0db11615..fc41d83be22 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -276,9 +276,12 @@ int blk_register_queue(struct gendisk *disk)
 
 	struct request_queue *q = disk->queue;
 
-	if (!q || !q->request_fn)
+	if (WARN_ON(!q))
 		return -ENXIO;
 
+	if (!q->request_fn)
+		return 0;
+
 	ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj),
 			  "%s", "queue");
 	if (ret < 0)
@@ -300,7 +303,10 @@ void blk_unregister_queue(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
 
-	if (q && q->request_fn) {
+	if (WARN_ON(!q))
+		return;
+
+	if (q->request_fn) {
 		elv_unregister_queue(q);
 
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index a8c37d4bbb3..4780a46ce23 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -6,6 +6,8 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 
+#include "blk.h"
+
 /**
  * blk_queue_find_tag - find a request by its tag and queue
  * @q:	 The request queue for the device
diff --git a/block/blk.h b/block/blk.h
index ec898dd0c65..ec9120fb789 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -32,6 +32,8 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
+int blk_dev_init(void);
+
 /*
  * Return the threshold (number of used requests) at which the queue is
  * considered to be congested.  It include a little hysteresis to keep the
diff --git a/block/bsg.c b/block/bsg.c
index 8917c5174dc..f51172ed27c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -37,7 +37,6 @@ struct bsg_device {
 	struct list_head done_list;
 	struct hlist_node dev_list;
 	atomic_t ref_count;
-	int minor;
 	int queued_cmds;
 	int done_cmds;
 	wait_queue_head_t wq_done;
@@ -368,7 +367,7 @@ static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd)
 
 	spin_lock_irq(&bd->lock);
 	if (bd->done_cmds) {
-		bc = list_entry(bd->done_list.next, struct bsg_command, list);
+		bc = list_first_entry(&bd->done_list, struct bsg_command, list);
 		list_del(&bc->list);
 		bd->done_cmds--;
 	}
@@ -468,8 +467,6 @@ static int bsg_complete_all_commands(struct bsg_device *bd)
 
 	dprintk("%s: entered\n", bd->name);
 
-	set_bit(BSG_F_BLOCK, &bd->flags);
-
 	/*
 	 * wait for all commands to complete
 	 */
@@ -705,6 +702,7 @@ static struct bsg_device *bsg_alloc_device(void)
 static int bsg_put_device(struct bsg_device *bd)
 {
 	int ret = 0;
+	struct device *dev = bd->queue->bsg_dev.dev;
 
 	mutex_lock(&bsg_mutex);
 
@@ -730,6 +728,7 @@ static int bsg_put_device(struct bsg_device *bd)
 	kfree(bd);
 out:
 	mutex_unlock(&bsg_mutex);
+	put_device(dev);
 	return ret;
 }
 
@@ -738,24 +737,28 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 					 struct file *file)
 {
 	struct bsg_device *bd;
+	int ret;
 #ifdef BSG_DEBUG
 	unsigned char buf[32];
 #endif
+	ret = blk_get_queue(rq);
+	if (ret)
+		return ERR_PTR(-ENXIO);
 
 	bd = bsg_alloc_device();
-	if (!bd)
+	if (!bd) {
+		blk_put_queue(rq);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	bd->queue = rq;
-	kobject_get(&rq->kobj);
 	bsg_set_block(bd, file);
 
 	atomic_set(&bd->ref_count, 1);
-	bd->minor = iminor(inode);
 	mutex_lock(&bsg_mutex);
-	hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(bd->minor));
+	hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode)));
 
-	strncpy(bd->name, rq->bsg_dev.class_dev->class_id, sizeof(bd->name) - 1);
+	strncpy(bd->name, rq->bsg_dev.class_dev->bus_id, sizeof(bd->name) - 1);
 	dprintk("bound to <%s>, max queue %d\n",
 		format_dev_t(buf, inode->i_rdev), bd->max_queue);
 
@@ -763,23 +766,21 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 	return bd;
 }
 
-static struct bsg_device *__bsg_get_device(int minor)
+static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
 {
-	struct bsg_device *bd = NULL;
+	struct bsg_device *bd;
 	struct hlist_node *entry;
 
 	mutex_lock(&bsg_mutex);
 
-	hlist_for_each(entry, bsg_dev_idx_hash(minor)) {
-		bd = hlist_entry(entry, struct bsg_device, dev_list);
-		if (bd->minor == minor) {
+	hlist_for_each_entry(bd, entry, bsg_dev_idx_hash(minor), dev_list) {
+		if (bd->queue == q) {
 			atomic_inc(&bd->ref_count);
-			break;
+			goto found;
 		}
-
-		bd = NULL;
 	}
-
+	bd = NULL;
+found:
 	mutex_unlock(&bsg_mutex);
 	return bd;
 }
@@ -789,21 +790,27 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
 	struct bsg_device *bd;
 	struct bsg_class_device *bcd;
 
-	bd = __bsg_get_device(iminor(inode));
-	if (bd)
-		return bd;
-
 	/*
 	 * find the class device
 	 */
 	mutex_lock(&bsg_mutex);
 	bcd = idr_find(&bsg_minor_idr, iminor(inode));
+	if (bcd)
+		get_device(bcd->dev);
 	mutex_unlock(&bsg_mutex);
 
 	if (!bcd)
 		return ERR_PTR(-ENODEV);
 
-	return bsg_add_device(inode, bcd->queue, file);
+	bd = __bsg_get_device(iminor(inode), bcd->queue);
+	if (bd)
+		return bd;
+
+	bd = bsg_add_device(inode, bcd->queue, file);
+	if (IS_ERR(bd))
+		put_device(bcd->dev);
+
+	return bd;
 }
 
 static int bsg_open(struct inode *inode, struct file *file)
@@ -939,10 +946,9 @@ void bsg_unregister_queue(struct request_queue *q)
 	mutex_lock(&bsg_mutex);
 	idr_remove(&bsg_minor_idr, bcd->minor);
 	sysfs_remove_link(&q->kobj, "bsg");
-	class_device_unregister(bcd->class_dev);
+	device_unregister(bcd->class_dev);
 	put_device(bcd->dev);
 	bcd->class_dev = NULL;
-	bcd->dev = NULL;
 	mutex_unlock(&bsg_mutex);
 }
 EXPORT_SYMBOL_GPL(bsg_unregister_queue);
@@ -953,7 +959,7 @@ int bsg_register_queue(struct request_queue *q, struct device *gdev,
 	struct bsg_class_device *bcd;
 	dev_t dev;
 	int ret, minor;
-	struct class_device *class_dev = NULL;
+	struct device *class_dev = NULL;
 	const char *devname;
 
 	if (name)
@@ -992,8 +998,7 @@ int bsg_register_queue(struct request_queue *q, struct device *gdev,
 	bcd->queue = q;
 	bcd->dev = get_device(gdev);
 	dev = MKDEV(bsg_major, bcd->minor);
-	class_dev = class_device_create(bsg_class, NULL, dev, gdev, "%s",
-					devname);
+	class_dev = device_create(bsg_class, gdev, dev, "%s", devname);
 	if (IS_ERR(class_dev)) {
 		ret = PTR_ERR(class_dev);
 		goto put_dev;
@@ -1010,7 +1015,7 @@ int bsg_register_queue(struct request_queue *q, struct device *gdev,
 	return 0;
 
 unregister_class_dev:
-	class_device_unregister(class_dev);
+	device_unregister(class_dev);
 put_dev:
 	put_device(gdev);
 remove_idr:
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ca198e61fa6..f4e1006c253 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1143,43 +1143,37 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 }
 
 /*
- * Call func for each cic attached to this ioc. Returns number of cic's seen.
+ * Call func for each cic attached to this ioc.
  */
-#define CIC_GANG_NR	16
-static unsigned int
+static void
 call_for_each_cic(struct io_context *ioc,
 		  void (*func)(struct io_context *, struct cfq_io_context *))
 {
-	struct cfq_io_context *cics[CIC_GANG_NR];
-	unsigned long index = 0;
-	unsigned int called = 0;
-	int nr;
+	struct cfq_io_context *cic;
+	struct hlist_node *n;
 
 	rcu_read_lock();
+	hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
+		func(ioc, cic);
+	rcu_read_unlock();
+}
 
-	do {
-		int i;
-
-		/*
-		 * Perhaps there's a better way - this just gang lookups from
-		 * 0 to the end, restarting after each CIC_GANG_NR from the
-		 * last key + 1.
-		 */
-		nr = radix_tree_gang_lookup(&ioc->radix_root, (void **) cics,
-						index, CIC_GANG_NR);
-		if (!nr)
-			break;
+static void cfq_cic_free_rcu(struct rcu_head *head)
+{
+	struct cfq_io_context *cic;
 
-		called += nr;
-		index = 1 + (unsigned long) cics[nr - 1]->key;
+	cic = container_of(head, struct cfq_io_context, rcu_head);
 
-		for (i = 0; i < nr; i++)
-			func(ioc, cics[i]);
-	} while (nr == CIC_GANG_NR);
+	kmem_cache_free(cfq_ioc_pool, cic);
+	elv_ioc_count_dec(ioc_count);
 
-	rcu_read_unlock();
+	if (ioc_gone && !elv_ioc_count_read(ioc_count))
+		complete(ioc_gone);
+}
 
-	return called;
+static void cfq_cic_free(struct cfq_io_context *cic)
+{
+	call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
 }
 
 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
@@ -1190,26 +1184,21 @@ static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
 
 	spin_lock_irqsave(&ioc->lock, flags);
 	radix_tree_delete(&ioc->radix_root, cic->dead_key);
+	hlist_del_rcu(&cic->cic_list);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
-	kmem_cache_free(cfq_ioc_pool, cic);
+	cfq_cic_free(cic);
 }
 
 static void cfq_free_io_context(struct io_context *ioc)
 {
-	int freed;
-
 	/*
-	 * ioc->refcount is zero here, so no more cic's are allowed to be
-	 * linked into this ioc. So it should be ok to iterate over the known
-	 * list, we will see all cic's since no new ones are added.
+	 * ioc->refcount is zero here, or we are called from elv_unregister(),
+	 * so no more cic's are allowed to be linked into this ioc.  So it
+	 * should be ok to iterate over the known list, we will see all cic's
+	 * since no new ones are added.
 	 */
-	freed = call_for_each_cic(ioc, cic_free_func);
-
-	elv_ioc_count_mod(ioc_count, -freed);
-
-	if (ioc_gone && !elv_ioc_count_read(ioc_count))
-		complete(ioc_gone);
+	call_for_each_cic(ioc, cic_free_func);
 }
 
 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -1225,6 +1214,8 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 					 struct cfq_io_context *cic)
 {
+	struct io_context *ioc = cic->ioc;
+
 	list_del_init(&cic->queue_list);
 
 	/*
@@ -1234,6 +1225,9 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 	cic->dead_key = (unsigned long) cic->key;
 	cic->key = NULL;
 
+	if (ioc->ioc_data == cic)
+		rcu_assign_pointer(ioc->ioc_data, NULL);
+
 	if (cic->cfqq[ASYNC]) {
 		cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
 		cic->cfqq[ASYNC] = NULL;
@@ -1266,7 +1260,6 @@ static void cfq_exit_single_io_context(struct io_context *ioc,
  */
 static void cfq_exit_io_context(struct io_context *ioc)
 {
-	rcu_assign_pointer(ioc->ioc_data, NULL);
 	call_for_each_cic(ioc, cfq_exit_single_io_context);
 }
 
@@ -1280,6 +1273,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 	if (cic) {
 		cic->last_end_request = jiffies;
 		INIT_LIST_HEAD(&cic->queue_list);
+		INIT_HLIST_NODE(&cic->cic_list);
 		cic->dtor = cfq_free_io_context;
 		cic->exit = cfq_exit_io_context;
 		elv_ioc_count_inc(ioc_count);
@@ -1475,15 +1469,6 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
 	return cfqq;
 }
 
-static void cfq_cic_free(struct cfq_io_context *cic)
-{
-	kmem_cache_free(cfq_ioc_pool, cic);
-	elv_ioc_count_dec(ioc_count);
-
-	if (ioc_gone && !elv_ioc_count_read(ioc_count))
-		complete(ioc_gone);
-}
-
 /*
  * We drop cfq io contexts lazily, so we may find a dead one.
  */
@@ -1497,10 +1482,10 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
 
 	spin_lock_irqsave(&ioc->lock, flags);
 
-	if (ioc->ioc_data == cic)
-		rcu_assign_pointer(ioc->ioc_data, NULL);
+	BUG_ON(ioc->ioc_data == cic);
 
 	radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd);
+	hlist_del_rcu(&cic->cic_list);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
 	cfq_cic_free(cic);
@@ -1561,6 +1546,8 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 		spin_lock_irqsave(&ioc->lock, flags);
 		ret = radix_tree_insert(&ioc->radix_root,
 						(unsigned long) cfqd, cic);
+		if (!ret)
+			hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
 		spin_unlock_irqrestore(&ioc->lock, flags);
 
 		radix_tree_preload_end();
@@ -2152,7 +2139,7 @@ static int __init cfq_slab_setup(void)
 	if (!cfq_pool)
 		goto fail;
 
-	cfq_ioc_pool = KMEM_CACHE(cfq_io_context, SLAB_DESTROY_BY_RCU);
+	cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0);
 	if (!cfq_ioc_pool)
 		goto fail;
 
@@ -2300,7 +2287,6 @@ static void __exit cfq_exit(void)
 	smp_wmb();
 	if (elv_ioc_count_read(ioc_count))
 		wait_for_completion(ioc_gone);
-	synchronize_rcu();
 	cfq_slab_kill();
 }
 
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index b73373216b0..c70d0b6f666 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -624,7 +624,6 @@ static int compat_blkdev_driver_ioctl(struct inode *inode, struct file *file,
 	case HDIO_GET_IDENTITY:
 	case HDIO_DRIVE_TASK:
 	case HDIO_DRIVE_CMD:
-	case HDIO_SCAN_HWIF:
 	/* 0x330 is reserved -- it used to be HDIO_GETGEO_BIG */
 	case 0x330:
 	/* 0x02 -- Floppy ioctls */
diff --git a/block/elevator.c b/block/elevator.c
index bafbae0344d..88318c38360 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -134,6 +134,21 @@ static struct elevator_type *elevator_get(const char *name)
 	spin_lock(&elv_list_lock);
 
 	e = elevator_find(name);
+	if (!e) {
+		char elv[ELV_NAME_MAX + strlen("-iosched")];
+
+		spin_unlock(&elv_list_lock);
+
+		if (!strcmp(name, "anticipatory"))
+			sprintf(elv, "as-iosched");
+		else
+			sprintf(elv, "%s-iosched", name);
+
+		request_module(elv);
+		spin_lock(&elv_list_lock);
+		e = elevator_find(name);
+	}
+
 	if (e && !try_module_get(e->elevator_owner))
 		e = NULL;
 
diff --git a/block/genhd.c b/block/genhd.c
index 53f2238e69c..00da5219ee3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -17,11 +17,15 @@
 #include <linux/buffer_head.h>
 #include <linux/mutex.h>
 
+#include "blk.h"
+
 static DEFINE_MUTEX(block_class_lock);
 #ifndef CONFIG_SYSFS_DEPRECATED
 struct kobject *block_depr;
 #endif
 
+static struct device_type disk_type;
+
 /*
  * Can be deleted altogether. Later.
  *
@@ -346,8 +350,6 @@ const struct seq_operations partitions_op = {
 #endif
 
 
-extern int blk_dev_init(void);
-
 static struct kobject *base_probe(dev_t devt, int *part, void *data)
 {
 	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
@@ -358,7 +360,9 @@ static struct kobject *base_probe(dev_t devt, int *part, void *data)
 
 static int __init genhd_device_init(void)
 {
-	class_register(&block_class);
+	int error = class_register(&block_class);
+	if (unlikely(error))
+		return error;
 	bdev_map = kobj_map_init(base_probe, &block_class_lock);
 	blk_dev_init();
 
@@ -502,7 +506,7 @@ struct class block_class = {
 	.name		= "block",
 };
 
-struct device_type disk_type = {
+static struct device_type disk_type = {
 	.name		= "disk",
 	.groups		= disk_attr_groups,
 	.release	= disk_release,
@@ -632,12 +636,14 @@ static void media_change_notify_thread(struct work_struct *work)
 	put_device(gd->driverfs_dev);
 }
 
+#if 0
 void genhd_media_change_notify(struct gendisk *disk)
 {
 	get_device(disk->driverfs_dev);
 	schedule_work(&disk->async_notify);
 }
 EXPORT_SYMBOL_GPL(genhd_media_change_notify);
+#endif  /*  0  */
 
 dev_t blk_lookup_devt(const char *name)
 {
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 9675b34638d..a2c3a936ebf 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -529,6 +529,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->data = NULL;
 	rq->data_len = 0;
+	rq->extra_len = 0;
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	memset(rq->cmd, 0, sizeof(rq->cmd));
 	rq->cmd[0] = cmd;
author	David Woodhouse <dwmw2@infradead.org>	2008-04-22 12:34:25 +0100
committer	David Woodhouse <dwmw2@infradead.org>	2008-04-22 12:34:25 +0100
commit	f838bad1b3be8ca0c785ee0e0c570dfda74cf377 (patch)
tree	5a842a8056a708cfad55a20fa8ab733dd94b0903 /block
parent	dd919660aacdf4adfcd279556aa03e595f7f0fc2 (diff)
parent	807501475fce0ebe68baedf87f202c3e4ee0d12c (diff)