summaryrefslogtreecommitdiffstats
path: root/drivers/block/xen-blkfront.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkfront.c')
-rw-r--r--drivers/block/xen-blkfront.c433
1 files changed, 330 insertions, 103 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 82ed403147c..ac1b682edec 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -41,6 +41,7 @@
#include <linux/cdrom.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/smp_lock.h>
#include <linux/scatterlist.h>
#include <xen/xen.h>
@@ -48,6 +49,7 @@
#include <xen/grant_table.h>
#include <xen/events.h>
#include <xen/page.h>
+#include <xen/platform_pci.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/blkif.h>
@@ -78,6 +80,7 @@ static const struct block_device_operations xlvbd_block_fops;
*/
struct blkfront_info
{
+ struct mutex mutex;
struct xenbus_device *xbdev;
struct gendisk *gd;
int vdevice;
@@ -94,16 +97,14 @@ struct blkfront_info
unsigned long shadow_free;
int feature_barrier;
int is_ready;
-
- /**
- * The number of people holding this device open. We won't allow a
- * hot-unplug unless this is 0.
- */
- int users;
};
static DEFINE_SPINLOCK(blkif_io_lock);
+static unsigned int nr_minors;
+static unsigned long *minors;
+static DEFINE_SPINLOCK(minor_lock);
+
#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
#define GRANT_INVALID_REF 0
@@ -138,6 +139,55 @@ static void add_id_to_freelist(struct blkfront_info *info,
info->shadow_free = id;
}
+static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
+{
+ unsigned int end = minor + nr;
+ int rc;
+
+ if (end > nr_minors) {
+ unsigned long *bitmap, *old;
+
+ bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
+ GFP_KERNEL);
+ if (bitmap == NULL)
+ return -ENOMEM;
+
+ spin_lock(&minor_lock);
+ if (end > nr_minors) {
+ old = minors;
+ memcpy(bitmap, minors,
+ BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
+ minors = bitmap;
+ nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
+ } else
+ old = bitmap;
+ spin_unlock(&minor_lock);
+ kfree(old);
+ }
+
+ spin_lock(&minor_lock);
+ if (find_next_bit(minors, end, minor) >= end) {
+ for (; minor < end; ++minor)
+ __set_bit(minor, minors);
+ rc = 0;
+ } else
+ rc = -EBUSY;
+ spin_unlock(&minor_lock);
+
+ return rc;
+}
+
+static void xlbd_release_minors(unsigned int minor, unsigned int nr)
+{
+ unsigned int end = minor + nr;
+
+ BUG_ON(end > nr_minors);
+ spin_lock(&minor_lock);
+ for (; minor < end; ++minor)
+ __clear_bit(minor, minors);
+ spin_unlock(&minor_lock);
+}
+
static void blkif_restart_queue_callback(void *arg)
{
struct blkfront_info *info = (struct blkfront_info *)arg;
@@ -238,7 +288,7 @@ static int blkif_queue_request(struct request *req)
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
- if (blk_barrier_rq(req))
+ if (req->cmd_flags & REQ_HARDBARRIER)
ring_req->operation = BLKIF_OP_WRITE_BARRIER;
ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
@@ -309,7 +359,7 @@ static void do_blkif_request(struct request_queue *rq)
blk_start_request(req);
- if (!blk_fs_request(req)) {
+ if (req->cmd_type != REQ_TYPE_FS) {
__blk_end_request_all(req, -EIO);
continue;
}
@@ -371,17 +421,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
static int xlvbd_barrier(struct blkfront_info *info)
{
int err;
+ const char *barrier;
- err = blk_queue_ordered(info->rq,
- info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
- NULL);
+ switch (info->feature_barrier) {
+ case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break;
+ case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break;
+ case QUEUE_ORDERED_NONE: barrier = "disabled"; break;
+ default: return -EINVAL;
+ }
+
+ err = blk_queue_ordered(info->rq, info->feature_barrier);
if (err)
return err;
printk(KERN_INFO "blkfront: %s: barriers %s\n",
- info->gd->disk_name,
- info->feature_barrier ? "enabled" : "disabled");
+ info->gd->disk_name, barrier);
return 0;
}
@@ -417,9 +472,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if ((minor % nr_parts) == 0)
nr_minors = nr_parts;
+ err = xlbd_reserve_minors(minor, nr_minors);
+ if (err)
+ goto out;
+ err = -ENODEV;
+
gd = alloc_disk(nr_minors);
if (gd == NULL)
- goto out;
+ goto release;
offset = minor / nr_parts;
@@ -450,14 +510,13 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if (xlvbd_init_blk_queue(gd, sector_size)) {
del_gendisk(gd);
- goto out;
+ goto release;
}
info->rq = gd->queue;
info->gd = gd;
- if (info->feature_barrier)
- xlvbd_barrier(info);
+ xlvbd_barrier(info);
if (vdisk_info & VDISK_READONLY)
set_disk_ro(gd, 1);
@@ -470,10 +529,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
return 0;
+ release:
+ xlbd_release_minors(minor, nr_minors);
out:
return err;
}
+static void xlvbd_release_gendisk(struct blkfront_info *info)
+{
+ unsigned int minor, nr_minors;
+ unsigned long flags;
+
+ if (info->rq == NULL)
+ return;
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ /* No more blkif_request(). */
+ blk_stop_queue(info->rq);
+
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&info->callback);
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_scheduled_work();
+
+ del_gendisk(info->gd);
+
+ minor = info->gd->first_minor;
+ nr_minors = info->gd->minors;
+ xlbd_release_minors(minor, nr_minors);
+
+ blk_cleanup_queue(info->rq);
+ info->rq = NULL;
+
+ put_disk(info->gd);
+ info->gd = NULL;
+}
+
static void kick_pending_request_queues(struct blkfront_info *info)
{
if (!RING_FULL(&info->ring)) {
@@ -568,7 +662,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
info->gd->disk_name);
error = -EOPNOTSUPP;
- info->feature_barrier = 0;
+ info->feature_barrier = QUEUE_ORDERED_NONE;
xlvbd_barrier(info);
}
/* fall through */
@@ -651,7 +745,7 @@ fail:
/* Common code used when first setting up, and when resuming. */
-static int talk_to_backend(struct xenbus_device *dev,
+static int talk_to_blkback(struct xenbus_device *dev,
struct blkfront_info *info)
{
const char *message = NULL;
@@ -711,7 +805,6 @@ again:
return err;
}
-
/**
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffer for communication with the backend, and
@@ -737,12 +830,42 @@ static int blkfront_probe(struct xenbus_device *dev,
}
}
+ if (xen_hvm_domain()) {
+ char *type;
+ int len;
+ /* no unplug has been done: do not hook devices != xen vbds */
+ if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) {
+ int major;
+
+ if (!VDEV_IS_EXTENDED(vdevice))
+ major = BLKIF_MAJOR(vdevice);
+ else
+ major = XENVBD_MAJOR;
+
+ if (major != XENVBD_MAJOR) {
+ printk(KERN_INFO
+ "%s: HVM does not support vbd %d as xen block device\n",
+ __FUNCTION__, vdevice);
+ return -ENODEV;
+ }
+ }
+ /* do not create a PV cdrom device if we are an HVM guest */
+ type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
+ if (IS_ERR(type))
+ return -ENODEV;
+ if (strncmp(type, "cdrom", 5) == 0) {
+ kfree(type);
+ return -ENODEV;
+ }
+ kfree(type);
+ }
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
return -ENOMEM;
}
+ mutex_init(&info->mutex);
info->xbdev = dev;
info->vdevice = vdevice;
info->connected = BLKIF_STATE_DISCONNECTED;
@@ -756,7 +879,7 @@ static int blkfront_probe(struct xenbus_device *dev,
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info);
- err = talk_to_backend(dev, info);
+ err = talk_to_blkback(dev, info);
if (err) {
kfree(info);
dev_set_drvdata(&dev->dev, NULL);
@@ -851,13 +974,50 @@ static int blkfront_resume(struct xenbus_device *dev)
blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
- err = talk_to_backend(dev, info);
+ err = talk_to_blkback(dev, info);
if (info->connected == BLKIF_STATE_SUSPENDED && !err)
err = blkif_recover(info);
return err;
}
+static void
+blkfront_closing(struct blkfront_info *info)
+{
+ struct xenbus_device *xbdev = info->xbdev;
+ struct block_device *bdev = NULL;
+
+ mutex_lock(&info->mutex);
+
+ if (xbdev->state == XenbusStateClosing) {
+ mutex_unlock(&info->mutex);
+ return;
+ }
+
+ if (info->gd)
+ bdev = bdget_disk(info->gd, 0);
+
+ mutex_unlock(&info->mutex);
+
+ if (!bdev) {
+ xenbus_frontend_closed(xbdev);
+ return;
+ }
+
+ mutex_lock(&bdev->bd_mutex);
+
+ if (bdev->bd_openers) {
+ xenbus_dev_error(xbdev, -EBUSY,
+ "Device in use; refusing to close");
+ xenbus_switch_state(xbdev, XenbusStateClosing);
+ } else {
+ xlvbd_release_gendisk(info);
+ xenbus_frontend_closed(xbdev);
+ }
+
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+}
/*
* Invoked when the backend is finally 'ready' (and has told produced
@@ -869,11 +1029,31 @@ static void blkfront_connect(struct blkfront_info *info)
unsigned long sector_size;
unsigned int binfo;
int err;
-
- if ((info->connected == BLKIF_STATE_CONNECTED) ||
- (info->connected == BLKIF_STATE_SUSPENDED) )
+ int barrier;
+
+ switch (info->connected) {
+ case BLKIF_STATE_CONNECTED:
+ /*
+ * Potentially, the back-end may be signalling
+ * a capacity change; update the capacity.
+ */
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+ "sectors", "%Lu", &sectors);
+ if (XENBUS_EXIST_ERR(err))
+ return;
+ printk(KERN_INFO "Setting capacity to %Lu\n",
+ sectors);
+ set_capacity(info->gd, sectors);
+ revalidate_disk(info->gd);
+
+ /* fall through */
+ case BLKIF_STATE_SUSPENDED:
return;
+ default:
+ break;
+ }
+
dev_dbg(&info->xbdev->dev, "%s:%s.\n",
__func__, info->xbdev->otherend);
@@ -890,10 +1070,26 @@ static void blkfront_connect(struct blkfront_info *info)
}
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
- "feature-barrier", "%lu", &info->feature_barrier,
+ "feature-barrier", "%lu", &barrier,
NULL);
+
+ /*
+ * If there's no "feature-barrier" defined, then it means
+ * we're dealing with a very old backend which writes
+ * synchronously; draining will do what needs to get done.
+ *
+ * If there are barriers, then we can do full queued writes
+ * with tagged barriers.
+ *
+ * If barriers are not supported, then there's no much we can
+ * do, so just set ordering to NONE.
+ */
if (err)
- info->feature_barrier = 0;
+ info->feature_barrier = QUEUE_ORDERED_DRAIN;
+ else if (barrier)
+ info->feature_barrier = QUEUE_ORDERED_TAG;
+ else
+ info->feature_barrier = QUEUE_ORDERED_NONE;
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
if (err) {
@@ -916,52 +1112,14 @@ static void blkfront_connect(struct blkfront_info *info)
}
/**
- * Handle the change of state of the backend to Closing. We must delete our
- * device-layer structures now, to ensure that writes are flushed through to
- * the backend. Once is this done, we can switch to Closed in
- * acknowledgement.
- */
-static void blkfront_closing(struct xenbus_device *dev)
-{
- struct blkfront_info *info = dev_get_drvdata(&dev->dev);
- unsigned long flags;
-
- dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
-
- if (info->rq == NULL)
- goto out;
-
- spin_lock_irqsave(&blkif_io_lock, flags);
-
- /* No more blkif_request(). */
- blk_stop_queue(info->rq);
-
- /* No more gnttab callback work. */
- gnttab_cancel_free_callback(&info->callback);
- spin_unlock_irqrestore(&blkif_io_lock, flags);
-
- /* Flush gnttab callback work. Must be done with no locks held. */
- flush_scheduled_work();
-
- blk_cleanup_queue(info->rq);
- info->rq = NULL;
-
- del_gendisk(info->gd);
-
- out:
- xenbus_frontend_closed(dev);
-}
-
-/**
* Callback received when the backend's state changes.
*/
-static void backend_changed(struct xenbus_device *dev,
+static void blkback_changed(struct xenbus_device *dev,
enum xenbus_state backend_state)
{
struct blkfront_info *info = dev_get_drvdata(&dev->dev);
- struct block_device *bd;
- dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
+ dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
switch (backend_state) {
case XenbusStateInitialising:
@@ -976,35 +1134,56 @@ static void backend_changed(struct xenbus_device *dev,
break;
case XenbusStateClosing:
- if (info->gd == NULL) {
- xenbus_frontend_closed(dev);
- break;
- }
- bd = bdget_disk(info->gd, 0);
- if (bd == NULL)
- xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
-
- mutex_lock(&bd->bd_mutex);
- if (info->users > 0)
- xenbus_dev_error(dev, -EBUSY,
- "Device in use; refusing to close");
- else
- blkfront_closing(dev);
- mutex_unlock(&bd->bd_mutex);
- bdput(bd);
+ blkfront_closing(info);
break;
}
}
-static int blkfront_remove(struct xenbus_device *dev)
+static int blkfront_remove(struct xenbus_device *xbdev)
{
- struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+ struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
+ struct block_device *bdev = NULL;
+ struct gendisk *disk;
- dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
+ dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
blkif_free(info, 0);
- kfree(info);
+ mutex_lock(&info->mutex);
+
+ disk = info->gd;
+ if (disk)
+ bdev = bdget_disk(disk, 0);
+
+ info->xbdev = NULL;
+ mutex_unlock(&info->mutex);
+
+ if (!bdev) {
+ kfree(info);
+ return 0;
+ }
+
+ /*
+ * The xbdev was removed before we reached the Closed
+ * state. See if it's safe to remove the disk. If the bdev
+ * isn't closed yet, we let release take care of it.
+ */
+
+ mutex_lock(&bdev->bd_mutex);
+ info = disk->private_data;
+
+ dev_warn(disk_to_dev(disk),
+ "%s was hot-unplugged, %d stale handles\n",
+ xbdev->nodename, bdev->bd_openers);
+
+ if (info && !bdev->bd_openers) {
+ xlvbd_release_gendisk(info);
+ disk->private_data = NULL;
+ kfree(info);
+ }
+
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
return 0;
}
@@ -1013,30 +1192,78 @@ static int blkfront_is_ready(struct xenbus_device *dev)
{
struct blkfront_info *info = dev_get_drvdata(&dev->dev);
- return info->is_ready;
+ return info->is_ready && info->xbdev;
}
static int blkif_open(struct block_device *bdev, fmode_t mode)
{
- struct blkfront_info *info = bdev->bd_disk->private_data;
- info->users++;
- return 0;
+ struct gendisk *disk = bdev->bd_disk;
+ struct blkfront_info *info;
+ int err = 0;
+
+ lock_kernel();
+
+ info = disk->private_data;
+ if (!info) {
+ /* xbdev gone */
+ err = -ERESTARTSYS;
+ goto out;
+ }
+
+ mutex_lock(&info->mutex);
+
+ if (!info->gd)
+ /* xbdev is closed */
+ err = -ERESTARTSYS;
+
+ mutex_unlock(&info->mutex);
+
+out:
+ unlock_kernel();
+ return err;
}
static int blkif_release(struct gendisk *disk, fmode_t mode)
{
struct blkfront_info *info = disk->private_data;
- info->users--;
- if (info->users == 0) {
- /* Check whether we have been instructed to close. We will
- have ignored this request initially, as the device was
- still mounted. */
- struct xenbus_device *dev = info->xbdev;
- enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
-
- if (state == XenbusStateClosing && info->is_ready)
- blkfront_closing(dev);
+ struct block_device *bdev;
+ struct xenbus_device *xbdev;
+
+ lock_kernel();
+
+ bdev = bdget_disk(disk, 0);
+ bdput(bdev);
+
+ if (bdev->bd_openers)
+ goto out;
+
+ /*
+ * Check if we have been instructed to close. We will have
+ * deferred this request, because the bdev was still open.
+ */
+
+ mutex_lock(&info->mutex);
+ xbdev = info->xbdev;
+
+ if (xbdev && xbdev->state == XenbusStateClosing) {
+ /* pending switch to state closed */
+ dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
+ xlvbd_release_gendisk(info);
+ xenbus_frontend_closed(info->xbdev);
+ }
+
+ mutex_unlock(&info->mutex);
+
+ if (!xbdev) {
+ /* sudden device removal */
+ dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
+ xlvbd_release_gendisk(info);
+ disk->private_data = NULL;
+ kfree(info);
}
+
+out:
+ unlock_kernel();
return 0;
}
@@ -1046,7 +1273,7 @@ static const struct block_device_operations xlvbd_block_fops =
.open = blkif_open,
.release = blkif_release,
.getgeo = blkif_getgeo,
- .locked_ioctl = blkif_ioctl,
+ .ioctl = blkif_ioctl,
};
@@ -1062,7 +1289,7 @@ static struct xenbus_driver blkfront = {
.probe = blkfront_probe,
.remove = blkfront_remove,
.resume = blkfront_resume,
- .otherend_changed = backend_changed,
+ .otherend_changed = blkback_changed,
.is_ready = blkfront_is_ready,
};