diff options
Diffstat (limited to 'drivers/block/xen-blkfront.c')
-rw-r--r-- | drivers/block/xen-blkfront.c | 433 |
1 files changed, 330 insertions, 103 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 82ed403147c..ac1b682edec 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -41,6 +41,7 @@ #include <linux/cdrom.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/smp_lock.h> #include <linux/scatterlist.h> #include <xen/xen.h> @@ -48,6 +49,7 @@ #include <xen/grant_table.h> #include <xen/events.h> #include <xen/page.h> +#include <xen/platform_pci.h> #include <xen/interface/grant_table.h> #include <xen/interface/io/blkif.h> @@ -78,6 +80,7 @@ static const struct block_device_operations xlvbd_block_fops; */ struct blkfront_info { + struct mutex mutex; struct xenbus_device *xbdev; struct gendisk *gd; int vdevice; @@ -94,16 +97,14 @@ struct blkfront_info unsigned long shadow_free; int feature_barrier; int is_ready; - - /** - * The number of people holding this device open. We won't allow a - * hot-unplug unless this is 0. - */ - int users; }; static DEFINE_SPINLOCK(blkif_io_lock); +static unsigned int nr_minors; +static unsigned long *minors; +static DEFINE_SPINLOCK(minor_lock); + #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) #define GRANT_INVALID_REF 0 @@ -138,6 +139,55 @@ static void add_id_to_freelist(struct blkfront_info *info, info->shadow_free = id; } +static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) +{ + unsigned int end = minor + nr; + int rc; + + if (end > nr_minors) { + unsigned long *bitmap, *old; + + bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap), + GFP_KERNEL); + if (bitmap == NULL) + return -ENOMEM; + + spin_lock(&minor_lock); + if (end > nr_minors) { + old = minors; + memcpy(bitmap, minors, + BITS_TO_LONGS(nr_minors) * sizeof(*bitmap)); + minors = bitmap; + nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG; + } else + old = bitmap; + spin_unlock(&minor_lock); + kfree(old); + } + + spin_lock(&minor_lock); + if (find_next_bit(minors, end, minor) >= end) { + for (; minor < end; ++minor) + __set_bit(minor, minors); + rc = 0; + } else + rc = -EBUSY; + spin_unlock(&minor_lock); + + return rc; +} + +static void xlbd_release_minors(unsigned int minor, unsigned int nr) +{ + unsigned int end = minor + nr; + + BUG_ON(end > nr_minors); + spin_lock(&minor_lock); + for (; minor < end; ++minor) + __clear_bit(minor, minors); + spin_unlock(&minor_lock); +} + static void blkif_restart_queue_callback(void *arg) { struct blkfront_info *info = (struct blkfront_info *)arg; @@ -238,7 +288,7 @@ static int blkif_queue_request(struct request *req) ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - if (blk_barrier_rq(req)) + if (req->cmd_flags & REQ_HARDBARRIER) ring_req->operation = BLKIF_OP_WRITE_BARRIER; ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); @@ -309,7 +359,7 @@ static void do_blkif_request(struct request_queue *rq) blk_start_request(req); - if (!blk_fs_request(req)) { + if (req->cmd_type != REQ_TYPE_FS) { __blk_end_request_all(req, -EIO); continue; } @@ -371,17 +421,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) static int xlvbd_barrier(struct blkfront_info *info) { int err; + const char *barrier; - err = blk_queue_ordered(info->rq, - info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, - NULL); + switch (info->feature_barrier) { + case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break; + case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break; + case QUEUE_ORDERED_NONE: barrier = "disabled"; break; + default: return -EINVAL; + } + + err = blk_queue_ordered(info->rq, info->feature_barrier); if (err) return err; printk(KERN_INFO "blkfront: %s: barriers %s\n", - info->gd->disk_name, - info->feature_barrier ? "enabled" : "disabled"); + info->gd->disk_name, barrier); return 0; } @@ -417,9 +472,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if ((minor % nr_parts) == 0) nr_minors = nr_parts; + err = xlbd_reserve_minors(minor, nr_minors); + if (err) + goto out; + err = -ENODEV; + gd = alloc_disk(nr_minors); if (gd == NULL) - goto out; + goto release; offset = minor / nr_parts; @@ -450,14 +510,13 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (xlvbd_init_blk_queue(gd, sector_size)) { del_gendisk(gd); - goto out; + goto release; } info->rq = gd->queue; info->gd = gd; - if (info->feature_barrier) - xlvbd_barrier(info); + xlvbd_barrier(info); if (vdisk_info & VDISK_READONLY) set_disk_ro(gd, 1); @@ -470,10 +529,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, return 0; + release: + xlbd_release_minors(minor, nr_minors); out: return err; } +static void xlvbd_release_gendisk(struct blkfront_info *info) +{ + unsigned int minor, nr_minors; + unsigned long flags; + + if (info->rq == NULL) + return; + + spin_lock_irqsave(&blkif_io_lock, flags); + + /* No more blkif_request(). */ + blk_stop_queue(info->rq); + + /* No more gnttab callback work. */ + gnttab_cancel_free_callback(&info->callback); + spin_unlock_irqrestore(&blkif_io_lock, flags); + + /* Flush gnttab callback work. Must be done with no locks held. */ + flush_scheduled_work(); + + del_gendisk(info->gd); + + minor = info->gd->first_minor; + nr_minors = info->gd->minors; + xlbd_release_minors(minor, nr_minors); + + blk_cleanup_queue(info->rq); + info->rq = NULL; + + put_disk(info->gd); + info->gd = NULL; +} + static void kick_pending_request_queues(struct blkfront_info *info) { if (!RING_FULL(&info->ring)) { @@ -568,7 +662,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", info->gd->disk_name); error = -EOPNOTSUPP; - info->feature_barrier = 0; + info->feature_barrier = QUEUE_ORDERED_NONE; xlvbd_barrier(info); } /* fall through */ @@ -651,7 +745,7 @@ fail: /* Common code used when first setting up, and when resuming. */ -static int talk_to_backend(struct xenbus_device *dev, +static int talk_to_blkback(struct xenbus_device *dev, struct blkfront_info *info) { const char *message = NULL; @@ -711,7 +805,6 @@ again: return err; } - /** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffer for communication with the backend, and @@ -737,12 +830,42 @@ static int blkfront_probe(struct xenbus_device *dev, } } + if (xen_hvm_domain()) { + char *type; + int len; + /* no unplug has been done: do not hook devices != xen vbds */ + if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) { + int major; + + if (!VDEV_IS_EXTENDED(vdevice)) + major = BLKIF_MAJOR(vdevice); + else + major = XENVBD_MAJOR; + + if (major != XENVBD_MAJOR) { + printk(KERN_INFO + "%s: HVM does not support vbd %d as xen block device\n", + __FUNCTION__, vdevice); + return -ENODEV; + } + } + /* do not create a PV cdrom device if we are an HVM guest */ + type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len); + if (IS_ERR(type)) + return -ENODEV; + if (strncmp(type, "cdrom", 5) == 0) { + kfree(type); + return -ENODEV; + } + kfree(type); + } info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); return -ENOMEM; } + mutex_init(&info->mutex); info->xbdev = dev; info->vdevice = vdevice; info->connected = BLKIF_STATE_DISCONNECTED; @@ -756,7 +879,7 @@ static int blkfront_probe(struct xenbus_device *dev, info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); - err = talk_to_backend(dev, info); + err = talk_to_blkback(dev, info); if (err) { kfree(info); dev_set_drvdata(&dev->dev, NULL); @@ -851,13 +974,50 @@ static int blkfront_resume(struct xenbus_device *dev) blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); - err = talk_to_backend(dev, info); + err = talk_to_blkback(dev, info); if (info->connected == BLKIF_STATE_SUSPENDED && !err) err = blkif_recover(info); return err; } +static void +blkfront_closing(struct blkfront_info *info) +{ + struct xenbus_device *xbdev = info->xbdev; + struct block_device *bdev = NULL; + + mutex_lock(&info->mutex); + + if (xbdev->state == XenbusStateClosing) { + mutex_unlock(&info->mutex); + return; + } + + if (info->gd) + bdev = bdget_disk(info->gd, 0); + + mutex_unlock(&info->mutex); + + if (!bdev) { + xenbus_frontend_closed(xbdev); + return; + } + + mutex_lock(&bdev->bd_mutex); + + if (bdev->bd_openers) { + xenbus_dev_error(xbdev, -EBUSY, + "Device in use; refusing to close"); + xenbus_switch_state(xbdev, XenbusStateClosing); + } else { + xlvbd_release_gendisk(info); + xenbus_frontend_closed(xbdev); + } + + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); +} /* * Invoked when the backend is finally 'ready' (and has told produced @@ -869,11 +1029,31 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int binfo; int err; - - if ((info->connected == BLKIF_STATE_CONNECTED) || - (info->connected == BLKIF_STATE_SUSPENDED) ) + int barrier; + + switch (info->connected) { + case BLKIF_STATE_CONNECTED: + /* + * Potentially, the back-end may be signalling + * a capacity change; update the capacity. + */ + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "sectors", "%Lu", §ors); + if (XENBUS_EXIST_ERR(err)) + return; + printk(KERN_INFO "Setting capacity to %Lu\n", + sectors); + set_capacity(info->gd, sectors); + revalidate_disk(info->gd); + + /* fall through */ + case BLKIF_STATE_SUSPENDED: return; + default: + break; + } + dev_dbg(&info->xbdev->dev, "%s:%s.\n", __func__, info->xbdev->otherend); @@ -890,10 +1070,26 @@ static void blkfront_connect(struct blkfront_info *info) } err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-barrier", "%lu", &info->feature_barrier, + "feature-barrier", "%lu", &barrier, NULL); + + /* + * If there's no "feature-barrier" defined, then it means + * we're dealing with a very old backend which writes + * synchronously; draining will do what needs to get done. + * + * If there are barriers, then we can do full queued writes + * with tagged barriers. + * + * If barriers are not supported, then there's no much we can + * do, so just set ordering to NONE. + */ if (err) - info->feature_barrier = 0; + info->feature_barrier = QUEUE_ORDERED_DRAIN; + else if (barrier) + info->feature_barrier = QUEUE_ORDERED_TAG; + else + info->feature_barrier = QUEUE_ORDERED_NONE; err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { @@ -916,52 +1112,14 @@ static void blkfront_connect(struct blkfront_info *info) } /** - * Handle the change of state of the backend to Closing. We must delete our - * device-layer structures now, to ensure that writes are flushed through to - * the backend. Once is this done, we can switch to Closed in - * acknowledgement. - */ -static void blkfront_closing(struct xenbus_device *dev) -{ - struct blkfront_info *info = dev_get_drvdata(&dev->dev); - unsigned long flags; - - dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename); - - if (info->rq == NULL) - goto out; - - spin_lock_irqsave(&blkif_io_lock, flags); - - /* No more blkif_request(). */ - blk_stop_queue(info->rq); - - /* No more gnttab callback work. */ - gnttab_cancel_free_callback(&info->callback); - spin_unlock_irqrestore(&blkif_io_lock, flags); - - /* Flush gnttab callback work. Must be done with no locks held. */ - flush_scheduled_work(); - - blk_cleanup_queue(info->rq); - info->rq = NULL; - - del_gendisk(info->gd); - - out: - xenbus_frontend_closed(dev); -} - -/** * Callback received when the backend's state changes. */ -static void backend_changed(struct xenbus_device *dev, +static void blkback_changed(struct xenbus_device *dev, enum xenbus_state backend_state) { struct blkfront_info *info = dev_get_drvdata(&dev->dev); - struct block_device *bd; - dev_dbg(&dev->dev, "blkfront:backend_changed.\n"); + dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); switch (backend_state) { case XenbusStateInitialising: @@ -976,35 +1134,56 @@ static void backend_changed(struct xenbus_device *dev, break; case XenbusStateClosing: - if (info->gd == NULL) { - xenbus_frontend_closed(dev); - break; - } - bd = bdget_disk(info->gd, 0); - if (bd == NULL) - xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); - - mutex_lock(&bd->bd_mutex); - if (info->users > 0) - xenbus_dev_error(dev, -EBUSY, - "Device in use; refusing to close"); - else - blkfront_closing(dev); - mutex_unlock(&bd->bd_mutex); - bdput(bd); + blkfront_closing(info); break; } } -static int blkfront_remove(struct xenbus_device *dev) +static int blkfront_remove(struct xenbus_device *xbdev) { - struct blkfront_info *info = dev_get_drvdata(&dev->dev); + struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); + struct block_device *bdev = NULL; + struct gendisk *disk; - dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename); + dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); blkif_free(info, 0); - kfree(info); + mutex_lock(&info->mutex); + + disk = info->gd; + if (disk) + bdev = bdget_disk(disk, 0); + + info->xbdev = NULL; + mutex_unlock(&info->mutex); + + if (!bdev) { + kfree(info); + return 0; + } + + /* + * The xbdev was removed before we reached the Closed + * state. See if it's safe to remove the disk. If the bdev + * isn't closed yet, we let release take care of it. + */ + + mutex_lock(&bdev->bd_mutex); + info = disk->private_data; + + dev_warn(disk_to_dev(disk), + "%s was hot-unplugged, %d stale handles\n", + xbdev->nodename, bdev->bd_openers); + + if (info && !bdev->bd_openers) { + xlvbd_release_gendisk(info); + disk->private_data = NULL; + kfree(info); + } + + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); return 0; } @@ -1013,30 +1192,78 @@ static int blkfront_is_ready(struct xenbus_device *dev) { struct blkfront_info *info = dev_get_drvdata(&dev->dev); - return info->is_ready; + return info->is_ready && info->xbdev; } static int blkif_open(struct block_device *bdev, fmode_t mode) { - struct blkfront_info *info = bdev->bd_disk->private_data; - info->users++; - return 0; + struct gendisk *disk = bdev->bd_disk; + struct blkfront_info *info; + int err = 0; + + lock_kernel(); + + info = disk->private_data; + if (!info) { + /* xbdev gone */ + err = -ERESTARTSYS; + goto out; + } + + mutex_lock(&info->mutex); + + if (!info->gd) + /* xbdev is closed */ + err = -ERESTARTSYS; + + mutex_unlock(&info->mutex); + +out: + unlock_kernel(); + return err; } static int blkif_release(struct gendisk *disk, fmode_t mode) { struct blkfront_info *info = disk->private_data; - info->users--; - if (info->users == 0) { - /* Check whether we have been instructed to close. We will - have ignored this request initially, as the device was - still mounted. */ - struct xenbus_device *dev = info->xbdev; - enum xenbus_state state = xenbus_read_driver_state(dev->otherend); - - if (state == XenbusStateClosing && info->is_ready) - blkfront_closing(dev); + struct block_device *bdev; + struct xenbus_device *xbdev; + + lock_kernel(); + + bdev = bdget_disk(disk, 0); + bdput(bdev); + + if (bdev->bd_openers) + goto out; + + /* + * Check if we have been instructed to close. We will have + * deferred this request, because the bdev was still open. + */ + + mutex_lock(&info->mutex); + xbdev = info->xbdev; + + if (xbdev && xbdev->state == XenbusStateClosing) { + /* pending switch to state closed */ + dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); + xlvbd_release_gendisk(info); + xenbus_frontend_closed(info->xbdev); + } + + mutex_unlock(&info->mutex); + + if (!xbdev) { + /* sudden device removal */ + dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); + xlvbd_release_gendisk(info); + disk->private_data = NULL; + kfree(info); } + +out: + unlock_kernel(); return 0; } @@ -1046,7 +1273,7 @@ static const struct block_device_operations xlvbd_block_fops = .open = blkif_open, .release = blkif_release, .getgeo = blkif_getgeo, - .locked_ioctl = blkif_ioctl, + .ioctl = blkif_ioctl, }; @@ -1062,7 +1289,7 @@ static struct xenbus_driver blkfront = { .probe = blkfront_probe, .remove = blkfront_remove, .resume = blkfront_resume, - .otherend_changed = backend_changed, + .otherend_changed = blkback_changed, .is_ready = blkfront_is_ready, }; |