diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 15 | ||||
-rw-r--r-- | drivers/block/aoe/aoe.h | 57 | ||||
-rw-r--r-- | drivers/block/aoe/aoeblk.c | 104 | ||||
-rw-r--r-- | drivers/block/aoe/aoechr.c | 7 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 717 | ||||
-rw-r--r-- | drivers/block/aoe/aoedev.c | 243 | ||||
-rw-r--r-- | drivers/block/aoe/aoemain.c | 2 | ||||
-rw-r--r-- | drivers/block/aoe/aoenet.c | 15 | ||||
-rw-r--r-- | drivers/block/cciss.c | 1 | ||||
-rw-r--r-- | drivers/block/floppy.c | 93 | ||||
-rw-r--r-- | drivers/block/loop.c | 17 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 57 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.h | 7 | ||||
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 18 | ||||
-rw-r--r-- | drivers/block/xen-blkback/common.h | 5 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 9 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 10 |
17 files changed, 967 insertions, 410 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index f529407db93..824e09c4d0d 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -131,6 +131,7 @@ config BLK_CPQ_DA config BLK_CPQ_CISS_DA tristate "Compaq Smart Array 5xxx support" depends on PCI + select CHECK_SIGNATURE help This is the driver for Compaq Smart Array 5xxx controllers. Everyone using these boards should say Y here. @@ -166,8 +167,8 @@ config BLK_DEV_DAC960 module will be called DAC960. config BLK_DEV_UMEM - tristate "Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL)" - depends on PCI && EXPERIMENTAL + tristate "Micro Memory MM5415 Battery Backed RAM support" + depends on PCI ---help--- Saying Y here will include support for the MM5415 family of battery backed (Non-volatile) RAM cards. @@ -430,8 +431,8 @@ config CDROM_PKTCDVD_BUFFERS a disc is opened for writing. config CDROM_PKTCDVD_WCACHE - bool "Enable write caching (EXPERIMENTAL)" - depends on CDROM_PKTCDVD && EXPERIMENTAL + bool "Enable write caching" + depends on CDROM_PKTCDVD help If enabled, write caching will be set for the CD-R/W device. For now this option is dangerous unless the CD-RW media is known good, as we @@ -508,8 +509,8 @@ config XEN_BLKDEV_BACKEND config VIRTIO_BLK - tristate "Virtio block driver (EXPERIMENTAL)" - depends on EXPERIMENTAL && VIRTIO + tristate "Virtio block driver" + depends on VIRTIO ---help--- This is the virtual block driver for virtio. It can be used with lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. @@ -528,7 +529,7 @@ config BLK_DEV_HD config BLK_DEV_RBD tristate "Rados block device (RBD)" - depends on INET && EXPERIMENTAL && BLOCK + depends on INET && BLOCK select CEPH_LIB select LIBCRC32C select CRYPTO_AES diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index d2ed7f18d1a..175649468c9 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -1,5 +1,5 @@ /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ -#define VERSION "50" +#define VERSION "81" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" @@ -10,7 +10,7 @@ #define AOE_PARTITIONS (16) #endif -#define WHITESPACE " \t\v\f\n" +#define WHITESPACE " \t\v\f\n," enum { AOECMD_ATA, @@ -73,21 +73,29 @@ enum { DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */ DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ DEVFL_GDALLOC = (1<<3), /* need to alloc gendisk */ - DEVFL_KICKME = (1<<4), /* slow polling network card catch */ - DEVFL_NEWSIZE = (1<<5), /* need to update dev size in block layer */ + DEVFL_GD_NOW = (1<<4), /* allocating gendisk */ + DEVFL_KICKME = (1<<5), /* slow polling network card catch */ + DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ + DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */ + DEVFL_FREED = (1<<8), /* device has been cleaned up */ }; enum { DEFAULTBCNT = 2 * 512, /* 2 sectors */ MIN_BUFS = 16, - NTARGETS = 8, + NTARGETS = 4, NAOEIFS = 8, NSKBPOOLMAX = 256, NFACTIVE = 61, TIMERTICK = HZ / 10, - MINTIMER = HZ >> 2, - MAXTIMER = HZ << 1, + RTTSCALE = 8, + RTTDSCALE = 3, + RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE, + RTTDEV_INIT = RTTAVG_INIT / 4, + + HARD_SCORN_SECS = 10, /* try another remote port after this */ + MAX_TAINT = 1000, /* cap on aoetgt taint */ }; struct buf { @@ -100,10 +108,17 @@ struct buf { struct request *rq; }; +enum frame_flags { + FFL_PROBE = 1, +}; + struct frame { struct list_head head; u32 tag; + struct timeval sent; /* high-res time packet was sent */ + u32 sent_jiffs; /* low-res jiffies-based sent time */ ulong waited; + ulong waited_total; struct aoetgt *t; /* parent target I belong to */ sector_t lba; struct sk_buff *skb; /* command skb freed on module exit */ @@ -112,6 +127,7 @@ struct frame { struct bio_vec *bv; ulong bcnt; ulong bv_off; + char flags; }; struct aoeif { @@ -122,28 +138,31 @@ struct aoeif { struct aoetgt { unsigned char addr[6]; - ushort nframes; + ushort nframes; /* cap on frames to use */ struct aoedev *d; /* parent device I belong to */ struct list_head ffree; /* list of free frames */ struct aoeif ifs[NAOEIFS]; struct aoeif *ifp; /* current aoeif in use */ - ushort nout; - ushort maxout; - ulong falloc; - ulong lastwadj; /* last window adjustment */ + ushort nout; /* number of AoE commands outstanding */ + ushort maxout; /* current value for max outstanding */ + ushort next_cwnd; /* incr maxout after decrementing to zero */ + ushort ssthresh; /* slow start threshold */ + ulong falloc; /* number of allocated frames */ + int taint; /* how much we want to avoid this aoetgt */ int minbcnt; int wpkts, rpkts; + char nout_probes; }; struct aoedev { struct aoedev *next; ulong sysminor; ulong aoemajor; + u32 rttavg; /* scaled AoE round trip time average */ + u32 rttdev; /* scaled round trip time mean deviation */ u16 aoeminor; u16 flags; u16 nopen; /* (bd_openers isn't available without sleeping) */ - u16 rttavg; /* round trip average of requests/responses */ - u16 mintimer; u16 fw_ver; /* version of blade's firmware */ u16 lasttag; /* last tag sent */ u16 useme; @@ -151,7 +170,7 @@ struct aoedev { struct work_struct work;/* disk create work struct */ struct gendisk *gd; struct request_queue *blkq; - struct hd_geometry geo; + struct hd_geometry geo; sector_t ssize; struct timer_list timer; spinlock_t lock; @@ -164,11 +183,12 @@ struct aoedev { } ip; ulong maxbcnt; struct list_head factive[NFACTIVE]; /* hash of active frames */ - struct aoetgt *targets[NTARGETS]; + struct list_head rexmitq; /* deferred retransmissions */ + struct aoetgt **targets; + ulong ntargets; /* number of allocated aoetgt pointers */ struct aoetgt **tgt; /* target in use when working */ - struct aoetgt *htgt; /* target needing rexmit assistance */ - ulong ntargets; ulong kicked; + char ident[512]; }; /* kthread tracking */ @@ -195,6 +215,7 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor); struct sk_buff *aoecmd_ata_rsp(struct sk_buff *); void aoecmd_cfg_rsp(struct sk_buff *); void aoecmd_sleepwork(struct work_struct *); +void aoecmd_wreset(struct aoetgt *t); void aoecmd_cleanslate(struct aoedev *); void aoecmd_exit(void); int aoecmd_init(void); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 00dfc5008ad..a129f8c8073 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -16,11 +16,19 @@ #include <linux/netdevice.h> #include <linux/mutex.h> #include <linux/export.h> +#include <linux/moduleparam.h> +#include <scsi/sg.h> #include "aoe.h" static DEFINE_MUTEX(aoeblk_mutex); static struct kmem_cache *buf_pool_cache; +/* GPFS needs a larger value than the default. */ +static int aoe_maxsectors; +module_param(aoe_maxsectors, int, 0644); +MODULE_PARM_DESC(aoe_maxsectors, + "When nonzero, set the maximum number of sectors per I/O request"); + static ssize_t aoedisk_show_state(struct device *dev, struct device_attribute *attr, char *page) { @@ -59,7 +67,7 @@ static ssize_t aoedisk_show_netif(struct device *dev, nd = nds; ne = nd + ARRAY_SIZE(nds); t = d->targets; - te = t + NTARGETS; + te = t + d->ntargets; for (; t < te && *t; t++) { ifp = (*t)->ifs; e = ifp + NAOEIFS; @@ -91,6 +99,14 @@ static ssize_t aoedisk_show_fwver(struct device *dev, return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver); } +static ssize_t aoedisk_show_payload(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct gendisk *disk = dev_to_disk(dev); + struct aoedev *d = disk->private_data; + + return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt); +} static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL); static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL); @@ -99,12 +115,14 @@ static struct device_attribute dev_attr_firmware_version = { .attr = { .name = "firmware-version", .mode = S_IRUGO }, .show = aoedisk_show_fwver, }; +static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL); static struct attribute *aoe_attrs[] = { &dev_attr_state.attr, &dev_attr_mac.attr, &dev_attr_netif.attr, &dev_attr_firmware_version.attr, + &dev_attr_payload.attr, NULL, }; @@ -129,9 +147,18 @@ aoeblk_open(struct block_device *bdev, fmode_t mode) struct aoedev *d = bdev->bd_disk->private_data; ulong flags; + if (!virt_addr_valid(d)) { + pr_crit("aoe: invalid device pointer in %s\n", + __func__); + WARN_ON(1); + return -ENODEV; + } + if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL) + return -ENODEV; + mutex_lock(&aoeblk_mutex); spin_lock_irqsave(&d->lock, flags); - if (d->flags & DEVFL_UP) { + if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) { d->nopen++; spin_unlock_irqrestore(&d->lock, flags); mutex_unlock(&aoeblk_mutex); @@ -195,9 +222,38 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } +static int +aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg) +{ + struct aoedev *d; + + if (!arg) + return -EINVAL; + + d = bdev->bd_disk->private_data; + if ((d->flags & DEVFL_UP) == 0) { + pr_err("aoe: disk not up\n"); + return -ENODEV; + } + + if (cmd == HDIO_GET_IDENTITY) { + if (!copy_to_user((void __user *) arg, &d->ident, + sizeof(d->ident))) + return 0; + return -EFAULT; + } + + /* udev calls scsi_id, which uses SG_IO, resulting in noise */ + if (cmd != SG_IO) + pr_info("aoe: unknown ioctl 0x%x\n", cmd); + + return -ENOTTY; +} + static const struct block_device_operations aoe_bdops = { .open = aoeblk_open, .release = aoeblk_release, + .ioctl = aoeblk_ioctl, .getgeo = aoeblk_getgeo, .owner = THIS_MODULE, }; @@ -212,6 +268,18 @@ aoeblk_gdalloc(void *vp) struct request_queue *q; enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; ulong flags; + int late = 0; + + spin_lock_irqsave(&d->lock, flags); + if (d->flags & DEVFL_GDALLOC + && !(d->flags & DEVFL_TKILL) + && !(d->flags & DEVFL_GD_NOW)) + d->flags |= DEVFL_GD_NOW; + else + late = 1; + spin_unlock_irqrestore(&d->lock, flags); + if (late) + return; gd = alloc_disk(AOE_PARTITIONS); if (gd == NULL) { @@ -231,23 +299,24 @@ aoeblk_gdalloc(void *vp) if (q == NULL) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); - mempool_destroy(mp); - goto err_disk; + goto err_mempool; } - d->blkq = blk_alloc_queue(GFP_KERNEL); - if (!d->blkq) - goto err_mempool; - d->blkq->backing_dev_info.name = "aoe"; - if (bdi_init(&d->blkq->backing_dev_info)) - goto err_blkq; spin_lock_irqsave(&d->lock, flags); - blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS); + WARN_ON(!(d->flags & DEVFL_GD_NOW)); + WARN_ON(!(d->flags & DEVFL_GDALLOC)); + WARN_ON(d->flags & DEVFL_TKILL); + WARN_ON(d->gd); + WARN_ON(d->flags & DEVFL_UP); + blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); + q->backing_dev_info.name = "aoe"; q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; d->bufpool = mp; d->blkq = gd->queue = q; q->queuedata = d; d->gd = gd; + if (aoe_maxsectors) + blk_queue_max_hw_sectors(q, aoe_maxsectors); gd->major = AOE_MAJOR; gd->first_minor = d->sysminor; gd->fops = &aoe_bdops; @@ -263,18 +332,21 @@ aoeblk_gdalloc(void *vp) add_disk(gd); aoedisk_add_sysfs(d); + + spin_lock_irqsave(&d->lock, flags); + WARN_ON(!(d->flags & DEVFL_GD_NOW)); + d->flags &= ~DEVFL_GD_NOW; + spin_unlock_irqrestore(&d->lock, flags); return; -err_blkq: - blk_cleanup_queue(d->blkq); - d->blkq = NULL; err_mempool: - mempool_destroy(d->bufpool); + mempool_destroy(mp); err_disk: put_disk(gd); err: spin_lock_irqsave(&d->lock, flags); - d->flags &= ~DEVFL_GDALLOC; + d->flags &= ~DEVFL_GD_NOW; + schedule_work(&d->work); spin_unlock_irqrestore(&d->lock, flags); } diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index ed57a890c64..42e67ad6bd2 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -39,6 +39,11 @@ struct ErrMsg { }; static DEFINE_MUTEX(aoechr_mutex); + +/* A ring buffer of error messages, to be read through + * "/dev/etherd/err". When no messages are present, + * readers will block waiting for messages to appear. + */ static struct ErrMsg emsgs[NMSG]; static int emsgs_head_idx, emsgs_tail_idx; static struct completion emsgs_comp; @@ -282,7 +287,7 @@ aoechr_init(void) int n, i; n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops); - if (n < 0) { + if (n < 0) { printk(KERN_ERR "aoe: can't register char device\n"); return n; } diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 3804a0af3ef..25ef5c014fc 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -22,6 +22,7 @@ #define MAXIOC (8192) /* default meant to avoid most soft lockups */ static void ktcomplete(struct frame *, struct sk_buff *); +static int count_targets(struct aoedev *d, int *untainted); static struct buf *nextbuf(struct aoedev *); @@ -29,7 +30,7 @@ static int aoe_deadsecs = 60 * 3; module_param(aoe_deadsecs, int, 0644); MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); -static int aoe_maxout = 16; +static int aoe_maxout = 64; module_param(aoe_maxout, int, 0644); MODULE_PARM_DESC(aoe_maxout, "Only aoe_maxout outstanding packets for every MAC on eX.Y."); @@ -43,6 +44,8 @@ static struct { spinlock_t lock; } iocq; +static struct page *empty_page; + static struct sk_buff * new_skb(ulong len) { @@ -59,6 +62,23 @@ new_skb(ulong len) } static struct frame * +getframe_deferred(struct aoedev *d, u32 tag) +{ + struct list_head *head, *pos, *nx; + struct frame *f; + + head = &d->rexmitq; + list_for_each_safe(pos, nx, head) { + f = list_entry(pos, struct frame, head); + if (f->tag == tag) { + list_del(pos); + return f; + } + } + return NULL; +} + +static struct frame * getframe(struct aoedev *d, u32 tag) { struct frame *f; @@ -162,8 +182,10 @@ aoe_freetframe(struct frame *f) t = f->t; f->buf = NULL; + f->lba = 0; f->bv = NULL; f->r_skb = NULL; + f->flags = 0; list_add(&f->head, &t->ffree); } @@ -217,20 +239,25 @@ newframe(struct aoedev *d) struct frame *f; struct aoetgt *t, **tt; int totout = 0; + int use_tainted; + int has_untainted; - if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */ + if (!d->targets || !d->targets[0]) { printk(KERN_ERR "aoe: NULL TARGETS!\n"); return NULL; } tt = d->tgt; /* last used target */ - for (;;) { + for (use_tainted = 0, has_untainted = 0;;) { tt++; - if (tt >= &d->targets[NTARGETS] || !*tt) + if (tt >= &d->targets[d->ntargets] || !*tt) tt = d->targets; t = *tt; - totout += t->nout; + if (!t->taint) { + has_untainted = 1; + totout += t->nout; + } if (t->nout < t->maxout - && t != d->htgt + && (use_tainted || !t->taint) && t->ifp->nd) { f = newtframe(d, t); if (f) { @@ -239,8 +266,12 @@ newframe(struct aoedev *d) return f; } } - if (tt == d->tgt) /* we've looped and found nada */ - break; + if (tt == d->tgt) { /* we've looped and found nada */ + if (!use_tainted && !has_untainted) + use_tainted = 1; + else + break; + } } if (totout == 0) { d->kicked++; @@ -277,21 +308,68 @@ fhash(struct frame *f) list_add_tail(&f->head, &d->factive[n]); } +static void +ata_rw_frameinit(struct frame *f) +{ + struct aoetgt *t; + struct aoe_hdr *h; + struct aoe_atahdr *ah; + struct sk_buff *skb; + char writebit, extbit; + + skb = f->skb; + h = (struct aoe_hdr *) skb_mac_header(skb); + ah = (struct aoe_atahdr *) (h + 1); + skb_put(skb, sizeof(*h) + sizeof(*ah)); + memset(h, 0, skb->len); + + writebit = 0x10; + extbit = 0x4; + + t = f->t; + f->tag = aoehdr_atainit(t->d, t, h); + fhash(f); + t->nout++; + f->waited = 0; + f->waited_total = 0; + if (f->buf) + f->lba = f->buf->sector; + + /* set up ata header */ + ah->scnt = f->bcnt >> 9; + put_lba(ah, f->lba); + if (t->d->flags & DEVFL_EXT) { + ah->aflags |= AOEAFL_EXT; + } else { + extbit = 0; + ah->lba3 &= 0x0f; + ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ + } + if (f->buf && bio_data_dir(f->buf->bio) == WRITE) { + skb_fillup(skb, f->bv, f->bv_off, f->bcnt); + ah->aflags |= AOEAFL_WRITE; + skb->len += f->bcnt; + skb->data_len = f->bcnt; + skb->truesize += f->bcnt; + t->wpkts++; + } else { + t->rpkts++; + writebit = 0; + } + + ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit; + skb->dev = t->ifp->nd; +} + static int aoecmd_ata_rw(struct aoedev *d) { struct frame *f; - struct aoe_hdr *h; - struct aoe_atahdr *ah; struct buf *buf; struct aoetgt *t; struct sk_buff *skb; struct sk_buff_head queue; ulong bcnt, fbcnt; - char writebit, extbit; - - writebit = 0x10; - extbit = 0x4; buf = nextbuf(d); if (buf == NULL) @@ -326,50 +404,18 @@ aoecmd_ata_rw(struct aoedev *d) } while (fbcnt); /* initialize the headers & frame */ - skb = f->skb; - h = (struct aoe_hdr *) skb_mac_header(skb); - ah = (struct aoe_atahdr *) (h+1); - skb_put(skb, sizeof *h + sizeof *ah); - memset(h, 0, skb->len); - f->tag = aoehdr_atainit(d, t, h); - fhash(f); - t->nout++; - f->waited = 0; f->buf = buf; f->bcnt = bcnt; - f->lba = buf->sector; - - /* set up ata header */ - ah->scnt = bcnt >> 9; - put_lba(ah, buf->sector); - if (d->flags & DEVFL_EXT) { - ah->aflags |= AOEAFL_EXT; - } else { - extbit = 0; - ah->lba3 &= 0x0f; - ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ - } - if (bio_data_dir(buf->bio) == WRITE) { - skb_fillup(skb, f->bv, f->bv_off, bcnt); - ah->aflags |= AOEAFL_WRITE; - skb->len += bcnt; - skb->data_len = bcnt; - skb->truesize += bcnt; - t->wpkts++; - } else { - t->rpkts++; - writebit = 0; - } - - ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit; + ata_rw_frameinit(f); /* mark all tracking fields and load out */ buf->nframesout += 1; buf->sector += bcnt >> 9; - skb->dev = t->ifp->nd; - skb = skb_clone(skb, GFP_ATOMIC); + skb = skb_clone(f->skb, GFP_ATOMIC); if (skb) { + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; __skb_queue_head_init(&queue); __skb_queue_tail(&queue, skb); aoenet_xmit(&queue); @@ -442,11 +488,14 @@ resend(struct aoedev *d, struct frame *f) h = (struct aoe_hdr *) skb_mac_header(skb); ah = (struct aoe_atahdr *) (h+1); - snprintf(buf, sizeof buf, - "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n", - "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n, - h->src, h->dst, t->nout); - aoechr_error(buf); + if (!(f->flags & FFL_PROBE)) { + snprintf(buf, sizeof(buf), + "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n", + "retransmit", d->aoemajor, d->aoeminor, + f->tag, jiffies, n, + h->src, h->dst, t->nout); + aoechr_error(buf); + } f->tag = n; fhash(f); @@ -458,12 +507,46 @@ resend(struct aoedev *d, struct frame *f) skb = skb_clone(skb, GFP_ATOMIC); if (skb == NULL) return; + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; __skb_queue_head_init(&queue); __skb_queue_tail(&queue, skb); aoenet_xmit(&queue); } static int +tsince_hr(struct frame *f) +{ + struct timeval now; + int n; + + do_gettimeofday(&now); + n = now.tv_usec - f->sent.tv_usec; + n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC; + + if (n < 0) + n = -n; + + /* For relatively long periods, use jiffies to avoid + * discrepancies caused by updates to the system time. + * + * On system with HZ of 1000, 32-bits is over 49 days + * worth of jiffies, or over 71 minutes worth of usecs. + * + * Jiffies overflow is handled by subtraction of unsigned ints: + * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe + * $3 = 4 + * (gdb) + */ + if (n > USEC_PER_SEC / 4) { + n = ((u32) jiffies) - f->sent_jiffs; + n *= USEC_PER_SEC / HZ; + } + + return n; +} + +static int tsince(u32 tag) { int n; @@ -472,7 +555,7 @@ tsince(u32 tag) n -= tag & 0xffff; if (n < 0) n += 1<<16; - return n; + return jiffies_to_usecs(n + 1); } static struct aoeif * @@ -503,70 +586,189 @@ ejectif(struct aoetgt *t, struct aoeif *ifp) dev_put(nd); } -static int -sthtith(struct aoedev *d) +static struct frame * +reassign_frame(struct frame *f) { - struct frame *f, *nf; - struct list_head *nx, *pos, *head; + struct frame *nf; struct sk_buff *skb; - struct aoetgt *ht = d->htgt; - int i; - for (i = 0; i < NFACTIVE; i++) { - head = &d->factive[i]; - list_for_each_safe(pos, nx, head) { - f = list_entry(pos, struct frame, head); - if (f->t != ht) - continue; + nf = newframe(f->t->d); + if (!nf) + return NULL; + if (nf->t == f->t) { + aoe_freetframe(nf); + return NULL; + } - nf = newframe(d); - if (!nf) - return 0; + skb = nf->skb; + nf->skb = f->skb; + nf->buf = f->buf; + nf->bcnt = f->bcnt; + nf->lba = f->lba; + nf->bv = f->bv; + nf->bv_off = f->bv_off; + nf->waited = 0; + nf->waited_total = f->waited_total; + nf->sent = f->sent; + nf->sent_jiffs = f->sent_jiffs; + f->skb = skb; + + return nf; +} - /* remove frame from active list */ - list_del(pos); +static void +probe(struct aoetgt *t) +{ + struct aoedev *d; + struct frame *f; + struct sk_buff *skb; + struct sk_buff_head queue; + size_t n, m; + int frag; - /* reassign all pertinent bits to new outbound frame */ - skb = nf->skb; - nf->skb = f->skb; - nf->buf = f->buf; - nf->bcnt = f->bcnt; - nf->lba = f->lba; - nf->bv = f->bv; - nf->bv_off = f->bv_off; - nf->waited = 0; - f->skb = skb; + d = t->d; + f = newtframe(d, t); + if (!f) { + pr_err("%s %pm for e%ld.%d: %s\n", + "aoe: cannot probe remote address", + t->addr, + (long) d->aoemajor, d->aoeminor, + "no frame available"); + return; + } + f->flags |= FFL_PROBE; + ifrotate(t); + f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT; + ata_rw_frameinit(f); + skb = f->skb; + for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) { + if (n < PAGE_SIZE) + m = n; + else + m = PAGE_SIZE; + skb_fill_page_desc(skb, frag, empty_page, 0, m); + } + skb->len += f->bcnt; + skb->data_len = f->bcnt; + skb->truesize += f->bcnt; + + skb = skb_clone(f->skb, GFP_ATOMIC); + if (skb) { + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; + __skb_queue_head_init(&queue); + __skb_queue_tail(&queue, skb); + aoenet_xmit(&queue); + } +} + +static long +rto(struct aoedev *d) +{ + long t; + + t = 2 * d->rttavg >> RTTSCALE; + t += 8 * d->rttdev >> RTTDSCALE; + if (t == 0) + t = 1; + + return t; +} + +static void +rexmit_deferred(struct aoedev *d) +{ + struct aoetgt *t; + struct frame *f; + struct frame *nf; + struct list_head *pos, *nx, *head; + int since; + int untainted; + + count_targets(d, &untainted); + + head = &d->rexmitq; + list_for_each_safe(pos, nx, head) { + f = list_entry(pos, struct frame, head); + t = f->t; + if (t->taint) { + if (!(f->flags & FFL_PROBE)) { + nf = reassign_frame(f); + if (nf) { + if (t->nout_probes == 0 + && untainted > 0) { + probe(t); + t->nout_probes++; + } + list_replace(&f->head, &nf->head); + pos = &nf->head; + aoe_freetframe(f); + f = nf; + t = f->t; + } + } else if (untainted < 1) { + /* don't probe w/o other untainted aoetgts */ + goto stop_probe; + } else if (tsince_hr(f) < t->taint * rto(d)) { + /* reprobe slowly when taint is high */ + continue; + } + } else if (f->flags & FFL_PROBE) { +stop_probe: /* don't probe untainted aoetgts */ + list_del(pos); aoe_freetframe(f); - ht->nout--; - nf->t->nout++; - resend(d, nf); + /* leaving d->kicked, because this is routine */ + f->t->d->flags |= DEVFL_KICKME; + continue; } + if (t->nout >= t->maxout) + continue; + list_del(pos); + t->nout++; + if (f->flags & FFL_PROBE) + t->nout_probes++; + since = tsince_hr(f); + f->waited += since; + f->waited_total += since; + resend(d, f); } - /* We've cleaned up the outstanding so take away his - * interfaces so he won't be used. We should remove him from - * the target array here, but cleaning up a target is - * involved. PUNT! - */ - memset(ht->ifs, 0, sizeof ht->ifs); - d->htgt = NULL; - return 1; } -static inline unsigned char -ata_scnt(unsigned char *packet) { - struct aoe_hdr *h; - struct aoe_atahdr *ah; +/* An aoetgt accumulates demerits quickly, and successful + * probing redeems the aoetgt slowly. + */ +static void +scorn(struct aoetgt *t) +{ + int n; - h = (struct aoe_hdr *) packet; - ah = (struct aoe_atahdr *) (h+1); - return ah->scnt; + n = t->taint++; + t->taint += t->taint * 2; + if (n > t->taint) + t->taint = n; + if (t->taint > MAX_TAINT) + t->taint = MAX_TAINT; +} + +static int +count_targets(struct aoedev *d, int *untainted) +{ + int i, good; + + for (i = good = 0; i < d->ntargets && d->targets[i]; ++i) + if (d->targets[i]->taint == 0) + good++; + + if (untainted) + *untainted = good; + return i; } static void rexmit_timer(ulong vp) { struct aoedev *d; - struct aoetgt *t, **tt, **te; + struct aoetgt *t; struct aoeif *ifp; struct frame *f; struct list_head *head, *pos, *nx; @@ -574,15 +776,18 @@ rexmit_timer(ulong vp) register long timeout; ulong flags, n; int i; + int utgts; /* number of aoetgt descriptors (not slots) */ + int since; d = (struct aoedev *) vp; - /* timeout is always ~150% of the moving average */ - timeout = d->rttavg; - timeout += timeout >> 1; - spin_lock_irqsave(&d->lock, flags); + /* timeout based on observed timings and variations */ + timeout = rto(d); + + utgts = count_targets(d, NULL); + if (d->flags & DEVFL_TKILL) { spin_unlock_irqrestore(&d->lock, flags); return; @@ -593,67 +798,61 @@ rexmit_timer(ulong vp) head = &d->factive[i]; list_for_each_safe(pos, nx, head) { f = list_entry(pos, struct frame, head); - if (tsince(f->tag) < timeout) + if (tsince_hr(f) < timeout) break; /* end of expired frames */ /* move to flist for later processing */ list_move_tail(pos, &flist); } } - /* window check */ - tt = d->targets; - te = tt + d->ntargets; - for (; tt < te && (t = *tt); tt++) { - if (t->nout == t->maxout - && t->maxout < t->nframes - && (jiffies - t->lastwadj)/HZ > 10) { - t->maxout++; - t->lastwadj = jiffies; - } - } - - if (!list_empty(&flist)) { /* retransmissions necessary */ - n = d->rttavg <<= 1; - if (n > MAXTIMER) - d->rttavg = MAXTIMER; - } /* process expired frames */ while (!list_empty(&flist)) { pos = flist.next; f = list_entry(pos, struct frame, head); - n = f->waited += timeout; - n /= HZ; - if (n > aoe_deadsecs) { + since = tsince_hr(f); + n = f->waited_total + since; + n /= USEC_PER_SEC; + if (aoe_deadsecs + && n > aoe_deadsecs + && !(f->flags & FFL_PROBE)) { /* Waited too long. Device failure. * Hang all frames on first hash bucket for downdev * to clean up. */ list_splice(&flist, &d->factive[0]); aoedev_downdev(d); - break; + goto out; } - list_del(pos); t = f->t; - if (n > aoe_deadsecs/2) - d->htgt = t; /* see if another target can help */ - - if (t->nout == t->maxout) { - if (t->maxout > 1) - t->maxout--; - t->lastwadj = jiffies; + n = f->waited + since; + n /= USEC_PER_SEC; + if (aoe_deadsecs && utgts > 0 + && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS)) + scorn(t); /* avoid this target */ + + if (t->maxout != 1) { + t->ssthresh = t->maxout / 2; + t->maxout = 1; } - ifp = getif(t, f->skb->dev); - if (ifp && ++ifp->lost > (t->nframes << 1) - && (ifp != t->ifs || t->ifs[1].nd)) { - ejectif(t, ifp); - ifp = NULL; + if (f->flags & FFL_PROBE) { + t->nout_probes--; + } else { + ifp = getif(t, f->skb->dev); + if (ifp && ++ifp->lost > (t->nframes << 1) + && (ifp != t->ifs || t->ifs[1].nd)) { + ejectif(t, ifp); + ifp = NULL; + } } - resend(d, f); + list_move_tail(pos, &d->rexmitq); + t->nout--; } + rexmit_deferred(d); - if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) { +out: + if ((d->flags & DEVFL_KICKME) && d->blkq) { d->flags &= ~DEVFL_KICKME; d->blkq->request_fn(d->blkq); } @@ -774,8 +973,7 @@ nextbuf(struct aoedev *d) void aoecmd_work(struct aoedev *d) { - if (d->htgt && !sthtith(d)) - return; + rexmit_deferred(d); while (aoecmd_ata_rw(d)) ; } @@ -809,6 +1007,17 @@ aoecmd_sleepwork(struct work_struct *work) } static void +ata_ident_fixstring(u16 *id, int ns) +{ + u16 s; + + while (ns-- > 0) { + s = *id; + *id++ = s >> 8 | s << 8; + } +} + +static void ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) { u64 ssize; @@ -843,6 +1052,11 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) d->geo.sectors = get_unaligned_le16(&id[56 << 1]); } + ata_ident_fixstring((u16 *) &id[10<<1], 10); /* serial */ + ata_ident_fixstring((u16 *) &id[23<<1], 4); /* firmware */ + ata_ident_fixstring((u16 *) &id[27<<1], 20); /* model */ + memcpy(d->ident, id, sizeof(d->ident)); + if (d->ssize != ssize) printk(KERN_INFO "aoe: %pm e%ld.%d v%04x has %llu sectors\n", @@ -862,26 +1076,28 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) } static void -calc_rttavg(struct aoedev *d, int rtt) +calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt) { register long n; n = rtt; - if (n < 0) { - n = -rtt; - if (n < MINTIMER) - n = MINTIMER; - else if (n > MAXTIMER) - n = MAXTIMER; - d->mintimer += (n - d->mintimer) >> 1; - } else if (n < d->mintimer) - n = d->mintimer; - else if (n > MAXTIMER) - n = MAXTIMER; - - /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ - n -= d->rttavg; - d->rttavg += n >> 2; + + /* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */ + n -= d->rttavg >> RTTSCALE; + d->rttavg += n; + if (n < 0) + n = -n; + n -= d->rttdev >> RTTDSCALE; + d->rttdev += n; + + if (!t || t->maxout >= t->nframes) + return; + if (t->maxout < t->ssthresh) + t->maxout += 1; + else if (t->nout == t->maxout && t->next_cwnd-- == 0) { + t->maxout += 1; + t->next_cwnd = t->maxout; + } } static struct aoetgt * @@ -890,7 +1106,7 @@ gettgt(struct aoedev *d, char *addr) struct aoetgt **t, **e; t = d->targets; - e = t + NTARGETS; + e = t + d->ntargets; for (; t < e && *t; t++) if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0) return *t; @@ -935,7 +1151,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) /* cf. http://lkml.org/lkml/2006/10/31/28 */ if (!fastfail) - q->request_fn(q); + __blk_run_queue(q); } static void @@ -966,19 +1182,22 @@ ktiocomplete(struct frame *f) struct aoeif *ifp; struct aoedev *d; long n; + int untainted; if (f == NULL) return; t = f->t; d = t->d; + skb = f->r_skb; + buf = f->buf; + if (f->flags & FFL_PROBE) + goto out; + if (!skb) /* just fail the buf. */ + goto noskb; hout = (struct aoe_hdr *) skb_mac_header(f->skb); ahout = (struct aoe_atahdr *) (hout+1); - buf = f->buf; - skb = f->r_skb; - if (skb == NULL) - goto noskb; /* just fail the buf. */ hin = (struct aoe_hdr *) skb->data; skb_pull(skb, sizeof(*hin)); @@ -988,9 +1207,9 @@ ktiocomplete(struct frame *f) pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n", ahout->cmdstat, ahin->cmdstat, d->aoemajor, d->aoeminor); -noskb: if (buf) +noskb: if (buf) clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); - goto badrsp; + goto out; } n = ahout->scnt << 9; @@ -998,8 +1217,10 @@ noskb: if (buf) case ATA_CMD_PIO_READ: case ATA_CMD_PIO_READ_EXT: if (skb->len < n) { - pr_err("aoe: runt data size in read. skb->len=%d need=%ld\n", - skb->len, n); + pr_err("%s e%ld.%d. skb->len=%d need=%ld\n", + "aoe: runt data size in read from", + (long) d->aoemajor, d->aoeminor, + skb->len, n); clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); break; } @@ -1010,13 +1231,13 @@ noskb: if (buf) ifp = getif(t, skb->dev); if (ifp) ifp->lost = 0; - if (d->htgt == t) /* I'll help myself, thank you. */ - d->htgt = NULL; spin_unlock_irq(&d->lock); break; case ATA_CMD_ID_ATA: if (skb->len < 512) { - pr_info("aoe: runt data size in ataid. skb->len=%d\n", + pr_info("%s e%ld.%d. skb->len=%d need=512\n", + "aoe: runt data size in ataid from", + (long) d->aoemajor, d->aoeminor, skb->len); break; } @@ -1032,16 +1253,23 @@ noskb: if (buf) be16_to_cpu(get_unaligned(&hin->major)), hin->minor); } -badrsp: +out: spin_lock_irq(&d->lock); + if (t->taint > 0 + && --t->taint > 0 + && t->nout_probes == 0) { + count_targets(d, &untainted); + if (untainted > 0) { + probe(t); + t->nout_probes++; + } + } aoe_freetframe(f); if (buf && --buf->nframesout == 0 && buf->resid == 0) aoe_end_buf(d, buf); - aoecmd_work(d); - spin_unlock_irq(&d->lock); aoedev_put(d); dev_kfree_skb(skb); @@ -1141,7 +1369,6 @@ aoecmd_ata_rsp(struct sk_buff *skb) struct aoedev *d; struct aoe_hdr *h; struct frame *f; - struct aoetgt *t; u32 n; ulong flags; char ebuf[128]; @@ -1162,23 +1389,32 @@ aoecmd_ata_rsp(struct sk_buff *skb) n = be32_to_cpu(get_unaligned(&h->tag)); f = getframe(d, n); - if (f == NULL) { - calc_rttavg(d, -tsince(n)); - spin_unlock_irqrestore(&d->lock, flags); - aoedev_put(d); - snprintf(ebuf, sizeof ebuf, - "%15s e%d.%d tag=%08x@%08lx\n", - "unexpected rsp", - get_unaligned_be16(&h->major), - h->minor, - get_unaligned_be32(&h->tag), - jiffies); - aoechr_error(ebuf); - return skb; + if (f) { + calc_rttavg(d, f->t, tsince_hr(f)); + f->t->nout--; + if (f->flags & FFL_PROBE) + f->t->nout_probes--; + } else { + f = getframe_deferred(d, n); + if (f) { + calc_rttavg(d, NULL, tsince_hr(f)); + } else { + calc_rttavg(d, NULL, tsince(n)); + spin_unlock_irqrestore(&d->lock, flags); + aoedev_put(d); + snprintf(ebuf, sizeof(ebuf), + "%15s e%d.%d tag=%08x@%08lx s=%pm d=%pm\n", + "unexpected rsp", + get_unaligned_be16(&h->major), + h->minor, + get_unaligned_be32(&h->tag), + jiffies, + h->src, + h->dst); + aoechr_error(ebuf); + return skb; + } } - t = f->t; - calc_rttavg(d, tsince(f->tag)); - t->nout--; aoecmd_work(d); spin_unlock_irqrestore(&d->lock, flags); @@ -1201,7 +1437,7 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) aoecmd_cfg_pkts(aoemajor, aoeminor, &queue); aoenet_xmit(&queue); } - + struct sk_buff * aoecmd_ata_id(struct aoedev *d) { @@ -1227,6 +1463,7 @@ aoecmd_ata_id(struct aoedev *d) fhash(f); t->nout++; f->waited = 0; + f->waited_total = 0; /* set up ata header */ ah->scnt = 1; @@ -1235,41 +1472,69 @@ aoecmd_ata_id(struct aoedev *d) skb->dev = t->ifp->nd; - d->rttavg = MAXTIMER; + d->rttavg = RTTAVG_INIT; + d->rttdev = RTTDEV_INIT; d->timer.function = rexmit_timer; - return skb_clone(skb, GFP_ATOMIC); + skb = skb_clone(skb, GFP_ATOMIC); + if (skb) { + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; + } + + return skb; } - + +static struct aoetgt ** +grow_targets(struct aoedev *d) +{ + ulong oldn, newn; + struct aoetgt **tt; + + oldn = d->ntargets; + newn = oldn * 2; + tt = kcalloc(newn, sizeof(*d->targets), GFP_ATOMIC); + if (!tt) + return NULL; + memmove(tt, d->targets, sizeof(*d->targets) * oldn); + d->tgt = tt + (d->tgt - d->targets); + kfree(d->targets); + d->targets = tt; + d->ntargets = newn; + + return &d->targets[oldn]; +} + static struct aoetgt * addtgt(struct aoedev *d, char *addr, ulong nframes) { struct aoetgt *t, **tt, **te; tt = d->targets; - te = tt + NTARGETS; + te = tt + d->ntargets; for (; tt < te && *tt; tt++) ; if (tt == te) { - printk(KERN_INFO - "aoe: device addtgt failure; too many targets\n"); - return NULL; + tt = grow_targets(d); + if (!tt) + goto nomem; } t = kzalloc(sizeof(*t), GFP_ATOMIC); - if (!t) { - printk(KERN_INFO "aoe: cannot allocate memory to add target\n"); - return NULL; - } - - d->ntargets++; + if (!t) + goto nomem; t->nframes = nframes; t->d = d; memcpy(t->addr, addr, sizeof t->addr); t->ifp = t->ifs; - t->maxout = t->nframes; + aoecmd_wreset(t); + t->maxout = t->nframes / 2; INIT_LIST_HEAD(&t->ffree); return *tt = t; + + nomem: + pr_info("aoe: cannot allocate memory to add target\n"); + return NULL; } static void @@ -1279,7 +1544,7 @@ setdbcnt(struct aoedev *d) int bcnt = 0; t = d->targets; - e = t + NTARGETS; + e = t + d->ntargets; for (; t < e && *t; t++) if (bcnt == 0 || bcnt > (*t)->minbcnt) bcnt = (*t)->minbcnt; @@ -1373,7 +1638,11 @@ aoecmd_cfg_rsp(struct sk_buff *skb) spin_lock_irqsave(&d->lock, flags); t = gettgt(d, h->src); - if (!t) { + if (t) { + t->nframes = n; + if (n < t->maxout) + aoecmd_wreset(t); + } else { t = addtgt(d, h->src, n); if (!t) goto bail; @@ -1402,17 +1671,26 @@ bail: } void +aoecmd_wreset(struct aoetgt *t) +{ + t->maxout = 1; + t->ssthresh = t->nframes / 2; + t->next_cwnd = t->nframes; +} + +void aoecmd_cleanslate(struct aoedev *d) { struct aoetgt **t, **te; - d->mintimer = MINTIMER; + d->rttavg = RTTAVG_INIT; + d->rttdev = RTTDEV_INIT; d->maxbcnt = 0; t = d->targets; - te = t + NTARGETS; + te = t + d->ntargets; for (; t < te && *t; t++) - (*t)->maxout = (*t)->nframes; + aoecmd_wreset(*t); } void @@ -1460,6 +1738,14 @@ aoe_flush_iocq(void) int __init aoecmd_init(void) { + void *p; + + /* get_zeroed_page returns page with ref count 1 */ + p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + if (!p) + return -ENOMEM; + empty_page = virt_to_page(p); + INIT_LIST_HEAD(&iocq.head); spin_lock_init(&iocq.lock); init_waitqueue_head(&ktiowq); @@ -1475,4 +1761,7 @@ aoecmd_exit(void) { aoe_ktstop(&kts); aoe_flush_iocq(); + + free_page((unsigned long) page_address(empty_page)); + empty_page = NULL; } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 90e5b537f94..98f2965778b 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -15,7 +15,6 @@ #include "aoe.h" static void dummy_timer(ulong); -static void aoedev_freedev(struct aoedev *); static void freetgt(struct aoedev *d, struct aoetgt *t); static void skbpoolfree(struct aoedev *d); @@ -69,25 +68,34 @@ minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin) NPERSHELF = 16, }; + if (aoemin >= NPERSHELF) { + pr_err("aoe: %s %d slots per shelf\n", + "static minor device numbers support only", + NPERSHELF); + error = -1; + goto out; + } + n = aoemaj * NPERSHELF + aoemin; - if (aoemin >= NPERSHELF || n >= N_DEVS) { + if (n >= N_DEVS) { pr_err("aoe: %s with e%ld.%d\n", "cannot use static minor device numbers", aoemaj, aoemin); error = -1; - } else { - spin_lock_irqsave(&used_minors_lock, flags); - if (test_bit(n, used_minors)) { - pr_err("aoe: %s %lu\n", - "existing device already has static minor number", - n); - error = -1; - } else - set_bit(n, used_minors); - spin_unlock_irqrestore(&used_minors_lock, flags); + goto out; } - *sysminor = n; + spin_lock_irqsave(&used_minors_lock, flags); + if (test_bit(n, used_minors)) { + pr_err("aoe: %s %lu\n", + "existing device already has static minor number", + n); + error = -1; + } else + set_bit(n, used_minors); + spin_unlock_irqrestore(&used_minors_lock, flags); + *sysminor = n * AOE_PARTITIONS; +out: return error; } @@ -170,41 +178,50 @@ aoe_failip(struct aoedev *d) aoe_end_request(d, rq, 0); } +static void +downdev_frame(struct list_head *pos) +{ + struct frame *f; + + f = list_entry(pos, struct frame, head); + list_del(pos); + if (f->buf) { + f->buf->nframesout--; + aoe_failbuf(f->t->d, f->buf); + } + aoe_freetframe(f); +} + void aoedev_downdev(struct aoedev *d) { struct aoetgt *t, **tt, **te; - struct frame *f; struct list_head *head, *pos, *nx; struct request *rq; int i; d->flags &= ~DEVFL_UP; - /* clean out active buffers */ + /* clean out active and to-be-retransmitted buffers */ for (i = 0; i < NFACTIVE; i++) { head = &d->factive[i]; - list_for_each_safe(pos, nx, head) { - f = list_entry(pos, struct frame, head); - list_del(pos); - if (f->buf) { - f->buf->nframesout--; - aoe_failbuf(d, f->buf); - } - aoe_freetframe(f); - } + list_for_each_safe(pos, nx, head) + downdev_frame(pos); } + head = &d->rexmitq; + list_for_each_safe(pos, nx, head) + downdev_frame(pos); + /* reset window dressings */ tt = d->targets; - te = tt + NTARGETS; + te = tt + d->ntargets; for (; tt < te && (t = *tt); tt++) { - t->maxout = t->nframes; + aoecmd_wreset(t); t->nout = 0; } /* clean out the in-process request (if any) */ aoe_failip(d); - d->htgt = NULL; /* fast fail all pending I/O */ if (d->blkq) { @@ -218,12 +235,48 @@ aoedev_downdev(struct aoedev *d) set_capacity(d->gd, 0); } +/* return whether the user asked for this particular + * device to be flushed + */ +static int +user_req(char *s, size_t slen, struct aoedev *d) +{ + char *p; + size_t lim; + + if (!d->gd) + return 0; + p = strrchr(d->gd->disk_name, '/'); + if (!p) + p = d->gd->disk_name; + else + p += 1; + lim = sizeof(d->gd->disk_name); + lim -= p - d->gd->disk_name; + if (slen < lim) + lim = slen; + + return !strncmp(s, p, lim); +} + static void -aoedev_freedev(struct aoedev *d) +freedev(struct aoedev *d) { struct aoetgt **t, **e; + int freeing = 0; + unsigned long flags; + + spin_lock_irqsave(&d->lock, flags); + if (d->flags & DEVFL_TKILL + && !(d->flags & DEVFL_FREEING)) { + d->flags |= DEVFL_FREEING; + freeing = 1; + } + spin_unlock_irqrestore(&d->lock, flags); + if (!freeing) + return; - cancel_work_sync(&d->work); + del_timer_sync(&d->timer); if (d->gd) { aoedisk_rm_sysfs(d); del_gendisk(d->gd); @@ -231,61 +284,113 @@ aoedev_freedev(struct aoedev *d) blk_cleanup_queue(d->blkq); } t = d->targets; - e = t + NTARGETS; + e = t + d->ntargets; for (; t < e && *t; t++) freetgt(d, *t); if (d->bufpool) mempool_destroy(d->bufpool); skbpoolfree(d); minor_free(d->sysminor); - kfree(d); + + spin_lock_irqsave(&d->lock, flags); + d->flags |= DEVFL_FREED; + spin_unlock_irqrestore(&d->lock, flags); } -int -aoedev_flush(const char __user *str, size_t cnt) +enum flush_parms { + NOT_EXITING = 0, + EXITING = 1, +}; + +static int +flush(const char __user *str, size_t cnt, int exiting) { ulong flags; struct aoedev *d, **dd; - struct aoedev *rmd = NULL; char buf[16]; int all = 0; + int specified = 0; /* flush a specific device */ + unsigned int skipflags; + + skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL; - if (cnt >= 3) { + if (!exiting && cnt >= 3) { if (cnt > sizeof buf) cnt = sizeof buf; if (copy_from_user(buf, str, cnt)) return -EFAULT; all = !strncmp(buf, "all", 3); + if (!all) + specified = 1; } + flush_scheduled_work(); + /* pass one: without sleeping, do aoedev_downdev */ spin_lock_irqsave(&devlist_lock, flags); - dd = &devlist; - while ((d = *dd)) { + for (d = devlist; d; d = d->next) { spin_lock(&d->lock); - if ((!all && (d->flags & DEVFL_UP)) - || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) + if (exiting) { + /* unconditionally take each device down */ + } else if (specified) { + if (!user_req(buf, cnt, d)) + goto cont; + } else if ((!all && (d->flags & DEVFL_UP)) + || d->flags & skipflags || d->nopen - || d->ref) { - spin_unlock(&d->lock); - dd = &d->next; - continue; - } - *dd = d->next; + || d->ref) + goto cont; + aoedev_downdev(d); d->flags |= DEVFL_TKILL; +cont: spin_unlock(&d->lock); - d->next = rmd; - rmd = d; } spin_unlock_irqrestore(&devlist_lock, flags); - while ((d = rmd)) { - rmd = d->next; - del_timer_sync(&d->timer); - aoedev_freedev(d); /* must be able to sleep */ + + /* pass two: call freedev, which might sleep, + * for aoedevs marked with DEVFL_TKILL + */ +restart: + spin_lock_irqsave(&devlist_lock, flags); + for (d = devlist; d; d = d->next) { + spin_lock(&d->lock); + if (d->flags & DEVFL_TKILL + && !(d->flags & DEVFL_FREEING)) { + spin_unlock(&d->lock); + spin_unlock_irqrestore(&devlist_lock, flags); + freedev(d); + goto restart; + } + spin_unlock(&d->lock); } + + /* pass three: remove aoedevs marked with DEVFL_FREED */ + for (dd = &devlist, d = *dd; d; d = *dd) { + struct aoedev *doomed = NULL; + + spin_lock(&d->lock); + if (d->flags & DEVFL_FREED) { + *dd = d->next; + doomed = d; + } else { + dd = &d->next; + } + spin_unlock(&d->lock); + if (doomed) + kfree(doomed->targets); + kfree(doomed); + } + spin_unlock_irqrestore(&devlist_lock, flags); + return 0; } +int +aoedev_flush(const char __user *str, size_t cnt) +{ + return flush(str, cnt, NOT_EXITING); +} + /* This has been confirmed to occur once with Tms=3*1000 due to the * driver changing link and not processing its transmit ring. The * problem is hard enough to solve by returning an error that I'm @@ -332,13 +437,20 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) struct aoedev *d; int i; ulong flags; - ulong sysminor; + ulong sysminor = 0; spin_lock_irqsave(&devlist_lock, flags); for (d=devlist; d; d=d->next) if (d->aoemajor == maj && d->aoeminor == min) { + spin_lock(&d->lock); + if (d->flags & DEVFL_TKILL) { + spin_unlock(&d->lock); + d = NULL; + goto out; + } d->ref++; + spin_unlock(&d->lock); break; } if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) @@ -346,6 +458,13 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) d = kcalloc(1, sizeof *d, GFP_ATOMIC); if (!d) goto out; + d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC); + if (!d->targets) { + kfree(d); + d = NULL; + goto out; + } + d->ntargets = NTARGETS; INIT_WORK(&d->work, aoecmd_sleepwork); spin_lock_init(&d->lock); skb_queue_head_init(&d->skbpool); @@ -359,10 +478,12 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) d->ref = 1; for (i = 0; i < NFACTIVE; i++) INIT_LIST_HEAD(&d->factive[i]); + INIT_LIST_HEAD(&d->rexmitq); d->sysminor = sysminor; d->aoemajor = maj; d->aoeminor = min; - d->mintimer = MINTIMER; + d->rttavg = RTTAVG_INIT; + d->rttdev = RTTDEV_INIT; d->next = devlist; devlist = d; out: @@ -396,21 +517,9 @@ freetgt(struct aoedev *d, struct aoetgt *t) void aoedev_exit(void) { - struct aoedev *d; - ulong flags; - + flush_scheduled_work(); aoe_flush_iocq(); - while ((d = devlist)) { - devlist = d->next; - - spin_lock_irqsave(&d->lock, flags); - aoedev_downdev(d); - d->flags |= DEVFL_TKILL; - spin_unlock_irqrestore(&d->lock, flags); - - del_timer_sync(&d->timer); - aoedev_freedev(d); - } + flush(NULL, 0, EXITING); } int __init diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c index 04793c2c701..4b987c2fefb 100644 --- a/drivers/block/aoe/aoemain.c +++ b/drivers/block/aoe/aoemain.c @@ -105,7 +105,7 @@ aoe_init(void) aoechr_exit(); chr_fail: aoedev_exit(); - + printk(KERN_INFO "aoe: initialisation failure.\n"); return ret; } diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 162c6471275..71d3ea8d300 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -31,7 +31,7 @@ enum { static char aoe_iflist[IFLISTSZ]; module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600); -MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=\"dev1 [dev2 ...]\""); +MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=dev1[,dev2...]"); static wait_queue_head_t txwq; static struct ktstate kts; @@ -52,13 +52,18 @@ static struct sk_buff_head skbtxq; /* enters with txlock held */ static int -tx(void) +tx(void) __must_hold(&txlock) { struct sk_buff *skb; + struct net_device *ifp; while ((skb = skb_dequeue(&skbtxq))) { spin_unlock_irq(&txlock); - dev_queue_xmit(skb); + ifp = skb->dev; + if (dev_queue_xmit(skb) == NET_XMIT_DROP && net_ratelimit()) + pr_warn("aoe: packet could not be sent on %s. %s\n", + ifp ? ifp->name : "netif", + "consider increasing tx_queue_len"); spin_lock_irq(&txlock); } return 0; @@ -119,8 +124,8 @@ aoenet_xmit(struct sk_buff_head *queue) } } -/* - * (1) len doesn't include the header by default. I want this. +/* + * (1) len doesn't include the header by default. I want this. */ static int aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index e04c63ec775..6526157edaf 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -5198,7 +5198,6 @@ static void cciss_shutdown(struct pci_dev *pdev) return; } /* write all data in the battery backed cache to disk */ - memset(flush_buf, 0, 4); return_code = sendcmd_withirq(h, CCISS_CACHE_FLUSH, flush_buf, 4, 0, CTLR_LUNID, TYPE_CMD); kfree(flush_buf); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 17c675c5229..2ddd64a9ffd 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4109,12 +4109,19 @@ static struct platform_driver floppy_driver = { static struct platform_device floppy_device[N_DRIVE]; +static bool floppy_available(int drive) +{ + if (!(allowed_drive_mask & (1 << drive))) + return false; + if (fdc_state[FDC(drive)].version == FDC_NONE) + return false; + return true; +} + static struct kobject *floppy_find(dev_t dev, int *part, void *data) { int drive = (*part & 3) | ((*part & 0x80) >> 5); - if (drive >= N_DRIVE || - !(allowed_drive_mask & (1 << drive)) || - fdc_state[FDC(drive)].version == FDC_NONE) + if (drive >= N_DRIVE || !floppy_available(drive)) return NULL; if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type)) return NULL; @@ -4124,8 +4131,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) static int __init do_floppy_init(void) { - int i, unit, drive; - int err, dr; + int i, unit, drive, err; set_debugt(); interruptjiffies = resultjiffies = jiffies; @@ -4137,34 +4143,32 @@ static int __init do_floppy_init(void) raw_cmd = NULL; - for (dr = 0; dr < N_DRIVE; dr++) { - disks[dr] = alloc_disk(1); - if (!disks[dr]) { - err = -ENOMEM; - goto out_put_disk; - } + floppy_wq = alloc_ordered_workqueue("floppy", 0); + if (!floppy_wq) + return -ENOMEM; - floppy_wq = alloc_ordered_workqueue("floppy", 0); - if (!floppy_wq) { + for (drive = 0; drive < N_DRIVE; drive++) { + disks[drive] = alloc_disk(1); + if (!disks[drive]) { err = -ENOMEM; goto out_put_disk; } - disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock); - if (!disks[dr]->queue) { + disks[drive]->queue = blk_init_queue(do_fd_request, &floppy_lock); + if (!disks[drive]->queue) { err = -ENOMEM; - goto out_destroy_workq; + goto out_put_disk; } - blk_queue_max_hw_sectors(disks[dr]->queue, 64); - disks[dr]->major = FLOPPY_MAJOR; - disks[dr]->first_minor = TOMINOR(dr); - disks[dr]->fops = &floppy_fops; - sprintf(disks[dr]->disk_name, "fd%d", dr); + blk_queue_max_hw_sectors(disks[drive]->queue, 64); + disks[drive]->major = FLOPPY_MAJOR; + disks[drive]->first_minor = TOMINOR(drive); + disks[drive]->fops = &floppy_fops; + sprintf(disks[drive]->disk_name, "fd%d", drive); - init_timer(&motor_off_timer[dr]); - motor_off_timer[dr].data = dr; - motor_off_timer[dr].function = motor_off_callback; + init_timer(&motor_off_timer[drive]); + motor_off_timer[drive].data = drive; + motor_off_timer[drive].function = motor_off_callback; } err = register_blkdev(FLOPPY_MAJOR, "fd"); @@ -4282,9 +4286,7 @@ static int __init do_floppy_init(void) } for (drive = 0; drive < N_DRIVE; drive++) { - if (!(allowed_drive_mask & (1 << drive))) - continue; - if (fdc_state[FDC(drive)].version == FDC_NONE) + if (!floppy_available(drive)) continue; floppy_device[drive].name = floppy_device_name; @@ -4293,7 +4295,7 @@ static int __init do_floppy_init(void) err = platform_device_register(&floppy_device[drive]); if (err) - goto out_release_dma; + goto out_remove_drives; err = device_create_file(&floppy_device[drive].dev, &dev_attr_cmos); @@ -4311,28 +4313,33 @@ static int __init do_floppy_init(void) out_unreg_platform_dev: platform_device_unregister(&floppy_device[drive]); +out_remove_drives: + while (drive--) { + if (floppy_available(drive)) { + del_gendisk(disks[drive]); + device_remove_file(&floppy_device[drive].dev, &dev_attr_cmos); + platform_device_unregister(&floppy_device[drive]); + } + } out_release_dma: if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); out_unreg_region: blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); platform_driver_unregister(&floppy_driver); -out_destroy_workq: - destroy_workqueue(floppy_wq); out_unreg_blkdev: unregister_blkdev(FLOPPY_MAJOR, "fd"); out_put_disk: - while (dr--) { - del_timer_sync(&motor_off_timer[dr]); - if (disks[dr]->queue) { - blk_cleanup_queue(disks[dr]->queue); - /* - * put_disk() is not paired with add_disk() and - * will put queue reference one extra time. fix it. - */ - disks[dr]->queue = NULL; + destroy_workqueue(floppy_wq); + for (drive = 0; drive < N_DRIVE; drive++) { + if (!disks[drive]) + break; + if (disks[drive]->queue) { + del_timer_sync(&motor_off_timer[drive]); + blk_cleanup_queue(disks[drive]->queue); + disks[drive]->queue = NULL; } - put_disk(disks[dr]); + put_disk(disks[drive]); } return err; } @@ -4548,11 +4555,12 @@ static void __exit floppy_module_exit(void) unregister_blkdev(FLOPPY_MAJOR, "fd"); platform_driver_unregister(&floppy_driver); + destroy_workqueue(floppy_wq); + for (drive = 0; drive < N_DRIVE; drive++) { del_timer_sync(&motor_off_timer[drive]); - if ((allowed_drive_mask & (1 << drive)) && - fdc_state[FDC(drive)].version != FDC_NONE) { + if (floppy_available(drive)) { del_gendisk(disks[drive]); device_remove_file(&floppy_device[drive].dev, &dev_attr_cmos); platform_device_unregister(&floppy_device[drive]); @@ -4572,7 +4580,6 @@ static void __exit floppy_module_exit(void) cancel_delayed_work_sync(&fd_timeout); cancel_delayed_work_sync(&fd_timer); - destroy_workqueue(floppy_wq); if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 800aec7927d..ae125127062 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -985,8 +985,21 @@ static int loop_clr_fd(struct loop_device *lo) if (lo->lo_state != Lo_bound) return -ENXIO; - if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */ - return -EBUSY; + /* + * If we've explicitly asked to tear down the loop device, + * and it has an elevated reference count, set it for auto-teardown when + * the last reference goes away. This stops $!~#$@ udev from + * preventing teardown because it decided that it needs to run blkid on + * the loopback device whenever they appear. xfstests is notorious for + * failing tests because blkid via udev races with a losetup + * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d + * command to fail with EBUSY. + */ + if (lo->lo_refcnt > 1) { + lo->lo_flags |= LO_FLAGS_AUTOCLEAR; + mutex_unlock(&lo->lo_ctl_mutex); + return 0; + } if (filp == NULL) return -EINVAL; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index f946d31d691..3fd10099045 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -559,7 +559,7 @@ static void mtip_timeout_function(unsigned long int data) struct mtip_cmd *command; int tag, cmdto_cnt = 0; unsigned int bit, group; - unsigned int num_command_slots = port->dd->slot_groups * 32; + unsigned int num_command_slots; unsigned long to, tagaccum[SLOTBITS_IN_LONGS]; if (unlikely(!port)) @@ -572,6 +572,7 @@ static void mtip_timeout_function(unsigned long int data) } /* clear the tag accumulator */ memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); + num_command_slots = port->dd->slot_groups * 32; for (tag = 0; tag < num_command_slots; tag++) { /* @@ -625,12 +626,13 @@ static void mtip_timeout_function(unsigned long int data) } } - if (cmdto_cnt && !test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { + if (cmdto_cnt) { print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); - - mtip_restart_port(port); + if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { + mtip_restart_port(port); + wake_up_interruptible(&port->svc_wait); + } clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - wake_up_interruptible(&port->svc_wait); } if (port->ic_pause_timer) { @@ -2035,8 +2037,9 @@ static unsigned int implicit_sector(unsigned char command, } return rv; } - -static void mtip_set_timeout(struct host_to_dev_fis *fis, unsigned int *timeout) +static void mtip_set_timeout(struct driver_data *dd, + struct host_to_dev_fis *fis, + unsigned int *timeout, u8 erasemode) { switch (fis->command) { case ATA_CMD_DOWNLOAD_MICRO: @@ -2044,7 +2047,10 @@ static void mtip_set_timeout(struct host_to_dev_fis *fis, unsigned int *timeout) break; case ATA_CMD_SEC_ERASE_UNIT: case 0xFC: - *timeout = 240000; /* 4 minutes */ + if (erasemode) + *timeout = ((*(dd->port->identify + 90) * 2) * 60000); + else + *timeout = ((*(dd->port->identify + 89) * 2) * 60000); break; case ATA_CMD_STANDBYNOW1: *timeout = 120000; /* 2 minutes */ @@ -2087,6 +2093,7 @@ static int exec_drive_taskfile(struct driver_data *dd, unsigned int transfer_size; unsigned long task_file_data; int intotal = outtotal + req_task->out_size; + int erasemode = 0; taskout = req_task->out_size; taskin = req_task->in_size; @@ -2212,7 +2219,13 @@ static int exec_drive_taskfile(struct driver_data *dd, fis.lba_hi, fis.device); - mtip_set_timeout(&fis, &timeout); + /* check for erase mode support during secure erase.*/ + if ((fis.command == ATA_CMD_SEC_ERASE_UNIT) && outbuf && + (outbuf[0] & MTIP_SEC_ERASE_MODE)) { + erasemode = 1; + } + + mtip_set_timeout(dd, &fis, &timeout, erasemode); /* Determine the correct transfer size.*/ if (force_single_sector) @@ -2428,7 +2441,7 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, * return value * None */ -static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, +static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector, int nsect, int nents, int tag, void *callback, void *data, int dir) { @@ -2436,6 +2449,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, struct mtip_port *port = dd->port; struct mtip_cmd *command = &port->commands[tag]; int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + u64 start = sector; /* Map the scatter list for DMA access */ nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir); @@ -2454,8 +2468,12 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, fis->opts = 1 << 7; fis->command = (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE); - *((unsigned int *) &fis->lba_low) = (start & 0xFFFFFF); - *((unsigned int *) &fis->lba_low_ex) = ((start >> 24) & 0xFFFFFF); + fis->lba_low = start & 0xFF; + fis->lba_mid = (start >> 8) & 0xFF; + fis->lba_hi = (start >> 16) & 0xFF; + fis->lba_low_ex = (start >> 24) & 0xFF; + fis->lba_mid_ex = (start >> 32) & 0xFF; + fis->lba_hi_ex = (start >> 40) & 0xFF; fis->device = 1 << 6; fis->features = nsect & 0xFF; fis->features_ex = (nsect >> 8) & 0xFF; @@ -3870,7 +3888,12 @@ static int mtip_block_remove(struct driver_data *dd) * Delete our gendisk structure. This also removes the device * from /dev */ - del_gendisk(dd->disk); + if (dd->disk) { + if (dd->disk->queue) + del_gendisk(dd->disk); + else + put_disk(dd->disk); + } spin_lock(&rssd_index_lock); ida_remove(&rssd_index_ida, dd->index); @@ -3904,7 +3927,13 @@ static int mtip_block_shutdown(struct driver_data *dd) "Shutting down %s ...\n", dd->disk->disk_name); /* Delete our gendisk structure, and cleanup the blk queue. */ - del_gendisk(dd->disk); + if (dd->disk) { + if (dd->disk->queue) + del_gendisk(dd->disk); + else + put_disk(dd->disk); + } + spin_lock(&rssd_index_lock); ida_remove(&rssd_index_ida, dd->index); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 18627a1d04c..b1742640556 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -33,6 +33,9 @@ /* offset of Device Control register in PCIe extended capabilites space */ #define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48 +/* check for erase mode support during secure erase */ +#define MTIP_SEC_ERASE_MODE 0x2 + /* # of times to retry timed out/failed IOs */ #define MTIP_MAX_RETRIES 2 @@ -152,14 +155,14 @@ enum { MTIP_DDF_REBUILD_FAILED_BIT = 8, }; -__packed struct smart_attr{ +struct smart_attr { u8 attr_id; u16 flags; u8 cur; u8 worst; u32 data; u8 res[3]; -}; +} __packed; /* Register Frame Information Structure (FIS), host to device. */ struct host_to_dev_fis { diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 74374fb762a..5ac841ff6cc 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -161,10 +161,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, static void make_response(struct xen_blkif *blkif, u64 id, unsigned short op, int st); -#define foreach_grant(pos, rbtree, node) \ - for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \ +#define foreach_grant_safe(pos, n, rbtree, node) \ + for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \ + (n) = rb_next(&(pos)->node); \ &(pos)->node != NULL; \ - (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node)) + (pos) = container_of(n, typeof(*(pos)), node), \ + (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL) static void add_persistent_gnt(struct rb_root *root, @@ -217,10 +219,11 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct persistent_gnt *persistent_gnt; + struct rb_node *n; int ret = 0; int segs_to_unmap = 0; - foreach_grant(persistent_gnt, root, node) { + foreach_grant_safe(persistent_gnt, n, root, node) { BUG_ON(persistent_gnt->handle == BLKBACK_INVALID_HANDLE); gnttab_set_unmap_op(&unmap[segs_to_unmap], @@ -230,9 +233,6 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) persistent_gnt->handle); pages[segs_to_unmap] = persistent_gnt->page; - rb_erase(&persistent_gnt->node, root); - kfree(persistent_gnt); - num--; if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || !rb_next(&persistent_gnt->node)) { @@ -241,6 +241,10 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) BUG_ON(ret); segs_to_unmap = 0; } + + rb_erase(&persistent_gnt->node, root); + kfree(persistent_gnt); + num--; } BUG_ON(num != 0); } diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index ae7951f0e26..6072390c7f5 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -159,9 +159,8 @@ struct xen_vbd { struct block_device *bdev; /* Cached size parameter. */ sector_t size; - bool flush_support; - bool discard_secure; - + unsigned int flush_support:1; + unsigned int discard_secure:1; unsigned int feature_gnt_persistent:1; unsigned int overflow_max_grants:1; }; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index a03ecbb0044..63980722db4 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -105,11 +105,10 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) { struct xen_blkif *blkif; - blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL); + blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL); if (!blkif) return ERR_PTR(-ENOMEM); - memset(blkif, 0, sizeof(*blkif)); blkif->domid = domid; spin_lock_init(&blkif->blk_ring_lock); atomic_set(&blkif->refcnt, 1); @@ -197,7 +196,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) } } -void xen_blkif_free(struct xen_blkif *blkif) +static void xen_blkif_free(struct xen_blkif *blkif) { if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); @@ -258,7 +257,7 @@ static struct attribute_group xen_vbdstat_group = { VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); VBD_SHOW(mode, "%s\n", be->mode); -int xenvbd_sysfs_addif(struct xenbus_device *dev) +static int xenvbd_sysfs_addif(struct xenbus_device *dev) { int error; @@ -282,7 +281,7 @@ fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); return error; } -void xenvbd_sysfs_delif(struct xenbus_device *dev) +static void xenvbd_sysfs_delif(struct xenbus_device *dev) { sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); device_remove_file(&dev->dev, &dev_attr_mode); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 96e9b00db08..11043c18ac5 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -792,6 +792,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) { struct llist_node *all_gnts; struct grant *persistent_gnt; + struct llist_node *n; /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&info->io_lock); @@ -804,7 +805,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* Remove all persistent grants */ if (info->persistent_gnts_c) { all_gnts = llist_del_all(&info->persistent_gnts); - llist_for_each_entry(persistent_gnt, all_gnts, node) { + llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) { gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); __free_page(pfn_to_page(persistent_gnt->pfn)); kfree(persistent_gnt); @@ -835,7 +836,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, struct blkif_response *bret) { - int i; + int i = 0; struct bio_vec *bvec; struct req_iterator iter; unsigned long flags; @@ -852,7 +853,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, */ rq_for_each_segment(bvec, s->request, iter) { BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); - i = offset >> PAGE_SHIFT; + if (bvec->bv_offset < offset) + i++; BUG_ON(i >= s->req.u.rw.nr_segments); shared_data = kmap_atomic( pfn_to_page(s->grants_used[i]->pfn)); @@ -861,7 +863,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, bvec->bv_len); bvec_kunmap_irq(bvec_data, &flags); kunmap_atomic(shared_data); - offset += bvec->bv_len; + offset = bvec->bv_offset + bvec->bv_len; } } /* Add the persistent grant into the list of free grants */ |