summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig15
-rw-r--r--drivers/block/aoe/aoe.h57
-rw-r--r--drivers/block/aoe/aoeblk.c104
-rw-r--r--drivers/block/aoe/aoechr.c7
-rw-r--r--drivers/block/aoe/aoecmd.c717
-rw-r--r--drivers/block/aoe/aoedev.c243
-rw-r--r--drivers/block/aoe/aoemain.c2
-rw-r--r--drivers/block/aoe/aoenet.c15
-rw-r--r--drivers/block/cciss.c1
-rw-r--r--drivers/block/floppy.c93
-rw-r--r--drivers/block/loop.c17
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c57
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h7
-rw-r--r--drivers/block/xen-blkback/blkback.c18
-rw-r--r--drivers/block/xen-blkback/common.h5
-rw-r--r--drivers/block/xen-blkback/xenbus.c9
-rw-r--r--drivers/block/xen-blkfront.c10
17 files changed, 967 insertions, 410 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index f529407db93..824e09c4d0d 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -131,6 +131,7 @@ config BLK_CPQ_DA
config BLK_CPQ_CISS_DA
tristate "Compaq Smart Array 5xxx support"
depends on PCI
+ select CHECK_SIGNATURE
help
This is the driver for Compaq Smart Array 5xxx controllers.
Everyone using these boards should say Y here.
@@ -166,8 +167,8 @@ config BLK_DEV_DAC960
module will be called DAC960.
config BLK_DEV_UMEM
- tristate "Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL)"
- depends on PCI && EXPERIMENTAL
+ tristate "Micro Memory MM5415 Battery Backed RAM support"
+ depends on PCI
---help---
Saying Y here will include support for the MM5415 family of
battery backed (Non-volatile) RAM cards.
@@ -430,8 +431,8 @@ config CDROM_PKTCDVD_BUFFERS
a disc is opened for writing.
config CDROM_PKTCDVD_WCACHE
- bool "Enable write caching (EXPERIMENTAL)"
- depends on CDROM_PKTCDVD && EXPERIMENTAL
+ bool "Enable write caching"
+ depends on CDROM_PKTCDVD
help
If enabled, write caching will be set for the CD-R/W device. For now
this option is dangerous unless the CD-RW media is known good, as we
@@ -508,8 +509,8 @@ config XEN_BLKDEV_BACKEND
config VIRTIO_BLK
- tristate "Virtio block driver (EXPERIMENTAL)"
- depends on EXPERIMENTAL && VIRTIO
+ tristate "Virtio block driver"
+ depends on VIRTIO
---help---
This is the virtual block driver for virtio. It can be used with
lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
@@ -528,7 +529,7 @@ config BLK_DEV_HD
config BLK_DEV_RBD
tristate "Rados block device (RBD)"
- depends on INET && EXPERIMENTAL && BLOCK
+ depends on INET && BLOCK
select CEPH_LIB
select LIBCRC32C
select CRYPTO_AES
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index d2ed7f18d1a..175649468c9 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,5 +1,5 @@
/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
-#define VERSION "50"
+#define VERSION "81"
#define AOE_MAJOR 152
#define DEVICE_NAME "aoe"
@@ -10,7 +10,7 @@
#define AOE_PARTITIONS (16)
#endif
-#define WHITESPACE " \t\v\f\n"
+#define WHITESPACE " \t\v\f\n,"
enum {
AOECMD_ATA,
@@ -73,21 +73,29 @@ enum {
DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */
DEVFL_EXT = (1<<2), /* device accepts lba48 commands */
DEVFL_GDALLOC = (1<<3), /* need to alloc gendisk */
- DEVFL_KICKME = (1<<4), /* slow polling network card catch */
- DEVFL_NEWSIZE = (1<<5), /* need to update dev size in block layer */
+ DEVFL_GD_NOW = (1<<4), /* allocating gendisk */
+ DEVFL_KICKME = (1<<5), /* slow polling network card catch */
+ DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */
+ DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */
+ DEVFL_FREED = (1<<8), /* device has been cleaned up */
};
enum {
DEFAULTBCNT = 2 * 512, /* 2 sectors */
MIN_BUFS = 16,
- NTARGETS = 8,
+ NTARGETS = 4,
NAOEIFS = 8,
NSKBPOOLMAX = 256,
NFACTIVE = 61,
TIMERTICK = HZ / 10,
- MINTIMER = HZ >> 2,
- MAXTIMER = HZ << 1,
+ RTTSCALE = 8,
+ RTTDSCALE = 3,
+ RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE,
+ RTTDEV_INIT = RTTAVG_INIT / 4,
+
+ HARD_SCORN_SECS = 10, /* try another remote port after this */
+ MAX_TAINT = 1000, /* cap on aoetgt taint */
};
struct buf {
@@ -100,10 +108,17 @@ struct buf {
struct request *rq;
};
+enum frame_flags {
+ FFL_PROBE = 1,
+};
+
struct frame {
struct list_head head;
u32 tag;
+ struct timeval sent; /* high-res time packet was sent */
+ u32 sent_jiffs; /* low-res jiffies-based sent time */
ulong waited;
+ ulong waited_total;
struct aoetgt *t; /* parent target I belong to */
sector_t lba;
struct sk_buff *skb; /* command skb freed on module exit */
@@ -112,6 +127,7 @@ struct frame {
struct bio_vec *bv;
ulong bcnt;
ulong bv_off;
+ char flags;
};
struct aoeif {
@@ -122,28 +138,31 @@ struct aoeif {
struct aoetgt {
unsigned char addr[6];
- ushort nframes;
+ ushort nframes; /* cap on frames to use */
struct aoedev *d; /* parent device I belong to */
struct list_head ffree; /* list of free frames */
struct aoeif ifs[NAOEIFS];
struct aoeif *ifp; /* current aoeif in use */
- ushort nout;
- ushort maxout;
- ulong falloc;
- ulong lastwadj; /* last window adjustment */
+ ushort nout; /* number of AoE commands outstanding */
+ ushort maxout; /* current value for max outstanding */
+ ushort next_cwnd; /* incr maxout after decrementing to zero */
+ ushort ssthresh; /* slow start threshold */
+ ulong falloc; /* number of allocated frames */
+ int taint; /* how much we want to avoid this aoetgt */
int minbcnt;
int wpkts, rpkts;
+ char nout_probes;
};
struct aoedev {
struct aoedev *next;
ulong sysminor;
ulong aoemajor;
+ u32 rttavg; /* scaled AoE round trip time average */
+ u32 rttdev; /* scaled round trip time mean deviation */
u16 aoeminor;
u16 flags;
u16 nopen; /* (bd_openers isn't available without sleeping) */
- u16 rttavg; /* round trip average of requests/responses */
- u16 mintimer;
u16 fw_ver; /* version of blade's firmware */
u16 lasttag; /* last tag sent */
u16 useme;
@@ -151,7 +170,7 @@ struct aoedev {
struct work_struct work;/* disk create work struct */
struct gendisk *gd;
struct request_queue *blkq;
- struct hd_geometry geo;
+ struct hd_geometry geo;
sector_t ssize;
struct timer_list timer;
spinlock_t lock;
@@ -164,11 +183,12 @@ struct aoedev {
} ip;
ulong maxbcnt;
struct list_head factive[NFACTIVE]; /* hash of active frames */
- struct aoetgt *targets[NTARGETS];
+ struct list_head rexmitq; /* deferred retransmissions */
+ struct aoetgt **targets;
+ ulong ntargets; /* number of allocated aoetgt pointers */
struct aoetgt **tgt; /* target in use when working */
- struct aoetgt *htgt; /* target needing rexmit assistance */
- ulong ntargets;
ulong kicked;
+ char ident[512];
};
/* kthread tracking */
@@ -195,6 +215,7 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor);
struct sk_buff *aoecmd_ata_rsp(struct sk_buff *);
void aoecmd_cfg_rsp(struct sk_buff *);
void aoecmd_sleepwork(struct work_struct *);
+void aoecmd_wreset(struct aoetgt *t);
void aoecmd_cleanslate(struct aoedev *);
void aoecmd_exit(void);
int aoecmd_init(void);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 00dfc5008ad..a129f8c8073 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -16,11 +16,19 @@
#include <linux/netdevice.h>
#include <linux/mutex.h>
#include <linux/export.h>
+#include <linux/moduleparam.h>
+#include <scsi/sg.h>
#include "aoe.h"
static DEFINE_MUTEX(aoeblk_mutex);
static struct kmem_cache *buf_pool_cache;
+/* GPFS needs a larger value than the default. */
+static int aoe_maxsectors;
+module_param(aoe_maxsectors, int, 0644);
+MODULE_PARM_DESC(aoe_maxsectors,
+ "When nonzero, set the maximum number of sectors per I/O request");
+
static ssize_t aoedisk_show_state(struct device *dev,
struct device_attribute *attr, char *page)
{
@@ -59,7 +67,7 @@ static ssize_t aoedisk_show_netif(struct device *dev,
nd = nds;
ne = nd + ARRAY_SIZE(nds);
t = d->targets;
- te = t + NTARGETS;
+ te = t + d->ntargets;
for (; t < te && *t; t++) {
ifp = (*t)->ifs;
e = ifp + NAOEIFS;
@@ -91,6 +99,14 @@ static ssize_t aoedisk_show_fwver(struct device *dev,
return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver);
}
+static ssize_t aoedisk_show_payload(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct aoedev *d = disk->private_data;
+
+ return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt);
+}
static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
@@ -99,12 +115,14 @@ static struct device_attribute dev_attr_firmware_version = {
.attr = { .name = "firmware-version", .mode = S_IRUGO },
.show = aoedisk_show_fwver,
};
+static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
static struct attribute *aoe_attrs[] = {
&dev_attr_state.attr,
&dev_attr_mac.attr,
&dev_attr_netif.attr,
&dev_attr_firmware_version.attr,
+ &dev_attr_payload.attr,
NULL,
};
@@ -129,9 +147,18 @@ aoeblk_open(struct block_device *bdev, fmode_t mode)
struct aoedev *d = bdev->bd_disk->private_data;
ulong flags;
+ if (!virt_addr_valid(d)) {
+ pr_crit("aoe: invalid device pointer in %s\n",
+ __func__);
+ WARN_ON(1);
+ return -ENODEV;
+ }
+ if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
+ return -ENODEV;
+
mutex_lock(&aoeblk_mutex);
spin_lock_irqsave(&d->lock, flags);
- if (d->flags & DEVFL_UP) {
+ if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
d->nopen++;
spin_unlock_irqrestore(&d->lock, flags);
mutex_unlock(&aoeblk_mutex);
@@ -195,9 +222,38 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
+static int
+aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
+{
+ struct aoedev *d;
+
+ if (!arg)
+ return -EINVAL;
+
+ d = bdev->bd_disk->private_data;
+ if ((d->flags & DEVFL_UP) == 0) {
+ pr_err("aoe: disk not up\n");
+ return -ENODEV;
+ }
+
+ if (cmd == HDIO_GET_IDENTITY) {
+ if (!copy_to_user((void __user *) arg, &d->ident,
+ sizeof(d->ident)))
+ return 0;
+ return -EFAULT;
+ }
+
+ /* udev calls scsi_id, which uses SG_IO, resulting in noise */
+ if (cmd != SG_IO)
+ pr_info("aoe: unknown ioctl 0x%x\n", cmd);
+
+ return -ENOTTY;
+}
+
static const struct block_device_operations aoe_bdops = {
.open = aoeblk_open,
.release = aoeblk_release,
+ .ioctl = aoeblk_ioctl,
.getgeo = aoeblk_getgeo,
.owner = THIS_MODULE,
};
@@ -212,6 +268,18 @@ aoeblk_gdalloc(void *vp)
struct request_queue *q;
enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
ulong flags;
+ int late = 0;
+
+ spin_lock_irqsave(&d->lock, flags);
+ if (d->flags & DEVFL_GDALLOC
+ && !(d->flags & DEVFL_TKILL)
+ && !(d->flags & DEVFL_GD_NOW))
+ d->flags |= DEVFL_GD_NOW;
+ else
+ late = 1;
+ spin_unlock_irqrestore(&d->lock, flags);
+ if (late)
+ return;
gd = alloc_disk(AOE_PARTITIONS);
if (gd == NULL) {
@@ -231,23 +299,24 @@ aoeblk_gdalloc(void *vp)
if (q == NULL) {
pr_err("aoe: cannot allocate block queue for %ld.%d\n",
d->aoemajor, d->aoeminor);
- mempool_destroy(mp);
- goto err_disk;
+ goto err_mempool;
}
- d->blkq = blk_alloc_queue(GFP_KERNEL);
- if (!d->blkq)
- goto err_mempool;
- d->blkq->backing_dev_info.name = "aoe";
- if (bdi_init(&d->blkq->backing_dev_info))
- goto err_blkq;
spin_lock_irqsave(&d->lock, flags);
- blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS);
+ WARN_ON(!(d->flags & DEVFL_GD_NOW));
+ WARN_ON(!(d->flags & DEVFL_GDALLOC));
+ WARN_ON(d->flags & DEVFL_TKILL);
+ WARN_ON(d->gd);
+ WARN_ON(d->flags & DEVFL_UP);
+ blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
+ q->backing_dev_info.name = "aoe";
q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
d->bufpool = mp;
d->blkq = gd->queue = q;
q->queuedata = d;
d->gd = gd;
+ if (aoe_maxsectors)
+ blk_queue_max_hw_sectors(q, aoe_maxsectors);
gd->major = AOE_MAJOR;
gd->first_minor = d->sysminor;
gd->fops = &aoe_bdops;
@@ -263,18 +332,21 @@ aoeblk_gdalloc(void *vp)
add_disk(gd);
aoedisk_add_sysfs(d);
+
+ spin_lock_irqsave(&d->lock, flags);
+ WARN_ON(!(d->flags & DEVFL_GD_NOW));
+ d->flags &= ~DEVFL_GD_NOW;
+ spin_unlock_irqrestore(&d->lock, flags);
return;
-err_blkq:
- blk_cleanup_queue(d->blkq);
- d->blkq = NULL;
err_mempool:
- mempool_destroy(d->bufpool);
+ mempool_destroy(mp);
err_disk:
put_disk(gd);
err:
spin_lock_irqsave(&d->lock, flags);
- d->flags &= ~DEVFL_GDALLOC;
+ d->flags &= ~DEVFL_GD_NOW;
+ schedule_work(&d->work);
spin_unlock_irqrestore(&d->lock, flags);
}
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index ed57a890c64..42e67ad6bd2 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -39,6 +39,11 @@ struct ErrMsg {
};
static DEFINE_MUTEX(aoechr_mutex);
+
+/* A ring buffer of error messages, to be read through
+ * "/dev/etherd/err". When no messages are present,
+ * readers will block waiting for messages to appear.
+ */
static struct ErrMsg emsgs[NMSG];
static int emsgs_head_idx, emsgs_tail_idx;
static struct completion emsgs_comp;
@@ -282,7 +287,7 @@ aoechr_init(void)
int n, i;
n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
- if (n < 0) {
+ if (n < 0) {
printk(KERN_ERR "aoe: can't register char device\n");
return n;
}
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 3804a0af3ef..25ef5c014fc 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -22,6 +22,7 @@
#define MAXIOC (8192) /* default meant to avoid most soft lockups */
static void ktcomplete(struct frame *, struct sk_buff *);
+static int count_targets(struct aoedev *d, int *untainted);
static struct buf *nextbuf(struct aoedev *);
@@ -29,7 +30,7 @@ static int aoe_deadsecs = 60 * 3;
module_param(aoe_deadsecs, int, 0644);
MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
-static int aoe_maxout = 16;
+static int aoe_maxout = 64;
module_param(aoe_maxout, int, 0644);
MODULE_PARM_DESC(aoe_maxout,
"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
@@ -43,6 +44,8 @@ static struct {
spinlock_t lock;
} iocq;
+static struct page *empty_page;
+
static struct sk_buff *
new_skb(ulong len)
{
@@ -59,6 +62,23 @@ new_skb(ulong len)
}
static struct frame *
+getframe_deferred(struct aoedev *d, u32 tag)
+{
+ struct list_head *head, *pos, *nx;
+ struct frame *f;
+
+ head = &d->rexmitq;
+ list_for_each_safe(pos, nx, head) {
+ f = list_entry(pos, struct frame, head);
+ if (f->tag == tag) {
+ list_del(pos);
+ return f;
+ }
+ }
+ return NULL;
+}
+
+static struct frame *
getframe(struct aoedev *d, u32 tag)
{
struct frame *f;
@@ -162,8 +182,10 @@ aoe_freetframe(struct frame *f)
t = f->t;
f->buf = NULL;
+ f->lba = 0;
f->bv = NULL;
f->r_skb = NULL;
+ f->flags = 0;
list_add(&f->head, &t->ffree);
}
@@ -217,20 +239,25 @@ newframe(struct aoedev *d)
struct frame *f;
struct aoetgt *t, **tt;
int totout = 0;
+ int use_tainted;
+ int has_untainted;
- if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */
+ if (!d->targets || !d->targets[0]) {
printk(KERN_ERR "aoe: NULL TARGETS!\n");
return NULL;
}
tt = d->tgt; /* last used target */
- for (;;) {
+ for (use_tainted = 0, has_untainted = 0;;) {
tt++;
- if (tt >= &d->targets[NTARGETS] || !*tt)
+ if (tt >= &d->targets[d->ntargets] || !*tt)
tt = d->targets;
t = *tt;
- totout += t->nout;
+ if (!t->taint) {
+ has_untainted = 1;
+ totout += t->nout;
+ }
if (t->nout < t->maxout
- && t != d->htgt
+ && (use_tainted || !t->taint)
&& t->ifp->nd) {
f = newtframe(d, t);
if (f) {
@@ -239,8 +266,12 @@ newframe(struct aoedev *d)
return f;
}
}
- if (tt == d->tgt) /* we've looped and found nada */
- break;
+ if (tt == d->tgt) { /* we've looped and found nada */
+ if (!use_tainted && !has_untainted)
+ use_tainted = 1;
+ else
+ break;
+ }
}
if (totout == 0) {
d->kicked++;
@@ -277,21 +308,68 @@ fhash(struct frame *f)
list_add_tail(&f->head, &d->factive[n]);
}
+static void
+ata_rw_frameinit(struct frame *f)
+{
+ struct aoetgt *t;
+ struct aoe_hdr *h;
+ struct aoe_atahdr *ah;
+ struct sk_buff *skb;
+ char writebit, extbit;
+
+ skb = f->skb;
+ h = (struct aoe_hdr *) skb_mac_header(skb);
+ ah = (struct aoe_atahdr *) (h + 1);
+ skb_put(skb, sizeof(*h) + sizeof(*ah));
+ memset(h, 0, skb->len);
+
+ writebit = 0x10;
+ extbit = 0x4;
+
+ t = f->t;
+ f->tag = aoehdr_atainit(t->d, t, h);
+ fhash(f);
+ t->nout++;
+ f->waited = 0;
+ f->waited_total = 0;
+ if (f->buf)
+ f->lba = f->buf->sector;
+
+ /* set up ata header */
+ ah->scnt = f->bcnt >> 9;
+ put_lba(ah, f->lba);
+ if (t->d->flags & DEVFL_EXT) {
+ ah->aflags |= AOEAFL_EXT;
+ } else {
+ extbit = 0;
+ ah->lba3 &= 0x0f;
+ ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
+ }
+ if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
+ skb_fillup(skb, f->bv, f->bv_off, f->bcnt);
+ ah->aflags |= AOEAFL_WRITE;
+ skb->len += f->bcnt;
+ skb->data_len = f->bcnt;
+ skb->truesize += f->bcnt;
+ t->wpkts++;
+ } else {
+ t->rpkts++;
+ writebit = 0;
+ }
+
+ ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
+ skb->dev = t->ifp->nd;
+}
+
static int
aoecmd_ata_rw(struct aoedev *d)
{
struct frame *f;
- struct aoe_hdr *h;
- struct aoe_atahdr *ah;
struct buf *buf;
struct aoetgt *t;
struct sk_buff *skb;
struct sk_buff_head queue;
ulong bcnt, fbcnt;
- char writebit, extbit;
-
- writebit = 0x10;
- extbit = 0x4;
buf = nextbuf(d);
if (buf == NULL)
@@ -326,50 +404,18 @@ aoecmd_ata_rw(struct aoedev *d)
} while (fbcnt);
/* initialize the headers & frame */
- skb = f->skb;
- h = (struct aoe_hdr *) skb_mac_header(skb);
- ah = (struct aoe_atahdr *) (h+1);
- skb_put(skb, sizeof *h + sizeof *ah);
- memset(h, 0, skb->len);
- f->tag = aoehdr_atainit(d, t, h);
- fhash(f);
- t->nout++;
- f->waited = 0;
f->buf = buf;
f->bcnt = bcnt;
- f->lba = buf->sector;
-
- /* set up ata header */
- ah->scnt = bcnt >> 9;
- put_lba(ah, buf->sector);
- if (d->flags & DEVFL_EXT) {
- ah->aflags |= AOEAFL_EXT;
- } else {
- extbit = 0;
- ah->lba3 &= 0x0f;
- ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
- }
- if (bio_data_dir(buf->bio) == WRITE) {
- skb_fillup(skb, f->bv, f->bv_off, bcnt);
- ah->aflags |= AOEAFL_WRITE;
- skb->len += bcnt;
- skb->data_len = bcnt;
- skb->truesize += bcnt;
- t->wpkts++;
- } else {
- t->rpkts++;
- writebit = 0;
- }
-
- ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
+ ata_rw_frameinit(f);
/* mark all tracking fields and load out */
buf->nframesout += 1;
buf->sector += bcnt >> 9;
- skb->dev = t->ifp->nd;
- skb = skb_clone(skb, GFP_ATOMIC);
+ skb = skb_clone(f->skb, GFP_ATOMIC);
if (skb) {
+ do_gettimeofday(&f->sent);
+ f->sent_jiffs = (u32) jiffies;
__skb_queue_head_init(&queue);
__skb_queue_tail(&queue, skb);
aoenet_xmit(&queue);
@@ -442,11 +488,14 @@ resend(struct aoedev *d, struct frame *f)
h = (struct aoe_hdr *) skb_mac_header(skb);
ah = (struct aoe_atahdr *) (h+1);
- snprintf(buf, sizeof buf,
- "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
- "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
- h->src, h->dst, t->nout);
- aoechr_error(buf);
+ if (!(f->flags & FFL_PROBE)) {
+ snprintf(buf, sizeof(buf),
+ "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
+ "retransmit", d->aoemajor, d->aoeminor,
+ f->tag, jiffies, n,
+ h->src, h->dst, t->nout);
+ aoechr_error(buf);
+ }
f->tag = n;
fhash(f);
@@ -458,12 +507,46 @@ resend(struct aoedev *d, struct frame *f)
skb = skb_clone(skb, GFP_ATOMIC);
if (skb == NULL)
return;
+ do_gettimeofday(&f->sent);
+ f->sent_jiffs = (u32) jiffies;
__skb_queue_head_init(&queue);
__skb_queue_tail(&queue, skb);
aoenet_xmit(&queue);
}
static int
+tsince_hr(struct frame *f)
+{
+ struct timeval now;
+ int n;
+
+ do_gettimeofday(&now);
+ n = now.tv_usec - f->sent.tv_usec;
+ n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC;
+
+ if (n < 0)
+ n = -n;
+
+ /* For relatively long periods, use jiffies to avoid
+ * discrepancies caused by updates to the system time.
+ *
+ * On system with HZ of 1000, 32-bits is over 49 days
+ * worth of jiffies, or over 71 minutes worth of usecs.
+ *
+ * Jiffies overflow is handled by subtraction of unsigned ints:
+ * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe
+ * $3 = 4
+ * (gdb)
+ */
+ if (n > USEC_PER_SEC / 4) {
+ n = ((u32) jiffies) - f->sent_jiffs;
+ n *= USEC_PER_SEC / HZ;
+ }
+
+ return n;
+}
+
+static int
tsince(u32 tag)
{
int n;
@@ -472,7 +555,7 @@ tsince(u32 tag)
n -= tag & 0xffff;
if (n < 0)
n += 1<<16;
- return n;
+ return jiffies_to_usecs(n + 1);
}
static struct aoeif *
@@ -503,70 +586,189 @@ ejectif(struct aoetgt *t, struct aoeif *ifp)
dev_put(nd);
}
-static int
-sthtith(struct aoedev *d)
+static struct frame *
+reassign_frame(struct frame *f)
{
- struct frame *f, *nf;
- struct list_head *nx, *pos, *head;
+ struct frame *nf;
struct sk_buff *skb;
- struct aoetgt *ht = d->htgt;
- int i;
- for (i = 0; i < NFACTIVE; i++) {
- head = &d->factive[i];
- list_for_each_safe(pos, nx, head) {
- f = list_entry(pos, struct frame, head);
- if (f->t != ht)
- continue;
+ nf = newframe(f->t->d);
+ if (!nf)
+ return NULL;
+ if (nf->t == f->t) {
+ aoe_freetframe(nf);
+ return NULL;
+ }
- nf = newframe(d);
- if (!nf)
- return 0;
+ skb = nf->skb;
+ nf->skb = f->skb;
+ nf->buf = f->buf;
+ nf->bcnt = f->bcnt;
+ nf->lba = f->lba;
+ nf->bv = f->bv;
+ nf->bv_off = f->bv_off;
+ nf->waited = 0;
+ nf->waited_total = f->waited_total;
+ nf->sent = f->sent;
+ nf->sent_jiffs = f->sent_jiffs;
+ f->skb = skb;
+
+ return nf;
+}
- /* remove frame from active list */
- list_del(pos);
+static void
+probe(struct aoetgt *t)
+{
+ struct aoedev *d;
+ struct frame *f;
+ struct sk_buff *skb;
+ struct sk_buff_head queue;
+ size_t n, m;
+ int frag;
- /* reassign all pertinent bits to new outbound frame */
- skb = nf->skb;
- nf->skb = f->skb;
- nf->buf = f->buf;
- nf->bcnt = f->bcnt;
- nf->lba = f->lba;
- nf->bv = f->bv;
- nf->bv_off = f->bv_off;
- nf->waited = 0;
- f->skb = skb;
+ d = t->d;
+ f = newtframe(d, t);
+ if (!f) {
+ pr_err("%s %pm for e%ld.%d: %s\n",
+ "aoe: cannot probe remote address",
+ t->addr,
+ (long) d->aoemajor, d->aoeminor,
+ "no frame available");
+ return;
+ }
+ f->flags |= FFL_PROBE;
+ ifrotate(t);
+ f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
+ ata_rw_frameinit(f);
+ skb = f->skb;
+ for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) {
+ if (n < PAGE_SIZE)
+ m = n;
+ else
+ m = PAGE_SIZE;
+ skb_fill_page_desc(skb, frag, empty_page, 0, m);
+ }
+ skb->len += f->bcnt;
+ skb->data_len = f->bcnt;
+ skb->truesize += f->bcnt;
+
+ skb = skb_clone(f->skb, GFP_ATOMIC);
+ if (skb) {
+ do_gettimeofday(&f->sent);
+ f->sent_jiffs = (u32) jiffies;
+ __skb_queue_head_init(&queue);
+ __skb_queue_tail(&queue, skb);
+ aoenet_xmit(&queue);
+ }
+}
+
+static long
+rto(struct aoedev *d)
+{
+ long t;
+
+ t = 2 * d->rttavg >> RTTSCALE;
+ t += 8 * d->rttdev >> RTTDSCALE;
+ if (t == 0)
+ t = 1;
+
+ return t;
+}
+
+static void
+rexmit_deferred(struct aoedev *d)
+{
+ struct aoetgt *t;
+ struct frame *f;
+ struct frame *nf;
+ struct list_head *pos, *nx, *head;
+ int since;
+ int untainted;
+
+ count_targets(d, &untainted);
+
+ head = &d->rexmitq;
+ list_for_each_safe(pos, nx, head) {
+ f = list_entry(pos, struct frame, head);
+ t = f->t;
+ if (t->taint) {
+ if (!(f->flags & FFL_PROBE)) {
+ nf = reassign_frame(f);
+ if (nf) {
+ if (t->nout_probes == 0
+ && untainted > 0) {
+ probe(t);
+ t->nout_probes++;
+ }
+ list_replace(&f->head, &nf->head);
+ pos = &nf->head;
+ aoe_freetframe(f);
+ f = nf;
+ t = f->t;
+ }
+ } else if (untainted < 1) {
+ /* don't probe w/o other untainted aoetgts */
+ goto stop_probe;
+ } else if (tsince_hr(f) < t->taint * rto(d)) {
+ /* reprobe slowly when taint is high */
+ continue;
+ }
+ } else if (f->flags & FFL_PROBE) {
+stop_probe: /* don't probe untainted aoetgts */
+ list_del(pos);
aoe_freetframe(f);
- ht->nout--;
- nf->t->nout++;
- resend(d, nf);
+ /* leaving d->kicked, because this is routine */
+ f->t->d->flags |= DEVFL_KICKME;
+ continue;
}
+ if (t->nout >= t->maxout)
+ continue;
+ list_del(pos);
+ t->nout++;
+ if (f->flags & FFL_PROBE)
+ t->nout_probes++;
+ since = tsince_hr(f);
+ f->waited += since;
+ f->waited_total += since;
+ resend(d, f);
}
- /* We've cleaned up the outstanding so take away his
- * interfaces so he won't be used. We should remove him from
- * the target array here, but cleaning up a target is
- * involved. PUNT!
- */
- memset(ht->ifs, 0, sizeof ht->ifs);
- d->htgt = NULL;
- return 1;
}
-static inline unsigned char
-ata_scnt(unsigned char *packet) {
- struct aoe_hdr *h;
- struct aoe_atahdr *ah;
+/* An aoetgt accumulates demerits quickly, and successful
+ * probing redeems the aoetgt slowly.
+ */
+static void
+scorn(struct aoetgt *t)
+{
+ int n;
- h = (struct aoe_hdr *) packet;
- ah = (struct aoe_atahdr *) (h+1);
- return ah->scnt;
+ n = t->taint++;
+ t->taint += t->taint * 2;
+ if (n > t->taint)
+ t->taint = n;
+ if (t->taint > MAX_TAINT)
+ t->taint = MAX_TAINT;
+}
+
+static int
+count_targets(struct aoedev *d, int *untainted)
+{
+ int i, good;
+
+ for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
+ if (d->targets[i]->taint == 0)
+ good++;
+
+ if (untainted)
+ *untainted = good;
+ return i;
}
static void
rexmit_timer(ulong vp)
{
struct aoedev *d;
- struct aoetgt *t, **tt, **te;
+ struct aoetgt *t;
struct aoeif *ifp;
struct frame *f;
struct list_head *head, *pos, *nx;
@@ -574,15 +776,18 @@ rexmit_timer(ulong vp)
register long timeout;
ulong flags, n;
int i;
+ int utgts; /* number of aoetgt descriptors (not slots) */
+ int since;
d = (struct aoedev *) vp;
- /* timeout is always ~150% of the moving average */
- timeout = d->rttavg;
- timeout += timeout >> 1;
-
spin_lock_irqsave(&d->lock, flags);
+ /* timeout based on observed timings and variations */
+ timeout = rto(d);
+
+ utgts = count_targets(d, NULL);
+
if (d->flags & DEVFL_TKILL) {
spin_unlock_irqrestore(&d->lock, flags);
return;
@@ -593,67 +798,61 @@ rexmit_timer(ulong vp)
head = &d->factive[i];
list_for_each_safe(pos, nx, head) {
f = list_entry(pos, struct frame, head);
- if (tsince(f->tag) < timeout)
+ if (tsince_hr(f) < timeout)
break; /* end of expired frames */
/* move to flist for later processing */
list_move_tail(pos, &flist);
}
}
- /* window check */
- tt = d->targets;
- te = tt + d->ntargets;
- for (; tt < te && (t = *tt); tt++) {
- if (t->nout == t->maxout
- && t->maxout < t->nframes
- && (jiffies - t->lastwadj)/HZ > 10) {
- t->maxout++;
- t->lastwadj = jiffies;
- }
- }
-
- if (!list_empty(&flist)) { /* retransmissions necessary */
- n = d->rttavg <<= 1;
- if (n > MAXTIMER)
- d->rttavg = MAXTIMER;
- }
/* process expired frames */
while (!list_empty(&flist)) {
pos = flist.next;
f = list_entry(pos, struct frame, head);
- n = f->waited += timeout;
- n /= HZ;
- if (n > aoe_deadsecs) {
+ since = tsince_hr(f);
+ n = f->waited_total + since;
+ n /= USEC_PER_SEC;
+ if (aoe_deadsecs
+ && n > aoe_deadsecs
+ && !(f->flags & FFL_PROBE)) {
/* Waited too long. Device failure.
* Hang all frames on first hash bucket for downdev
* to clean up.
*/
list_splice(&flist, &d->factive[0]);
aoedev_downdev(d);
- break;
+ goto out;
}
- list_del(pos);
t = f->t;
- if (n > aoe_deadsecs/2)
- d->htgt = t; /* see if another target can help */
-
- if (t->nout == t->maxout) {
- if (t->maxout > 1)
- t->maxout--;
- t->lastwadj = jiffies;
+ n = f->waited + since;
+ n /= USEC_PER_SEC;
+ if (aoe_deadsecs && utgts > 0
+ && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
+ scorn(t); /* avoid this target */
+
+ if (t->maxout != 1) {
+ t->ssthresh = t->maxout / 2;
+ t->maxout = 1;
}
- ifp = getif(t, f->skb->dev);
- if (ifp && ++ifp->lost > (t->nframes << 1)
- && (ifp != t->ifs || t->ifs[1].nd)) {
- ejectif(t, ifp);
- ifp = NULL;
+ if (f->flags & FFL_PROBE) {
+ t->nout_probes--;
+ } else {
+ ifp = getif(t, f->skb->dev);
+ if (ifp && ++ifp->lost > (t->nframes << 1)
+ && (ifp != t->ifs || t->ifs[1].nd)) {
+ ejectif(t, ifp);
+ ifp = NULL;
+ }
}
- resend(d, f);
+ list_move_tail(pos, &d->rexmitq);
+ t->nout--;
}
+ rexmit_deferred(d);
- if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) {
+out:
+ if ((d->flags & DEVFL_KICKME) && d->blkq) {
d->flags &= ~DEVFL_KICKME;
d->blkq->request_fn(d->blkq);
}
@@ -774,8 +973,7 @@ nextbuf(struct aoedev *d)
void
aoecmd_work(struct aoedev *d)
{
- if (d->htgt && !sthtith(d))
- return;
+ rexmit_deferred(d);
while (aoecmd_ata_rw(d))
;
}
@@ -809,6 +1007,17 @@ aoecmd_sleepwork(struct work_struct *work)
}
static void
+ata_ident_fixstring(u16 *id, int ns)
+{
+ u16 s;
+
+ while (ns-- > 0) {
+ s = *id;
+ *id++ = s >> 8 | s << 8;
+ }
+}
+
+static void
ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
{
u64 ssize;
@@ -843,6 +1052,11 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
}
+ ata_ident_fixstring((u16 *) &id[10<<1], 10); /* serial */
+ ata_ident_fixstring((u16 *) &id[23<<1], 4); /* firmware */
+ ata_ident_fixstring((u16 *) &id[27<<1], 20); /* model */
+ memcpy(d->ident, id, sizeof(d->ident));
+
if (d->ssize != ssize)
printk(KERN_INFO
"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
@@ -862,26 +1076,28 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
}
static void
-calc_rttavg(struct aoedev *d, int rtt)
+calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt)
{
register long n;
n = rtt;
- if (n < 0) {
- n = -rtt;
- if (n < MINTIMER)
- n = MINTIMER;
- else if (n > MAXTIMER)
- n = MAXTIMER;
- d->mintimer += (n - d->mintimer) >> 1;
- } else if (n < d->mintimer)
- n = d->mintimer;
- else if (n > MAXTIMER)
- n = MAXTIMER;
-
- /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
- n -= d->rttavg;
- d->rttavg += n >> 2;
+
+ /* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */
+ n -= d->rttavg >> RTTSCALE;
+ d->rttavg += n;
+ if (n < 0)
+ n = -n;
+ n -= d->rttdev >> RTTDSCALE;
+ d->rttdev += n;
+
+ if (!t || t->maxout >= t->nframes)
+ return;
+ if (t->maxout < t->ssthresh)
+ t->maxout += 1;
+ else if (t->nout == t->maxout && t->next_cwnd-- == 0) {
+ t->maxout += 1;
+ t->next_cwnd = t->maxout;
+ }
}
static struct aoetgt *
@@ -890,7 +1106,7 @@ gettgt(struct aoedev *d, char *addr)
struct aoetgt **t, **e;
t = d->targets;
- e = t + NTARGETS;
+ e = t + d->ntargets;
for (; t < e && *t; t++)
if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
return *t;
@@ -935,7 +1151,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
/* cf. http://lkml.org/lkml/2006/10/31/28 */
if (!fastfail)
- q->request_fn(q);
+ __blk_run_queue(q);
}
static void
@@ -966,19 +1182,22 @@ ktiocomplete(struct frame *f)
struct aoeif *ifp;
struct aoedev *d;
long n;
+ int untainted;
if (f == NULL)
return;
t = f->t;
d = t->d;
+ skb = f->r_skb;
+ buf = f->buf;
+ if (f->flags & FFL_PROBE)
+ goto out;
+ if (!skb) /* just fail the buf. */
+ goto noskb;
hout = (struct aoe_hdr *) skb_mac_header(f->skb);
ahout = (struct aoe_atahdr *) (hout+1);
- buf = f->buf;
- skb = f->r_skb;
- if (skb == NULL)
- goto noskb; /* just fail the buf. */
hin = (struct aoe_hdr *) skb->data;
skb_pull(skb, sizeof(*hin));
@@ -988,9 +1207,9 @@ ktiocomplete(struct frame *f)
pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
ahout->cmdstat, ahin->cmdstat,
d->aoemajor, d->aoeminor);
-noskb: if (buf)
+noskb: if (buf)
clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
- goto badrsp;
+ goto out;
}
n = ahout->scnt << 9;
@@ -998,8 +1217,10 @@ noskb: if (buf)
case ATA_CMD_PIO_READ:
case ATA_CMD_PIO_READ_EXT:
if (skb->len < n) {
- pr_err("aoe: runt data size in read. skb->len=%d need=%ld\n",
- skb->len, n);
+ pr_err("%s e%ld.%d. skb->len=%d need=%ld\n",
+ "aoe: runt data size in read from",
+ (long) d->aoemajor, d->aoeminor,
+ skb->len, n);
clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
break;
}
@@ -1010,13 +1231,13 @@ noskb: if (buf)
ifp = getif(t, skb->dev);
if (ifp)
ifp->lost = 0;
- if (d->htgt == t) /* I'll help myself, thank you. */
- d->htgt = NULL;
spin_unlock_irq(&d->lock);
break;
case ATA_CMD_ID_ATA:
if (skb->len < 512) {
- pr_info("aoe: runt data size in ataid. skb->len=%d\n",
+ pr_info("%s e%ld.%d. skb->len=%d need=512\n",
+ "aoe: runt data size in ataid from",
+ (long) d->aoemajor, d->aoeminor,
skb->len);
break;
}
@@ -1032,16 +1253,23 @@ noskb: if (buf)
be16_to_cpu(get_unaligned(&hin->major)),
hin->minor);
}
-badrsp:
+out:
spin_lock_irq(&d->lock);
+ if (t->taint > 0
+ && --t->taint > 0
+ && t->nout_probes == 0) {
+ count_targets(d, &untainted);
+ if (untainted > 0) {
+ probe(t);
+ t->nout_probes++;
+ }
+ }
aoe_freetframe(f);
if (buf && --buf->nframesout == 0 && buf->resid == 0)
aoe_end_buf(d, buf);
- aoecmd_work(d);
-
spin_unlock_irq(&d->lock);
aoedev_put(d);
dev_kfree_skb(skb);
@@ -1141,7 +1369,6 @@ aoecmd_ata_rsp(struct sk_buff *skb)
struct aoedev *d;
struct aoe_hdr *h;
struct frame *f;
- struct aoetgt *t;
u32 n;
ulong flags;
char ebuf[128];
@@ -1162,23 +1389,32 @@ aoecmd_ata_rsp(struct sk_buff *skb)
n = be32_to_cpu(get_unaligned(&h->tag));
f = getframe(d, n);
- if (f == NULL) {
- calc_rttavg(d, -tsince(n));
- spin_unlock_irqrestore(&d->lock, flags);
- aoedev_put(d);
- snprintf(ebuf, sizeof ebuf,
- "%15s e%d.%d tag=%08x@%08lx\n",
- "unexpected rsp",
- get_unaligned_be16(&h->major),
- h->minor,
- get_unaligned_be32(&h->tag),
- jiffies);
- aoechr_error(ebuf);
- return skb;
+ if (f) {
+ calc_rttavg(d, f->t, tsince_hr(f));
+ f->t->nout--;
+ if (f->flags & FFL_PROBE)
+ f->t->nout_probes--;
+ } else {
+ f = getframe_deferred(d, n);
+ if (f) {
+ calc_rttavg(d, NULL, tsince_hr(f));
+ } else {
+ calc_rttavg(d, NULL, tsince(n));
+ spin_unlock_irqrestore(&d->lock, flags);
+ aoedev_put(d);
+ snprintf(ebuf, sizeof(ebuf),
+ "%15s e%d.%d tag=%08x@%08lx s=%pm d=%pm\n",
+ "unexpected rsp",
+ get_unaligned_be16(&h->major),
+ h->minor,
+ get_unaligned_be32(&h->tag),
+ jiffies,
+ h->src,
+ h->dst);
+ aoechr_error(ebuf);
+ return skb;
+ }
}
- t = f->t;
- calc_rttavg(d, tsince(f->tag));
- t->nout--;
aoecmd_work(d);
spin_unlock_irqrestore(&d->lock, flags);
@@ -1201,7 +1437,7 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
aoenet_xmit(&queue);
}
-
+
struct sk_buff *
aoecmd_ata_id(struct aoedev *d)
{
@@ -1227,6 +1463,7 @@ aoecmd_ata_id(struct aoedev *d)
fhash(f);
t->nout++;
f->waited = 0;
+ f->waited_total = 0;
/* set up ata header */
ah->scnt = 1;
@@ -1235,41 +1472,69 @@ aoecmd_ata_id(struct aoedev *d)
skb->dev = t->ifp->nd;
- d->rttavg = MAXTIMER;
+ d->rttavg = RTTAVG_INIT;
+ d->rttdev = RTTDEV_INIT;
d->timer.function = rexmit_timer;
- return skb_clone(skb, GFP_ATOMIC);
+ skb = skb_clone(skb, GFP_ATOMIC);
+ if (skb) {
+ do_gettimeofday(&f->sent);
+ f->sent_jiffs = (u32) jiffies;
+ }
+
+ return skb;
}
-
+
+static struct aoetgt **
+grow_targets(struct aoedev *d)
+{
+ ulong oldn, newn;
+ struct aoetgt **tt;
+
+ oldn = d->ntargets;
+ newn = oldn * 2;
+ tt = kcalloc(newn, sizeof(*d->targets), GFP_ATOMIC);
+ if (!tt)
+ return NULL;
+ memmove(tt, d->targets, sizeof(*d->targets) * oldn);
+ d->tgt = tt + (d->tgt - d->targets);
+ kfree(d->targets);
+ d->targets = tt;
+ d->ntargets = newn;
+
+ return &d->targets[oldn];
+}
+
static struct aoetgt *
addtgt(struct aoedev *d, char *addr, ulong nframes)
{
struct aoetgt *t, **tt, **te;
tt = d->targets;
- te = tt + NTARGETS;
+ te = tt + d->ntargets;
for (; tt < te && *tt; tt++)
;
if (tt == te) {
- printk(KERN_INFO
- "aoe: device addtgt failure; too many targets\n");
- return NULL;
+ tt = grow_targets(d);
+ if (!tt)
+ goto nomem;
}
t = kzalloc(sizeof(*t), GFP_ATOMIC);
- if (!t) {
- printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
- return NULL;
- }
-
- d->ntargets++;
+ if (!t)
+ goto nomem;
t->nframes = nframes;
t->d = d;
memcpy(t->addr, addr, sizeof t->addr);
t->ifp = t->ifs;
- t->maxout = t->nframes;
+ aoecmd_wreset(t);
+ t->maxout = t->nframes / 2;
INIT_LIST_HEAD(&t->ffree);
return *tt = t;
+
+ nomem:
+ pr_info("aoe: cannot allocate memory to add target\n");
+ return NULL;
}
static void
@@ -1279,7 +1544,7 @@ setdbcnt(struct aoedev *d)
int bcnt = 0;
t = d->targets;
- e = t + NTARGETS;
+ e = t + d->ntargets;
for (; t < e && *t; t++)
if (bcnt == 0 || bcnt > (*t)->minbcnt)
bcnt = (*t)->minbcnt;
@@ -1373,7 +1638,11 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
spin_lock_irqsave(&d->lock, flags);
t = gettgt(d, h->src);
- if (!t) {
+ if (t) {
+ t->nframes = n;
+ if (n < t->maxout)
+ aoecmd_wreset(t);
+ } else {
t = addtgt(d, h->src, n);
if (!t)
goto bail;
@@ -1402,17 +1671,26 @@ bail:
}
void
+aoecmd_wreset(struct aoetgt *t)
+{
+ t->maxout = 1;
+ t->ssthresh = t->nframes / 2;
+ t->next_cwnd = t->nframes;
+}
+
+void
aoecmd_cleanslate(struct aoedev *d)
{
struct aoetgt **t, **te;
- d->mintimer = MINTIMER;
+ d->rttavg = RTTAVG_INIT;
+ d->rttdev = RTTDEV_INIT;
d->maxbcnt = 0;
t = d->targets;
- te = t + NTARGETS;
+ te = t + d->ntargets;
for (; t < te && *t; t++)
- (*t)->maxout = (*t)->nframes;
+ aoecmd_wreset(*t);
}
void
@@ -1460,6 +1738,14 @@ aoe_flush_iocq(void)
int __init
aoecmd_init(void)
{
+ void *p;
+
+ /* get_zeroed_page returns page with ref count 1 */
+ p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+ if (!p)
+ return -ENOMEM;
+ empty_page = virt_to_page(p);
+
INIT_LIST_HEAD(&iocq.head);
spin_lock_init(&iocq.lock);
init_waitqueue_head(&ktiowq);
@@ -1475,4 +1761,7 @@ aoecmd_exit(void)
{
aoe_ktstop(&kts);
aoe_flush_iocq();
+
+ free_page((unsigned long) page_address(empty_page));
+ empty_page = NULL;
}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 90e5b537f94..98f2965778b 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -15,7 +15,6 @@
#include "aoe.h"
static void dummy_timer(ulong);
-static void aoedev_freedev(struct aoedev *);
static void freetgt(struct aoedev *d, struct aoetgt *t);
static void skbpoolfree(struct aoedev *d);
@@ -69,25 +68,34 @@ minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
NPERSHELF = 16,
};
+ if (aoemin >= NPERSHELF) {
+ pr_err("aoe: %s %d slots per shelf\n",
+ "static minor device numbers support only",
+ NPERSHELF);
+ error = -1;
+ goto out;
+ }
+
n = aoemaj * NPERSHELF + aoemin;
- if (aoemin >= NPERSHELF || n >= N_DEVS) {
+ if (n >= N_DEVS) {
pr_err("aoe: %s with e%ld.%d\n",
"cannot use static minor device numbers",
aoemaj, aoemin);
error = -1;
- } else {
- spin_lock_irqsave(&used_minors_lock, flags);
- if (test_bit(n, used_minors)) {
- pr_err("aoe: %s %lu\n",
- "existing device already has static minor number",
- n);
- error = -1;
- } else
- set_bit(n, used_minors);
- spin_unlock_irqrestore(&used_minors_lock, flags);
+ goto out;
}
- *sysminor = n;
+ spin_lock_irqsave(&used_minors_lock, flags);
+ if (test_bit(n, used_minors)) {
+ pr_err("aoe: %s %lu\n",
+ "existing device already has static minor number",
+ n);
+ error = -1;
+ } else
+ set_bit(n, used_minors);
+ spin_unlock_irqrestore(&used_minors_lock, flags);
+ *sysminor = n * AOE_PARTITIONS;
+out:
return error;
}
@@ -170,41 +178,50 @@ aoe_failip(struct aoedev *d)
aoe_end_request(d, rq, 0);
}
+static void
+downdev_frame(struct list_head *pos)
+{
+ struct frame *f;
+
+ f = list_entry(pos, struct frame, head);
+ list_del(pos);
+ if (f->buf) {
+ f->buf->nframesout--;
+ aoe_failbuf(f->t->d, f->buf);
+ }
+ aoe_freetframe(f);
+}
+
void
aoedev_downdev(struct aoedev *d)
{
struct aoetgt *t, **tt, **te;
- struct frame *f;
struct list_head *head, *pos, *nx;
struct request *rq;
int i;
d->flags &= ~DEVFL_UP;
- /* clean out active buffers */
+ /* clean out active and to-be-retransmitted buffers */
for (i = 0; i < NFACTIVE; i++) {
head = &d->factive[i];
- list_for_each_safe(pos, nx, head) {
- f = list_entry(pos, struct frame, head);
- list_del(pos);
- if (f->buf) {
- f->buf->nframesout--;
- aoe_failbuf(d, f->buf);
- }
- aoe_freetframe(f);
- }
+ list_for_each_safe(pos, nx, head)
+ downdev_frame(pos);
}
+ head = &d->rexmitq;
+ list_for_each_safe(pos, nx, head)
+ downdev_frame(pos);
+
/* reset window dressings */
tt = d->targets;
- te = tt + NTARGETS;
+ te = tt + d->ntargets;
for (; tt < te && (t = *tt); tt++) {
- t->maxout = t->nframes;
+ aoecmd_wreset(t);
t->nout = 0;
}
/* clean out the in-process request (if any) */
aoe_failip(d);
- d->htgt = NULL;
/* fast fail all pending I/O */
if (d->blkq) {
@@ -218,12 +235,48 @@ aoedev_downdev(struct aoedev *d)
set_capacity(d->gd, 0);
}
+/* return whether the user asked for this particular
+ * device to be flushed
+ */
+static int
+user_req(char *s, size_t slen, struct aoedev *d)
+{
+ char *p;
+ size_t lim;
+
+ if (!d->gd)
+ return 0;
+ p = strrchr(d->gd->disk_name, '/');
+ if (!p)
+ p = d->gd->disk_name;
+ else
+ p += 1;
+ lim = sizeof(d->gd->disk_name);
+ lim -= p - d->gd->disk_name;
+ if (slen < lim)
+ lim = slen;
+
+ return !strncmp(s, p, lim);
+}
+
static void
-aoedev_freedev(struct aoedev *d)
+freedev(struct aoedev *d)
{
struct aoetgt **t, **e;
+ int freeing = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&d->lock, flags);
+ if (d->flags & DEVFL_TKILL
+ && !(d->flags & DEVFL_FREEING)) {
+ d->flags |= DEVFL_FREEING;
+ freeing = 1;
+ }
+ spin_unlock_irqrestore(&d->lock, flags);
+ if (!freeing)
+ return;
- cancel_work_sync(&d->work);
+ del_timer_sync(&d->timer);
if (d->gd) {
aoedisk_rm_sysfs(d);
del_gendisk(d->gd);
@@ -231,61 +284,113 @@ aoedev_freedev(struct aoedev *d)
blk_cleanup_queue(d->blkq);
}
t = d->targets;
- e = t + NTARGETS;
+ e = t + d->ntargets;
for (; t < e && *t; t++)
freetgt(d, *t);
if (d->bufpool)
mempool_destroy(d->bufpool);
skbpoolfree(d);
minor_free(d->sysminor);
- kfree(d);
+
+ spin_lock_irqsave(&d->lock, flags);
+ d->flags |= DEVFL_FREED;
+ spin_unlock_irqrestore(&d->lock, flags);
}
-int
-aoedev_flush(const char __user *str, size_t cnt)
+enum flush_parms {
+ NOT_EXITING = 0,
+ EXITING = 1,
+};
+
+static int
+flush(const char __user *str, size_t cnt, int exiting)
{
ulong flags;
struct aoedev *d, **dd;
- struct aoedev *rmd = NULL;
char buf[16];
int all = 0;
+ int specified = 0; /* flush a specific device */
+ unsigned int skipflags;
+
+ skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
- if (cnt >= 3) {
+ if (!exiting && cnt >= 3) {
if (cnt > sizeof buf)
cnt = sizeof buf;
if (copy_from_user(buf, str, cnt))
return -EFAULT;
all = !strncmp(buf, "all", 3);
+ if (!all)
+ specified = 1;
}
+ flush_scheduled_work();
+ /* pass one: without sleeping, do aoedev_downdev */
spin_lock_irqsave(&devlist_lock, flags);
- dd = &devlist;
- while ((d = *dd)) {
+ for (d = devlist; d; d = d->next) {
spin_lock(&d->lock);
- if ((!all && (d->flags & DEVFL_UP))
- || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
+ if (exiting) {
+ /* unconditionally take each device down */
+ } else if (specified) {
+ if (!user_req(buf, cnt, d))
+ goto cont;
+ } else if ((!all && (d->flags & DEVFL_UP))
+ || d->flags & skipflags
|| d->nopen
- || d->ref) {
- spin_unlock(&d->lock);
- dd = &d->next;
- continue;
- }
- *dd = d->next;
+ || d->ref)
+ goto cont;
+
aoedev_downdev(d);
d->flags |= DEVFL_TKILL;
+cont:
spin_unlock(&d->lock);
- d->next = rmd;
- rmd = d;
}
spin_unlock_irqrestore(&devlist_lock, flags);
- while ((d = rmd)) {
- rmd = d->next;
- del_timer_sync(&d->timer);
- aoedev_freedev(d); /* must be able to sleep */
+
+ /* pass two: call freedev, which might sleep,
+ * for aoedevs marked with DEVFL_TKILL
+ */
+restart:
+ spin_lock_irqsave(&devlist_lock, flags);
+ for (d = devlist; d; d = d->next) {
+ spin_lock(&d->lock);
+ if (d->flags & DEVFL_TKILL
+ && !(d->flags & DEVFL_FREEING)) {
+ spin_unlock(&d->lock);
+ spin_unlock_irqrestore(&devlist_lock, flags);
+ freedev(d);
+ goto restart;
+ }
+ spin_unlock(&d->lock);
}
+
+ /* pass three: remove aoedevs marked with DEVFL_FREED */
+ for (dd = &devlist, d = *dd; d; d = *dd) {
+ struct aoedev *doomed = NULL;
+
+ spin_lock(&d->lock);
+ if (d->flags & DEVFL_FREED) {
+ *dd = d->next;
+ doomed = d;
+ } else {
+ dd = &d->next;
+ }
+ spin_unlock(&d->lock);
+ if (doomed)
+ kfree(doomed->targets);
+ kfree(doomed);
+ }
+ spin_unlock_irqrestore(&devlist_lock, flags);
+
return 0;
}
+int
+aoedev_flush(const char __user *str, size_t cnt)
+{
+ return flush(str, cnt, NOT_EXITING);
+}
+
/* This has been confirmed to occur once with Tms=3*1000 due to the
* driver changing link and not processing its transmit ring. The
* problem is hard enough to solve by returning an error that I'm
@@ -332,13 +437,20 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
struct aoedev *d;
int i;
ulong flags;
- ulong sysminor;
+ ulong sysminor = 0;
spin_lock_irqsave(&devlist_lock, flags);
for (d=devlist; d; d=d->next)
if (d->aoemajor == maj && d->aoeminor == min) {
+ spin_lock(&d->lock);
+ if (d->flags & DEVFL_TKILL) {
+ spin_unlock(&d->lock);
+ d = NULL;
+ goto out;
+ }
d->ref++;
+ spin_unlock(&d->lock);
break;
}
if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
@@ -346,6 +458,13 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
d = kcalloc(1, sizeof *d, GFP_ATOMIC);
if (!d)
goto out;
+ d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
+ if (!d->targets) {
+ kfree(d);
+ d = NULL;
+ goto out;
+ }
+ d->ntargets = NTARGETS;
INIT_WORK(&d->work, aoecmd_sleepwork);
spin_lock_init(&d->lock);
skb_queue_head_init(&d->skbpool);
@@ -359,10 +478,12 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
d->ref = 1;
for (i = 0; i < NFACTIVE; i++)
INIT_LIST_HEAD(&d->factive[i]);
+ INIT_LIST_HEAD(&d->rexmitq);
d->sysminor = sysminor;
d->aoemajor = maj;
d->aoeminor = min;
- d->mintimer = MINTIMER;
+ d->rttavg = RTTAVG_INIT;
+ d->rttdev = RTTDEV_INIT;
d->next = devlist;
devlist = d;
out:
@@ -396,21 +517,9 @@ freetgt(struct aoedev *d, struct aoetgt *t)
void
aoedev_exit(void)
{
- struct aoedev *d;
- ulong flags;
-
+ flush_scheduled_work();
aoe_flush_iocq();
- while ((d = devlist)) {
- devlist = d->next;
-
- spin_lock_irqsave(&d->lock, flags);
- aoedev_downdev(d);
- d->flags |= DEVFL_TKILL;
- spin_unlock_irqrestore(&d->lock, flags);
-
- del_timer_sync(&d->timer);
- aoedev_freedev(d);
- }
+ flush(NULL, 0, EXITING);
}
int __init
diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
index 04793c2c701..4b987c2fefb 100644
--- a/drivers/block/aoe/aoemain.c
+++ b/drivers/block/aoe/aoemain.c
@@ -105,7 +105,7 @@ aoe_init(void)
aoechr_exit();
chr_fail:
aoedev_exit();
-
+
printk(KERN_INFO "aoe: initialisation failure.\n");
return ret;
}
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index 162c6471275..71d3ea8d300 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -31,7 +31,7 @@ enum {
static char aoe_iflist[IFLISTSZ];
module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600);
-MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=\"dev1 [dev2 ...]\"");
+MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=dev1[,dev2...]");
static wait_queue_head_t txwq;
static struct ktstate kts;
@@ -52,13 +52,18 @@ static struct sk_buff_head skbtxq;
/* enters with txlock held */
static int
-tx(void)
+tx(void) __must_hold(&txlock)
{
struct sk_buff *skb;
+ struct net_device *ifp;
while ((skb = skb_dequeue(&skbtxq))) {
spin_unlock_irq(&txlock);
- dev_queue_xmit(skb);
+ ifp = skb->dev;
+ if (dev_queue_xmit(skb) == NET_XMIT_DROP && net_ratelimit())
+ pr_warn("aoe: packet could not be sent on %s. %s\n",
+ ifp ? ifp->name : "netif",
+ "consider increasing tx_queue_len");
spin_lock_irq(&txlock);
}
return 0;
@@ -119,8 +124,8 @@ aoenet_xmit(struct sk_buff_head *queue)
}
}
-/*
- * (1) len doesn't include the header by default. I want this.
+/*
+ * (1) len doesn't include the header by default. I want this.
*/
static int
aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev)
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e04c63ec775..6526157edaf 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -5198,7 +5198,6 @@ static void cciss_shutdown(struct pci_dev *pdev)
return;
}
/* write all data in the battery backed cache to disk */
- memset(flush_buf, 0, 4);
return_code = sendcmd_withirq(h, CCISS_CACHE_FLUSH, flush_buf,
4, 0, CTLR_LUNID, TYPE_CMD);
kfree(flush_buf);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 17c675c5229..2ddd64a9ffd 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4109,12 +4109,19 @@ static struct platform_driver floppy_driver = {
static struct platform_device floppy_device[N_DRIVE];
+static bool floppy_available(int drive)
+{
+ if (!(allowed_drive_mask & (1 << drive)))
+ return false;
+ if (fdc_state[FDC(drive)].version == FDC_NONE)
+ return false;
+ return true;
+}
+
static struct kobject *floppy_find(dev_t dev, int *part, void *data)
{
int drive = (*part & 3) | ((*part & 0x80) >> 5);
- if (drive >= N_DRIVE ||
- !(allowed_drive_mask & (1 << drive)) ||
- fdc_state[FDC(drive)].version == FDC_NONE)
+ if (drive >= N_DRIVE || !floppy_available(drive))
return NULL;
if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type))
return NULL;
@@ -4124,8 +4131,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
static int __init do_floppy_init(void)
{
- int i, unit, drive;
- int err, dr;
+ int i, unit, drive, err;
set_debugt();
interruptjiffies = resultjiffies = jiffies;
@@ -4137,34 +4143,32 @@ static int __init do_floppy_init(void)
raw_cmd = NULL;
- for (dr = 0; dr < N_DRIVE; dr++) {
- disks[dr] = alloc_disk(1);
- if (!disks[dr]) {
- err = -ENOMEM;
- goto out_put_disk;
- }
+ floppy_wq = alloc_ordered_workqueue("floppy", 0);
+ if (!floppy_wq)
+ return -ENOMEM;
- floppy_wq = alloc_ordered_workqueue("floppy", 0);
- if (!floppy_wq) {
+ for (drive = 0; drive < N_DRIVE; drive++) {
+ disks[drive] = alloc_disk(1);
+ if (!disks[drive]) {
err = -ENOMEM;
goto out_put_disk;
}
- disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock);
- if (!disks[dr]->queue) {
+ disks[drive]->queue = blk_init_queue(do_fd_request, &floppy_lock);
+ if (!disks[drive]->queue) {
err = -ENOMEM;
- goto out_destroy_workq;
+ goto out_put_disk;
}
- blk_queue_max_hw_sectors(disks[dr]->queue, 64);
- disks[dr]->major = FLOPPY_MAJOR;
- disks[dr]->first_minor = TOMINOR(dr);
- disks[dr]->fops = &floppy_fops;
- sprintf(disks[dr]->disk_name, "fd%d", dr);
+ blk_queue_max_hw_sectors(disks[drive]->queue, 64);
+ disks[drive]->major = FLOPPY_MAJOR;
+ disks[drive]->first_minor = TOMINOR(drive);
+ disks[drive]->fops = &floppy_fops;
+ sprintf(disks[drive]->disk_name, "fd%d", drive);
- init_timer(&motor_off_timer[dr]);
- motor_off_timer[dr].data = dr;
- motor_off_timer[dr].function = motor_off_callback;
+ init_timer(&motor_off_timer[drive]);
+ motor_off_timer[drive].data = drive;
+ motor_off_timer[drive].function = motor_off_callback;
}
err = register_blkdev(FLOPPY_MAJOR, "fd");
@@ -4282,9 +4286,7 @@ static int __init do_floppy_init(void)
}
for (drive = 0; drive < N_DRIVE; drive++) {
- if (!(allowed_drive_mask & (1 << drive)))
- continue;
- if (fdc_state[FDC(drive)].version == FDC_NONE)
+ if (!floppy_available(drive))
continue;
floppy_device[drive].name = floppy_device_name;
@@ -4293,7 +4295,7 @@ static int __init do_floppy_init(void)
err = platform_device_register(&floppy_device[drive]);
if (err)
- goto out_release_dma;
+ goto out_remove_drives;
err = device_create_file(&floppy_device[drive].dev,
&dev_attr_cmos);
@@ -4311,28 +4313,33 @@ static int __init do_floppy_init(void)
out_unreg_platform_dev:
platform_device_unregister(&floppy_device[drive]);
+out_remove_drives:
+ while (drive--) {
+ if (floppy_available(drive)) {
+ del_gendisk(disks[drive]);
+ device_remove_file(&floppy_device[drive].dev, &dev_attr_cmos);
+ platform_device_unregister(&floppy_device[drive]);
+ }
+ }
out_release_dma:
if (atomic_read(&usage_count))
floppy_release_irq_and_dma();
out_unreg_region:
blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
platform_driver_unregister(&floppy_driver);
-out_destroy_workq:
- destroy_workqueue(floppy_wq);
out_unreg_blkdev:
unregister_blkdev(FLOPPY_MAJOR, "fd");
out_put_disk:
- while (dr--) {
- del_timer_sync(&motor_off_timer[dr]);
- if (disks[dr]->queue) {
- blk_cleanup_queue(disks[dr]->queue);
- /*
- * put_disk() is not paired with add_disk() and
- * will put queue reference one extra time. fix it.
- */
- disks[dr]->queue = NULL;
+ destroy_workqueue(floppy_wq);
+ for (drive = 0; drive < N_DRIVE; drive++) {
+ if (!disks[drive])
+ break;
+ if (disks[drive]->queue) {
+ del_timer_sync(&motor_off_timer[drive]);
+ blk_cleanup_queue(disks[drive]->queue);
+ disks[drive]->queue = NULL;
}
- put_disk(disks[dr]);
+ put_disk(disks[drive]);
}
return err;
}
@@ -4548,11 +4555,12 @@ static void __exit floppy_module_exit(void)
unregister_blkdev(FLOPPY_MAJOR, "fd");
platform_driver_unregister(&floppy_driver);
+ destroy_workqueue(floppy_wq);
+
for (drive = 0; drive < N_DRIVE; drive++) {
del_timer_sync(&motor_off_timer[drive]);
- if ((allowed_drive_mask & (1 << drive)) &&
- fdc_state[FDC(drive)].version != FDC_NONE) {
+ if (floppy_available(drive)) {
del_gendisk(disks[drive]);
device_remove_file(&floppy_device[drive].dev, &dev_attr_cmos);
platform_device_unregister(&floppy_device[drive]);
@@ -4572,7 +4580,6 @@ static void __exit floppy_module_exit(void)
cancel_delayed_work_sync(&fd_timeout);
cancel_delayed_work_sync(&fd_timer);
- destroy_workqueue(floppy_wq);
if (atomic_read(&usage_count))
floppy_release_irq_and_dma();
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 800aec7927d..ae125127062 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -985,8 +985,21 @@ static int loop_clr_fd(struct loop_device *lo)
if (lo->lo_state != Lo_bound)
return -ENXIO;
- if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */
- return -EBUSY;
+ /*
+ * If we've explicitly asked to tear down the loop device,
+ * and it has an elevated reference count, set it for auto-teardown when
+ * the last reference goes away. This stops $!~#$@ udev from
+ * preventing teardown because it decided that it needs to run blkid on
+ * the loopback device whenever they appear. xfstests is notorious for
+ * failing tests because blkid via udev races with a losetup
+ * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
+ * command to fail with EBUSY.
+ */
+ if (lo->lo_refcnt > 1) {
+ lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
+ mutex_unlock(&lo->lo_ctl_mutex);
+ return 0;
+ }
if (filp == NULL)
return -EINVAL;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index f946d31d691..3fd10099045 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -559,7 +559,7 @@ static void mtip_timeout_function(unsigned long int data)
struct mtip_cmd *command;
int tag, cmdto_cnt = 0;
unsigned int bit, group;
- unsigned int num_command_slots = port->dd->slot_groups * 32;
+ unsigned int num_command_slots;
unsigned long to, tagaccum[SLOTBITS_IN_LONGS];
if (unlikely(!port))
@@ -572,6 +572,7 @@ static void mtip_timeout_function(unsigned long int data)
}
/* clear the tag accumulator */
memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
+ num_command_slots = port->dd->slot_groups * 32;
for (tag = 0; tag < num_command_slots; tag++) {
/*
@@ -625,12 +626,13 @@ static void mtip_timeout_function(unsigned long int data)
}
}
- if (cmdto_cnt && !test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
+ if (cmdto_cnt) {
print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
-
- mtip_restart_port(port);
+ if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
+ mtip_restart_port(port);
+ wake_up_interruptible(&port->svc_wait);
+ }
clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
- wake_up_interruptible(&port->svc_wait);
}
if (port->ic_pause_timer) {
@@ -2035,8 +2037,9 @@ static unsigned int implicit_sector(unsigned char command,
}
return rv;
}
-
-static void mtip_set_timeout(struct host_to_dev_fis *fis, unsigned int *timeout)
+static void mtip_set_timeout(struct driver_data *dd,
+ struct host_to_dev_fis *fis,
+ unsigned int *timeout, u8 erasemode)
{
switch (fis->command) {
case ATA_CMD_DOWNLOAD_MICRO:
@@ -2044,7 +2047,10 @@ static void mtip_set_timeout(struct host_to_dev_fis *fis, unsigned int *timeout)
break;
case ATA_CMD_SEC_ERASE_UNIT:
case 0xFC:
- *timeout = 240000; /* 4 minutes */
+ if (erasemode)
+ *timeout = ((*(dd->port->identify + 90) * 2) * 60000);
+ else
+ *timeout = ((*(dd->port->identify + 89) * 2) * 60000);
break;
case ATA_CMD_STANDBYNOW1:
*timeout = 120000; /* 2 minutes */
@@ -2087,6 +2093,7 @@ static int exec_drive_taskfile(struct driver_data *dd,
unsigned int transfer_size;
unsigned long task_file_data;
int intotal = outtotal + req_task->out_size;
+ int erasemode = 0;
taskout = req_task->out_size;
taskin = req_task->in_size;
@@ -2212,7 +2219,13 @@ static int exec_drive_taskfile(struct driver_data *dd,
fis.lba_hi,
fis.device);
- mtip_set_timeout(&fis, &timeout);
+ /* check for erase mode support during secure erase.*/
+ if ((fis.command == ATA_CMD_SEC_ERASE_UNIT) && outbuf &&
+ (outbuf[0] & MTIP_SEC_ERASE_MODE)) {
+ erasemode = 1;
+ }
+
+ mtip_set_timeout(dd, &fis, &timeout, erasemode);
/* Determine the correct transfer size.*/
if (force_single_sector)
@@ -2428,7 +2441,7 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
* return value
* None
*/
-static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
+static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector,
int nsect, int nents, int tag, void *callback,
void *data, int dir)
{
@@ -2436,6 +2449,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
struct mtip_port *port = dd->port;
struct mtip_cmd *command = &port->commands[tag];
int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ u64 start = sector;
/* Map the scatter list for DMA access */
nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
@@ -2454,8 +2468,12 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start,
fis->opts = 1 << 7;
fis->command =
(dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE);
- *((unsigned int *) &fis->lba_low) = (start & 0xFFFFFF);
- *((unsigned int *) &fis->lba_low_ex) = ((start >> 24) & 0xFFFFFF);
+ fis->lba_low = start & 0xFF;
+ fis->lba_mid = (start >> 8) & 0xFF;
+ fis->lba_hi = (start >> 16) & 0xFF;
+ fis->lba_low_ex = (start >> 24) & 0xFF;
+ fis->lba_mid_ex = (start >> 32) & 0xFF;
+ fis->lba_hi_ex = (start >> 40) & 0xFF;
fis->device = 1 << 6;
fis->features = nsect & 0xFF;
fis->features_ex = (nsect >> 8) & 0xFF;
@@ -3870,7 +3888,12 @@ static int mtip_block_remove(struct driver_data *dd)
* Delete our gendisk structure. This also removes the device
* from /dev
*/
- del_gendisk(dd->disk);
+ if (dd->disk) {
+ if (dd->disk->queue)
+ del_gendisk(dd->disk);
+ else
+ put_disk(dd->disk);
+ }
spin_lock(&rssd_index_lock);
ida_remove(&rssd_index_ida, dd->index);
@@ -3904,7 +3927,13 @@ static int mtip_block_shutdown(struct driver_data *dd)
"Shutting down %s ...\n", dd->disk->disk_name);
/* Delete our gendisk structure, and cleanup the blk queue. */
- del_gendisk(dd->disk);
+ if (dd->disk) {
+ if (dd->disk->queue)
+ del_gendisk(dd->disk);
+ else
+ put_disk(dd->disk);
+ }
+
spin_lock(&rssd_index_lock);
ida_remove(&rssd_index_ida, dd->index);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 18627a1d04c..b1742640556 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -33,6 +33,9 @@
/* offset of Device Control register in PCIe extended capabilites space */
#define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48
+/* check for erase mode support during secure erase */
+#define MTIP_SEC_ERASE_MODE 0x2
+
/* # of times to retry timed out/failed IOs */
#define MTIP_MAX_RETRIES 2
@@ -152,14 +155,14 @@ enum {
MTIP_DDF_REBUILD_FAILED_BIT = 8,
};
-__packed struct smart_attr{
+struct smart_attr {
u8 attr_id;
u16 flags;
u8 cur;
u8 worst;
u32 data;
u8 res[3];
-};
+} __packed;
/* Register Frame Information Structure (FIS), host to device. */
struct host_to_dev_fis {
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 74374fb762a..5ac841ff6cc 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -161,10 +161,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
static void make_response(struct xen_blkif *blkif, u64 id,
unsigned short op, int st);
-#define foreach_grant(pos, rbtree, node) \
- for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
+#define foreach_grant_safe(pos, n, rbtree, node) \
+ for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \
+ (n) = rb_next(&(pos)->node); \
&(pos)->node != NULL; \
- (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
+ (pos) = container_of(n, typeof(*(pos)), node), \
+ (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
static void add_persistent_gnt(struct rb_root *root,
@@ -217,10 +219,11 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct persistent_gnt *persistent_gnt;
+ struct rb_node *n;
int ret = 0;
int segs_to_unmap = 0;
- foreach_grant(persistent_gnt, root, node) {
+ foreach_grant_safe(persistent_gnt, n, root, node) {
BUG_ON(persistent_gnt->handle ==
BLKBACK_INVALID_HANDLE);
gnttab_set_unmap_op(&unmap[segs_to_unmap],
@@ -230,9 +233,6 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
persistent_gnt->handle);
pages[segs_to_unmap] = persistent_gnt->page;
- rb_erase(&persistent_gnt->node, root);
- kfree(persistent_gnt);
- num--;
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
!rb_next(&persistent_gnt->node)) {
@@ -241,6 +241,10 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
BUG_ON(ret);
segs_to_unmap = 0;
}
+
+ rb_erase(&persistent_gnt->node, root);
+ kfree(persistent_gnt);
+ num--;
}
BUG_ON(num != 0);
}
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index ae7951f0e26..6072390c7f5 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -159,9 +159,8 @@ struct xen_vbd {
struct block_device *bdev;
/* Cached size parameter. */
sector_t size;
- bool flush_support;
- bool discard_secure;
-
+ unsigned int flush_support:1;
+ unsigned int discard_secure:1;
unsigned int feature_gnt_persistent:1;
unsigned int overflow_max_grants:1;
};
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index a03ecbb0044..63980722db4 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -105,11 +105,10 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
{
struct xen_blkif *blkif;
- blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL);
+ blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
if (!blkif)
return ERR_PTR(-ENOMEM);
- memset(blkif, 0, sizeof(*blkif));
blkif->domid = domid;
spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1);
@@ -197,7 +196,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
}
}
-void xen_blkif_free(struct xen_blkif *blkif)
+static void xen_blkif_free(struct xen_blkif *blkif)
{
if (!atomic_dec_and_test(&blkif->refcnt))
BUG();
@@ -258,7 +257,7 @@ static struct attribute_group xen_vbdstat_group = {
VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
VBD_SHOW(mode, "%s\n", be->mode);
-int xenvbd_sysfs_addif(struct xenbus_device *dev)
+static int xenvbd_sysfs_addif(struct xenbus_device *dev)
{
int error;
@@ -282,7 +281,7 @@ fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
return error;
}
-void xenvbd_sysfs_delif(struct xenbus_device *dev)
+static void xenvbd_sysfs_delif(struct xenbus_device *dev)
{
sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
device_remove_file(&dev->dev, &dev_attr_mode);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 96e9b00db08..11043c18ac5 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -792,6 +792,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
{
struct llist_node *all_gnts;
struct grant *persistent_gnt;
+ struct llist_node *n;
/* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&info->io_lock);
@@ -804,7 +805,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
/* Remove all persistent grants */
if (info->persistent_gnts_c) {
all_gnts = llist_del_all(&info->persistent_gnts);
- llist_for_each_entry(persistent_gnt, all_gnts, node) {
+ llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) {
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
__free_page(pfn_to_page(persistent_gnt->pfn));
kfree(persistent_gnt);
@@ -835,7 +836,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
struct blkif_response *bret)
{
- int i;
+ int i = 0;
struct bio_vec *bvec;
struct req_iterator iter;
unsigned long flags;
@@ -852,7 +853,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
*/
rq_for_each_segment(bvec, s->request, iter) {
BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
- i = offset >> PAGE_SHIFT;
+ if (bvec->bv_offset < offset)
+ i++;
BUG_ON(i >= s->req.u.rw.nr_segments);
shared_data = kmap_atomic(
pfn_to_page(s->grants_used[i]->pfn));
@@ -861,7 +863,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
bvec->bv_len);
bvec_kunmap_irq(bvec_data, &flags);
kunmap_atomic(shared_data);
- offset += bvec->bv_len;
+ offset = bvec->bv_offset + bvec->bv_len;
}
}
/* Add the persistent grant into the list of free grants */