summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig2
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/aoe/aoe.h10
-rw-r--r--drivers/block/aoe/aoecmd.c153
-rw-r--r--drivers/block/brd.c16
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_main.c27
-rw-r--r--drivers/block/drbd/drbd_receiver.c19
-rw-r--r--drivers/block/drbd/drbd_req.c6
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_worker.c8
-rw-r--r--drivers/block/floppy.c52
-rw-r--r--drivers/block/loop.c29
-rw-r--r--drivers/block/mg_disk.c2
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c270
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h14
-rw-r--r--drivers/block/nbd.c14
-rw-r--r--drivers/block/null_blk.c5
-rw-r--r--drivers/block/nvme-core.c752
-rw-r--r--drivers/block/nvme-scsi.c147
-rw-r--r--drivers/block/paride/pg.c2
-rw-r--r--drivers/block/pktcdvd.c186
-rw-r--r--drivers/block/ps3disk.c17
-rw-r--r--drivers/block/ps3vram.c12
-rw-r--r--drivers/block/rbd.c394
-rw-r--r--drivers/block/rsxx/dev.c6
-rw-r--r--drivers/block/rsxx/dma.c15
-rw-r--r--drivers/block/sx8.c16
-rw-r--r--drivers/block/umem.c53
-rw-r--r--drivers/block/xen-blkback/blkback.c2
-rw-r--r--drivers/block/xen-blkfront.c2
-rw-r--r--drivers/block/zram/Kconfig24
-rw-r--r--drivers/block/zram/Makefile3
-rw-r--r--drivers/block/zram/zram_drv.c958
-rw-r--r--drivers/block/zram/zram_drv.h109
37 files changed, 2500 insertions, 836 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 9ffa90c6201..014a1cfc41c 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -108,6 +108,8 @@ source "drivers/block/paride/Kconfig"
source "drivers/block/mtip32xx/Kconfig"
+source "drivers/block/zram/Kconfig"
+
config BLK_CPQ_DA
tristate "Compaq SMART2 support"
depends on PCI && VIRT_TO_BUS && 0
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 816d979c326..02b688d1438 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
+obj-$(CONFIG_ZRAM) += zram/
nvme-y := nvme-core.o nvme-scsi.o
skd-y := skd_main.o
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 14a9d191231..9220f8e833d 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -100,11 +100,8 @@ enum {
struct buf {
ulong nframesout;
- ulong resid;
- ulong bv_resid;
- sector_t sector;
struct bio *bio;
- struct bio_vec *bv;
+ struct bvec_iter iter;
struct request *rq;
};
@@ -120,13 +117,10 @@ struct frame {
ulong waited;
ulong waited_total;
struct aoetgt *t; /* parent target I belong to */
- sector_t lba;
struct sk_buff *skb; /* command skb freed on module exit */
struct sk_buff *r_skb; /* response skb for async processing */
struct buf *buf;
- struct bio_vec *bv;
- ulong bcnt;
- ulong bv_off;
+ struct bvec_iter iter;
char flags;
};
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d2515435e23..8184451b57c 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -196,8 +196,7 @@ aoe_freetframe(struct frame *f)
t = f->t;
f->buf = NULL;
- f->lba = 0;
- f->bv = NULL;
+ memset(&f->iter, 0, sizeof(f->iter));
f->r_skb = NULL;
f->flags = 0;
list_add(&f->head, &t->ffree);
@@ -295,21 +294,14 @@ newframe(struct aoedev *d)
}
static void
-skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt)
+skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter)
{
int frag = 0;
- ulong fcnt;
-loop:
- fcnt = bv->bv_len - (off - bv->bv_offset);
- if (fcnt > cnt)
- fcnt = cnt;
- skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt);
- cnt -= fcnt;
- if (cnt <= 0)
- return;
- bv++;
- off = bv->bv_offset;
- goto loop;
+ struct bio_vec bv;
+
+ __bio_for_each_segment(bv, bio, iter, iter)
+ skb_fill_page_desc(skb, frag++, bv.bv_page,
+ bv.bv_offset, bv.bv_len);
}
static void
@@ -346,12 +338,10 @@ ata_rw_frameinit(struct frame *f)
t->nout++;
f->waited = 0;
f->waited_total = 0;
- if (f->buf)
- f->lba = f->buf->sector;
/* set up ata header */
- ah->scnt = f->bcnt >> 9;
- put_lba(ah, f->lba);
+ ah->scnt = f->iter.bi_size >> 9;
+ put_lba(ah, f->iter.bi_sector);
if (t->d->flags & DEVFL_EXT) {
ah->aflags |= AOEAFL_EXT;
} else {
@@ -360,11 +350,11 @@ ata_rw_frameinit(struct frame *f)
ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
}
if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
- skb_fillup(skb, f->bv, f->bv_off, f->bcnt);
+ skb_fillup(skb, f->buf->bio, f->iter);
ah->aflags |= AOEAFL_WRITE;
- skb->len += f->bcnt;
- skb->data_len = f->bcnt;
- skb->truesize += f->bcnt;
+ skb->len += f->iter.bi_size;
+ skb->data_len = f->iter.bi_size;
+ skb->truesize += f->iter.bi_size;
t->wpkts++;
} else {
t->rpkts++;
@@ -382,7 +372,6 @@ aoecmd_ata_rw(struct aoedev *d)
struct buf *buf;
struct sk_buff *skb;
struct sk_buff_head queue;
- ulong bcnt, fbcnt;
buf = nextbuf(d);
if (buf == NULL)
@@ -390,39 +379,22 @@ aoecmd_ata_rw(struct aoedev *d)
f = newframe(d);
if (f == NULL)
return 0;
- bcnt = d->maxbcnt;
- if (bcnt == 0)
- bcnt = DEFAULTBCNT;
- if (bcnt > buf->resid)
- bcnt = buf->resid;
- fbcnt = bcnt;
- f->bv = buf->bv;
- f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid);
- do {
- if (fbcnt < buf->bv_resid) {
- buf->bv_resid -= fbcnt;
- buf->resid -= fbcnt;
- break;
- }
- fbcnt -= buf->bv_resid;
- buf->resid -= buf->bv_resid;
- if (buf->resid == 0) {
- d->ip.buf = NULL;
- break;
- }
- buf->bv++;
- buf->bv_resid = buf->bv->bv_len;
- WARN_ON(buf->bv_resid == 0);
- } while (fbcnt);
/* initialize the headers & frame */
f->buf = buf;
- f->bcnt = bcnt;
- ata_rw_frameinit(f);
+ f->iter = buf->iter;
+ f->iter.bi_size = min_t(unsigned long,
+ d->maxbcnt ?: DEFAULTBCNT,
+ f->iter.bi_size);
+ bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size);
+
+ if (!buf->iter.bi_size)
+ d->ip.buf = NULL;
/* mark all tracking fields and load out */
buf->nframesout += 1;
- buf->sector += bcnt >> 9;
+
+ ata_rw_frameinit(f);
skb = skb_clone(f->skb, GFP_ATOMIC);
if (skb) {
@@ -613,10 +585,7 @@ reassign_frame(struct frame *f)
skb = nf->skb;
nf->skb = f->skb;
nf->buf = f->buf;
- nf->bcnt = f->bcnt;
- nf->lba = f->lba;
- nf->bv = f->bv;
- nf->bv_off = f->bv_off;
+ nf->iter = f->iter;
nf->waited = 0;
nf->waited_total = f->waited_total;
nf->sent = f->sent;
@@ -648,19 +617,19 @@ probe(struct aoetgt *t)
}
f->flags |= FFL_PROBE;
ifrotate(t);
- f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
+ f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
ata_rw_frameinit(f);
skb = f->skb;
- for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) {
+ for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) {
if (n < PAGE_SIZE)
m = n;
else
m = PAGE_SIZE;
skb_fill_page_desc(skb, frag, empty_page, 0, m);
}
- skb->len += f->bcnt;
- skb->data_len = f->bcnt;
- skb->truesize += f->bcnt;
+ skb->len += f->iter.bi_size;
+ skb->data_len = f->iter.bi_size;
+ skb->truesize += f->iter.bi_size;
skb = skb_clone(f->skb, GFP_ATOMIC);
if (skb) {
@@ -897,15 +866,15 @@ rqbiocnt(struct request *r)
static void
bio_pageinc(struct bio *bio)
{
- struct bio_vec *bv;
+ struct bio_vec bv;
struct page *page;
- int i;
+ struct bvec_iter iter;
- bio_for_each_segment(bv, bio, i) {
+ bio_for_each_segment(bv, bio, iter) {
/* Non-zero page count for non-head members of
* compound pages is no longer allowed by the kernel.
*/
- page = compound_trans_head(bv->bv_page);
+ page = compound_trans_head(bv.bv_page);
atomic_inc(&page->_count);
}
}
@@ -913,12 +882,12 @@ bio_pageinc(struct bio *bio)
static void
bio_pagedec(struct bio *bio)
{
- struct bio_vec *bv;
struct page *page;
- int i;
+ struct bio_vec bv;
+ struct bvec_iter iter;
- bio_for_each_segment(bv, bio, i) {
- page = compound_trans_head(bv->bv_page);
+ bio_for_each_segment(bv, bio, iter) {
+ page = compound_trans_head(bv.bv_page);
atomic_dec(&page->_count);
}
}
@@ -929,12 +898,8 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio)
memset(buf, 0, sizeof(*buf));
buf->rq = rq;
buf->bio = bio;
- buf->resid = bio->bi_size;
- buf->sector = bio->bi_sector;
+ buf->iter = bio->bi_iter;
bio_pageinc(bio);
- buf->bv = bio_iovec(bio);
- buf->bv_resid = buf->bv->bv_len;
- WARN_ON(buf->bv_resid == 0);
}
static struct buf *
@@ -1119,24 +1084,18 @@ gettgt(struct aoedev *d, char *addr)
}
static void
-bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt)
+bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
{
- ulong fcnt;
- char *p;
int soff = 0;
-loop:
- fcnt = bv->bv_len - (off - bv->bv_offset);
- if (fcnt > cnt)
- fcnt = cnt;
- p = page_address(bv->bv_page) + off;
- skb_copy_bits(skb, soff, p, fcnt);
- soff += fcnt;
- cnt -= fcnt;
- if (cnt <= 0)
- return;
- bv++;
- off = bv->bv_offset;
- goto loop;
+ struct bio_vec bv;
+
+ iter.bi_size = cnt;
+
+ __bio_for_each_segment(bv, bio, iter, iter) {
+ char *p = page_address(bv.bv_page) + bv.bv_offset;
+ skb_copy_bits(skb, soff, p, bv.bv_len);
+ soff += bv.bv_len;
+ }
}
void
@@ -1152,7 +1111,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
do {
bio = rq->bio;
bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags);
- } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_size));
+ } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
/* cf. http://lkml.org/lkml/2006/10/31/28 */
if (!fastfail)
@@ -1229,7 +1188,15 @@ noskb: if (buf)
clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
break;
}
- bvcpy(f->bv, f->bv_off, skb, n);
+ if (n > f->iter.bi_size) {
+ pr_err_ratelimited("%s e%ld.%d. bytes=%ld need=%u\n",
+ "aoe: too-large data size in read from",
+ (long) d->aoemajor, d->aoeminor,
+ n, f->iter.bi_size);
+ clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
+ break;
+ }
+ bvcpy(skb, f->buf->bio, f->iter, n);
case ATA_CMD_PIO_WRITE:
case ATA_CMD_PIO_WRITE_EXT:
spin_lock_irq(&d->lock);
@@ -1272,7 +1239,7 @@ out:
aoe_freetframe(f);
- if (buf && --buf->nframesout == 0 && buf->resid == 0)
+ if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0)
aoe_end_buf(d, buf);
spin_unlock_irq(&d->lock);
@@ -1727,7 +1694,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
{
if (buf == NULL)
return;
- buf->resid = 0;
+ buf->iter.bi_size = 0;
clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
if (buf->nframesout == 0)
aoe_end_buf(d, buf);
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index d91f1a56e86..e73b85cf075 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -328,18 +328,18 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
struct block_device *bdev = bio->bi_bdev;
struct brd_device *brd = bdev->bd_disk->private_data;
int rw;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
sector_t sector;
- int i;
+ struct bvec_iter iter;
int err = -EIO;
- sector = bio->bi_sector;
+ sector = bio->bi_iter.bi_sector;
if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
goto out;
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
err = 0;
- discard_from_brd(brd, sector, bio->bi_size);
+ discard_from_brd(brd, sector, bio->bi_iter.bi_size);
goto out;
}
@@ -347,10 +347,10 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
if (rw == READA)
rw = READ;
- bio_for_each_segment(bvec, bio, i) {
- unsigned int len = bvec->bv_len;
- err = brd_do_bvec(brd, bvec->bv_page, len,
- bvec->bv_offset, rw, sector);
+ bio_for_each_segment(bvec, bio, iter) {
+ unsigned int len = bvec.bv_len;
+ err = brd_do_bvec(brd, bvec.bv_page, len,
+ bvec.bv_offset, rw, sector);
if (err)
break;
sector += len >> SECTOR_SHIFT;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index b35fc4f5237..036e8ab86c7 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -5004,7 +5004,7 @@ reinit_after_soft_reset:
i = alloc_cciss_hba(pdev);
if (i < 0)
- return -1;
+ return -ENOMEM;
h = hba[i];
h->pdev = pdev;
@@ -5205,7 +5205,7 @@ clean_no_release_regions:
*/
pci_set_drvdata(pdev, NULL);
free_hba(h);
- return -1;
+ return -ENODEV;
}
static void cciss_shutdown(struct pci_dev *pdev)
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 28c73ca320a..a9b13f2cc42 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -159,7 +159,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
bio = bio_alloc_drbd(GFP_NOIO);
bio->bi_bdev = bdev->md_bdev;
- bio->bi_sector = sector;
+ bio->bi_iter.bi_sector = sector;
err = -EIO;
if (bio_add_page(bio, page, size, 0) != size)
goto out;
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index b12c11ec4bd..597f111df67 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1028,7 +1028,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
} else
page = b->bm_pages[page_nr];
bio->bi_bdev = mdev->ldev->md_bdev;
- bio->bi_sector = on_disk_sector;
+ bio->bi_iter.bi_sector = on_disk_sector;
/* bio_add_page of a single page to an empty bio will always succeed,
* according to api. Do we want to assert that? */
bio_add_page(bio, page, len, 0);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9e3818b1bc8..929468e1512 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1537,15 +1537,17 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
{
- struct bio_vec *bvec;
- int i;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+
/* hint all but last page with MSG_MORE */
- bio_for_each_segment(bvec, bio, i) {
+ bio_for_each_segment(bvec, bio, iter) {
int err;
- err = _drbd_no_send_page(mdev, bvec->bv_page,
- bvec->bv_offset, bvec->bv_len,
- i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
+ err = _drbd_no_send_page(mdev, bvec.bv_page,
+ bvec.bv_offset, bvec.bv_len,
+ bio_iter_last(bvec, iter)
+ ? 0 : MSG_MORE);
if (err)
return err;
}
@@ -1554,15 +1556,16 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
{
- struct bio_vec *bvec;
- int i;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+
/* hint all but last page with MSG_MORE */
- bio_for_each_segment(bvec, bio, i) {
+ bio_for_each_segment(bvec, bio, iter) {
int err;
- err = _drbd_send_page(mdev, bvec->bv_page,
- bvec->bv_offset, bvec->bv_len,
- i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
+ err = _drbd_send_page(mdev, bvec.bv_page,
+ bvec.bv_offset, bvec.bv_len,
+ bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
if (err)
return err;
}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 6fa6673b36b..d073305ffd5 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1333,7 +1333,7 @@ next_bio:
goto fail;
}
/* > peer_req->i.sector, unless this is the first bio */
- bio->bi_sector = sector;
+ bio->bi_iter.bi_sector = sector;
bio->bi_bdev = mdev->ldev->backing_bdev;
bio->bi_rw = rw;
bio->bi_private = peer_req;
@@ -1353,7 +1353,7 @@ next_bio:
dev_err(DEV,
"bio_add_page failed for len=%u, "
"bi_vcnt=0 (bi_sector=%llu)\n",
- len, (unsigned long long)bio->bi_sector);
+ len, (uint64_t)bio->bi_iter.bi_sector);
err = -ENOSPC;
goto fail;
}
@@ -1595,9 +1595,10 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
sector_t sector, int data_size)
{
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct bio *bio;
- int dgs, err, i, expect;
+ int dgs, err, expect;
void *dig_in = mdev->tconn->int_dig_in;
void *dig_vv = mdev->tconn->int_dig_vv;
@@ -1615,13 +1616,13 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
mdev->recv_cnt += data_size>>9;
bio = req->master_bio;
- D_ASSERT(sector == bio->bi_sector);
+ D_ASSERT(sector == bio->bi_iter.bi_sector);
- bio_for_each_segment(bvec, bio, i) {
- void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
- expect = min_t(int, data_size, bvec->bv_len);
+ bio_for_each_segment(bvec, bio, iter) {
+ void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
+ expect = min_t(int, data_size, bvec.bv_len);
err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
- kunmap(bvec->bv_page);
+ kunmap(bvec.bv_page);
if (err)
return err;
data_size -= expect;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index fec7bef4499..104a040f24d 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -77,8 +77,8 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
req->epoch = 0;
drbd_clear_interval(&req->i);
- req->i.sector = bio_src->bi_sector;
- req->i.size = bio_src->bi_size;
+ req->i.sector = bio_src->bi_iter.bi_sector;
+ req->i.size = bio_src->bi_iter.bi_size;
req->i.local = true;
req->i.waiting = false;
@@ -1280,7 +1280,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
/*
* what we "blindly" assume:
*/
- D_ASSERT(IS_ALIGNED(bio->bi_size, 512));
+ D_ASSERT(IS_ALIGNED(bio->bi_iter.bi_size, 512));
inc_ap_bio(mdev);
__drbd_make_request(mdev, bio, start_time);
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 978cb1addc9..28e15d91197 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -269,7 +269,7 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi
/* Short lived temporary struct on the stack.
* We could squirrel the error to be returned into
- * bio->bi_size, or similar. But that would be too ugly. */
+ * bio->bi_iter.bi_size, or similar. But that would be too ugly. */
struct bio_and_error {
struct bio *bio;
int error;
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 891c0ecaa29..84d3175d493 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -313,8 +313,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
{
struct hash_desc desc;
struct scatterlist sg;
- struct bio_vec *bvec;
- int i;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
desc.tfm = tfm;
desc.flags = 0;
@@ -322,8 +322,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
sg_init_table(&sg, 1);
crypto_hash_init(&desc);
- bio_for_each_segment(bvec, bio, i) {
- sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
+ bio_for_each_segment(bvec, bio, iter) {
+ sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
crypto_hash_update(&desc, &sg, sg.length);
}
crypto_hash_final(&desc, digest);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 000abe2f105..2023043ce7c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2351,7 +2351,7 @@ static void rw_interrupt(void)
/* Compute maximal contiguous buffer size. */
static int buffer_chain_size(void)
{
- struct bio_vec *bv;
+ struct bio_vec bv;
int size;
struct req_iterator iter;
char *base;
@@ -2360,10 +2360,10 @@ static int buffer_chain_size(void)
size = 0;
rq_for_each_segment(bv, current_req, iter) {
- if (page_address(bv->bv_page) + bv->bv_offset != base + size)
+ if (page_address(bv.bv_page) + bv.bv_offset != base + size)
break;
- size += bv->bv_len;
+ size += bv.bv_len;
}
return size >> 9;
@@ -2389,7 +2389,7 @@ static int transfer_size(int ssize, int max_sector, int max_size)
static void copy_buffer(int ssize, int max_sector, int max_sector_2)
{
int remaining; /* number of transferred 512-byte sectors */
- struct bio_vec *bv;
+ struct bio_vec bv;
char *buffer;
char *dma_buffer;
int size;
@@ -2427,10 +2427,10 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
if (!remaining)
break;
- size = bv->bv_len;
+ size = bv.bv_len;
SUPBOUND(size, remaining);
- buffer = page_address(bv->bv_page) + bv->bv_offset;
+ buffer = page_address(bv.bv_page) + bv.bv_offset;
if (dma_buffer + size >
floppy_track_buffer + (max_buffer_sectors << 10) ||
dma_buffer < floppy_track_buffer) {
@@ -3691,9 +3691,12 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (!(mode & FMODE_NDELAY)) {
if (mode & (FMODE_READ|FMODE_WRITE)) {
UDRS->last_checked = 0;
+ clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
check_disk_change(bdev);
if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
goto out;
+ if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+ goto out;
}
res = -EROFS;
if ((mode & FMODE_WRITE) &&
@@ -3746,17 +3749,29 @@ static unsigned int floppy_check_events(struct gendisk *disk,
* a disk in the drive, and whether that disk is writable.
*/
-static void floppy_rb0_complete(struct bio *bio, int err)
+struct rb0_cbdata {
+ int drive;
+ struct completion complete;
+};
+
+static void floppy_rb0_cb(struct bio *bio, int err)
{
- complete((struct completion *)bio->bi_private);
+ struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
+ int drive = cbdata->drive;
+
+ if (err) {
+ pr_info("floppy: error %d while reading block 0", err);
+ set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+ }
+ complete(&cbdata->complete);
}
-static int __floppy_read_block_0(struct block_device *bdev)
+static int __floppy_read_block_0(struct block_device *bdev, int drive)
{
struct bio bio;
struct bio_vec bio_vec;
- struct completion complete;
struct page *page;
+ struct rb0_cbdata cbdata;
size_t size;
page = alloc_page(GFP_NOIO);
@@ -3769,23 +3784,26 @@ static int __floppy_read_block_0(struct block_device *bdev)
if (!size)
size = 1024;
+ cbdata.drive = drive;
+
bio_init(&bio);
bio.bi_io_vec = &bio_vec;
bio_vec.bv_page = page;
bio_vec.bv_len = size;
bio_vec.bv_offset = 0;
bio.bi_vcnt = 1;
- bio.bi_size = size;
+ bio.bi_iter.bi_size = size;
bio.bi_bdev = bdev;
- bio.bi_sector = 0;
+ bio.bi_iter.bi_sector = 0;
bio.bi_flags = (1 << BIO_QUIET);
- init_completion(&complete);
- bio.bi_private = &complete;
- bio.bi_end_io = floppy_rb0_complete;
+ bio.bi_private = &cbdata;
+ bio.bi_end_io = floppy_rb0_cb;
submit_bio(READ, &bio);
process_fd_request();
- wait_for_completion(&complete);
+
+ init_completion(&cbdata.complete);
+ wait_for_completion(&cbdata.complete);
__free_page(page);
@@ -3827,7 +3845,7 @@ static int floppy_revalidate(struct gendisk *disk)
UDRS->generation++;
if (drive_no_geom(drive)) {
/* auto-sensing */
- res = __floppy_read_block_0(opened_bdev[drive]);
+ res = __floppy_read_block_0(opened_bdev[drive], drive);
} else {
if (cf)
poll_drive(false, FD_RAW_NEED_DISK);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c8dac730524..66e8c3b94ef 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -288,9 +288,10 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
{
int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
struct page *page);
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct page *page = NULL;
- int i, ret = 0;
+ int ret = 0;
if (lo->transfer != transfer_none) {
page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
@@ -302,11 +303,11 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
do_lo_send = do_lo_send_direct_write;
}
- bio_for_each_segment(bvec, bio, i) {
- ret = do_lo_send(lo, bvec, pos, page);
+ bio_for_each_segment(bvec, bio, iter) {
+ ret = do_lo_send(lo, &bvec, pos, page);
if (ret < 0)
break;
- pos += bvec->bv_len;
+ pos += bvec.bv_len;
}
if (page) {
kunmap(page);
@@ -392,20 +393,20 @@ do_lo_receive(struct loop_device *lo,
static int
lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
{
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
ssize_t s;
- int i;
- bio_for_each_segment(bvec, bio, i) {
- s = do_lo_receive(lo, bvec, bsize, pos);
+ bio_for_each_segment(bvec, bio, iter) {
+ s = do_lo_receive(lo, &bvec, bsize, pos);
if (s < 0)
return s;
- if (s != bvec->bv_len) {
+ if (s != bvec.bv_len) {
zero_fill_bio(bio);
break;
}
- pos += bvec->bv_len;
+ pos += bvec.bv_len;
}
return 0;
}
@@ -415,7 +416,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
loff_t pos;
int ret;
- pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
+ pos = ((loff_t) bio->bi_iter.bi_sector << 9) + lo->lo_offset;
if (bio_rw(bio) == WRITE) {
struct file *file = lo->lo_backing_file;
@@ -444,7 +445,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
goto out;
}
ret = file->f_op->fallocate(file, mode, pos,
- bio->bi_size);
+ bio->bi_iter.bi_size);
if (unlikely(ret && ret != -EINVAL &&
ret != -EOPNOTSUPP))
ret = -EIO;
@@ -798,7 +799,7 @@ static void loop_config_discard(struct loop_device *lo)
/*
* We use punch hole to reclaim the free space used by the
- * image a.k.a. discard. However we do support discard if
+ * image a.k.a. discard. However we do not support discard if
* encryption is enabled, because it may give an attacker
* useful information.
*/
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 7bc363f1ee8..eb59b124136 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -915,7 +915,7 @@ static int mg_probe(struct platform_device *plat_dev)
/* disk reset */
if (prv_data->dev_attr == MG_STORAGE_DEV) {
- /* If POR seq. not yet finised, wait */
+ /* If POR seq. not yet finished, wait */
err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT);
if (err)
goto probe_err_3b;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 050c71267f1..516026954be 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -41,10 +41,31 @@
#include "mtip32xx.h"
#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
-#define HW_CMD_TBL_SZ (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16))
-#define HW_CMD_TBL_AR_SZ (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS)
-#define HW_PORT_PRIV_DMA_SZ \
- (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
+
+/* DMA region containing RX Fis, Identify, RLE10, and SMART buffers */
+#define AHCI_RX_FIS_SZ 0x100
+#define AHCI_RX_FIS_OFFSET 0x0
+#define AHCI_IDFY_SZ ATA_SECT_SIZE
+#define AHCI_IDFY_OFFSET 0x400
+#define AHCI_SECTBUF_SZ ATA_SECT_SIZE
+#define AHCI_SECTBUF_OFFSET 0x800
+#define AHCI_SMARTBUF_SZ ATA_SECT_SIZE
+#define AHCI_SMARTBUF_OFFSET 0xC00
+/* 0x100 + 0x200 + 0x200 + 0x200 is smaller than 4k but we pad it out */
+#define BLOCK_DMA_ALLOC_SZ 4096
+
+/* DMA region containing command table (should be 8192 bytes) */
+#define AHCI_CMD_SLOT_SZ sizeof(struct mtip_cmd_hdr)
+#define AHCI_CMD_TBL_SZ (MTIP_MAX_COMMAND_SLOTS * AHCI_CMD_SLOT_SZ)
+#define AHCI_CMD_TBL_OFFSET 0x0
+
+/* DMA region per command (contains header and SGL) */
+#define AHCI_CMD_TBL_HDR_SZ 0x80
+#define AHCI_CMD_TBL_HDR_OFFSET 0x0
+#define AHCI_CMD_TBL_SGL_SZ (MTIP_MAX_SG * sizeof(struct mtip_cmd_sg))
+#define AHCI_CMD_TBL_SGL_OFFSET AHCI_CMD_TBL_HDR_SZ
+#define CMD_DMA_ALLOC_SZ (AHCI_CMD_TBL_SGL_SZ + AHCI_CMD_TBL_HDR_SZ)
+
#define HOST_CAP_NZDMA (1 << 19)
#define HOST_HSORG 0xFC
@@ -899,8 +920,9 @@ static void mtip_handle_tfe(struct driver_data *dd)
fail_reason = "thermal shutdown";
}
if (buf[288] == 0xBF) {
+ set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
dev_info(&dd->pdev->dev,
- "Drive indicates rebuild has failed.\n");
+ "Drive indicates rebuild has failed. Secure erase required.\n");
fail_all_ncq_cmds = 1;
fail_reason = "rebuild failed";
}
@@ -1566,6 +1588,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
}
#endif
+ /* Check security locked state */
+ if (port->identify[128] & 0x4)
+ set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
+ else
+ clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
+
#ifdef MTIP_TRIM /* Disabling TRIM support temporarily */
/* Demux ID.DRAT & ID.RZAT to determine trim support */
if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5))
@@ -1887,6 +1915,10 @@ static void mtip_dump_identify(struct mtip_port *port)
strlcpy(cbuf, (char *)(port->identify+27), 41);
dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
+ dev_info(&port->dd->pdev->dev, "Security: %04x %s\n",
+ port->identify[128],
+ port->identify[128] & 0x4 ? "(LOCKED)" : "");
+
if (mtip_hw_get_capacity(port->dd, &sectors))
dev_info(&port->dd->pdev->dev,
"Capacity: %llu sectors (%llu MB)\n",
@@ -3313,6 +3345,118 @@ st_out:
}
/*
+ * DMA region teardown
+ *
+ * @dd Pointer to driver_data structure
+ *
+ * return value
+ * None
+ */
+static void mtip_dma_free(struct driver_data *dd)
+{
+ int i;
+ struct mtip_port *port = dd->port;
+
+ if (port->block1)
+ dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
+ port->block1, port->block1_dma);
+
+ if (port->command_list) {
+ dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
+ port->command_list, port->command_list_dma);
+ }
+
+ for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
+ if (port->commands[i].command)
+ dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
+ port->commands[i].command,
+ port->commands[i].command_dma);
+ }
+}
+
+/*
+ * DMA region setup
+ *
+ * @dd Pointer to driver_data structure
+ *
+ * return value
+ * -ENOMEM Not enough free DMA region space to initialize driver
+ */
+static int mtip_dma_alloc(struct driver_data *dd)
+{
+ struct mtip_port *port = dd->port;
+ int i, rv = 0;
+ u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
+
+ /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
+ port->block1 =
+ dmam_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
+ &port->block1_dma, GFP_KERNEL);
+ if (!port->block1)
+ return -ENOMEM;
+ memset(port->block1, 0, BLOCK_DMA_ALLOC_SZ);
+
+ /* Allocate dma memory for command list */
+ port->command_list =
+ dmam_alloc_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
+ &port->command_list_dma, GFP_KERNEL);
+ if (!port->command_list) {
+ dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
+ port->block1, port->block1_dma);
+ port->block1 = NULL;
+ port->block1_dma = 0;
+ return -ENOMEM;
+ }
+ memset(port->command_list, 0, AHCI_CMD_TBL_SZ);
+
+ /* Setup all pointers into first DMA region */
+ port->rxfis = port->block1 + AHCI_RX_FIS_OFFSET;
+ port->rxfis_dma = port->block1_dma + AHCI_RX_FIS_OFFSET;
+ port->identify = port->block1 + AHCI_IDFY_OFFSET;
+ port->identify_dma = port->block1_dma + AHCI_IDFY_OFFSET;
+ port->log_buf = port->block1 + AHCI_SECTBUF_OFFSET;
+ port->log_buf_dma = port->block1_dma + AHCI_SECTBUF_OFFSET;
+ port->smart_buf = port->block1 + AHCI_SMARTBUF_OFFSET;
+ port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET;
+
+ /* Setup per command SGL DMA region */
+
+ /* Point the command headers at the command tables */
+ for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
+ port->commands[i].command =
+ dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
+ &port->commands[i].command_dma, GFP_KERNEL);
+ if (!port->commands[i].command) {
+ rv = -ENOMEM;
+ mtip_dma_free(dd);
+ return rv;
+ }
+ memset(port->commands[i].command, 0, CMD_DMA_ALLOC_SZ);
+
+ port->commands[i].command_header = port->command_list +
+ (sizeof(struct mtip_cmd_hdr) * i);
+ port->commands[i].command_header_dma =
+ dd->port->command_list_dma +
+ (sizeof(struct mtip_cmd_hdr) * i);
+
+ if (host_cap_64)
+ port->commands[i].command_header->ctbau =
+ __force_bit2int cpu_to_le32(
+ (port->commands[i].command_dma >> 16) >> 16);
+
+ port->commands[i].command_header->ctba =
+ __force_bit2int cpu_to_le32(
+ port->commands[i].command_dma & 0xFFFFFFFF);
+
+ sg_init_table(port->commands[i].sg, MTIP_MAX_SG);
+
+ /* Mark command as currently inactive */
+ atomic_set(&dd->port->commands[i].active, 0);
+ }
+ return 0;
+}
+
+/*
* Called once for each card.
*
* @dd Pointer to the driver data structure.
@@ -3370,83 +3514,10 @@ static int mtip_hw_init(struct driver_data *dd)
dd->port->mmio = dd->mmio + PORT_OFFSET;
dd->port->dd = dd;
- /* Allocate memory for the command list. */
- dd->port->command_list =
- dmam_alloc_coherent(&dd->pdev->dev,
- HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
- &dd->port->command_list_dma,
- GFP_KERNEL);
- if (!dd->port->command_list) {
- dev_err(&dd->pdev->dev,
- "Memory allocation: command list\n");
- rv = -ENOMEM;
+ /* DMA allocations */
+ rv = mtip_dma_alloc(dd);
+ if (rv < 0)
goto out1;
- }
-
- /* Clear the memory we have allocated. */
- memset(dd->port->command_list,
- 0,
- HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4));
-
- /* Setup the addresse of the RX FIS. */
- dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ;
- dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
-
- /* Setup the address of the command tables. */
- dd->port->command_table = dd->port->rxfis + AHCI_RX_FIS_SZ;
- dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
-
- /* Setup the address of the identify data. */
- dd->port->identify = dd->port->command_table +
- HW_CMD_TBL_AR_SZ;
- dd->port->identify_dma = dd->port->command_tbl_dma +
- HW_CMD_TBL_AR_SZ;
-
- /* Setup the address of the sector buffer - for some non-ncq cmds */
- dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
- dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
-
- /* Setup the address of the log buf - for read log command */
- dd->port->log_buf = (void *)dd->port->sector_buffer + ATA_SECT_SIZE;
- dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE;
-
- /* Setup the address of the smart buf - for smart read data command */
- dd->port->smart_buf = (void *)dd->port->log_buf + ATA_SECT_SIZE;
- dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE;
-
-
- /* Point the command headers at the command tables. */
- for (i = 0; i < num_command_slots; i++) {
- dd->port->commands[i].command_header =
- dd->port->command_list +
- (sizeof(struct mtip_cmd_hdr) * i);
- dd->port->commands[i].command_header_dma =
- dd->port->command_list_dma +
- (sizeof(struct mtip_cmd_hdr) * i);
-
- dd->port->commands[i].command =
- dd->port->command_table + (HW_CMD_TBL_SZ * i);
- dd->port->commands[i].command_dma =
- dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
-
- if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
- dd->port->commands[i].command_header->ctbau =
- __force_bit2int cpu_to_le32(
- (dd->port->commands[i].command_dma >> 16) >> 16);
- dd->port->commands[i].command_header->ctba =
- __force_bit2int cpu_to_le32(
- dd->port->commands[i].command_dma & 0xFFFFFFFF);
-
- /*
- * If this is not done, a bug is reported by the stock
- * FC11 i386. Due to the fact that it has lots of kernel
- * debugging enabled.
- */
- sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
-
- /* Mark all commands as currently inactive.*/
- atomic_set(&dd->port->commands[i].active, 0);
- }
/* Setup the pointers to the extended s_active and CI registers. */
for (i = 0; i < dd->slot_groups; i++) {
@@ -3594,12 +3665,8 @@ out3:
out2:
mtip_deinit_port(dd->port);
+ mtip_dma_free(dd);
- /* Free the command/command header memory. */
- dmam_free_coherent(&dd->pdev->dev,
- HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
- dd->port->command_list,
- dd->port->command_list_dma);
out1:
/* Free the memory allocated for the for structure. */
kfree(dd->port);
@@ -3622,7 +3689,8 @@ static int mtip_hw_exit(struct driver_data *dd)
* saves its state.
*/
if (!dd->sr) {
- if (!test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
+ if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) &&
+ !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
if (mtip_standby_immediate(dd->port))
dev_warn(&dd->pdev->dev,
"STANDBY IMMEDIATE failed\n");
@@ -3641,11 +3709,9 @@ static int mtip_hw_exit(struct driver_data *dd)
irq_set_affinity_hint(dd->pdev->irq, NULL);
devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
- /* Free the command/command header memory. */
- dmam_free_coherent(&dd->pdev->dev,
- HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
- dd->port->command_list,
- dd->port->command_list_dma);
+ /* Free dma regions */
+ mtip_dma_free(dd);
+
/* Free the memory allocated for the for structure. */
kfree(dd->port);
dd->port = NULL;
@@ -3962,8 +4028,9 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
{
struct driver_data *dd = queue->queuedata;
struct scatterlist *sg;
- struct bio_vec *bvec;
- int i, nents = 0;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+ int nents = 0;
int tag = 0, unaligned = 0;
if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
@@ -3993,7 +4060,7 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
}
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
- bio_endio(bio, mtip_send_trim(dd, bio->bi_sector,
+ bio_endio(bio, mtip_send_trim(dd, bio->bi_iter.bi_sector,
bio_sectors(bio)));
return;
}
@@ -4006,7 +4073,8 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 &&
dd->unal_qdepth) {
- if (bio->bi_sector % 8 != 0) /* Unaligned on 4k boundaries */
+ if (bio->bi_iter.bi_sector % 8 != 0)
+ /* Unaligned on 4k boundaries */
unaligned = 1;
else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */
unaligned = 1;
@@ -4025,17 +4093,17 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
}
/* Create the scatter list for this bio. */
- bio_for_each_segment(bvec, bio, i) {
+ bio_for_each_segment(bvec, bio, iter) {
sg_set_page(&sg[nents],
- bvec->bv_page,
- bvec->bv_len,
- bvec->bv_offset);
+ bvec.bv_page,
+ bvec.bv_len,
+ bvec.bv_offset);
nents++;
}
/* Issue the read/write. */
mtip_hw_submit_io(dd,
- bio->bi_sector,
+ bio->bi_iter.bi_sector,
bio_sectors(bio),
nents,
tag,
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 9be7a1582ad..b52e9a6d6aa 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -69,7 +69,7 @@
* Maximum number of scatter gather entries
* a single command may have.
*/
-#define MTIP_MAX_SG 128
+#define MTIP_MAX_SG 504
/*
* Maximum number of slot groups (Command Issue & s_active registers)
@@ -92,7 +92,7 @@
/* Driver name and version strings */
#define MTIP_DRV_NAME "mtip32xx"
-#define MTIP_DRV_VERSION "1.2.6os3"
+#define MTIP_DRV_VERSION "1.3.0"
/* Maximum number of minor device numbers per device. */
#define MTIP_MAX_MINORS 16
@@ -391,15 +391,13 @@ struct mtip_port {
*/
dma_addr_t rxfis_dma;
/*
- * Pointer to the beginning of the command table memory as used
- * by the driver.
+ * Pointer to the DMA region for RX Fis, Identify, RLE10, and SMART
*/
- void *command_table;
+ void *block1;
/*
- * Pointer to the beginning of the command table memory as used
- * by the DMA.
+ * DMA address of region for RX Fis, Identify, RLE10, and SMART
*/
- dma_addr_t command_tbl_dma;
+ dma_addr_t block1_dma;
/*
* Pointer to the beginning of the identify data memory as used
* by the driver.
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 2dc3b5153f0..55298db36b2 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -271,18 +271,18 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
if (nbd_cmd(req) == NBD_CMD_WRITE) {
struct req_iterator iter;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
/*
* we are really probing at internals to determine
* whether to set MSG_MORE or not...
*/
rq_for_each_segment(bvec, req, iter) {
flags = 0;
- if (!rq_iter_last(req, iter))
+ if (!rq_iter_last(bvec, iter))
flags = MSG_MORE;
dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
- nbd->disk->disk_name, req, bvec->bv_len);
- result = sock_send_bvec(nbd, bvec, flags);
+ nbd->disk->disk_name, req, bvec.bv_len);
+ result = sock_send_bvec(nbd, &bvec, flags);
if (result <= 0) {
dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n",
@@ -378,10 +378,10 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
nbd->disk->disk_name, req);
if (nbd_cmd(req) == NBD_CMD_READ) {
struct req_iterator iter;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
rq_for_each_segment(bvec, req, iter) {
- result = sock_recv_bvec(nbd, bvec);
+ result = sock_recv_bvec(nbd, &bvec);
if (result <= 0) {
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
result);
@@ -389,7 +389,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
return req;
}
dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
- nbd->disk->disk_name, req, bvec->bv_len);
+ nbd->disk->disk_name, req, bvec.bv_len);
}
}
return req;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 83a598ebb65..3107282a974 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -616,6 +616,11 @@ static int __init null_init(void)
irqmode = NULL_IRQ_NONE;
}
#endif
+ if (bs > PAGE_SIZE) {
+ pr_warn("null_blk: invalid block size\n");
+ pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
+ bs = PAGE_SIZE;
+ }
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) {
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 26d03fa0bf2..51824d1f23e 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -46,7 +46,6 @@
#define NVME_Q_DEPTH 1024
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
-#define NVME_MINORS 64
#define ADMIN_TIMEOUT (60 * HZ)
static int nvme_major;
@@ -58,6 +57,17 @@ module_param(use_threaded_interrupts, int, 0);
static DEFINE_SPINLOCK(dev_list_lock);
static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread;
+static struct workqueue_struct *nvme_workq;
+
+static void nvme_reset_failed_dev(struct work_struct *ws);
+
+struct async_cmd_info {
+ struct kthread_work work;
+ struct kthread_worker *worker;
+ u32 result;
+ int status;
+ void *ctx;
+};
/*
* An NVM Express queue. Each device has at least two (one for admin
@@ -66,6 +76,7 @@ static struct task_struct *nvme_thread;
struct nvme_queue {
struct device *q_dmadev;
struct nvme_dev *dev;
+ char irqname[24]; /* nvme4294967295-65535\0 */
spinlock_t q_lock;
struct nvme_command *sq_cmds;
volatile struct nvme_completion *cqes;
@@ -80,9 +91,11 @@ struct nvme_queue {
u16 sq_head;
u16 sq_tail;
u16 cq_head;
+ u16 qid;
u8 cq_phase;
u8 cqe_seen;
u8 q_suspended;
+ struct async_cmd_info cmdinfo;
unsigned long cmdid_data[];
};
@@ -97,6 +110,7 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
@@ -111,6 +125,7 @@ struct nvme_cmd_info {
nvme_completion_fn fn;
void *ctx;
unsigned long timeout;
+ int aborted;
};
static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
@@ -154,6 +169,7 @@ static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx,
info[cmdid].fn = handler;
info[cmdid].ctx = ctx;
info[cmdid].timeout = jiffies + timeout;
+ info[cmdid].aborted = 0;
return cmdid;
}
@@ -172,6 +188,7 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
#define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE)
#define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE)
#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE)
+#define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE)
static void special_completion(struct nvme_dev *dev, void *ctx,
struct nvme_completion *cqe)
@@ -180,6 +197,10 @@ static void special_completion(struct nvme_dev *dev, void *ctx,
return;
if (ctx == CMD_CTX_FLUSH)
return;
+ if (ctx == CMD_CTX_ABORT) {
+ ++dev->abort_limit;
+ return;
+ }
if (ctx == CMD_CTX_COMPLETED) {
dev_warn(&dev->pci_dev->dev,
"completed id %d twice on queue %d\n",
@@ -196,6 +217,15 @@ static void special_completion(struct nvme_dev *dev, void *ctx,
dev_warn(&dev->pci_dev->dev, "Unknown special completion %p\n", ctx);
}
+static void async_completion(struct nvme_dev *dev, void *ctx,
+ struct nvme_completion *cqe)
+{
+ struct async_cmd_info *cmdinfo = ctx;
+ cmdinfo->result = le32_to_cpup(&cqe->result);
+ cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
+ queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
+}
+
/*
* Called with local interrupts disabled and the q_lock held. May not sleep.
*/
@@ -441,104 +471,19 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
return total_len;
}
-struct nvme_bio_pair {
- struct bio b1, b2, *parent;
- struct bio_vec *bv1, *bv2;
- int err;
- atomic_t cnt;
-};
-
-static void nvme_bio_pair_endio(struct bio *bio, int err)
-{
- struct nvme_bio_pair *bp = bio->bi_private;
-
- if (err)
- bp->err = err;
-
- if (atomic_dec_and_test(&bp->cnt)) {
- bio_endio(bp->parent, bp->err);
- kfree(bp->bv1);
- kfree(bp->bv2);
- kfree(bp);
- }
-}
-
-static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
- int len, int offset)
-{
- struct nvme_bio_pair *bp;
-
- BUG_ON(len > bio->bi_size);
- BUG_ON(idx > bio->bi_vcnt);
-
- bp = kmalloc(sizeof(*bp), GFP_ATOMIC);
- if (!bp)
- return NULL;
- bp->err = 0;
-
- bp->b1 = *bio;
- bp->b2 = *bio;
-
- bp->b1.bi_size = len;
- bp->b2.bi_size -= len;
- bp->b1.bi_vcnt = idx;
- bp->b2.bi_idx = idx;
- bp->b2.bi_sector += len >> 9;
-
- if (offset) {
- bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
- GFP_ATOMIC);
- if (!bp->bv1)
- goto split_fail_1;
-
- bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
- GFP_ATOMIC);
- if (!bp->bv2)
- goto split_fail_2;
-
- memcpy(bp->bv1, bio->bi_io_vec,
- bio->bi_max_vecs * sizeof(struct bio_vec));
- memcpy(bp->bv2, bio->bi_io_vec,
- bio->bi_max_vecs * sizeof(struct bio_vec));
-
- bp->b1.bi_io_vec = bp->bv1;
- bp->b2.bi_io_vec = bp->bv2;
- bp->b2.bi_io_vec[idx].bv_offset += offset;
- bp->b2.bi_io_vec[idx].bv_len -= offset;
- bp->b1.bi_io_vec[idx].bv_len = offset;
- bp->b1.bi_vcnt++;
- } else
- bp->bv1 = bp->bv2 = NULL;
-
- bp->b1.bi_private = bp;
- bp->b2.bi_private = bp;
-
- bp->b1.bi_end_io = nvme_bio_pair_endio;
- bp->b2.bi_end_io = nvme_bio_pair_endio;
-
- bp->parent = bio;
- atomic_set(&bp->cnt, 2);
-
- return bp;
-
- split_fail_2:
- kfree(bp->bv1);
- split_fail_1:
- kfree(bp);
- return NULL;
-}
-
static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
- int idx, int len, int offset)
+ int len)
{
- struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset);
- if (!bp)
+ struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL);
+ if (!split)
return -ENOMEM;
+ bio_chain(split, bio);
+
if (bio_list_empty(&nvmeq->sq_cong))
add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
- bio_list_add(&nvmeq->sq_cong, &bp->b1);
- bio_list_add(&nvmeq->sq_cong, &bp->b2);
+ bio_list_add(&nvmeq->sq_cong, split);
+ bio_list_add(&nvmeq->sq_cong, bio);
return 0;
}
@@ -550,41 +495,44 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
struct bio *bio, enum dma_data_direction dma_dir, int psegs)
{
- struct bio_vec *bvec, *bvprv = NULL;
+ struct bio_vec bvec, bvprv;
+ struct bvec_iter iter;
struct scatterlist *sg = NULL;
- int i, length = 0, nsegs = 0, split_len = bio->bi_size;
+ int length = 0, nsegs = 0, split_len = bio->bi_iter.bi_size;
+ int first = 1;
if (nvmeq->dev->stripe_size)
split_len = nvmeq->dev->stripe_size -
- ((bio->bi_sector << 9) & (nvmeq->dev->stripe_size - 1));
+ ((bio->bi_iter.bi_sector << 9) &
+ (nvmeq->dev->stripe_size - 1));
sg_init_table(iod->sg, psegs);
- bio_for_each_segment(bvec, bio, i) {
- if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) {
- sg->length += bvec->bv_len;
+ bio_for_each_segment(bvec, bio, iter) {
+ if (!first && BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec)) {
+ sg->length += bvec.bv_len;
} else {
- if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec))
- return nvme_split_and_submit(bio, nvmeq, i,
- length, 0);
+ if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
+ return nvme_split_and_submit(bio, nvmeq,
+ length);
sg = sg ? sg + 1 : iod->sg;
- sg_set_page(sg, bvec->bv_page, bvec->bv_len,
- bvec->bv_offset);
+ sg_set_page(sg, bvec.bv_page,
+ bvec.bv_len, bvec.bv_offset);
nsegs++;
}
- if (split_len - length < bvec->bv_len)
- return nvme_split_and_submit(bio, nvmeq, i, split_len,
- split_len - length);
- length += bvec->bv_len;
+ if (split_len - length < bvec.bv_len)
+ return nvme_split_and_submit(bio, nvmeq, split_len);
+ length += bvec.bv_len;
bvprv = bvec;
+ first = 0;
}
iod->nents = nsegs;
sg_mark_end(sg);
if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0)
return -ENOMEM;
- BUG_ON(length != bio->bi_size);
+ BUG_ON(length != bio->bi_iter.bi_size);
return length;
}
@@ -608,8 +556,8 @@ static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
iod->npages = 0;
range->cattr = cpu_to_le32(0);
- range->nlb = cpu_to_le32(bio->bi_size >> ns->lba_shift);
- range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_sector));
+ range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift);
+ range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
memset(cmnd, 0, sizeof(*cmnd));
cmnd->dsm.opcode = nvme_cmd_dsm;
@@ -674,7 +622,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
}
result = -ENOMEM;
- iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC);
+ iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
if (!iod)
goto nomem;
iod->private = bio;
@@ -723,7 +671,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length,
GFP_ATOMIC);
- cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_sector));
+ cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1);
cmnd->rw.control = cpu_to_le16(control);
cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
@@ -775,7 +723,7 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
return 0;
- writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride));
+ writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
nvmeq->cq_head = head;
nvmeq->cq_phase = phase;
@@ -886,12 +834,34 @@ int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
return cmdinfo.status;
}
+static int nvme_submit_async_cmd(struct nvme_queue *nvmeq,
+ struct nvme_command *cmd,
+ struct async_cmd_info *cmdinfo, unsigned timeout)
+{
+ int cmdid;
+
+ cmdid = alloc_cmdid_killable(nvmeq, cmdinfo, async_completion, timeout);
+ if (cmdid < 0)
+ return cmdid;
+ cmdinfo->status = -EINTR;
+ cmd->common.command_id = cmdid;
+ nvme_submit_cmd(nvmeq, cmd);
+ return 0;
+}
+
int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
u32 *result)
{
return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT);
}
+static int nvme_submit_admin_cmd_async(struct nvme_dev *dev,
+ struct nvme_command *cmd, struct async_cmd_info *cmdinfo)
+{
+ return nvme_submit_async_cmd(dev->queues[0], cmd, cmdinfo,
+ ADMIN_TIMEOUT);
+}
+
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
{
int status;
@@ -1002,6 +972,56 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
}
/**
+ * nvme_abort_cmd - Attempt aborting a command
+ * @cmdid: Command id of a timed out IO
+ * @queue: The queue with timed out IO
+ *
+ * Schedule controller reset if the command was already aborted once before and
+ * still hasn't been returned to the driver, or if this is the admin queue.
+ */
+static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq)
+{
+ int a_cmdid;
+ struct nvme_command cmd;
+ struct nvme_dev *dev = nvmeq->dev;
+ struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
+
+ if (!nvmeq->qid || info[cmdid].aborted) {
+ if (work_busy(&dev->reset_work))
+ return;
+ list_del_init(&dev->node);
+ dev_warn(&dev->pci_dev->dev,
+ "I/O %d QID %d timeout, reset controller\n", cmdid,
+ nvmeq->qid);
+ PREPARE_WORK(&dev->reset_work, nvme_reset_failed_dev);
+ queue_work(nvme_workq, &dev->reset_work);
+ return;
+ }
+
+ if (!dev->abort_limit)
+ return;
+
+ a_cmdid = alloc_cmdid(dev->queues[0], CMD_CTX_ABORT, special_completion,
+ ADMIN_TIMEOUT);
+ if (a_cmdid < 0)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abort.opcode = nvme_admin_abort_cmd;
+ cmd.abort.cid = cmdid;
+ cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
+ cmd.abort.command_id = a_cmdid;
+
+ --dev->abort_limit;
+ info[cmdid].aborted = 1;
+ info[cmdid].timeout = jiffies + ADMIN_TIMEOUT;
+
+ dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", cmdid,
+ nvmeq->qid);
+ nvme_submit_cmd(dev->queues[0], &cmd);
+}
+
+/**
* nvme_cancel_ios - Cancel outstanding I/Os
* @queue: The queue to cancel I/Os on
* @timeout: True to only cancel I/Os which have timed out
@@ -1024,7 +1044,12 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
continue;
if (info[cmdid].ctx == CMD_CTX_CANCELLED)
continue;
- dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid);
+ if (timeout && nvmeq->dev->initialized) {
+ nvme_abort_cmd(cmdid, nvmeq);
+ continue;
+ }
+ dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", cmdid,
+ nvmeq->qid);
ctx = cancel_cmdid(nvmeq, cmdid, &fn);
fn(nvmeq->dev, ctx, &cqe);
}
@@ -1046,26 +1071,31 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
kfree(nvmeq);
}
-static void nvme_free_queues(struct nvme_dev *dev)
+static void nvme_free_queues(struct nvme_dev *dev, int lowest)
{
int i;
- for (i = dev->queue_count - 1; i >= 0; i--) {
+ for (i = dev->queue_count - 1; i >= lowest; i--) {
nvme_free_queue(dev->queues[i]);
dev->queue_count--;
dev->queues[i] = NULL;
}
}
-static void nvme_disable_queue(struct nvme_dev *dev, int qid)
+/**
+ * nvme_suspend_queue - put queue into suspended state
+ * @nvmeq - queue to suspend
+ *
+ * Returns 1 if already suspended, 0 otherwise.
+ */
+static int nvme_suspend_queue(struct nvme_queue *nvmeq)
{
- struct nvme_queue *nvmeq = dev->queues[qid];
- int vector = dev->entry[nvmeq->cq_vector].vector;
+ int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
spin_lock_irq(&nvmeq->q_lock);
if (nvmeq->q_suspended) {
spin_unlock_irq(&nvmeq->q_lock);
- return;
+ return 1;
}
nvmeq->q_suspended = 1;
spin_unlock_irq(&nvmeq->q_lock);
@@ -1073,18 +1103,35 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
irq_set_affinity_hint(vector, NULL);
free_irq(vector, nvmeq);
- /* Don't tell the adapter to delete the admin queue */
- if (qid) {
- adapter_delete_sq(dev, qid);
- adapter_delete_cq(dev, qid);
- }
+ return 0;
+}
+static void nvme_clear_queue(struct nvme_queue *nvmeq)
+{
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
nvme_cancel_ios(nvmeq, false);
spin_unlock_irq(&nvmeq->q_lock);
}
+static void nvme_disable_queue(struct nvme_dev *dev, int qid)
+{
+ struct nvme_queue *nvmeq = dev->queues[qid];
+
+ if (!nvmeq)
+ return;
+ if (nvme_suspend_queue(nvmeq))
+ return;
+
+ /* Don't tell the adapter to delete the admin queue.
+ * Don't tell a removed adapter to delete IO queues. */
+ if (qid && readl(&dev->bar->csts) != -1) {
+ adapter_delete_sq(dev, qid);
+ adapter_delete_cq(dev, qid);
+ }
+ nvme_clear_queue(nvmeq);
+}
+
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth, int vector)
{
@@ -1107,15 +1154,18 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->q_dmadev = dmadev;
nvmeq->dev = dev;
+ snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
+ dev->instance, qid);
spin_lock_init(&nvmeq->q_lock);
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
init_waitqueue_head(&nvmeq->sq_full);
init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread);
bio_list_init(&nvmeq->sq_cong);
- nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
+ nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
nvmeq->q_depth = depth;
nvmeq->cq_vector = vector;
+ nvmeq->qid = qid;
nvmeq->q_suspended = 1;
dev->queue_count++;
@@ -1134,11 +1184,10 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
{
if (use_threaded_interrupts)
return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector,
- nvme_irq_check, nvme_irq,
- IRQF_DISABLED | IRQF_SHARED,
+ nvme_irq_check, nvme_irq, IRQF_SHARED,
name, nvmeq);
return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq,
- IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
+ IRQF_SHARED, name, nvmeq);
}
static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
@@ -1149,7 +1198,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
nvmeq->sq_tail = 0;
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
- nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
+ nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
memset(nvmeq->cmdid_data, 0, extra);
memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
nvme_cancel_ios(nvmeq, false);
@@ -1169,13 +1218,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
if (result < 0)
goto release_cq;
- result = queue_request_irq(dev, nvmeq, "nvme");
+ result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
if (result < 0)
goto release_sq;
- spin_lock(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->q_lock);
nvme_init_queue(nvmeq, qid);
- spin_unlock(&nvmeq->q_lock);
+ spin_unlock_irq(&nvmeq->q_lock);
return result;
@@ -1287,13 +1336,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
if (result)
return result;
- result = queue_request_irq(dev, nvmeq, "nvme admin");
+ result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
if (result)
return result;
- spin_lock(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->q_lock);
nvme_init_queue(nvmeq, 0);
- spin_unlock(&nvmeq->q_lock);
+ spin_unlock_irq(&nvmeq->q_lock);
return result;
}
@@ -1569,10 +1618,47 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
}
}
+#ifdef CONFIG_COMPAT
+static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+ switch (cmd) {
+ case SG_IO:
+ return nvme_sg_io32(ns, arg);
+ }
+ return nvme_ioctl(bdev, mode, cmd, arg);
+}
+#else
+#define nvme_compat_ioctl NULL
+#endif
+
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+ struct nvme_dev *dev = ns->dev;
+
+ kref_get(&dev->kref);
+ return 0;
+}
+
+static void nvme_free_dev(struct kref *kref);
+
+static void nvme_release(struct gendisk *disk, fmode_t mode)
+{
+ struct nvme_ns *ns = disk->private_data;
+ struct nvme_dev *dev = ns->dev;
+
+ kref_put(&dev->kref, nvme_free_dev);
+}
+
static const struct block_device_operations nvme_fops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
- .compat_ioctl = nvme_ioctl,
+ .compat_ioctl = nvme_compat_ioctl,
+ .open = nvme_open,
+ .release = nvme_release,
};
static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
@@ -1596,13 +1682,25 @@ static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
static int nvme_kthread(void *data)
{
- struct nvme_dev *dev;
+ struct nvme_dev *dev, *next;
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
spin_lock(&dev_list_lock);
- list_for_each_entry(dev, &dev_list, node) {
+ list_for_each_entry_safe(dev, next, &dev_list, node) {
int i;
+ if (readl(&dev->bar->csts) & NVME_CSTS_CFS &&
+ dev->initialized) {
+ if (work_busy(&dev->reset_work))
+ continue;
+ list_del_init(&dev->node);
+ dev_warn(&dev->pci_dev->dev,
+ "Failed status, reset controller\n");
+ PREPARE_WORK(&dev->reset_work,
+ nvme_reset_failed_dev);
+ queue_work(nvme_workq, &dev->reset_work);
+ continue;
+ }
for (i = 0; i < dev->queue_count; i++) {
struct nvme_queue *nvmeq = dev->queues[i];
if (!nvmeq)
@@ -1623,33 +1721,6 @@ static int nvme_kthread(void *data)
return 0;
}
-static DEFINE_IDA(nvme_index_ida);
-
-static int nvme_get_ns_idx(void)
-{
- int index, error;
-
- do {
- if (!ida_pre_get(&nvme_index_ida, GFP_KERNEL))
- return -1;
-
- spin_lock(&dev_list_lock);
- error = ida_get_new(&nvme_index_ida, &index);
- spin_unlock(&dev_list_lock);
- } while (error == -EAGAIN);
-
- if (error)
- index = -1;
- return index;
-}
-
-static void nvme_put_ns_idx(int index)
-{
- spin_lock(&dev_list_lock);
- ida_remove(&nvme_index_ida, index);
- spin_unlock(&dev_list_lock);
-}
-
static void nvme_config_discard(struct nvme_ns *ns)
{
u32 logical_block_size = queue_logical_block_size(ns->queue);
@@ -1683,7 +1754,7 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
ns->dev = dev;
ns->queue->queuedata = ns;
- disk = alloc_disk(NVME_MINORS);
+ disk = alloc_disk(0);
if (!disk)
goto out_free_queue;
ns->ns_id = nsid;
@@ -1696,12 +1767,12 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
disk->major = nvme_major;
- disk->minors = NVME_MINORS;
- disk->first_minor = NVME_MINORS * nvme_get_ns_idx();
+ disk->first_minor = 0;
disk->fops = &nvme_fops;
disk->private_data = ns;
disk->queue = ns->queue;
disk->driverfs_dev = &dev->pci_dev->dev;
+ disk->flags = GENHD_FL_EXT_DEVT;
sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -1717,15 +1788,6 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
return NULL;
}
-static void nvme_ns_free(struct nvme_ns *ns)
-{
- int index = ns->disk->first_minor / NVME_MINORS;
- put_disk(ns->disk);
- nvme_put_ns_idx(index);
- blk_cleanup_queue(ns->queue);
- kfree(ns);
-}
-
static int set_queue_count(struct nvme_dev *dev, int count)
{
int status;
@@ -1741,11 +1803,12 @@ static int set_queue_count(struct nvme_dev *dev, int count)
static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
{
- return 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
+ return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
}
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
+ struct nvme_queue *adminq = dev->queues[0];
struct pci_dev *pdev = dev->pci_dev;
int result, cpu, i, vecs, nr_io_queues, size, q_depth;
@@ -1772,7 +1835,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
}
/* Deregister the admin queue's interrupt */
- free_irq(dev->entry[0].vector, dev->queues[0]);
+ free_irq(dev->entry[0].vector, adminq);
vecs = nr_io_queues;
for (i = 0; i < vecs; i++)
@@ -1810,9 +1873,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
*/
nr_io_queues = vecs;
- result = queue_request_irq(dev, dev->queues[0], "nvme admin");
+ result = queue_request_irq(dev, adminq, adminq->irqname);
if (result) {
- dev->queues[0]->q_suspended = 1;
+ adminq->q_suspended = 1;
goto free_queues;
}
@@ -1821,9 +1884,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
for (i = dev->queue_count - 1; i > nr_io_queues; i--) {
struct nvme_queue *nvmeq = dev->queues[i];
- spin_lock(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->q_lock);
nvme_cancel_ios(nvmeq, false);
- spin_unlock(&nvmeq->q_lock);
+ spin_unlock_irq(&nvmeq->q_lock);
nvme_free_queue(nvmeq);
dev->queue_count--;
@@ -1864,7 +1927,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
return 0;
free_queues:
- nvme_free_queues(dev);
+ nvme_free_queues(dev, 1);
return result;
}
@@ -1876,6 +1939,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
*/
static int nvme_dev_add(struct nvme_dev *dev)
{
+ struct pci_dev *pdev = dev->pci_dev;
int res;
unsigned nn, i;
struct nvme_ns *ns;
@@ -1885,8 +1949,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
dma_addr_t dma_addr;
int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
- mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
- GFP_KERNEL);
+ mem = dma_alloc_coherent(&pdev->dev, 8192, &dma_addr, GFP_KERNEL);
if (!mem)
return -ENOMEM;
@@ -1899,13 +1962,14 @@ static int nvme_dev_add(struct nvme_dev *dev)
ctrl = mem;
nn = le32_to_cpup(&ctrl->nn);
dev->oncs = le16_to_cpup(&ctrl->oncs);
+ dev->abort_limit = ctrl->acl + 1;
memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
if (ctrl->mdts)
dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
- if ((dev->pci_dev->vendor == PCI_VENDOR_ID_INTEL) &&
- (dev->pci_dev->device == 0x0953) && ctrl->vs[3])
+ if ((pdev->vendor == PCI_VENDOR_ID_INTEL) &&
+ (pdev->device == 0x0953) && ctrl->vs[3])
dev->stripe_size = 1 << (ctrl->vs[3] + shift);
id_ns = mem;
@@ -1953,16 +2017,21 @@ static int nvme_dev_map(struct nvme_dev *dev)
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
goto disable;
- pci_set_drvdata(pdev, dev);
dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
if (!dev->bar)
goto disable;
-
- dev->db_stride = NVME_CAP_STRIDE(readq(&dev->bar->cap));
+ if (readl(&dev->bar->csts) == -1) {
+ result = -ENODEV;
+ goto unmap;
+ }
+ dev->db_stride = 1 << NVME_CAP_STRIDE(readq(&dev->bar->cap));
dev->dbs = ((void __iomem *)dev->bar) + 4096;
return 0;
+ unmap:
+ iounmap(dev->bar);
+ dev->bar = NULL;
disable:
pci_release_regions(pdev);
disable_pci:
@@ -1980,37 +2049,183 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
if (dev->bar) {
iounmap(dev->bar);
dev->bar = NULL;
+ pci_release_regions(dev->pci_dev);
}
- pci_release_regions(dev->pci_dev);
if (pci_is_enabled(dev->pci_dev))
pci_disable_device(dev->pci_dev);
}
+struct nvme_delq_ctx {
+ struct task_struct *waiter;
+ struct kthread_worker *worker;
+ atomic_t refcount;
+};
+
+static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
+{
+ dq->waiter = current;
+ mb();
+
+ for (;;) {
+ set_current_state(TASK_KILLABLE);
+ if (!atomic_read(&dq->refcount))
+ break;
+ if (!schedule_timeout(ADMIN_TIMEOUT) ||
+ fatal_signal_pending(current)) {
+ set_current_state(TASK_RUNNING);
+
+ nvme_disable_ctrl(dev, readq(&dev->bar->cap));
+ nvme_disable_queue(dev, 0);
+
+ send_sig(SIGKILL, dq->worker->task, 1);
+ flush_kthread_worker(dq->worker);
+ return;
+ }
+ }
+ set_current_state(TASK_RUNNING);
+}
+
+static void nvme_put_dq(struct nvme_delq_ctx *dq)
+{
+ atomic_dec(&dq->refcount);
+ if (dq->waiter)
+ wake_up_process(dq->waiter);
+}
+
+static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
+{
+ atomic_inc(&dq->refcount);
+ return dq;
+}
+
+static void nvme_del_queue_end(struct nvme_queue *nvmeq)
+{
+ struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
+
+ nvme_clear_queue(nvmeq);
+ nvme_put_dq(dq);
+}
+
+static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
+ kthread_work_func_t fn)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ c.delete_queue.opcode = opcode;
+ c.delete_queue.qid = cpu_to_le16(nvmeq->qid);
+
+ init_kthread_work(&nvmeq->cmdinfo.work, fn);
+ return nvme_submit_admin_cmd_async(nvmeq->dev, &c, &nvmeq->cmdinfo);
+}
+
+static void nvme_del_cq_work_handler(struct kthread_work *work)
+{
+ struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
+ cmdinfo.work);
+ nvme_del_queue_end(nvmeq);
+}
+
+static int nvme_delete_cq(struct nvme_queue *nvmeq)
+{
+ return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq,
+ nvme_del_cq_work_handler);
+}
+
+static void nvme_del_sq_work_handler(struct kthread_work *work)
+{
+ struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
+ cmdinfo.work);
+ int status = nvmeq->cmdinfo.status;
+
+ if (!status)
+ status = nvme_delete_cq(nvmeq);
+ if (status)
+ nvme_del_queue_end(nvmeq);
+}
+
+static int nvme_delete_sq(struct nvme_queue *nvmeq)
+{
+ return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq,
+ nvme_del_sq_work_handler);
+}
+
+static void nvme_del_queue_start(struct kthread_work *work)
+{
+ struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
+ cmdinfo.work);
+ allow_signal(SIGKILL);
+ if (nvme_delete_sq(nvmeq))
+ nvme_del_queue_end(nvmeq);
+}
+
+static void nvme_disable_io_queues(struct nvme_dev *dev)
+{
+ int i;
+ DEFINE_KTHREAD_WORKER_ONSTACK(worker);
+ struct nvme_delq_ctx dq;
+ struct task_struct *kworker_task = kthread_run(kthread_worker_fn,
+ &worker, "nvme%d", dev->instance);
+
+ if (IS_ERR(kworker_task)) {
+ dev_err(&dev->pci_dev->dev,
+ "Failed to create queue del task\n");
+ for (i = dev->queue_count - 1; i > 0; i--)
+ nvme_disable_queue(dev, i);
+ return;
+ }
+
+ dq.waiter = NULL;
+ atomic_set(&dq.refcount, 0);
+ dq.worker = &worker;
+ for (i = dev->queue_count - 1; i > 0; i--) {
+ struct nvme_queue *nvmeq = dev->queues[i];
+
+ if (nvme_suspend_queue(nvmeq))
+ continue;
+ nvmeq->cmdinfo.ctx = nvme_get_dq(&dq);
+ nvmeq->cmdinfo.worker = dq.worker;
+ init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start);
+ queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work);
+ }
+ nvme_wait_dq(&dq, dev);
+ kthread_stop(kworker_task);
+}
+
static void nvme_dev_shutdown(struct nvme_dev *dev)
{
int i;
- for (i = dev->queue_count - 1; i >= 0; i--)
- nvme_disable_queue(dev, i);
+ dev->initialized = 0;
spin_lock(&dev_list_lock);
list_del_init(&dev->node);
spin_unlock(&dev_list_lock);
- if (dev->bar)
+ if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
+ for (i = dev->queue_count - 1; i >= 0; i--) {
+ struct nvme_queue *nvmeq = dev->queues[i];
+ nvme_suspend_queue(nvmeq);
+ nvme_clear_queue(nvmeq);
+ }
+ } else {
+ nvme_disable_io_queues(dev);
nvme_shutdown_ctrl(dev);
+ nvme_disable_queue(dev, 0);
+ }
nvme_dev_unmap(dev);
}
static void nvme_dev_remove(struct nvme_dev *dev)
{
- struct nvme_ns *ns, *next;
+ struct nvme_ns *ns;
- list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
- list_del(&ns->list);
- del_gendisk(ns->disk);
- nvme_ns_free(ns);
+ list_for_each_entry(ns, &dev->namespaces, list) {
+ if (ns->disk->flags & GENHD_FL_UP)
+ del_gendisk(ns->disk);
+ if (!blk_queue_dying(ns->queue))
+ blk_cleanup_queue(ns->queue);
}
}
@@ -2067,14 +2282,22 @@ static void nvme_release_instance(struct nvme_dev *dev)
spin_unlock(&dev_list_lock);
}
+static void nvme_free_namespaces(struct nvme_dev *dev)
+{
+ struct nvme_ns *ns, *next;
+
+ list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+ list_del(&ns->list);
+ put_disk(ns->disk);
+ kfree(ns);
+ }
+}
+
static void nvme_free_dev(struct kref *kref)
{
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
- nvme_dev_remove(dev);
- nvme_dev_shutdown(dev);
- nvme_free_queues(dev);
- nvme_release_instance(dev);
- nvme_release_prp_pools(dev);
+
+ nvme_free_namespaces(dev);
kfree(dev->queues);
kfree(dev->entry);
kfree(dev);
@@ -2138,6 +2361,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
return result;
disable:
+ nvme_disable_queue(dev, 0);
spin_lock(&dev_list_lock);
list_del_init(&dev->node);
spin_unlock(&dev_list_lock);
@@ -2146,6 +2370,71 @@ static int nvme_dev_start(struct nvme_dev *dev)
return result;
}
+static int nvme_remove_dead_ctrl(void *arg)
+{
+ struct nvme_dev *dev = (struct nvme_dev *)arg;
+ struct pci_dev *pdev = dev->pci_dev;
+
+ if (pci_get_drvdata(pdev))
+ pci_stop_and_remove_bus_device(pdev);
+ kref_put(&dev->kref, nvme_free_dev);
+ return 0;
+}
+
+static void nvme_remove_disks(struct work_struct *ws)
+{
+ int i;
+ struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
+
+ nvme_dev_remove(dev);
+ spin_lock(&dev_list_lock);
+ for (i = dev->queue_count - 1; i > 0; i--) {
+ BUG_ON(!dev->queues[i] || !dev->queues[i]->q_suspended);
+ nvme_free_queue(dev->queues[i]);
+ dev->queue_count--;
+ dev->queues[i] = NULL;
+ }
+ spin_unlock(&dev_list_lock);
+}
+
+static int nvme_dev_resume(struct nvme_dev *dev)
+{
+ int ret;
+
+ ret = nvme_dev_start(dev);
+ if (ret && ret != -EBUSY)
+ return ret;
+ if (ret == -EBUSY) {
+ spin_lock(&dev_list_lock);
+ PREPARE_WORK(&dev->reset_work, nvme_remove_disks);
+ queue_work(nvme_workq, &dev->reset_work);
+ spin_unlock(&dev_list_lock);
+ }
+ dev->initialized = 1;
+ return 0;
+}
+
+static void nvme_dev_reset(struct nvme_dev *dev)
+{
+ nvme_dev_shutdown(dev);
+ if (nvme_dev_resume(dev)) {
+ dev_err(&dev->pci_dev->dev, "Device failed to resume\n");
+ kref_get(&dev->kref);
+ if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
+ dev->instance))) {
+ dev_err(&dev->pci_dev->dev,
+ "Failed to start controller remove task\n");
+ kref_put(&dev->kref, nvme_free_dev);
+ }
+ }
+}
+
+static void nvme_reset_failed_dev(struct work_struct *ws)
+{
+ struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
+ nvme_dev_reset(dev);
+}
+
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int result = -ENOMEM;
@@ -2164,8 +2453,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto free;
INIT_LIST_HEAD(&dev->namespaces);
+ INIT_WORK(&dev->reset_work, nvme_reset_failed_dev);
dev->pci_dev = pdev;
-
+ pci_set_drvdata(pdev, dev);
result = nvme_set_instance(dev);
if (result)
goto free;
@@ -2181,6 +2471,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto release_pools;
}
+ kref_init(&dev->kref);
result = nvme_dev_add(dev);
if (result)
goto shutdown;
@@ -2195,15 +2486,16 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto remove;
- kref_init(&dev->kref);
+ dev->initialized = 1;
return 0;
remove:
nvme_dev_remove(dev);
+ nvme_free_namespaces(dev);
shutdown:
nvme_dev_shutdown(dev);
release_pools:
- nvme_free_queues(dev);
+ nvme_free_queues(dev, 0);
nvme_release_prp_pools(dev);
release:
nvme_release_instance(dev);
@@ -2214,10 +2506,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return result;
}
+static void nvme_shutdown(struct pci_dev *pdev)
+{
+ struct nvme_dev *dev = pci_get_drvdata(pdev);
+ nvme_dev_shutdown(dev);
+}
+
static void nvme_remove(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+ spin_lock(&dev_list_lock);
+ list_del_init(&dev->node);
+ spin_unlock(&dev_list_lock);
+
+ pci_set_drvdata(pdev, NULL);
+ flush_work(&dev->reset_work);
misc_deregister(&dev->miscdev);
+ nvme_dev_remove(dev);
+ nvme_dev_shutdown(dev);
+ nvme_free_queues(dev, 0);
+ nvme_release_instance(dev);
+ nvme_release_prp_pools(dev);
kref_put(&dev->kref, nvme_free_dev);
}
@@ -2241,13 +2551,12 @@ static int nvme_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- int ret;
- ret = nvme_dev_start(ndev);
- /* XXX: should remove gendisks if resume fails */
- if (ret)
- nvme_free_queues(ndev);
- return ret;
+ if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) {
+ PREPARE_WORK(&ndev->reset_work, nvme_reset_failed_dev);
+ queue_work(nvme_workq, &ndev->reset_work);
+ }
+ return 0;
}
static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
@@ -2274,6 +2583,7 @@ static struct pci_driver nvme_driver = {
.id_table = nvme_id_table,
.probe = nvme_probe,
.remove = nvme_remove,
+ .shutdown = nvme_shutdown,
.driver = {
.pm = &nvme_dev_pm_ops,
},
@@ -2288,9 +2598,14 @@ static int __init nvme_init(void)
if (IS_ERR(nvme_thread))
return PTR_ERR(nvme_thread);
+ result = -ENOMEM;
+ nvme_workq = create_singlethread_workqueue("nvme");
+ if (!nvme_workq)
+ goto kill_kthread;
+
result = register_blkdev(nvme_major, "nvme");
if (result < 0)
- goto kill_kthread;
+ goto kill_workq;
else if (result > 0)
nvme_major = result;
@@ -2301,6 +2616,8 @@ static int __init nvme_init(void)
unregister_blkdev:
unregister_blkdev(nvme_major, "nvme");
+ kill_workq:
+ destroy_workqueue(nvme_workq);
kill_kthread:
kthread_stop(nvme_thread);
return result;
@@ -2310,6 +2627,7 @@ static void __exit nvme_exit(void)
{
pci_unregister_driver(&nvme_driver);
unregister_blkdev(nvme_major, "nvme");
+ destroy_workqueue(nvme_workq);
kthread_stop(nvme_thread);
}
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 4a4ff4eb8e2..4a0ceb64e26 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -25,6 +25,7 @@
#include <linux/bio.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
+#include <linux/compat.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/fs.h>
@@ -3038,6 +3039,152 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
return retcode;
}
+#ifdef CONFIG_COMPAT
+typedef struct sg_io_hdr32 {
+ compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */
+ compat_int_t dxfer_direction; /* [i] data transfer direction */
+ unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */
+ unsigned char mx_sb_len; /* [i] max length to write to sbp */
+ unsigned short iovec_count; /* [i] 0 implies no scatter gather */
+ compat_uint_t dxfer_len; /* [i] byte count of data transfer */
+ compat_uint_t dxferp; /* [i], [*io] points to data transfer memory
+ or scatter gather list */
+ compat_uptr_t cmdp; /* [i], [*i] points to command to perform */
+ compat_uptr_t sbp; /* [i], [*o] points to sense_buffer memory */
+ compat_uint_t timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */
+ compat_uint_t flags; /* [i] 0 -> default, see SG_FLAG... */
+ compat_int_t pack_id; /* [i->o] unused internally (normally) */
+ compat_uptr_t usr_ptr; /* [i->o] unused internally */
+ unsigned char status; /* [o] scsi status */
+ unsigned char masked_status; /* [o] shifted, masked scsi status */
+ unsigned char msg_status; /* [o] messaging level data (optional) */
+ unsigned char sb_len_wr; /* [o] byte count actually written to sbp */
+ unsigned short host_status; /* [o] errors from host adapter */
+ unsigned short driver_status; /* [o] errors from software driver */
+ compat_int_t resid; /* [o] dxfer_len - actual_transferred */
+ compat_uint_t duration; /* [o] time taken by cmd (unit: millisec) */
+ compat_uint_t info; /* [o] auxiliary information */
+} sg_io_hdr32_t; /* 64 bytes long (on sparc32) */
+
+typedef struct sg_iovec32 {
+ compat_uint_t iov_base;
+ compat_uint_t iov_len;
+} sg_iovec32_t;
+
+static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iovec_count)
+{
+ sg_iovec_t __user *iov = (sg_iovec_t __user *) (sgio + 1);
+ sg_iovec32_t __user *iov32 = dxferp;
+ int i;
+
+ for (i = 0; i < iovec_count; i++) {
+ u32 base, len;
+
+ if (get_user(base, &iov32[i].iov_base) ||
+ get_user(len, &iov32[i].iov_len) ||
+ put_user(compat_ptr(base), &iov[i].iov_base) ||
+ put_user(len, &iov[i].iov_len))
+ return -EFAULT;
+ }
+
+ if (put_user(iov, &sgio->dxferp))
+ return -EFAULT;
+ return 0;
+}
+
+int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg)
+{
+ sg_io_hdr32_t __user *sgio32 = (sg_io_hdr32_t __user *)arg;
+ sg_io_hdr_t __user *sgio;
+ u16 iovec_count;
+ u32 data;
+ void __user *dxferp;
+ int err;
+ int interface_id;
+
+ if (get_user(interface_id, &sgio32->interface_id))
+ return -EFAULT;
+ if (interface_id != 'S')
+ return -EINVAL;
+
+ if (get_user(iovec_count, &sgio32->iovec_count))
+ return -EFAULT;
+
+ {
+ void __user *top = compat_alloc_user_space(0);
+ void __user *new = compat_alloc_user_space(sizeof(sg_io_hdr_t) +
+ (iovec_count * sizeof(sg_iovec_t)));
+ if (new > top)
+ return -EINVAL;
+
+ sgio = new;
+ }
+
+ /* Ok, now construct. */
+ if (copy_in_user(&sgio->interface_id, &sgio32->interface_id,
+ (2 * sizeof(int)) +
+ (2 * sizeof(unsigned char)) +
+ (1 * sizeof(unsigned short)) +
+ (1 * sizeof(unsigned int))))
+ return -EFAULT;
+
+ if (get_user(data, &sgio32->dxferp))
+ return -EFAULT;
+ dxferp = compat_ptr(data);
+ if (iovec_count) {
+ if (sg_build_iovec(sgio, dxferp, iovec_count))
+ return -EFAULT;
+ } else {
+ if (put_user(dxferp, &sgio->dxferp))
+ return -EFAULT;
+ }
+
+ {
+ unsigned char __user *cmdp;
+ unsigned char __user *sbp;
+
+ if (get_user(data, &sgio32->cmdp))
+ return -EFAULT;
+ cmdp = compat_ptr(data);
+
+ if (get_user(data, &sgio32->sbp))
+ return -EFAULT;
+ sbp = compat_ptr(data);
+
+ if (put_user(cmdp, &sgio->cmdp) ||
+ put_user(sbp, &sgio->sbp))
+ return -EFAULT;
+ }
+
+ if (copy_in_user(&sgio->timeout, &sgio32->timeout,
+ 3 * sizeof(int)))
+ return -EFAULT;
+
+ if (get_user(data, &sgio32->usr_ptr))
+ return -EFAULT;
+ if (put_user(compat_ptr(data), &sgio->usr_ptr))
+ return -EFAULT;
+
+ err = nvme_sg_io(ns, sgio);
+ if (err >= 0) {
+ void __user *datap;
+
+ if (copy_in_user(&sgio32->pack_id, &sgio->pack_id,
+ sizeof(int)) ||
+ get_user(datap, &sgio->usr_ptr) ||
+ put_user((u32)(unsigned long)datap,
+ &sgio32->usr_ptr) ||
+ copy_in_user(&sgio32->status, &sgio->status,
+ (4 * sizeof(unsigned char)) +
+ (2 * sizeof(unsigned short)) +
+ (3 * sizeof(int))))
+ err = -EFAULT;
+ }
+
+ return err;
+}
+#endif
+
int nvme_sg_get_version_num(int __user *ip)
{
return put_user(sg_version_num, ip);
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 4a27b1de5fc..2ce3dfd7e6b 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -581,7 +581,7 @@ static ssize_t pg_write(struct file *filp, const char __user *buf, size_t count,
if (hdr.magic != PG_MAGIC)
return -EINVAL;
- if (hdr.dlen > PG_MAX_DATA)
+ if (hdr.dlen < 0 || hdr.dlen > PG_MAX_DATA)
return -EINVAL;
if ((count - hs) > PG_MAX_DATA)
return -EINVAL;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ff8668c5efb..a2af73db187 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -651,7 +651,7 @@ static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s
for (;;) {
tmp = rb_entry(n, struct pkt_rb_node, rb_node);
- if (s <= tmp->bio->bi_sector)
+ if (s <= tmp->bio->bi_iter.bi_sector)
next = n->rb_left;
else
next = n->rb_right;
@@ -660,12 +660,12 @@ static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s
n = next;
}
- if (s > tmp->bio->bi_sector) {
+ if (s > tmp->bio->bi_iter.bi_sector) {
tmp = pkt_rbtree_next(tmp);
if (!tmp)
return NULL;
}
- BUG_ON(s > tmp->bio->bi_sector);
+ BUG_ON(s > tmp->bio->bi_iter.bi_sector);
return tmp;
}
@@ -676,13 +676,13 @@ static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *nod
{
struct rb_node **p = &pd->bio_queue.rb_node;
struct rb_node *parent = NULL;
- sector_t s = node->bio->bi_sector;
+ sector_t s = node->bio->bi_iter.bi_sector;
struct pkt_rb_node *tmp;
while (*p) {
parent = *p;
tmp = rb_entry(parent, struct pkt_rb_node, rb_node);
- if (s < tmp->bio->bi_sector)
+ if (s < tmp->bio->bi_iter.bi_sector)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
@@ -706,7 +706,9 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
WRITE : READ, __GFP_WAIT);
if (cgc->buflen) {
- if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT))
+ ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
+ __GFP_WAIT);
+ if (ret)
goto out;
}
@@ -857,7 +859,8 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
spin_lock(&pd->iosched.lock);
bio = bio_list_peek(&pd->iosched.write_queue);
spin_unlock(&pd->iosched.lock);
- if (bio && (bio->bi_sector == pd->iosched.last_write))
+ if (bio && (bio->bi_iter.bi_sector ==
+ pd->iosched.last_write))
need_write_seek = 0;
if (need_write_seek && reads_queued) {
if (atomic_read(&pd->cdrw.pending_bios) > 0) {
@@ -888,7 +891,8 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
continue;
if (bio_data_dir(bio) == READ)
- pd->iosched.successive_reads += bio->bi_size >> 10;
+ pd->iosched.successive_reads +=
+ bio->bi_iter.bi_size >> 10;
else {
pd->iosched.successive_reads = 0;
pd->iosched.last_write = bio_end_sector(bio);
@@ -978,7 +982,7 @@ static void pkt_end_io_read(struct bio *bio, int err)
pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
bio, (unsigned long long)pkt->sector,
- (unsigned long long)bio->bi_sector, err);
+ (unsigned long long)bio->bi_iter.bi_sector, err);
if (err)
atomic_inc(&pkt->io_errors);
@@ -1026,8 +1030,9 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
memset(written, 0, sizeof(written));
spin_lock(&pkt->lock);
bio_list_for_each(bio, &pkt->orig_bios) {
- int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
- int num_frames = bio->bi_size / CD_FRAMESIZE;
+ int first_frame = (bio->bi_iter.bi_sector - pkt->sector) /
+ (CD_FRAMESIZE >> 9);
+ int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE;
pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
BUG_ON(first_frame < 0);
BUG_ON(first_frame + num_frames > pkt->frames);
@@ -1053,7 +1058,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
bio = pkt->r_bios[f];
bio_reset(bio);
- bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
+ bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
bio->bi_bdev = pd->bdev;
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
@@ -1150,8 +1155,8 @@ static int pkt_start_recovery(struct packet_data *pkt)
bio_reset(pkt->bio);
pkt->bio->bi_bdev = pd->bdev;
pkt->bio->bi_rw = REQ_WRITE;
- pkt->bio->bi_sector = new_sector;
- pkt->bio->bi_size = pkt->frames * CD_FRAMESIZE;
+ pkt->bio->bi_iter.bi_sector = new_sector;
+ pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE;
pkt->bio->bi_vcnt = pkt->frames;
pkt->bio->bi_end_io = pkt_end_io_packet_write;
@@ -1213,7 +1218,7 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
node = first_node;
while (node) {
bio = node->bio;
- zone = get_zone(bio->bi_sector, pd);
+ zone = get_zone(bio->bi_iter.bi_sector, pd);
list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
if (p->sector == zone) {
bio = NULL;
@@ -1252,14 +1257,14 @@ try_next_bio:
pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
bio = node->bio;
- pkt_dbg(2, pd, "found zone=%llx\n",
- (unsigned long long)get_zone(bio->bi_sector, pd));
- if (get_zone(bio->bi_sector, pd) != zone)
+ pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long)
+ get_zone(bio->bi_iter.bi_sector, pd));
+ if (get_zone(bio->bi_iter.bi_sector, pd) != zone)
break;
pkt_rbtree_erase(pd, node);
spin_lock(&pkt->lock);
bio_list_add(&pkt->orig_bios, bio);
- pkt->write_size += bio->bi_size / CD_FRAMESIZE;
+ pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE;
spin_unlock(&pkt->lock);
}
/* check write congestion marks, and if bio_queue_size is
@@ -1293,7 +1298,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
struct bio_vec *bvec = pkt->w_bio->bi_io_vec;
bio_reset(pkt->w_bio);
- pkt->w_bio->bi_sector = pkt->sector;
+ pkt->w_bio->bi_iter.bi_sector = pkt->sector;
pkt->w_bio->bi_bdev = pd->bdev;
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
@@ -2335,75 +2340,29 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err)
pkt_bio_finished(pd);
}
-static void pkt_make_request(struct request_queue *q, struct bio *bio)
+static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
- struct pktcdvd_device *pd;
- char b[BDEVNAME_SIZE];
+ struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+ struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
+
+ psd->pd = pd;
+ psd->bio = bio;
+ cloned_bio->bi_bdev = pd->bdev;
+ cloned_bio->bi_private = psd;
+ cloned_bio->bi_end_io = pkt_end_io_read_cloned;
+ pd->stats.secs_r += bio_sectors(bio);
+ pkt_queue_bio(pd, cloned_bio);
+}
+
+static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
+{
+ struct pktcdvd_device *pd = q->queuedata;
sector_t zone;
struct packet_data *pkt;
int was_empty, blocked_bio;
struct pkt_rb_node *node;
- pd = q->queuedata;
- if (!pd) {
- pr_err("%s incorrect request queue\n",
- bdevname(bio->bi_bdev, b));
- goto end_io;
- }
-
- /*
- * Clone READ bios so we can have our own bi_end_io callback.
- */
- if (bio_data_dir(bio) == READ) {
- struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
- struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
-
- psd->pd = pd;
- psd->bio = bio;
- cloned_bio->bi_bdev = pd->bdev;
- cloned_bio->bi_private = psd;
- cloned_bio->bi_end_io = pkt_end_io_read_cloned;
- pd->stats.secs_r += bio_sectors(bio);
- pkt_queue_bio(pd, cloned_bio);
- return;
- }
-
- if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
- pkt_notice(pd, "WRITE for ro device (%llu)\n",
- (unsigned long long)bio->bi_sector);
- goto end_io;
- }
-
- if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) {
- pkt_err(pd, "wrong bio size\n");
- goto end_io;
- }
-
- blk_queue_bounce(q, &bio);
-
- zone = get_zone(bio->bi_sector, pd);
- pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
- (unsigned long long)bio->bi_sector,
- (unsigned long long)bio_end_sector(bio));
-
- /* Check if we have to split the bio */
- {
- struct bio_pair *bp;
- sector_t last_zone;
- int first_sectors;
-
- last_zone = get_zone(bio_end_sector(bio) - 1, pd);
- if (last_zone != zone) {
- BUG_ON(last_zone != zone + pd->settings.size);
- first_sectors = last_zone - bio->bi_sector;
- bp = bio_split(bio, first_sectors);
- BUG_ON(!bp);
- pkt_make_request(q, &bp->bio1);
- pkt_make_request(q, &bp->bio2);
- bio_pair_release(bp);
- return;
- }
- }
+ zone = get_zone(bio->bi_iter.bi_sector, pd);
/*
* If we find a matching packet in state WAITING or READ_WAIT, we can
@@ -2417,7 +2376,8 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
if ((pkt->state == PACKET_WAITING_STATE) ||
(pkt->state == PACKET_READ_WAIT_STATE)) {
bio_list_add(&pkt->orig_bios, bio);
- pkt->write_size += bio->bi_size / CD_FRAMESIZE;
+ pkt->write_size +=
+ bio->bi_iter.bi_size / CD_FRAMESIZE;
if ((pkt->write_size >= pkt->frames) &&
(pkt->state == PACKET_WAITING_STATE)) {
atomic_inc(&pkt->run_sm);
@@ -2476,6 +2436,64 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
*/
wake_up(&pd->wqueue);
}
+}
+
+static void pkt_make_request(struct request_queue *q, struct bio *bio)
+{
+ struct pktcdvd_device *pd;
+ char b[BDEVNAME_SIZE];
+ struct bio *split;
+
+ pd = q->queuedata;
+ if (!pd) {
+ pr_err("%s incorrect request queue\n",
+ bdevname(bio->bi_bdev, b));
+ goto end_io;
+ }
+
+ pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
+ (unsigned long long)bio->bi_iter.bi_sector,
+ (unsigned long long)bio_end_sector(bio));
+
+ /*
+ * Clone READ bios so we can have our own bi_end_io callback.
+ */
+ if (bio_data_dir(bio) == READ) {
+ pkt_make_request_read(pd, bio);
+ return;
+ }
+
+ if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
+ pkt_notice(pd, "WRITE for ro device (%llu)\n",
+ (unsigned long long)bio->bi_iter.bi_sector);
+ goto end_io;
+ }
+
+ if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
+ pkt_err(pd, "wrong bio size\n");
+ goto end_io;
+ }
+
+ blk_queue_bounce(q, &bio);
+
+ do {
+ sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
+ sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
+
+ if (last_zone != zone) {
+ BUG_ON(last_zone != zone + pd->settings.size);
+
+ split = bio_split(bio, last_zone -
+ bio->bi_iter.bi_sector,
+ GFP_NOIO, fs_bio_set);
+ bio_chain(split, bio);
+ } else {
+ split = bio;
+ }
+
+ pkt_make_request_write(q, split);
+ } while (split != bio);
+
return;
end_io:
bio_io_error(bio);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index d754a88d758..c120d70d3fb 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -94,26 +94,25 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev,
{
unsigned int offset = 0;
struct req_iterator iter;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
unsigned int i = 0;
size_t size;
void *buf;
rq_for_each_segment(bvec, req, iter) {
unsigned long flags;
- dev_dbg(&dev->sbd.core,
- "%s:%u: bio %u: %u segs %u sectors from %lu\n",
- __func__, __LINE__, i, bio_segments(iter.bio),
- bio_sectors(iter.bio), iter.bio->bi_sector);
+ dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u sectors from %lu\n",
+ __func__, __LINE__, i, bio_sectors(iter.bio),
+ iter.bio->bi_iter.bi_sector);
- size = bvec->bv_len;
- buf = bvec_kmap_irq(bvec, &flags);
+ size = bvec.bv_len;
+ buf = bvec_kmap_irq(&bvec, &flags);
if (gather)
memcpy(dev->bounce_buf+offset, buf, size);
else
memcpy(buf, dev->bounce_buf+offset, size);
offset += size;
- flush_kernel_dcache_page(bvec->bv_page);
+ flush_kernel_dcache_page(bvec.bv_page);
bvec_kunmap_irq(buf, &flags);
i++;
}
@@ -130,7 +129,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
#ifdef DEBUG
unsigned int n = 0;
- struct bio_vec *bv;
+ struct bio_vec bv;
struct req_iterator iter;
rq_for_each_segment(bv, req, iter)
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 06a2e53e5f3..ef45cfb98fd 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -553,16 +553,16 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
int write = bio_data_dir(bio) == WRITE;
const char *op = write ? "write" : "read";
- loff_t offset = bio->bi_sector << 9;
+ loff_t offset = bio->bi_iter.bi_sector << 9;
int error = 0;
- struct bio_vec *bvec;
- unsigned int i;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
struct bio *next;
- bio_for_each_segment(bvec, bio, i) {
+ bio_for_each_segment(bvec, bio, iter) {
/* PS3 is ppc64, so we don't handle highmem */
- char *ptr = page_address(bvec->bv_page) + bvec->bv_offset;
- size_t len = bvec->bv_len, retlen;
+ char *ptr = page_address(bvec.bv_page) + bvec.bv_offset;
+ size_t len = bvec.bv_len, retlen;
dev_dbg(&dev->core, " %s %zu bytes at offset %llu\n", op,
len, offset);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index cb1db2979d3..b365e0dfccb 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -41,6 +41,7 @@
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
+#include <linux/idr.h>
#include "rbd_types.h"
@@ -89,9 +90,9 @@ static int atomic_dec_return_safe(atomic_t *v)
}
#define RBD_DRV_NAME "rbd"
-#define RBD_DRV_NAME_LONG "rbd (rados block device)"
-#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
+#define RBD_MINORS_PER_MAJOR 256
+#define RBD_SINGLE_MAJOR_PART_SHIFT 4
#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
#define RBD_MAX_SNAP_NAME_LEN \
@@ -323,6 +324,7 @@ struct rbd_device {
int dev_id; /* blkdev unique id */
int major; /* blkdev assigned major */
+ int minor;
struct gendisk *disk; /* blkdev's gendisk and rq */
u32 image_format; /* Either 1 or 2 */
@@ -386,6 +388,17 @@ static struct kmem_cache *rbd_img_request_cache;
static struct kmem_cache *rbd_obj_request_cache;
static struct kmem_cache *rbd_segment_name_cache;
+static int rbd_major;
+static DEFINE_IDA(rbd_dev_id_ida);
+
+/*
+ * Default to false for now, as single-major requires >= 0.75 version of
+ * userspace rbd utility.
+ */
+static bool single_major = false;
+module_param(single_major, bool, S_IRUGO);
+MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: false)");
+
static int rbd_img_request_submit(struct rbd_img_request *img_request);
static void rbd_dev_device_release(struct device *dev);
@@ -394,18 +407,52 @@ static ssize_t rbd_add(struct bus_type *bus, const char *buf,
size_t count);
static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
size_t count);
+static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
+ size_t count);
+static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
+ size_t count);
static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
static void rbd_spec_put(struct rbd_spec *spec);
+static int rbd_dev_id_to_minor(int dev_id)
+{
+ return dev_id << RBD_SINGLE_MAJOR_PART_SHIFT;
+}
+
+static int minor_to_rbd_dev_id(int minor)
+{
+ return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
+}
+
static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
+static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
+static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
static struct attribute *rbd_bus_attrs[] = {
&bus_attr_add.attr,
&bus_attr_remove.attr,
+ &bus_attr_add_single_major.attr,
+ &bus_attr_remove_single_major.attr,
NULL,
};
-ATTRIBUTE_GROUPS(rbd_bus);
+
+static umode_t rbd_bus_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ if (!single_major &&
+ (attr == &bus_attr_add_single_major.attr ||
+ attr == &bus_attr_remove_single_major.attr))
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group rbd_bus_group = {
+ .attrs = rbd_bus_attrs,
+ .is_visible = rbd_bus_is_visible,
+};
+__ATTRIBUTE_GROUPS(rbd_bus);
static struct bus_type rbd_bus_type = {
.name = "rbd",
@@ -1041,9 +1088,9 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
name_format = "%s.%012llx";
if (rbd_dev->image_format == 2)
name_format = "%s.%016llx";
- ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, name_format,
+ ret = snprintf(name, CEPH_MAX_OID_NAME_LEN + 1, name_format,
rbd_dev->header.object_prefix, segment);
- if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) {
+ if (ret < 0 || ret > CEPH_MAX_OID_NAME_LEN) {
pr_err("error formatting segment name for #%llu (%d)\n",
segment, ret);
kfree(name);
@@ -1109,23 +1156,23 @@ static void bio_chain_put(struct bio *chain)
*/
static void zero_bio_chain(struct bio *chain, int start_ofs)
{
- struct bio_vec *bv;
+ struct bio_vec bv;
+ struct bvec_iter iter;
unsigned long flags;
void *buf;
- int i;
int pos = 0;
while (chain) {
- bio_for_each_segment(bv, chain, i) {
- if (pos + bv->bv_len > start_ofs) {
+ bio_for_each_segment(bv, chain, iter) {
+ if (pos + bv.bv_len > start_ofs) {
int remainder = max(start_ofs - pos, 0);
- buf = bvec_kmap_irq(bv, &flags);
+ buf = bvec_kmap_irq(&bv, &flags);
memset(buf + remainder, 0,
- bv->bv_len - remainder);
- flush_dcache_page(bv->bv_page);
+ bv.bv_len - remainder);
+ flush_dcache_page(bv.bv_page);
bvec_kunmap_irq(buf, &flags);
}
- pos += bv->bv_len;
+ pos += bv.bv_len;
}
chain = chain->bi_next;
@@ -1173,74 +1220,14 @@ static struct bio *bio_clone_range(struct bio *bio_src,
unsigned int len,
gfp_t gfpmask)
{
- struct bio_vec *bv;
- unsigned int resid;
- unsigned short idx;
- unsigned int voff;
- unsigned short end_idx;
- unsigned short vcnt;
struct bio *bio;
- /* Handle the easy case for the caller */
-
- if (!offset && len == bio_src->bi_size)
- return bio_clone(bio_src, gfpmask);
-
- if (WARN_ON_ONCE(!len))
- return NULL;
- if (WARN_ON_ONCE(len > bio_src->bi_size))
- return NULL;
- if (WARN_ON_ONCE(offset > bio_src->bi_size - len))
- return NULL;
-
- /* Find first affected segment... */
-
- resid = offset;
- bio_for_each_segment(bv, bio_src, idx) {
- if (resid < bv->bv_len)
- break;
- resid -= bv->bv_len;
- }
- voff = resid;
-
- /* ...and the last affected segment */
-
- resid += len;
- __bio_for_each_segment(bv, bio_src, end_idx, idx) {
- if (resid <= bv->bv_len)
- break;
- resid -= bv->bv_len;
- }
- vcnt = end_idx - idx + 1;
-
- /* Build the clone */
-
- bio = bio_alloc(gfpmask, (unsigned int) vcnt);
+ bio = bio_clone(bio_src, gfpmask);
if (!bio)
return NULL; /* ENOMEM */
- bio->bi_bdev = bio_src->bi_bdev;
- bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT);
- bio->bi_rw = bio_src->bi_rw;
- bio->bi_flags |= 1 << BIO_CLONED;
-
- /*
- * Copy over our part of the bio_vec, then update the first
- * and last (or only) entries.
- */
- memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx],
- vcnt * sizeof (struct bio_vec));
- bio->bi_io_vec[0].bv_offset += voff;
- if (vcnt > 1) {
- bio->bi_io_vec[0].bv_len -= voff;
- bio->bi_io_vec[vcnt - 1].bv_len = resid;
- } else {
- bio->bi_io_vec[0].bv_len = len;
- }
-
- bio->bi_vcnt = vcnt;
- bio->bi_size = len;
- bio->bi_idx = 0;
+ bio_advance(bio, offset);
+ bio->bi_iter.bi_size = len;
return bio;
}
@@ -1271,7 +1258,7 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
/* Build up a chain of clone bios up to the limit */
- if (!bi || off >= bi->bi_size || !len)
+ if (!bi || off >= bi->bi_iter.bi_size || !len)
return NULL; /* Nothing to clone */
end = &chain;
@@ -1283,7 +1270,7 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
rbd_warn(NULL, "bio_chain exhausted with %u left", len);
goto out_err; /* EINVAL; ran out of bio's */
}
- bi_size = min_t(unsigned int, bi->bi_size - off, len);
+ bi_size = min_t(unsigned int, bi->bi_iter.bi_size - off, len);
bio = bio_clone_range(bi, off, bi_size, gfpmask);
if (!bio)
goto out_err; /* ENOMEM */
@@ -1292,7 +1279,7 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
end = &bio->bi_next;
off += bi_size;
- if (off == bi->bi_size) {
+ if (off == bi->bi_iter.bi_size) {
bi = bi->bi_next;
off = 0;
}
@@ -1761,11 +1748,8 @@ static struct ceph_osd_request *rbd_osd_req_create(
osd_req->r_callback = rbd_osd_req_callback;
osd_req->r_priv = obj_request;
- osd_req->r_oid_len = strlen(obj_request->object_name);
- rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid));
- memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len);
-
- osd_req->r_file_layout = rbd_dev->layout; /* struct */
+ osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
+ ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name);
return osd_req;
}
@@ -1802,11 +1786,8 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
osd_req->r_callback = rbd_osd_req_callback;
osd_req->r_priv = obj_request;
- osd_req->r_oid_len = strlen(obj_request->object_name);
- rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid));
- memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len);
-
- osd_req->r_file_layout = rbd_dev->layout; /* struct */
+ osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
+ ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name);
return osd_req;
}
@@ -2186,7 +2167,8 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
if (type == OBJ_REQUEST_BIO) {
bio_list = data_desc;
- rbd_assert(img_offset == bio_list->bi_sector << SECTOR_SHIFT);
+ rbd_assert(img_offset ==
+ bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
} else {
rbd_assert(type == OBJ_REQUEST_PAGES);
pages = data_desc;
@@ -2866,7 +2848,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
* Request sync osd watch/unwatch. The value of "start" determines
* whether a watch request is being initiated or torn down.
*/
-static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
+static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
{
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
struct rbd_obj_request *obj_request;
@@ -2941,6 +2923,22 @@ out_cancel:
return ret;
}
+static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
+{
+ return __rbd_dev_header_watch_sync(rbd_dev, true);
+}
+
+static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
+{
+ int ret;
+
+ ret = __rbd_dev_header_watch_sync(rbd_dev, false);
+ if (ret) {
+ rbd_warn(rbd_dev, "unable to tear down watch request: %d\n",
+ ret);
+ }
+}
+
/*
* Synchronous osd object method call. Returns the number of bytes
* returned in the outbound buffer, or a negative error code.
@@ -3388,14 +3386,18 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
u64 segment_size;
/* create gendisk info */
- disk = alloc_disk(RBD_MINORS_PER_MAJOR);
+ disk = alloc_disk(single_major ?
+ (1 << RBD_SINGLE_MAJOR_PART_SHIFT) :
+ RBD_MINORS_PER_MAJOR);
if (!disk)
return -ENOMEM;
snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
rbd_dev->dev_id);
disk->major = rbd_dev->major;
- disk->first_minor = 0;
+ disk->first_minor = rbd_dev->minor;
+ if (single_major)
+ disk->flags |= GENHD_FL_EXT_DEVT;
disk->fops = &rbd_bd_ops;
disk->private_data = rbd_dev;
@@ -3467,7 +3469,14 @@ static ssize_t rbd_major_show(struct device *dev,
return sprintf(buf, "%d\n", rbd_dev->major);
return sprintf(buf, "(none)\n");
+}
+
+static ssize_t rbd_minor_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+ return sprintf(buf, "%d\n", rbd_dev->minor);
}
static ssize_t rbd_client_id_show(struct device *dev,
@@ -3589,6 +3598,7 @@ static ssize_t rbd_image_refresh(struct device *dev,
static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
+static DEVICE_ATTR(minor, S_IRUGO, rbd_minor_show, NULL);
static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
@@ -3602,6 +3612,7 @@ static struct attribute *rbd_attrs[] = {
&dev_attr_size.attr,
&dev_attr_features.attr,
&dev_attr_major.attr,
+ &dev_attr_minor.attr,
&dev_attr_client_id.attr,
&dev_attr_pool.attr,
&dev_attr_pool_id.attr,
@@ -4372,21 +4383,29 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
device_unregister(&rbd_dev->dev);
}
-static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0);
-
/*
* Get a unique rbd identifier for the given new rbd_dev, and add
- * the rbd_dev to the global list. The minimum rbd id is 1.
+ * the rbd_dev to the global list.
*/
-static void rbd_dev_id_get(struct rbd_device *rbd_dev)
+static int rbd_dev_id_get(struct rbd_device *rbd_dev)
{
- rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max);
+ int new_dev_id;
+
+ new_dev_id = ida_simple_get(&rbd_dev_id_ida,
+ 0, minor_to_rbd_dev_id(1 << MINORBITS),
+ GFP_KERNEL);
+ if (new_dev_id < 0)
+ return new_dev_id;
+
+ rbd_dev->dev_id = new_dev_id;
spin_lock(&rbd_dev_list_lock);
list_add_tail(&rbd_dev->node, &rbd_dev_list);
spin_unlock(&rbd_dev_list_lock);
- dout("rbd_dev %p given dev id %llu\n", rbd_dev,
- (unsigned long long) rbd_dev->dev_id);
+
+ dout("rbd_dev %p given dev id %d\n", rbd_dev, rbd_dev->dev_id);
+
+ return 0;
}
/*
@@ -4395,49 +4414,13 @@ static void rbd_dev_id_get(struct rbd_device *rbd_dev)
*/
static void rbd_dev_id_put(struct rbd_device *rbd_dev)
{
- struct list_head *tmp;
- int rbd_id = rbd_dev->dev_id;
- int max_id;
-
- rbd_assert(rbd_id > 0);
-
- dout("rbd_dev %p released dev id %llu\n", rbd_dev,
- (unsigned long long) rbd_dev->dev_id);
spin_lock(&rbd_dev_list_lock);
list_del_init(&rbd_dev->node);
-
- /*
- * If the id being "put" is not the current maximum, there
- * is nothing special we need to do.
- */
- if (rbd_id != atomic64_read(&rbd_dev_id_max)) {
- spin_unlock(&rbd_dev_list_lock);
- return;
- }
-
- /*
- * We need to update the current maximum id. Search the
- * list to find out what it is. We're more likely to find
- * the maximum at the end, so search the list backward.
- */
- max_id = 0;
- list_for_each_prev(tmp, &rbd_dev_list) {
- struct rbd_device *rbd_dev;
-
- rbd_dev = list_entry(tmp, struct rbd_device, node);
- if (rbd_dev->dev_id > max_id)
- max_id = rbd_dev->dev_id;
- }
spin_unlock(&rbd_dev_list_lock);
- /*
- * The max id could have been updated by rbd_dev_id_get(), in
- * which case it now accurately reflects the new maximum.
- * Be careful not to overwrite the maximum value in that
- * case.
- */
- atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id);
- dout(" max dev id has been reset\n");
+ ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
+
+ dout("rbd_dev %p released dev id %d\n", rbd_dev, rbd_dev->dev_id);
}
/*
@@ -4860,20 +4843,29 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
{
int ret;
- /* generate unique id: find highest unique id, add one */
- rbd_dev_id_get(rbd_dev);
+ /* Get an id and fill in device name. */
+
+ ret = rbd_dev_id_get(rbd_dev);
+ if (ret)
+ return ret;
- /* Fill in the device name, now that we have its id. */
BUILD_BUG_ON(DEV_NAME_LEN
< sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
- /* Get our block major device number. */
+ /* Record our major and minor device numbers. */
- ret = register_blkdev(0, rbd_dev->name);
- if (ret < 0)
- goto err_out_id;
- rbd_dev->major = ret;
+ if (!single_major) {
+ ret = register_blkdev(0, rbd_dev->name);
+ if (ret < 0)
+ goto err_out_id;
+
+ rbd_dev->major = ret;
+ rbd_dev->minor = 0;
+ } else {
+ rbd_dev->major = rbd_major;
+ rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id);
+ }
/* Set up the blkdev mapping. */
@@ -4905,7 +4897,8 @@ err_out_mapping:
err_out_disk:
rbd_free_disk(rbd_dev);
err_out_blkdev:
- unregister_blkdev(rbd_dev->major, rbd_dev->name);
+ if (!single_major)
+ unregister_blkdev(rbd_dev->major, rbd_dev->name);
err_out_id:
rbd_dev_id_put(rbd_dev);
rbd_dev_mapping_clear(rbd_dev);
@@ -4961,7 +4954,6 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
{
int ret;
- int tmp;
/*
* Get the id from the image id object. Unless there's an
@@ -4980,7 +4972,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
goto err_out_format;
if (mapping) {
- ret = rbd_dev_header_watch_sync(rbd_dev, true);
+ ret = rbd_dev_header_watch_sync(rbd_dev);
if (ret)
goto out_header_name;
}
@@ -5007,12 +4999,8 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
- if (mapping) {
- tmp = rbd_dev_header_watch_sync(rbd_dev, false);
- if (tmp)
- rbd_warn(rbd_dev, "unable to tear down "
- "watch request (%d)\n", tmp);
- }
+ if (mapping)
+ rbd_dev_header_unwatch_sync(rbd_dev);
out_header_name:
kfree(rbd_dev->header_name);
rbd_dev->header_name = NULL;
@@ -5026,9 +5014,9 @@ err_out_format:
return ret;
}
-static ssize_t rbd_add(struct bus_type *bus,
- const char *buf,
- size_t count)
+static ssize_t do_rbd_add(struct bus_type *bus,
+ const char *buf,
+ size_t count)
{
struct rbd_device *rbd_dev = NULL;
struct ceph_options *ceph_opts = NULL;
@@ -5090,6 +5078,12 @@ static ssize_t rbd_add(struct bus_type *bus,
rc = rbd_dev_device_setup(rbd_dev);
if (rc) {
+ /*
+ * rbd_dev_header_unwatch_sync() can't be moved into
+ * rbd_dev_image_release() without refactoring, see
+ * commit 1f3ef78861ac.
+ */
+ rbd_dev_header_unwatch_sync(rbd_dev);
rbd_dev_image_release(rbd_dev);
goto err_out_module;
}
@@ -5110,6 +5104,23 @@ err_out_module:
return (ssize_t)rc;
}
+static ssize_t rbd_add(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+{
+ if (single_major)
+ return -EINVAL;
+
+ return do_rbd_add(bus, buf, count);
+}
+
+static ssize_t rbd_add_single_major(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+{
+ return do_rbd_add(bus, buf, count);
+}
+
static void rbd_dev_device_release(struct device *dev)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
@@ -5117,8 +5128,8 @@ static void rbd_dev_device_release(struct device *dev)
rbd_free_disk(rbd_dev);
clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
rbd_dev_mapping_clear(rbd_dev);
- unregister_blkdev(rbd_dev->major, rbd_dev->name);
- rbd_dev->major = 0;
+ if (!single_major)
+ unregister_blkdev(rbd_dev->major, rbd_dev->name);
rbd_dev_id_put(rbd_dev);
rbd_dev_mapping_clear(rbd_dev);
}
@@ -5149,9 +5160,9 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
}
}
-static ssize_t rbd_remove(struct bus_type *bus,
- const char *buf,
- size_t count)
+static ssize_t do_rbd_remove(struct bus_type *bus,
+ const char *buf,
+ size_t count)
{
struct rbd_device *rbd_dev = NULL;
struct list_head *tmp;
@@ -5191,16 +5202,14 @@ static ssize_t rbd_remove(struct bus_type *bus,
if (ret < 0 || already)
return ret;
- ret = rbd_dev_header_watch_sync(rbd_dev, false);
- if (ret)
- rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
-
+ rbd_dev_header_unwatch_sync(rbd_dev);
/*
* flush remaining watch callbacks - these must be complete
* before the osd_client is shutdown
*/
dout("%s: flushing notifies", __func__);
ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
+
/*
* Don't free anything from rbd_dev->disk until after all
* notifies are completely processed. Otherwise
@@ -5214,6 +5223,23 @@ static ssize_t rbd_remove(struct bus_type *bus,
return count;
}
+static ssize_t rbd_remove(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+{
+ if (single_major)
+ return -EINVAL;
+
+ return do_rbd_remove(bus, buf, count);
+}
+
+static ssize_t rbd_remove_single_major(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+{
+ return do_rbd_remove(bus, buf, count);
+}
+
/*
* create control files in sysfs
* /sys/bus/rbd/...
@@ -5259,7 +5285,7 @@ static int rbd_slab_init(void)
rbd_assert(!rbd_segment_name_cache);
rbd_segment_name_cache = kmem_cache_create("rbd_segment_name",
- MAX_OBJ_NAME_SIZE + 1, 1, 0, NULL);
+ CEPH_MAX_OID_NAME_LEN + 1, 1, 0, NULL);
if (rbd_segment_name_cache)
return 0;
out_err:
@@ -5295,24 +5321,45 @@ static int __init rbd_init(void)
if (!libceph_compatible(NULL)) {
rbd_warn(NULL, "libceph incompatibility (quitting)");
-
return -EINVAL;
}
+
rc = rbd_slab_init();
if (rc)
return rc;
+
+ if (single_major) {
+ rbd_major = register_blkdev(0, RBD_DRV_NAME);
+ if (rbd_major < 0) {
+ rc = rbd_major;
+ goto err_out_slab;
+ }
+ }
+
rc = rbd_sysfs_init();
if (rc)
- rbd_slab_exit();
+ goto err_out_blkdev;
+
+ if (single_major)
+ pr_info("loaded (major %d)\n", rbd_major);
else
- pr_info("loaded " RBD_DRV_NAME_LONG "\n");
+ pr_info("loaded\n");
+
+ return 0;
+err_out_blkdev:
+ if (single_major)
+ unregister_blkdev(rbd_major, RBD_DRV_NAME);
+err_out_slab:
+ rbd_slab_exit();
return rc;
}
static void __exit rbd_exit(void)
{
rbd_sysfs_cleanup();
+ if (single_major)
+ unregister_blkdev(rbd_major, RBD_DRV_NAME);
rbd_slab_exit();
}
@@ -5322,9 +5369,8 @@ module_exit(rbd_exit);
MODULE_AUTHOR("Alex Elder <elder@inktank.com>");
MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
-MODULE_DESCRIPTION("rados block device");
-
/* following authorship retained from original osdblk.c */
MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
+MODULE_DESCRIPTION("RADOS Block Device (RBD) driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 2284f5d3a54..2839d37e5af 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -174,7 +174,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
if (!card)
goto req_err;
- if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk))
+ if (bio_end_sector(bio) > get_capacity(card->gendisk))
goto req_err;
if (unlikely(card->halt)) {
@@ -187,7 +187,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
goto req_err;
}
- if (bio->bi_size == 0) {
+ if (bio->bi_iter.bi_size == 0) {
dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
goto req_err;
}
@@ -208,7 +208,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
bio_data_dir(bio) ? 'W' : 'R', bio_meta,
- (u64)bio->bi_sector << 9, bio->bi_size);
+ (u64)bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas,
bio_dma_done_cb, bio_meta);
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index fc88ba3e1bd..cf8cd293abb 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -684,7 +684,8 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
void *cb_data)
{
struct list_head dma_list[RSXX_MAX_TARGETS];
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
unsigned long long addr8;
unsigned int laddr;
unsigned int bv_len;
@@ -696,7 +697,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
int st;
int i;
- addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */
+ addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
atomic_set(n_dmas, 0);
for (i = 0; i < card->n_targets; i++) {
@@ -705,7 +706,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
}
if (bio->bi_rw & REQ_DISCARD) {
- bv_len = bio->bi_size;
+ bv_len = bio->bi_iter.bi_size;
while (bv_len > 0) {
tgt = rsxx_get_dma_tgt(card, addr8);
@@ -722,9 +723,9 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
bv_len -= RSXX_HW_BLK_SIZE;
}
} else {
- bio_for_each_segment(bvec, bio, i) {
- bv_len = bvec->bv_len;
- bv_off = bvec->bv_offset;
+ bio_for_each_segment(bvec, bio, iter) {
+ bv_len = bvec.bv_len;
+ bv_off = bvec.bv_offset;
while (bv_len > 0) {
tgt = rsxx_get_dma_tgt(card, addr8);
@@ -736,7 +737,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
st = rsxx_queue_dma(card, &dma_list[tgt],
bio_data_dir(bio),
dma_off, dma_len,
- laddr, bvec->bv_page,
+ laddr, bvec.bv_page,
bv_off, cb, cb_data);
if (st)
goto bvec_err;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 3fb6ab4c8b4..d5e2d12b9d9 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1744,20 +1744,6 @@ static void carm_remove_one (struct pci_dev *pdev)
kfree(host);
pci_release_regions(pdev);
pci_disable_device(pdev);
- pci_set_drvdata(pdev, NULL);
}
-static int __init carm_init(void)
-{
- return pci_register_driver(&carm_driver);
-}
-
-static void __exit carm_exit(void)
-{
- pci_unregister_driver(&carm_driver);
-}
-
-module_init(carm_init);
-module_exit(carm_exit);
-
-
+module_pci_driver(carm_driver);
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index ad70868f8a9..4cf81b5bf0f 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -108,8 +108,7 @@ struct cardinfo {
* have been written
*/
struct bio *bio, *currentbio, **biotail;
- int current_idx;
- sector_t current_sector;
+ struct bvec_iter current_iter;
struct request_queue *queue;
@@ -118,7 +117,7 @@ struct cardinfo {
struct mm_dma_desc *desc;
int cnt, headcnt;
struct bio *bio, **biotail;
- int idx;
+ struct bvec_iter iter;
} mm_pages[2];
#define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc))
@@ -344,16 +343,13 @@ static int add_bio(struct cardinfo *card)
dma_addr_t dma_handle;
int offset;
struct bio *bio;
- struct bio_vec *vec;
- int idx;
+ struct bio_vec vec;
int rw;
- int len;
bio = card->currentbio;
if (!bio && card->bio) {
card->currentbio = card->bio;
- card->current_idx = card->bio->bi_idx;
- card->current_sector = card->bio->bi_sector;
+ card->current_iter = card->bio->bi_iter;
card->bio = card->bio->bi_next;
if (card->bio == NULL)
card->biotail = &card->bio;
@@ -362,18 +358,17 @@ static int add_bio(struct cardinfo *card)
}
if (!bio)
return 0;
- idx = card->current_idx;
rw = bio_rw(bio);
if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE)
return 0;
- vec = bio_iovec_idx(bio, idx);
- len = vec->bv_len;
+ vec = bio_iter_iovec(bio, card->current_iter);
+
dma_handle = pci_map_page(card->dev,
- vec->bv_page,
- vec->bv_offset,
- len,
+ vec.bv_page,
+ vec.bv_offset,
+ vec.bv_len,
(rw == READ) ?
PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
@@ -381,7 +376,7 @@ static int add_bio(struct cardinfo *card)
desc = &p->desc[p->cnt];
p->cnt++;
if (p->bio == NULL)
- p->idx = idx;
+ p->iter = card->current_iter;
if ((p->biotail) != &bio->bi_next) {
*(p->biotail) = bio;
p->biotail = &(bio->bi_next);
@@ -391,8 +386,8 @@ static int add_bio(struct cardinfo *card)
desc->data_dma_handle = dma_handle;
desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle);
- desc->local_addr = cpu_to_le64(card->current_sector << 9);
- desc->transfer_size = cpu_to_le32(len);
+ desc->local_addr = cpu_to_le64(card->current_iter.bi_sector << 9);
+ desc->transfer_size = cpu_to_le32(vec.bv_len);
offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc));
desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset));
desc->zero1 = desc->zero2 = 0;
@@ -407,10 +402,9 @@ static int add_bio(struct cardinfo *card)
desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
desc->sem_control_bits = desc->control_bits;
- card->current_sector += (len >> 9);
- idx++;
- card->current_idx = idx;
- if (idx >= bio->bi_vcnt)
+
+ bio_advance_iter(bio, &card->current_iter, vec.bv_len);
+ if (!card->current_iter.bi_size)
card->currentbio = NULL;
return 1;
@@ -439,23 +433,25 @@ static void process_page(unsigned long data)
struct mm_dma_desc *desc = &page->desc[page->headcnt];
int control = le32_to_cpu(desc->sem_control_bits);
int last = 0;
- int idx;
+ struct bio_vec vec;
if (!(control & DMASCR_DMA_COMPLETE)) {
control = dma_status;
last = 1;
}
+
page->headcnt++;
- idx = page->idx;
- page->idx++;
- if (page->idx >= bio->bi_vcnt) {
+ vec = bio_iter_iovec(bio, page->iter);
+ bio_advance_iter(bio, &page->iter, vec.bv_len);
+
+ if (!page->iter.bi_size) {
page->bio = bio->bi_next;
if (page->bio)
- page->idx = page->bio->bi_idx;
+ page->iter = page->bio->bi_iter;
}
pci_unmap_page(card->dev, desc->data_dma_handle,
- bio_iovec_idx(bio, idx)->bv_len,
+ vec.bv_len,
(control & DMASCR_TRANSFER_READ) ?
PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
if (control & DMASCR_HARD_ERROR) {
@@ -532,7 +528,8 @@ static void mm_make_request(struct request_queue *q, struct bio *bio)
{
struct cardinfo *card = q->queuedata;
pr_debug("mm_make_request %llu %u\n",
- (unsigned long long)bio->bi_sector, bio->bi_size);
+ (unsigned long long)bio->bi_iter.bi_sector,
+ bio->bi_iter.bi_size);
spin_lock_irq(&card->lock);
*card->biotail = bio;
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 6620b73d049..4b97b86da92 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1257,7 +1257,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
bio->bi_bdev = preq.bdev;
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
- bio->bi_sector = preq.sector_number;
+ bio->bi_iter.bi_sector = preq.sector_number;
}
preq.sector_number += seg[i].nsec;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index f9c43f91f03..8dcfb54f160 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1547,7 +1547,7 @@ static int blkif_recover(struct blkfront_info *info)
for (i = 0; i < pending; i++) {
offset = (i * segs * PAGE_SIZE) >> 9;
size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
- (unsigned int)(bio->bi_size >> 9) - offset);
+ (unsigned int)bio_sectors(bio) - offset);
cloned_bio = bio_clone(bio, GFP_NOIO);
BUG_ON(cloned_bio == NULL);
bio_trim(cloned_bio, offset, size);
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
new file mode 100644
index 00000000000..3450be85039
--- /dev/null
+++ b/drivers/block/zram/Kconfig
@@ -0,0 +1,24 @@
+config ZRAM
+ tristate "Compressed RAM block device support"
+ depends on BLOCK && SYSFS && ZSMALLOC
+ select LZO_COMPRESS
+ select LZO_DECOMPRESS
+ default n
+ help
+ Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
+ Pages written to these disks are compressed and stored in memory
+ itself. These disks allow very fast I/O and compression provides
+ good amounts of memory savings.
+
+ It has several use cases, for example: /tmp storage, use as swap
+ disks and maybe many more.
+
+ See zram.txt for more information.
+
+config ZRAM_DEBUG
+ bool "Compressed RAM block device debug support"
+ depends on ZRAM
+ default n
+ help
+ This option adds additional debugging code to the compressed
+ RAM block device driver.
diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile
new file mode 100644
index 00000000000..cb0f9ced6a9
--- /dev/null
+++ b/drivers/block/zram/Makefile
@@ -0,0 +1,3 @@
+zram-y := zram_drv.o
+
+obj-$(CONFIG_ZRAM) += zram.o
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
new file mode 100644
index 00000000000..011e55d820b
--- /dev/null
+++ b/drivers/block/zram/zram_drv.c
@@ -0,0 +1,958 @@
+/*
+ * Compressed RAM block device
+ *
+ * Copyright (C) 2008, 2009, 2010 Nitin Gupta
+ * 2012, 2013 Minchan Kim
+ *
+ * This code is released using a dual license strategy: BSD/GPL
+ * You can choose the licence that better fits your requirements.
+ *
+ * Released under the terms of 3-clause BSD License
+ * Released under the terms of GNU General Public License Version 2.0
+ *
+ */
+
+#define KMSG_COMPONENT "zram"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#ifdef CONFIG_ZRAM_DEBUG
+#define DEBUG
+#endif
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/bio.h>
+#include <linux/bitops.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/lzo.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include "zram_drv.h"
+
+/* Globals */
+static int zram_major;
+static struct zram *zram_devices;
+
+/* Module params (documentation at end) */
+static unsigned int num_devices = 1;
+
+static inline struct zram *dev_to_zram(struct device *dev)
+{
+ return (struct zram *)dev_to_disk(dev)->private_data;
+}
+
+static ssize_t disksize_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n", zram->disksize);
+}
+
+static ssize_t initstate_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%u\n", zram->init_done);
+}
+
+static ssize_t num_reads_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.num_reads));
+}
+
+static ssize_t num_writes_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.num_writes));
+}
+
+static ssize_t invalid_io_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.invalid_io));
+}
+
+static ssize_t notify_free_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.notify_free));
+}
+
+static ssize_t zero_pages_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero));
+}
+
+static ssize_t orig_data_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
+}
+
+static ssize_t compr_data_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.compr_size));
+}
+
+static ssize_t mem_used_total_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u64 val = 0;
+ struct zram *zram = dev_to_zram(dev);
+ struct zram_meta *meta = zram->meta;
+
+ down_read(&zram->init_lock);
+ if (zram->init_done)
+ val = zs_get_total_size_bytes(meta->mem_pool);
+ up_read(&zram->init_lock);
+
+ return sprintf(buf, "%llu\n", val);
+}
+
+/* flag operations needs meta->tb_lock */
+static int zram_test_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
+{
+ return meta->table[index].flags & BIT(flag);
+}
+
+static void zram_set_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
+{
+ meta->table[index].flags |= BIT(flag);
+}
+
+static void zram_clear_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
+{
+ meta->table[index].flags &= ~BIT(flag);
+}
+
+static inline int is_partial_io(struct bio_vec *bvec)
+{
+ return bvec->bv_len != PAGE_SIZE;
+}
+
+/*
+ * Check if request is within bounds and aligned on zram logical blocks.
+ */
+static inline int valid_io_request(struct zram *zram, struct bio *bio)
+{
+ u64 start, end, bound;
+
+ /* unaligned request */
+ if (unlikely(bio->bi_iter.bi_sector &
+ (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
+ return 0;
+ if (unlikely(bio->bi_iter.bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
+ return 0;
+
+ start = bio->bi_iter.bi_sector;
+ end = start + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
+ bound = zram->disksize >> SECTOR_SHIFT;
+ /* out of range range */
+ if (unlikely(start >= bound || end > bound || start > end))
+ return 0;
+
+ /* I/O request is valid */
+ return 1;
+}
+
+static void zram_meta_free(struct zram_meta *meta)
+{
+ zs_destroy_pool(meta->mem_pool);
+ kfree(meta->compress_workmem);
+ free_pages((unsigned long)meta->compress_buffer, 1);
+ vfree(meta->table);
+ kfree(meta);
+}
+
+static struct zram_meta *zram_meta_alloc(u64 disksize)
+{
+ size_t num_pages;
+ struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+ if (!meta)
+ goto out;
+
+ meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+ if (!meta->compress_workmem)
+ goto free_meta;
+
+ meta->compress_buffer =
+ (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+ if (!meta->compress_buffer) {
+ pr_err("Error allocating compressor buffer space\n");
+ goto free_workmem;
+ }
+
+ num_pages = disksize >> PAGE_SHIFT;
+ meta->table = vzalloc(num_pages * sizeof(*meta->table));
+ if (!meta->table) {
+ pr_err("Error allocating zram address table\n");
+ goto free_buffer;
+ }
+
+ meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
+ if (!meta->mem_pool) {
+ pr_err("Error creating memory pool\n");
+ goto free_table;
+ }
+
+ rwlock_init(&meta->tb_lock);
+ mutex_init(&meta->buffer_lock);
+ return meta;
+
+free_table:
+ vfree(meta->table);
+free_buffer:
+ free_pages((unsigned long)meta->compress_buffer, 1);
+free_workmem:
+ kfree(meta->compress_workmem);
+free_meta:
+ kfree(meta);
+ meta = NULL;
+out:
+ return meta;
+}
+
+static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+{
+ if (*offset + bvec->bv_len >= PAGE_SIZE)
+ (*index)++;
+ *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
+}
+
+static int page_zero_filled(void *ptr)
+{
+ unsigned int pos;
+ unsigned long *page;
+
+ page = (unsigned long *)ptr;
+
+ for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
+ if (page[pos])
+ return 0;
+ }
+
+ return 1;
+}
+
+static void handle_zero_page(struct bio_vec *bvec)
+{
+ struct page *page = bvec->bv_page;
+ void *user_mem;
+
+ user_mem = kmap_atomic(page);
+ if (is_partial_io(bvec))
+ memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
+ else
+ clear_page(user_mem);
+ kunmap_atomic(user_mem);
+
+ flush_dcache_page(page);
+}
+
+/* NOTE: caller should hold meta->tb_lock with write-side */
+static void zram_free_page(struct zram *zram, size_t index)
+{
+ struct zram_meta *meta = zram->meta;
+ unsigned long handle = meta->table[index].handle;
+ u16 size = meta->table[index].size;
+
+ if (unlikely(!handle)) {
+ /*
+ * No memory is allocated for zero filled pages.
+ * Simply clear zero page flag.
+ */
+ if (zram_test_flag(meta, index, ZRAM_ZERO)) {
+ zram_clear_flag(meta, index, ZRAM_ZERO);
+ atomic_dec(&zram->stats.pages_zero);
+ }
+ return;
+ }
+
+ if (unlikely(size > max_zpage_size))
+ atomic_dec(&zram->stats.bad_compress);
+
+ zs_free(meta->mem_pool, handle);
+
+ if (size <= PAGE_SIZE / 2)
+ atomic_dec(&zram->stats.good_compress);
+
+ atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
+ atomic_dec(&zram->stats.pages_stored);
+
+ meta->table[index].handle = 0;
+ meta->table[index].size = 0;
+}
+
+static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
+{
+ int ret = LZO_E_OK;
+ size_t clen = PAGE_SIZE;
+ unsigned char *cmem;
+ struct zram_meta *meta = zram->meta;
+ unsigned long handle;
+ u16 size;
+
+ read_lock(&meta->tb_lock);
+ handle = meta->table[index].handle;
+ size = meta->table[index].size;
+
+ if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
+ read_unlock(&meta->tb_lock);
+ clear_page(mem);
+ return 0;
+ }
+
+ cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
+ if (size == PAGE_SIZE)
+ copy_page(mem, cmem);
+ else
+ ret = lzo1x_decompress_safe(cmem, size, mem, &clen);
+ zs_unmap_object(meta->mem_pool, handle);
+ read_unlock(&meta->tb_lock);
+
+ /* Should NEVER happen. Return bio error if it does. */
+ if (unlikely(ret != LZO_E_OK)) {
+ pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
+ atomic64_inc(&zram->stats.failed_reads);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
+ u32 index, int offset, struct bio *bio)
+{
+ int ret;
+ struct page *page;
+ unsigned char *user_mem, *uncmem = NULL;
+ struct zram_meta *meta = zram->meta;
+ page = bvec->bv_page;
+
+ read_lock(&meta->tb_lock);
+ if (unlikely(!meta->table[index].handle) ||
+ zram_test_flag(meta, index, ZRAM_ZERO)) {
+ read_unlock(&meta->tb_lock);
+ handle_zero_page(bvec);
+ return 0;
+ }
+ read_unlock(&meta->tb_lock);
+
+ if (is_partial_io(bvec))
+ /* Use a temporary buffer to decompress the page */
+ uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
+
+ user_mem = kmap_atomic(page);
+ if (!is_partial_io(bvec))
+ uncmem = user_mem;
+
+ if (!uncmem) {
+ pr_info("Unable to allocate temp memory\n");
+ ret = -ENOMEM;
+ goto out_cleanup;
+ }
+
+ ret = zram_decompress_page(zram, uncmem, index);
+ /* Should NEVER happen. Return bio error if it does. */
+ if (unlikely(ret != LZO_E_OK))
+ goto out_cleanup;
+
+ if (is_partial_io(bvec))
+ memcpy(user_mem + bvec->bv_offset, uncmem + offset,
+ bvec->bv_len);
+
+ flush_dcache_page(page);
+ ret = 0;
+out_cleanup:
+ kunmap_atomic(user_mem);
+ if (is_partial_io(bvec))
+ kfree(uncmem);
+ return ret;
+}
+
+static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
+ int offset)
+{
+ int ret = 0;
+ size_t clen;
+ unsigned long handle;
+ struct page *page;
+ unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
+ struct zram_meta *meta = zram->meta;
+ bool locked = false;
+
+ page = bvec->bv_page;
+ src = meta->compress_buffer;
+
+ if (is_partial_io(bvec)) {
+ /*
+ * This is a partial IO. We need to read the full page
+ * before to write the changes.
+ */
+ uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
+ if (!uncmem) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = zram_decompress_page(zram, uncmem, index);
+ if (ret)
+ goto out;
+ }
+
+ mutex_lock(&meta->buffer_lock);
+ locked = true;
+ user_mem = kmap_atomic(page);
+
+ if (is_partial_io(bvec)) {
+ memcpy(uncmem + offset, user_mem + bvec->bv_offset,
+ bvec->bv_len);
+ kunmap_atomic(user_mem);
+ user_mem = NULL;
+ } else {
+ uncmem = user_mem;
+ }
+
+ if (page_zero_filled(uncmem)) {
+ kunmap_atomic(user_mem);
+ /* Free memory associated with this sector now. */
+ write_lock(&zram->meta->tb_lock);
+ zram_free_page(zram, index);
+ zram_set_flag(meta, index, ZRAM_ZERO);
+ write_unlock(&zram->meta->tb_lock);
+
+ atomic_inc(&zram->stats.pages_zero);
+ ret = 0;
+ goto out;
+ }
+
+ ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
+ meta->compress_workmem);
+ if (!is_partial_io(bvec)) {
+ kunmap_atomic(user_mem);
+ user_mem = NULL;
+ uncmem = NULL;
+ }
+
+ if (unlikely(ret != LZO_E_OK)) {
+ pr_err("Compression failed! err=%d\n", ret);
+ goto out;
+ }
+
+ if (unlikely(clen > max_zpage_size)) {
+ atomic_inc(&zram->stats.bad_compress);
+ clen = PAGE_SIZE;
+ src = NULL;
+ if (is_partial_io(bvec))
+ src = uncmem;
+ }
+
+ handle = zs_malloc(meta->mem_pool, clen);
+ if (!handle) {
+ pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
+ index, clen);
+ ret = -ENOMEM;
+ goto out;
+ }
+ cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
+
+ if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
+ src = kmap_atomic(page);
+ copy_page(cmem, src);
+ kunmap_atomic(src);
+ } else {
+ memcpy(cmem, src, clen);
+ }
+
+ zs_unmap_object(meta->mem_pool, handle);
+
+ /*
+ * Free memory associated with this sector
+ * before overwriting unused sectors.
+ */
+ write_lock(&zram->meta->tb_lock);
+ zram_free_page(zram, index);
+
+ meta->table[index].handle = handle;
+ meta->table[index].size = clen;
+ write_unlock(&zram->meta->tb_lock);
+
+ /* Update stats */
+ atomic64_add(clen, &zram->stats.compr_size);
+ atomic_inc(&zram->stats.pages_stored);
+ if (clen <= PAGE_SIZE / 2)
+ atomic_inc(&zram->stats.good_compress);
+
+out:
+ if (locked)
+ mutex_unlock(&meta->buffer_lock);
+ if (is_partial_io(bvec))
+ kfree(uncmem);
+
+ if (ret)
+ atomic64_inc(&zram->stats.failed_writes);
+ return ret;
+}
+
+static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
+ int offset, struct bio *bio, int rw)
+{
+ int ret;
+
+ if (rw == READ)
+ ret = zram_bvec_read(zram, bvec, index, offset, bio);
+ else
+ ret = zram_bvec_write(zram, bvec, index, offset);
+
+ return ret;
+}
+
+static void zram_reset_device(struct zram *zram, bool reset_capacity)
+{
+ size_t index;
+ struct zram_meta *meta;
+
+ down_write(&zram->init_lock);
+ if (!zram->init_done) {
+ up_write(&zram->init_lock);
+ return;
+ }
+
+ meta = zram->meta;
+ zram->init_done = 0;
+
+ /* Free all pages that are still in this zram device */
+ for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
+ unsigned long handle = meta->table[index].handle;
+ if (!handle)
+ continue;
+
+ zs_free(meta->mem_pool, handle);
+ }
+
+ zram_meta_free(zram->meta);
+ zram->meta = NULL;
+ /* Reset stats */
+ memset(&zram->stats, 0, sizeof(zram->stats));
+
+ zram->disksize = 0;
+ if (reset_capacity)
+ set_capacity(zram->disk, 0);
+ up_write(&zram->init_lock);
+}
+
+static void zram_init_device(struct zram *zram, struct zram_meta *meta)
+{
+ if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) {
+ pr_info(
+ "There is little point creating a zram of greater than "
+ "twice the size of memory since we expect a 2:1 compression "
+ "ratio. Note that zram uses about 0.1%% of the size of "
+ "the disk when not in use so a huge zram is "
+ "wasteful.\n"
+ "\tMemory Size: %lu kB\n"
+ "\tSize you selected: %llu kB\n"
+ "Continuing anyway ...\n",
+ (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10
+ );
+ }
+
+ /* zram devices sort of resembles non-rotational disks */
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
+
+ zram->meta = meta;
+ zram->init_done = 1;
+
+ pr_debug("Initialization done!\n");
+}
+
+static ssize_t disksize_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ u64 disksize;
+ struct zram_meta *meta;
+ struct zram *zram = dev_to_zram(dev);
+
+ disksize = memparse(buf, NULL);
+ if (!disksize)
+ return -EINVAL;
+
+ disksize = PAGE_ALIGN(disksize);
+ meta = zram_meta_alloc(disksize);
+ down_write(&zram->init_lock);
+ if (zram->init_done) {
+ up_write(&zram->init_lock);
+ zram_meta_free(meta);
+ pr_info("Cannot change disksize for initialized device\n");
+ return -EBUSY;
+ }
+
+ zram->disksize = disksize;
+ set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+ zram_init_device(zram, meta);
+ up_write(&zram->init_lock);
+
+ return len;
+}
+
+static ssize_t reset_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ int ret;
+ unsigned short do_reset;
+ struct zram *zram;
+ struct block_device *bdev;
+
+ zram = dev_to_zram(dev);
+ bdev = bdget_disk(zram->disk, 0);
+
+ if (!bdev)
+ return -ENOMEM;
+
+ /* Do not reset an active device! */
+ if (bdev->bd_holders) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ ret = kstrtou16(buf, 10, &do_reset);
+ if (ret)
+ goto out;
+
+ if (!do_reset) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Make sure all pending I/O is finished */
+ fsync_bdev(bdev);
+ bdput(bdev);
+
+ zram_reset_device(zram, true);
+ return len;
+
+out:
+ bdput(bdev);
+ return ret;
+}
+
+static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
+{
+ int offset;
+ u32 index;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+
+ switch (rw) {
+ case READ:
+ atomic64_inc(&zram->stats.num_reads);
+ break;
+ case WRITE:
+ atomic64_inc(&zram->stats.num_writes);
+ break;
+ }
+
+ index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
+ offset = (bio->bi_iter.bi_sector &
+ (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
+
+ bio_for_each_segment(bvec, bio, iter) {
+ int max_transfer_size = PAGE_SIZE - offset;
+
+ if (bvec.bv_len > max_transfer_size) {
+ /*
+ * zram_bvec_rw() can only make operation on a single
+ * zram page. Split the bio vector.
+ */
+ struct bio_vec bv;
+
+ bv.bv_page = bvec.bv_page;
+ bv.bv_len = max_transfer_size;
+ bv.bv_offset = bvec.bv_offset;
+
+ if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0)
+ goto out;
+
+ bv.bv_len = bvec.bv_len - max_transfer_size;
+ bv.bv_offset += max_transfer_size;
+ if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0)
+ goto out;
+ } else
+ if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw)
+ < 0)
+ goto out;
+
+ update_position(&index, &offset, &bvec);
+ }
+
+ set_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio_endio(bio, 0);
+ return;
+
+out:
+ bio_io_error(bio);
+}
+
+/*
+ * Handler function for all zram I/O requests.
+ */
+static void zram_make_request(struct request_queue *queue, struct bio *bio)
+{
+ struct zram *zram = queue->queuedata;
+
+ down_read(&zram->init_lock);
+ if (unlikely(!zram->init_done))
+ goto error;
+
+ if (!valid_io_request(zram, bio)) {
+ atomic64_inc(&zram->stats.invalid_io);
+ goto error;
+ }
+
+ __zram_make_request(zram, bio, bio_data_dir(bio));
+ up_read(&zram->init_lock);
+
+ return;
+
+error:
+ up_read(&zram->init_lock);
+ bio_io_error(bio);
+}
+
+static void zram_slot_free_notify(struct block_device *bdev,
+ unsigned long index)
+{
+ struct zram *zram;
+ struct zram_meta *meta;
+
+ zram = bdev->bd_disk->private_data;
+ meta = zram->meta;
+
+ write_lock(&meta->tb_lock);
+ zram_free_page(zram, index);
+ write_unlock(&meta->tb_lock);
+ atomic64_inc(&zram->stats.notify_free);
+}
+
+static const struct block_device_operations zram_devops = {
+ .swap_slot_free_notify = zram_slot_free_notify,
+ .owner = THIS_MODULE
+};
+
+static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
+ disksize_show, disksize_store);
+static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
+static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
+static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
+static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
+static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
+static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
+static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
+static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
+static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
+static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
+
+static struct attribute *zram_disk_attrs[] = {
+ &dev_attr_disksize.attr,
+ &dev_attr_initstate.attr,
+ &dev_attr_reset.attr,
+ &dev_attr_num_reads.attr,
+ &dev_attr_num_writes.attr,
+ &dev_attr_invalid_io.attr,
+ &dev_attr_notify_free.attr,
+ &dev_attr_zero_pages.attr,
+ &dev_attr_orig_data_size.attr,
+ &dev_attr_compr_data_size.attr,
+ &dev_attr_mem_used_total.attr,
+ NULL,
+};
+
+static struct attribute_group zram_disk_attr_group = {
+ .attrs = zram_disk_attrs,
+};
+
+static int create_device(struct zram *zram, int device_id)
+{
+ int ret = -ENOMEM;
+
+ init_rwsem(&zram->init_lock);
+
+ zram->queue = blk_alloc_queue(GFP_KERNEL);
+ if (!zram->queue) {
+ pr_err("Error allocating disk queue for device %d\n",
+ device_id);
+ goto out;
+ }
+
+ blk_queue_make_request(zram->queue, zram_make_request);
+ zram->queue->queuedata = zram;
+
+ /* gendisk structure */
+ zram->disk = alloc_disk(1);
+ if (!zram->disk) {
+ pr_warn("Error allocating disk structure for device %d\n",
+ device_id);
+ goto out_free_queue;
+ }
+
+ zram->disk->major = zram_major;
+ zram->disk->first_minor = device_id;
+ zram->disk->fops = &zram_devops;
+ zram->disk->queue = zram->queue;
+ zram->disk->private_data = zram;
+ snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
+
+ /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
+ set_capacity(zram->disk, 0);
+
+ /*
+ * To ensure that we always get PAGE_SIZE aligned
+ * and n*PAGE_SIZED sized I/O requests.
+ */
+ blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
+ blk_queue_logical_block_size(zram->disk->queue,
+ ZRAM_LOGICAL_BLOCK_SIZE);
+ blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
+ blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
+
+ add_disk(zram->disk);
+
+ ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
+ &zram_disk_attr_group);
+ if (ret < 0) {
+ pr_warn("Error creating sysfs group");
+ goto out_free_disk;
+ }
+
+ zram->init_done = 0;
+ return 0;
+
+out_free_disk:
+ del_gendisk(zram->disk);
+ put_disk(zram->disk);
+out_free_queue:
+ blk_cleanup_queue(zram->queue);
+out:
+ return ret;
+}
+
+static void destroy_device(struct zram *zram)
+{
+ sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
+ &zram_disk_attr_group);
+
+ del_gendisk(zram->disk);
+ put_disk(zram->disk);
+
+ blk_cleanup_queue(zram->queue);
+}
+
+static int __init zram_init(void)
+{
+ int ret, dev_id;
+
+ if (num_devices > max_num_devices) {
+ pr_warn("Invalid value for num_devices: %u\n",
+ num_devices);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ zram_major = register_blkdev(0, "zram");
+ if (zram_major <= 0) {
+ pr_warn("Unable to get major number\n");
+ ret = -EBUSY;
+ goto out;
+ }
+
+ /* Allocate the device array and initialize each one */
+ zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
+ if (!zram_devices) {
+ ret = -ENOMEM;
+ goto unregister;
+ }
+
+ for (dev_id = 0; dev_id < num_devices; dev_id++) {
+ ret = create_device(&zram_devices[dev_id], dev_id);
+ if (ret)
+ goto free_devices;
+ }
+
+ pr_info("Created %u device(s) ...\n", num_devices);
+
+ return 0;
+
+free_devices:
+ while (dev_id)
+ destroy_device(&zram_devices[--dev_id]);
+ kfree(zram_devices);
+unregister:
+ unregister_blkdev(zram_major, "zram");
+out:
+ return ret;
+}
+
+static void __exit zram_exit(void)
+{
+ int i;
+ struct zram *zram;
+
+ for (i = 0; i < num_devices; i++) {
+ zram = &zram_devices[i];
+
+ destroy_device(zram);
+ /*
+ * Shouldn't access zram->disk after destroy_device
+ * because destroy_device already released zram->disk.
+ */
+ zram_reset_device(zram, false);
+ }
+
+ unregister_blkdev(zram_major, "zram");
+
+ kfree(zram_devices);
+ pr_debug("Cleanup done!\n");
+}
+
+module_init(zram_init);
+module_exit(zram_exit);
+
+module_param(num_devices, uint, 0);
+MODULE_PARM_DESC(num_devices, "Number of zram devices");
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
+MODULE_DESCRIPTION("Compressed RAM Block Device");
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
new file mode 100644
index 00000000000..ad8aa35bae0
--- /dev/null
+++ b/drivers/block/zram/zram_drv.h
@@ -0,0 +1,109 @@
+/*
+ * Compressed RAM block device
+ *
+ * Copyright (C) 2008, 2009, 2010 Nitin Gupta
+ * 2012, 2013 Minchan Kim
+ *
+ * This code is released using a dual license strategy: BSD/GPL
+ * You can choose the licence that better fits your requirements.
+ *
+ * Released under the terms of 3-clause BSD License
+ * Released under the terms of GNU General Public License Version 2.0
+ *
+ */
+
+#ifndef _ZRAM_DRV_H_
+#define _ZRAM_DRV_H_
+
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/zsmalloc.h>
+
+/*
+ * Some arbitrary value. This is just to catch
+ * invalid value for num_devices module parameter.
+ */
+static const unsigned max_num_devices = 32;
+
+/*-- Configurable parameters */
+
+/*
+ * Pages that compress to size greater than this are stored
+ * uncompressed in memory.
+ */
+static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
+
+/*
+ * NOTE: max_zpage_size must be less than or equal to:
+ * ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would
+ * always return failure.
+ */
+
+/*-- End of configurable params */
+
+#define SECTOR_SHIFT 9
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
+#define ZRAM_LOGICAL_BLOCK_SHIFT 12
+#define ZRAM_LOGICAL_BLOCK_SIZE (1 << ZRAM_LOGICAL_BLOCK_SHIFT)
+#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \
+ (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
+
+/* Flags for zram pages (table[page_no].flags) */
+enum zram_pageflags {
+ /* Page consists entirely of zeros */
+ ZRAM_ZERO,
+
+ __NR_ZRAM_PAGEFLAGS,
+};
+
+/*-- Data structures */
+
+/* Allocated for each disk page */
+struct table {
+ unsigned long handle;
+ u16 size; /* object size (excluding header) */
+ u8 count; /* object ref count (not yet used) */
+ u8 flags;
+} __aligned(4);
+
+struct zram_stats {
+ atomic64_t compr_size; /* compressed size of pages stored */
+ atomic64_t num_reads; /* failed + successful */
+ atomic64_t num_writes; /* --do-- */
+ atomic64_t failed_reads; /* should NEVER! happen */
+ atomic64_t failed_writes; /* can happen when memory is too low */
+ atomic64_t invalid_io; /* non-page-aligned I/O requests */
+ atomic64_t notify_free; /* no. of swap slot free notifications */
+ atomic_t pages_zero; /* no. of zero filled pages */
+ atomic_t pages_stored; /* no. of pages currently stored */
+ atomic_t good_compress; /* % of pages with compression ratio<=50% */
+ atomic_t bad_compress; /* % of pages with compression ratio>=75% */
+};
+
+struct zram_meta {
+ rwlock_t tb_lock; /* protect table */
+ void *compress_workmem;
+ void *compress_buffer;
+ struct table *table;
+ struct zs_pool *mem_pool;
+ struct mutex buffer_lock; /* protect compress buffers */
+};
+
+struct zram {
+ struct zram_meta *meta;
+ struct request_queue *queue;
+ struct gendisk *disk;
+ int init_done;
+ /* Prevent concurrent execution of device init, reset and R/W request */
+ struct rw_semaphore init_lock;
+ /*
+ * This is the limit on amount of *uncompressed* worth of data
+ * we can store in a disk.
+ */
+ u64 disksize; /* bytes */
+
+ struct zram_stats stats;
+};
+#endif