summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/aoe/aoecmd.c1
-rw-r--r--drivers/block/cciss_scsi.c12
-rw-r--r--drivers/block/drbd/drbd_actlog.c8
-rw-r--r--drivers/block/drbd/drbd_bitmap.c19
-rw-r--r--drivers/block/drbd/drbd_int.h45
-rw-r--r--drivers/block/drbd/drbd_main.c97
-rw-r--r--drivers/block/drbd/drbd_nl.c40
-rw-r--r--drivers/block/drbd/drbd_proc.c3
-rw-r--r--drivers/block/drbd/drbd_receiver.c38
-rw-r--r--drivers/block/drbd/drbd_req.c45
-rw-r--r--drivers/block/drbd/drbd_worker.c12
-rw-r--r--drivers/block/floppy.c24
-rw-r--r--drivers/block/mg_disk.c13
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c38
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h10
-rw-r--r--drivers/block/nbd.c17
-rw-r--r--drivers/block/nvme.c153
-rw-r--r--drivers/block/rbd.c827
-rw-r--r--drivers/block/rbd_types.h1
-rw-r--r--drivers/block/umem.c37
-rw-r--r--drivers/block/virtio_blk.c115
-rw-r--r--drivers/block/xen-blkback/blkback.c2
-rw-r--r--drivers/block/xen-blkfront.c5
23 files changed, 967 insertions, 595 deletions
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index de0435e63b0..887f68f6d79 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -35,6 +35,7 @@ new_skb(ulong len)
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb->protocol = __constant_htons(ETH_P_AOE);
+ skb_checksum_none_assert(skb);
}
return skb;
}
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index acda773b372..da3311129a0 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -763,16 +763,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
{
case CMD_TARGET_STATUS:
/* Pass it up to the upper layers... */
- if( ei->ScsiStatus)
- {
-#if 0
- printk(KERN_WARNING "cciss: cmd %p "
- "has SCSI Status = %x\n",
- c, ei->ScsiStatus);
-#endif
- cmd->result |= (ei->ScsiStatus << 1);
- }
- else { /* scsi status is zero??? How??? */
+ if (!ei->ScsiStatus) {
/* Ordinarily, this case should never happen, but there is a bug
in some released firmware revisions that allows it to happen
@@ -804,6 +795,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
}
break;
case CMD_PROTOCOL_ERR:
+ cmd->result = DID_ERROR << 16;
dev_warn(&h->pdev->dev,
"%p has protocol error\n", c);
break;
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index e54e31b02b8..3fbef018ce5 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -411,7 +411,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
if (++mdev->al_tr_pos >
div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
@@ -876,7 +876,11 @@ int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
unsigned int enr, count = 0;
struct lc_element *e;
- if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+ /* this should be an empty REQ_FLUSH */
+ if (size == 0)
+ return 0;
+
+ if (size < 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
dev_err(DEV, "sector: %llus, size: %d\n",
(unsigned long long)sector, size);
return 0;
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index fcb956bb4b4..d8456649674 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -889,6 +889,7 @@ struct bm_aio_ctx {
unsigned int done;
unsigned flags;
#define BM_AIO_COPY_PAGES 1
+#define BM_WRITE_ALL_PAGES 2
int error;
struct kref kref;
};
@@ -1059,7 +1060,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
break;
if (rw & WRITE) {
- if (bm_test_page_unchanged(b->bm_pages[i])) {
+ if (!(flags & BM_WRITE_ALL_PAGES) &&
+ bm_test_page_unchanged(b->bm_pages[i])) {
dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
continue;
}
@@ -1096,7 +1098,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
if (ctx->error) {
dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
err = -EIO; /* ctx->error ? */
}
@@ -1141,6 +1143,17 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
}
/**
+ * drbd_bm_write_all() - Write the whole bitmap to its on disk location.
+ * @mdev: DRBD device.
+ *
+ * Will write all pages.
+ */
+int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local)
+{
+ return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0);
+}
+
+/**
* drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
* @mdev: DRBD device.
* @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages
@@ -1212,7 +1225,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
if (ctx->error)
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
/* that should force detach, so the in memory bitmap will be
* gone in a moment as well. */
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 02f013a073a..b953cc7c9c0 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -813,7 +813,6 @@ enum {
SIGNAL_ASENDER, /* whether asender wants to be interrupted */
SEND_PING, /* whether asender should send a ping asap */
- UNPLUG_QUEUED, /* only relevant with kernel 2.4 */
UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */
MD_DIRTY, /* current uuids and flags not yet on disk */
DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */
@@ -824,7 +823,6 @@ enum {
CRASHED_PRIMARY, /* This node was a crashed primary.
* Gets cleared when the state.conn
* goes into C_CONNECTED state. */
- NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */
CONSIDER_RESYNC,
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
@@ -834,6 +832,7 @@ enum {
BITMAP_IO_QUEUED, /* Started bitmap IO */
GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
WAS_IO_ERROR, /* Local disk failed returned IO error */
+ FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
NET_CONGESTED, /* The data socket is congested */
@@ -851,6 +850,13 @@ enum {
AL_SUSPENDED, /* Activity logging is currently suspended. */
AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */
STATE_SENT, /* Do not change state/UUIDs while this is set */
+
+ CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
+ * pending, from drbd worker context.
+ * If set, bdi_write_congested() returns true,
+ * so shrink_page_list() would not recurse into,
+ * and potentially deadlock on, this drbd worker.
+ */
};
struct drbd_bitmap; /* opaque for drbd_conf */
@@ -1130,8 +1136,8 @@ struct drbd_conf {
int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
int rs_planed; /* resync sectors already planned */
atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
- int peer_max_bio_size;
- int local_max_bio_size;
+ unsigned int peer_max_bio_size;
+ unsigned int local_max_bio_size;
};
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1435,9 +1441,9 @@ struct bm_extent {
* hash table. */
#define HT_SHIFT 8
#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
-#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */
+#define DRBD_MAX_BIO_SIZE_SAFE (1U << 12) /* Works always = 4k */
-#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
+#define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* The old header only allows packets up to 32Kib data */
/* Number of elements in the app_reads_hash */
#define APP_R_HSIZE 15
@@ -1463,6 +1469,7 @@ extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
+extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local);
extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local);
extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
unsigned long al_enr);
@@ -1840,12 +1847,20 @@ static inline int drbd_request_state(struct drbd_conf *mdev,
return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
}
+enum drbd_force_detach_flags {
+ DRBD_IO_ERROR,
+ DRBD_META_IO_ERROR,
+ DRBD_FORCE_DETACH,
+};
+
#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
-static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where)
+static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
+ enum drbd_force_detach_flags forcedetach,
+ const char *where)
{
switch (mdev->ldev->dc.on_io_error) {
case EP_PASS_ON:
- if (!forcedetach) {
+ if (forcedetach == DRBD_IO_ERROR) {
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Local IO failed in %s.\n", where);
if (mdev->state.disk > D_INCONSISTENT)
@@ -1856,6 +1871,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
case EP_DETACH:
case EP_CALL_HELPER:
set_bit(WAS_IO_ERROR, &mdev->flags);
+ if (forcedetach == DRBD_FORCE_DETACH)
+ set_bit(FORCE_DETACH, &mdev->flags);
if (mdev->state.disk > D_FAILED) {
_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
dev_err(DEV,
@@ -1875,7 +1892,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
*/
#define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
static inline void drbd_chk_io_error_(struct drbd_conf *mdev,
- int error, int forcedetach, const char *where)
+ int error, enum drbd_force_detach_flags forcedetach, const char *where)
{
if (error) {
unsigned long flags;
@@ -2405,15 +2422,17 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt);
D_ASSERT(ap_bio >= 0);
+
+ if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
+ if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
+ drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+ }
+
/* this currently does wake_up for every dec_ap_bio!
* maybe rather introduce some type of hysteresis?
* e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
if (ap_bio < mxb)
wake_up(&mdev->misc_wait);
- if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
- if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
- drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
- }
}
static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 920ede2829d..f93a0320e95 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -79,6 +79,7 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
static void md_sync_timer_fn(unsigned long data);
static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static void _tl_clear(struct drbd_conf *mdev);
MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
"Lars Ellenberg <lars@linbit.com>");
@@ -432,19 +433,10 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
/* Actions operating on the disk state, also want to work on
requests that got barrier acked. */
- switch (what) {
- case fail_frozen_disk_io:
- case restart_frozen_disk_io:
- list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
- req = list_entry(le, struct drbd_request, tl_requests);
- _req_mod(req, what);
- }
- case connection_lost_while_pending:
- case resend:
- break;
- default:
- dev_err(DEV, "what = %d in _tl_restart()\n", what);
+ list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
+ req = list_entry(le, struct drbd_request, tl_requests);
+ _req_mod(req, what);
}
}
@@ -459,11 +451,16 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
*/
void tl_clear(struct drbd_conf *mdev)
{
+ spin_lock_irq(&mdev->req_lock);
+ _tl_clear(mdev);
+ spin_unlock_irq(&mdev->req_lock);
+}
+
+static void _tl_clear(struct drbd_conf *mdev)
+{
struct list_head *le, *tle;
struct drbd_request *r;
- spin_lock_irq(&mdev->req_lock);
-
_tl_restart(mdev, connection_lost_while_pending);
/* we expect this list to be empty. */
@@ -482,7 +479,6 @@ void tl_clear(struct drbd_conf *mdev)
memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
- spin_unlock_irq(&mdev->req_lock);
}
void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
@@ -1476,12 +1472,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
if (ns.susp_fen) {
/* case1: The outdate peer handler is successful: */
if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
- tl_clear(mdev);
if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
drbd_uuid_new_current(mdev);
clear_bit(NEW_CUR_UUID, &mdev->flags);
}
spin_lock_irq(&mdev->req_lock);
+ _tl_clear(mdev);
_drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
spin_unlock_irq(&mdev->req_lock);
}
@@ -1514,6 +1510,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
/* Do not change the order of the if above and the two below... */
if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
+ /* we probably will start a resync soon.
+ * make sure those things are properly reset. */
+ mdev->rs_total = 0;
+ mdev->rs_failed = 0;
+ atomic_set(&mdev->rs_pending_cnt, 0);
+ drbd_rs_cancel_all(mdev);
+
drbd_send_uuids(mdev);
drbd_send_state(mdev, ns);
}
@@ -1630,9 +1633,24 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
eh = mdev->ldev->dc.on_io_error;
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
- /* Immediately allow completion of all application IO, that waits
- for completion from the local disk. */
- tl_abort_disk_io(mdev);
+ if (was_io_error && eh == EP_CALL_HELPER)
+ drbd_khelper(mdev, "local-io-error");
+
+ /* Immediately allow completion of all application IO,
+ * that waits for completion from the local disk,
+ * if this was a force-detach due to disk_timeout
+ * or administrator request (drbdsetup detach --force).
+ * Do NOT abort otherwise.
+ * Aborting local requests may cause serious problems,
+ * if requests are completed to upper layers already,
+ * and then later the already submitted local bio completes.
+ * This can cause DMA into former bio pages that meanwhile
+ * have been re-used for other things.
+ * So aborting local requests may cause crashes,
+ * or even worse, silent data corruption.
+ */
+ if (test_and_clear_bit(FORCE_DETACH, &mdev->flags))
+ tl_abort_disk_io(mdev);
/* current state still has to be D_FAILED,
* there is only one way out: to D_DISKLESS,
@@ -1653,9 +1671,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
drbd_md_sync(mdev);
}
put_ldev(mdev);
-
- if (was_io_error && eh == EP_CALL_HELPER)
- drbd_khelper(mdev, "local-io-error");
}
/* second half of local IO error, failure to attach,
@@ -1669,10 +1684,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
"ASSERT FAILED: disk is %s while going diskless\n",
drbd_disk_str(mdev->state.disk));
- mdev->rs_total = 0;
- mdev->rs_failed = 0;
- atomic_set(&mdev->rs_pending_cnt, 0);
-
if (ns.conn >= C_CONNECTED)
drbd_send_state(mdev, ns);
@@ -2194,7 +2205,8 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
{
struct p_sizes p;
sector_t d_size, u_size;
- int q_order_type, max_bio_size;
+ int q_order_type;
+ unsigned int max_bio_size;
int ok;
if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -2203,7 +2215,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
u_size = mdev->ldev->dc.disk_size;
q_order_type = drbd_queue_order_type(mdev);
max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
- max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
+ max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
put_ldev(mdev);
} else {
d_size = 0;
@@ -2214,7 +2226,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
/* Never allow old drbd (up to 8.3.7) to see more than 32KiB */
if (mdev->agreed_pro_version <= 94)
- max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+ max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
p.d_size = cpu_to_be64(d_size);
p.u_size = cpu_to_be64(u_size);
@@ -3521,9 +3533,9 @@ static void drbd_cleanup(void)
}
/**
- * drbd_congested() - Callback for pdflush
+ * drbd_congested() - Callback for the flusher thread
* @congested_data: User data
- * @bdi_bits: Bits pdflush is currently interested in
+ * @bdi_bits: Bits the BDI flusher thread is currently interested in
*
* Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
*/
@@ -3541,6 +3553,22 @@ static int drbd_congested(void *congested_data, int bdi_bits)
goto out;
}
+ if (test_bit(CALLBACK_PENDING, &mdev->flags)) {
+ r |= (1 << BDI_async_congested);
+ /* Without good local data, we would need to read from remote,
+ * and that would need the worker thread as well, which is
+ * currently blocked waiting for that usermode helper to
+ * finish.
+ */
+ if (!get_ldev_if_state(mdev, D_UP_TO_DATE))
+ r |= (1 << BDI_sync_congested);
+ else
+ put_ldev(mdev);
+ r &= bdi_bits;
+ reason = 'c';
+ goto out;
+ }
+
if (get_ldev(mdev)) {
q = bdev_get_queue(mdev->ldev->backing_bdev);
r = bdi_congested(&q->backing_dev_info, bdi_bits);
@@ -3604,6 +3632,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
q->backing_dev_info.congested_data = mdev;
blk_queue_make_request(q, drbd_make_request);
+ blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
@@ -3870,7 +3899,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
/* this was a try anyways ... */
dev_err(DEV, "meta data update failed!\n");
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
}
/* Update mdev->ldev->md.la_size_sect,
@@ -3950,9 +3979,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
spin_lock_irq(&mdev->req_lock);
if (mdev->state.conn < C_CONNECTED) {
- int peer;
+ unsigned int peer;
peer = be32_to_cpu(buffer->la_peer_max_bio_size);
- peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
+ peer = max(peer, DRBD_MAX_BIO_SIZE_SAFE);
mdev->peer_max_bio_size = peer;
}
spin_unlock_irq(&mdev->req_lock);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 6d4de6a72e8..edb490aad8b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -147,6 +147,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
char *argv[] = {usermode_helper, cmd, mb, NULL };
int ret;
+ if (current == mdev->worker.task)
+ set_bit(CALLBACK_PENDING, &mdev->flags);
+
snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
if (get_net_conf(mdev)) {
@@ -189,6 +192,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
usermode_helper, cmd, mb,
(ret >> 8) & 0xff, ret);
+ if (current == mdev->worker.task)
+ clear_bit(CALLBACK_PENDING, &mdev->flags);
+
if (ret < 0) /* Ignore any ERRNOs we got. */
ret = 0;
@@ -668,8 +674,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
la_size_changed && md_moved ? "size changed and md moved" :
la_size_changed ? "size changed" : "md moved");
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
- err = drbd_bitmap_io(mdev, &drbd_bm_write,
- "size changed", BM_LOCKED_MASK);
+ err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
+ "size changed", BM_LOCKED_MASK);
if (err) {
rv = dev_size_error;
goto out;
@@ -795,8 +801,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
{
struct request_queue * const q = mdev->rq_queue;
- int max_hw_sectors = max_bio_size >> 9;
- int max_segments = 0;
+ unsigned int max_hw_sectors = max_bio_size >> 9;
+ unsigned int max_segments = 0;
if (get_ldev_if_state(mdev, D_ATTACHING)) {
struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
@@ -829,7 +835,7 @@ static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_
void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
{
- int now, new, local, peer;
+ unsigned int now, new, local, peer;
now = queue_max_hw_sectors(mdev->rq_queue) << 9;
local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
@@ -840,13 +846,14 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
mdev->local_max_bio_size = local;
put_ldev(mdev);
}
+ local = min(local, DRBD_MAX_BIO_SIZE);
/* We may ignore peer limits if the peer is modern enough.
Because new from 8.3.8 onwards the peer can use multiple
BIOs for a single peer_request */
if (mdev->state.conn >= C_CONNECTED) {
if (mdev->agreed_pro_version < 94) {
- peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+ peer = min(mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
} else if (mdev->agreed_pro_version == 94)
peer = DRBD_MAX_SIZE_H80_PACKET;
@@ -854,10 +861,10 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
peer = DRBD_MAX_BIO_SIZE;
}
- new = min_t(int, local, peer);
+ new = min(local, peer);
if (mdev->state.role == R_PRIMARY && new < now)
- dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
+ dev_err(DEV, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
if (new != now)
dev_info(DEV, "max BIO size = %u\n", new);
@@ -950,6 +957,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
* to realize a "hot spare" feature (not that I'd recommend that) */
wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+ /* make sure there is no leftover from previous force-detach attempts */
+ clear_bit(FORCE_DETACH, &mdev->flags);
+
+ /* and no leftover from previously aborted resync or verify, either */
+ mdev->rs_total = 0;
+ mdev->rs_failed = 0;
+ atomic_set(&mdev->rs_pending_cnt, 0);
+
/* allocation not in the IO path, cqueue thread context */
nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
if (!nbc) {
@@ -1345,6 +1360,7 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
}
if (dt.detach_force) {
+ set_bit(FORCE_DETACH, &mdev->flags);
drbd_force_state(mdev, NS(disk, D_FAILED));
reply->ret_code = SS_SUCCESS;
goto out;
@@ -1962,9 +1978,11 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
int retcode;
/* If there is still bitmap IO pending, probably because of a previous
- * resync just being finished, wait for it before requesting a new resync. */
+ * resync just being finished, wait for it before requesting a new resync.
+ * Also wait for it's after_state_ch(). */
drbd_suspend_io(mdev);
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+ drbd_flush_workqueue(mdev);
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
@@ -2003,9 +2021,11 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
int retcode;
/* If there is still bitmap IO pending, probably because of a previous
- * resync just being finished, wait for it before requesting a new resync. */
+ * resync just being finished, wait for it before requesting a new resync.
+ * Also wait for it's after_state_ch(). */
drbd_suspend_io(mdev);
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+ drbd_flush_workqueue(mdev);
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 869bada2ed0..5496104f90b 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -245,6 +245,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
mdev->state.role == R_SECONDARY) {
seq_printf(seq, "%2d: cs:Unconfigured\n", i);
} else {
+ /* reset mdev->congestion_reason */
+ bdi_rw_congested(&mdev->rq_queue->backing_dev_info);
+
seq_printf(seq,
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index ea4836e0ae9..c74ca2df743 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -277,6 +277,9 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
int i;
+ if (page == NULL)
+ return;
+
if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
i = page_chain_free(page);
else {
@@ -316,7 +319,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
gfp_t gfp_mask) __must_hold(local)
{
struct drbd_epoch_entry *e;
- struct page *page;
+ struct page *page = NULL;
unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
@@ -329,9 +332,11 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
return NULL;
}
- page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
- if (!page)
- goto fail;
+ if (data_size) {
+ page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
+ if (!page)
+ goto fail;
+ }
INIT_HLIST_NODE(&e->collision);
e->epoch = NULL;
@@ -1270,7 +1275,6 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
data_size -= dgs;
- ERR_IF(data_size == 0) return NULL;
ERR_IF(data_size & 0x1ff) return NULL;
ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL;
@@ -1291,6 +1295,9 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
if (!e)
return NULL;
+ if (!data_size)
+ return e;
+
ds = data_size;
page = e->pages;
page_chain_for_each(page) {
@@ -1715,6 +1722,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
dp_flags = be32_to_cpu(p->dp_flags);
rw |= wire_flags_to_bio(mdev, dp_flags);
+ if (e->pages == NULL) {
+ D_ASSERT(e->size == 0);
+ D_ASSERT(dp_flags & DP_FLUSH);
+ }
if (dp_flags & DP_MAY_SET_IN_SYNC)
e->flags |= EE_MAY_SET_IN_SYNC;
@@ -3801,11 +3812,18 @@ void drbd_free_tl_hash(struct drbd_conf *mdev)
mdev->ee_hash = NULL;
mdev->ee_hash_s = 0;
- /* paranoia code */
- for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++)
- if (h->first)
- dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n",
- (int)(h - mdev->tl_hash), h->first);
+ /* We may not have had the chance to wait for all locally pending
+ * application requests. The hlist_add_fake() prevents access after
+ * free on master bio completion. */
+ for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) {
+ struct drbd_request *req;
+ struct hlist_node *pos, *n;
+ hlist_for_each_entry_safe(req, pos, n, h, collision) {
+ hlist_del_init(&req->collision);
+ hlist_add_fake(&req->collision);
+ }
+ }
+
kfree(mdev->tl_hash);
mdev->tl_hash = NULL;
mdev->tl_hash_s = 0;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 8e93a6ac9bb..01b2ac641c7 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -455,7 +455,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
_req_may_be_done_not_susp(req, m);
break;
@@ -477,7 +477,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;
}
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
goto_queue_for_net_read:
@@ -695,6 +695,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;
case resend:
+ /* Simply complete (local only) READs. */
+ if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
+ _req_may_be_done(req, m);
+ break;
+ }
+
/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
before the connection loss (B&C only); only P_BARRIER_ACK was missing.
Trowing them out of the TL here by pretending we got a BARRIER_ACK
@@ -834,7 +840,15 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
req->private_bio = NULL;
}
if (rw == WRITE) {
- remote = 1;
+ /* Need to replicate writes. Unless it is an empty flush,
+ * which is better mapped to a DRBD P_BARRIER packet,
+ * also for drbd wire protocol compatibility reasons. */
+ if (unlikely(size == 0)) {
+ /* The only size==0 bios we expect are empty flushes. */
+ D_ASSERT(bio->bi_rw & REQ_FLUSH);
+ remote = 0;
+ } else
+ remote = 1;
} else {
/* READ || READA */
if (local) {
@@ -870,8 +884,11 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
* extent. This waits for any resync activity in the corresponding
* resync extent to finish, and, if necessary, pulls in the target
* extent into the activity log, which involves further disk io because
- * of transactional on-disk meta data updates. */
- if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
+ * of transactional on-disk meta data updates.
+ * Empty flushes don't need to go into the activity log, they can only
+ * flush data for pending writes which are already in there. */
+ if (rw == WRITE && local && size
+ && !test_bit(AL_SUSPENDED, &mdev->flags)) {
req->rq_state |= RQ_IN_ACT_LOG;
drbd_al_begin_io(mdev, sector);
}
@@ -994,7 +1011,10 @@ allocate_barrier:
if (rw == WRITE && _req_conflicts(req))
goto fail_conflicting;
- list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
+ /* no point in adding empty flushes to the transfer log,
+ * they are mapped to drbd barriers already. */
+ if (likely(size!=0))
+ list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
/* NOTE remote first: to get the concurrent write detection right,
* we must register the request before start of local IO. */
@@ -1014,6 +1034,14 @@ allocate_barrier:
mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96)
maybe_pull_ahead(mdev);
+ /* If this was a flush, queue a drbd barrier/start a new epoch.
+ * Unless the current epoch was empty anyways, or we are not currently
+ * replicating, in which case there is no point. */
+ if (unlikely(bio->bi_rw & REQ_FLUSH)
+ && mdev->newest_tle->n_writes
+ && drbd_should_do_remote(mdev->state))
+ queue_barrier(mdev);
+
spin_unlock_irq(&mdev->req_lock);
kfree(b); /* if someone else has beaten us to it... */
@@ -1111,13 +1139,12 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
/*
* what we "blindly" assume:
*/
- D_ASSERT(bio->bi_size > 0);
D_ASSERT((bio->bi_size & 0x1ff) == 0);
/* to make some things easier, force alignment of requests within the
* granularity of our hash tables */
s_enr = bio->bi_sector >> HT_SHIFT;
- e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
+ e_enr = bio->bi_size ? (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT : s_enr;
if (likely(s_enr == e_enr)) {
do {
@@ -1275,7 +1302,7 @@ void request_timer_fn(unsigned long data)
time_after(now, req->start_time + dt) &&
!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
- __drbd_chk_io_error(mdev, 1);
+ __drbd_chk_io_error(mdev, DRBD_FORCE_DETACH);
}
nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
spin_unlock_irq(&mdev->req_lock);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 620c70ff223..6bce2cc179d 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -111,7 +111,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
if (list_empty(&mdev->read_ee))
wake_up(&mdev->ee_wait);
if (test_bit(__EE_WAS_ERROR, &e->flags))
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
spin_unlock_irqrestore(&mdev->req_lock, flags);
drbd_queue_work(&mdev->data.work, &e->w);
@@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
: list_empty(&mdev->active_ee);
if (test_bit(__EE_WAS_ERROR, &e->flags))
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
spin_unlock_irqrestore(&mdev->req_lock, flags);
if (is_syncer_req)
@@ -1501,14 +1501,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
return;
}
- if (mdev->state.conn < C_AHEAD) {
- /* In case a previous resync run was aborted by an IO error/detach on the peer. */
- drbd_rs_cancel_all(mdev);
- /* This should be done when we abort the resync. We definitely do not
- want to have this for connections going back and forth between
- Ahead/Behind and SyncSource/SyncTarget */
- }
-
if (side == C_SYNC_TARGET) {
/* Since application IO was locked out during C_WF_BITMAP_T and
C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 553f43a9095..a7d6347aaa7 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -191,6 +191,7 @@ static int print_unex = 1;
#include <linux/mutex.h>
#include <linux/io.h>
#include <linux/uaccess.h>
+#include <linux/async.h>
/*
* PS/2 floppies have much slower step rates than regular floppies.
@@ -2516,8 +2517,7 @@ static int make_raw_rw_request(void)
set_fdc((long)current_req->rq_disk->private_data);
raw_cmd = &default_raw_cmd;
- raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_DISK | FD_RAW_NEED_DISK |
- FD_RAW_NEED_SEEK;
+ raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_DISK | FD_RAW_NEED_SEEK;
raw_cmd->cmd_count = NR_RW;
if (rq_data_dir(current_req) == READ) {
raw_cmd->flags |= FD_RAW_READ;
@@ -4123,7 +4123,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
return get_disk(disks[drive]);
}
-static int __init floppy_init(void)
+static int __init do_floppy_init(void)
{
int i, unit, drive;
int err, dr;
@@ -4338,6 +4338,24 @@ out_put_disk:
return err;
}
+#ifndef MODULE
+static __init void floppy_async_init(void *data, async_cookie_t cookie)
+{
+ do_floppy_init();
+}
+#endif
+
+static int __init floppy_init(void)
+{
+#ifdef MODULE
+ return do_floppy_init();
+#else
+ /* Don't hold up the bootup by the floppy initialization */
+ async_schedule(floppy_async_init, NULL);
+ return 0;
+#endif
+}
+
static const struct io_region {
int offset;
int size;
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 76fa3deaee8..1788f491e0f 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -780,9 +780,9 @@ static const struct block_device_operations mg_disk_ops = {
.getgeo = mg_getgeo
};
-static int mg_suspend(struct platform_device *plat_dev, pm_message_t state)
+static int mg_suspend(struct device *dev)
{
- struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
+ struct mg_drv_data *prv_data = dev->platform_data;
struct mg_host *host = prv_data->host;
if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
@@ -804,9 +804,9 @@ static int mg_suspend(struct platform_device *plat_dev, pm_message_t state)
return 0;
}
-static int mg_resume(struct platform_device *plat_dev)
+static int mg_resume(struct device *dev)
{
- struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
+ struct mg_drv_data *prv_data = dev->platform_data;
struct mg_host *host = prv_data->host;
if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
@@ -825,6 +825,8 @@ static int mg_resume(struct platform_device *plat_dev)
return 0;
}
+static SIMPLE_DEV_PM_OPS(mg_pm, mg_suspend, mg_resume);
+
static int mg_probe(struct platform_device *plat_dev)
{
struct mg_host *host;
@@ -1074,11 +1076,10 @@ static int mg_remove(struct platform_device *plat_dev)
static struct platform_driver mg_disk_driver = {
.probe = mg_probe,
.remove = mg_remove,
- .suspend = mg_suspend,
- .resume = mg_resume,
.driver = {
.name = MG_DEV_NAME,
.owner = THIS_MODULE,
+ .pm = &mg_pm,
}
};
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index a8fddeb3d63..f946d31d691 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -1148,11 +1148,15 @@ static bool mtip_pause_ncq(struct mtip_port *port,
reply = port->rxfis + RX_FIS_D2H_REG;
task_file_data = readl(port->mmio+PORT_TFDATA);
- if ((task_file_data & 1) || (fis->command == ATA_CMD_SEC_ERASE_UNIT))
+ if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
+ clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
+
+ if ((task_file_data & 1))
return false;
if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+ set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
port->ic_pause_timer = jiffies;
return true;
} else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
@@ -1900,7 +1904,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command,
int rv = 0, xfer_sz = command[3];
if (xfer_sz) {
- if (user_buffer)
+ if (!user_buffer)
return -EFAULT;
buf = dmam_alloc_coherent(&port->dd->pdev->dev,
@@ -2043,7 +2047,7 @@ static void mtip_set_timeout(struct host_to_dev_fis *fis, unsigned int *timeout)
*timeout = 240000; /* 4 minutes */
break;
case ATA_CMD_STANDBYNOW1:
- *timeout = 10000; /* 10 seconds */
+ *timeout = 120000; /* 2 minutes */
break;
case 0xF7:
case 0xFA:
@@ -2588,9 +2592,6 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
if (!len || size)
return 0;
- if (size < 0)
- return -EINVAL;
-
size += sprintf(&buf[size], "H/ S ACTive : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--)
@@ -2660,9 +2661,6 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
if (!len || size)
return 0;
- if (size < 0)
- return -EINVAL;
-
size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
dd->port->flags);
size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n",
@@ -3214,8 +3212,8 @@ static int mtip_hw_init(struct driver_data *dd)
"Unable to check write protect progress\n");
else
dev_info(&dd->pdev->dev,
- "Write protect progress: %d%% (%d blocks)\n",
- attr242.cur, attr242.data);
+ "Write protect progress: %u%% (%u blocks)\n",
+ attr242.cur, le32_to_cpu(attr242.data));
return rv;
out3:
@@ -3619,6 +3617,10 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
bio_endio(bio, -ENODATA);
return;
}
+ if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) {
+ bio_endio(bio, -ENODATA);
+ return;
+ }
}
if (unlikely(!bio_has_data(bio))) {
@@ -4168,7 +4170,13 @@ static void mtip_pci_shutdown(struct pci_dev *pdev)
/* Table of device ids supported by this driver. */
static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = {
- { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320H_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320M_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320S_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P325M_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420H_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420M_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P425M_DEVICE_ID) },
{ 0 }
};
@@ -4199,12 +4207,12 @@ static int __init mtip_init(void)
{
int error;
- printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
+ pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
/* Allocate a major block device number to use with this driver. */
error = register_blkdev(0, MTIP_DRV_NAME);
if (error <= 0) {
- printk(KERN_ERR "Unable to register block device (%d)\n",
+ pr_err("Unable to register block device (%d)\n",
error);
return -EBUSY;
}
@@ -4213,7 +4221,7 @@ static int __init mtip_init(void)
if (!dfs_parent) {
dfs_parent = debugfs_create_dir("rssd", NULL);
if (IS_ERR_OR_NULL(dfs_parent)) {
- printk(KERN_WARNING "Error creating debugfs parent\n");
+ pr_warn("Error creating debugfs parent\n");
dfs_parent = NULL;
}
}
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index f51fc23d17b..18627a1d04c 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -76,7 +76,13 @@
/* Micron Vendor ID & P320x SSD Device ID */
#define PCI_VENDOR_ID_MICRON 0x1344
-#define P320_DEVICE_ID 0x5150
+#define P320H_DEVICE_ID 0x5150
+#define P320M_DEVICE_ID 0x5151
+#define P320S_DEVICE_ID 0x5152
+#define P325M_DEVICE_ID 0x5153
+#define P420H_DEVICE_ID 0x5160
+#define P420M_DEVICE_ID 0x5161
+#define P425M_DEVICE_ID 0x5163
/* Driver name and version strings */
#define MTIP_DRV_NAME "mtip32xx"
@@ -131,10 +137,12 @@ enum {
MTIP_PF_SVC_THD_STOP_BIT = 8,
/* below are bit numbers in 'dd_flag' defined in driver_data */
+ MTIP_DDF_SEC_LOCK_BIT = 0,
MTIP_DDF_REMOVE_PENDING_BIT = 1,
MTIP_DDF_OVER_TEMP_BIT = 2,
MTIP_DDF_WRITE_PROTECT_BIT = 3,
MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \
+ (1 << MTIP_DDF_SEC_LOCK_BIT) | \
(1 << MTIP_DDF_OVER_TEMP_BIT) | \
(1 << MTIP_DDF_WRITE_PROTECT_BIT)),
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 061427a75d3..0c03411c59e 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -154,6 +154,7 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
struct msghdr msg;
struct kvec iov;
sigset_t blocked, oldset;
+ unsigned long pflags = current->flags;
if (unlikely(!sock)) {
dev_err(disk_to_dev(nbd->disk),
@@ -167,8 +168,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
siginitsetinv(&blocked, sigmask(SIGKILL));
sigprocmask(SIG_SETMASK, &blocked, &oldset);
+ current->flags |= PF_MEMALLOC;
do {
- sock->sk->sk_allocation = GFP_NOIO;
+ sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
iov.iov_base = buf;
iov.iov_len = size;
msg.msg_name = NULL;
@@ -214,6 +216,7 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
} while (size > 0);
sigprocmask(SIG_SETMASK, &oldset, NULL);
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
return result;
}
@@ -405,6 +408,7 @@ static int nbd_do_it(struct nbd_device *nbd)
BUG_ON(nbd->magic != NBD_MAGIC);
+ sk_set_memalloc(nbd->sock->sk);
nbd->pid = task_pid_nr(current);
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (ret) {
@@ -445,6 +449,14 @@ static void nbd_clear_que(struct nbd_device *nbd)
req->errors++;
nbd_end_request(req);
}
+
+ while (!list_empty(&nbd->waiting_queue)) {
+ req = list_entry(nbd->waiting_queue.next, struct request,
+ queuelist);
+ list_del_init(&req->queuelist);
+ req->errors++;
+ nbd_end_request(req);
+ }
}
@@ -481,7 +493,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
nbd_end_request(req);
} else {
spin_lock(&nbd->queue_lock);
- list_add(&req->queuelist, &nbd->queue_head);
+ list_add_tail(&req->queuelist, &nbd->queue_head);
spin_unlock(&nbd->queue_lock);
}
@@ -594,6 +606,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
nbd->file = NULL;
nbd_clear_que(nbd);
BUG_ON(!list_empty(&nbd->queue_head));
+ BUG_ON(!list_empty(&nbd->waiting_queue));
if (file)
fput(file);
return 0;
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 38a2d063188..ad16c68c864 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -79,6 +79,7 @@ struct nvme_dev {
char serial[20];
char model[40];
char firmware_rev[8];
+ u32 max_hw_sectors;
};
/*
@@ -835,15 +836,15 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
}
static int nvme_get_features(struct nvme_dev *dev, unsigned fid,
- unsigned dword11, dma_addr_t dma_addr)
+ unsigned nsid, dma_addr_t dma_addr)
{
struct nvme_command c;
memset(&c, 0, sizeof(c));
c.features.opcode = nvme_admin_get_features;
+ c.features.nsid = cpu_to_le32(nsid);
c.features.prp1 = cpu_to_le64(dma_addr);
c.features.fid = cpu_to_le32(fid);
- c.features.dword11 = cpu_to_le32(dword11);
return nvme_submit_admin_cmd(dev, &c, NULL);
}
@@ -862,11 +863,51 @@ static int nvme_set_features(struct nvme_dev *dev, unsigned fid,
return nvme_submit_admin_cmd(dev, &c, result);
}
+/**
+ * nvme_cancel_ios - Cancel outstanding I/Os
+ * @queue: The queue to cancel I/Os on
+ * @timeout: True to only cancel I/Os which have timed out
+ */
+static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
+{
+ int depth = nvmeq->q_depth - 1;
+ struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
+ unsigned long now = jiffies;
+ int cmdid;
+
+ for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) {
+ void *ctx;
+ nvme_completion_fn fn;
+ static struct nvme_completion cqe = {
+ .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1,
+ };
+
+ if (timeout && !time_after(now, info[cmdid].timeout))
+ continue;
+ dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid);
+ ctx = cancel_cmdid(nvmeq, cmdid, &fn);
+ fn(nvmeq->dev, ctx, &cqe);
+ }
+}
+
+static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
+{
+ dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
+ (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
+ dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+ nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+ kfree(nvmeq);
+}
+
static void nvme_free_queue(struct nvme_dev *dev, int qid)
{
struct nvme_queue *nvmeq = dev->queues[qid];
int vector = dev->entry[nvmeq->cq_vector].vector;
+ spin_lock_irq(&nvmeq->q_lock);
+ nvme_cancel_ios(nvmeq, false);
+ spin_unlock_irq(&nvmeq->q_lock);
+
irq_set_affinity_hint(vector, NULL);
free_irq(vector, nvmeq);
@@ -876,18 +917,15 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
adapter_delete_cq(dev, qid);
}
- dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
- (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
- dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
- nvmeq->sq_cmds, nvmeq->sq_dma_addr);
- kfree(nvmeq);
+ nvme_free_queue_mem(nvmeq);
}
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth, int vector)
{
struct device *dmadev = &dev->pci_dev->dev;
- unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info));
+ unsigned extra = DIV_ROUND_UP(depth, 8) + (depth *
+ sizeof(struct nvme_cmd_info));
struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
if (!nvmeq)
return NULL;
@@ -975,7 +1013,7 @@ static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev,
static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
{
- int result;
+ int result = 0;
u32 aqa;
u64 cap;
unsigned long timeout;
@@ -1005,17 +1043,22 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
dev->db_stride = NVME_CAP_STRIDE(cap);
- while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
+ while (!result && !(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
msleep(100);
if (fatal_signal_pending(current))
- return -EINTR;
+ result = -EINTR;
if (time_after(jiffies, timeout)) {
dev_err(&dev->pci_dev->dev,
"Device not ready; aborting initialisation\n");
- return -ENODEV;
+ result = -ENODEV;
}
}
+ if (result) {
+ nvme_free_queue_mem(nvmeq);
+ return result;
+ }
+
result = queue_request_irq(dev, nvmeq, "nvme admin");
dev->queues[0] = nvmeq;
return result;
@@ -1037,6 +1080,8 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
offset = offset_in_page(addr);
count = DIV_ROUND_UP(offset + length, PAGE_SIZE);
pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return ERR_PTR(-ENOMEM);
err = get_user_pages_fast(addr, count, 1, pages);
if (err < count) {
@@ -1146,14 +1191,13 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
return status;
}
-static int nvme_user_admin_cmd(struct nvme_ns *ns,
+static int nvme_user_admin_cmd(struct nvme_dev *dev,
struct nvme_admin_cmd __user *ucmd)
{
- struct nvme_dev *dev = ns->dev;
struct nvme_admin_cmd cmd;
struct nvme_command c;
int status, length;
- struct nvme_iod *iod;
+ struct nvme_iod *uninitialized_var(iod);
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
@@ -1204,7 +1248,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
case NVME_IOCTL_ID:
return ns->ns_id;
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_admin_cmd(ns, (void __user *)arg);
+ return nvme_user_admin_cmd(ns->dev, (void __user *)arg);
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, (void __user *)arg);
default:
@@ -1218,26 +1262,6 @@ static const struct block_device_operations nvme_fops = {
.compat_ioctl = nvme_ioctl,
};
-static void nvme_timeout_ios(struct nvme_queue *nvmeq)
-{
- int depth = nvmeq->q_depth - 1;
- struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
- unsigned long now = jiffies;
- int cmdid;
-
- for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) {
- void *ctx;
- nvme_completion_fn fn;
- static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, };
-
- if (!time_after(now, info[cmdid].timeout))
- continue;
- dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid);
- ctx = cancel_cmdid(nvmeq, cmdid, &fn);
- fn(nvmeq->dev, ctx, &cqe);
- }
-}
-
static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
{
while (bio_list_peek(&nvmeq->sq_cong)) {
@@ -1269,7 +1293,7 @@ static int nvme_kthread(void *data)
spin_lock_irq(&nvmeq->q_lock);
if (nvme_process_cq(nvmeq))
printk("process_cq did something\n");
- nvme_timeout_ios(nvmeq);
+ nvme_cancel_ios(nvmeq, true);
nvme_resubmit_bios(nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
}
@@ -1339,6 +1363,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid,
ns->disk = disk;
lbaf = id->flbas & 0xf;
ns->lba_shift = id->lbaf[lbaf].ds;
+ blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
+ if (dev->max_hw_sectors)
+ blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
disk->major = nvme_major;
disk->minors = NVME_MINORS;
@@ -1383,7 +1410,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
{
- int result, cpu, i, nr_io_queues, db_bar_size;
+ int result, cpu, i, nr_io_queues, db_bar_size, q_depth;
nr_io_queues = num_online_cpus();
result = set_queue_count(dev, nr_io_queues);
@@ -1429,9 +1456,10 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
cpu = cpumask_next(cpu, cpu_online_mask);
}
+ q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
+ NVME_Q_DEPTH);
for (i = 0; i < nr_io_queues; i++) {
- dev->queues[i + 1] = nvme_create_queue(dev, i + 1,
- NVME_Q_DEPTH, i);
+ dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i);
if (IS_ERR(dev->queues[i + 1]))
return PTR_ERR(dev->queues[i + 1]);
dev->queue_count++;
@@ -1480,6 +1508,10 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev)
memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
+ if (ctrl->mdts) {
+ int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
+ dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
+ }
id_ns = mem;
for (i = 1; i <= nn; i++) {
@@ -1523,8 +1555,6 @@ static int nvme_dev_remove(struct nvme_dev *dev)
list_del(&dev->node);
spin_unlock(&dev_list_lock);
- /* TODO: wait all I/O finished or cancel them */
-
list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
list_del(&ns->list);
del_gendisk(ns->disk);
@@ -1560,15 +1590,33 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
dma_pool_destroy(dev->prp_small_pool);
}
-/* XXX: Use an ida or something to let remove / add work correctly */
-static void nvme_set_instance(struct nvme_dev *dev)
+static DEFINE_IDA(nvme_instance_ida);
+
+static int nvme_set_instance(struct nvme_dev *dev)
{
- static int instance;
- dev->instance = instance++;
+ int instance, error;
+
+ do {
+ if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
+ return -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ error = ida_get_new(&nvme_instance_ida, &instance);
+ spin_unlock(&dev_list_lock);
+ } while (error == -EAGAIN);
+
+ if (error)
+ return -ENODEV;
+
+ dev->instance = instance;
+ return 0;
}
static void nvme_release_instance(struct nvme_dev *dev)
{
+ spin_lock(&dev_list_lock);
+ ida_remove(&nvme_instance_ida, dev->instance);
+ spin_unlock(&dev_list_lock);
}
static int __devinit nvme_probe(struct pci_dev *pdev,
@@ -1601,7 +1649,10 @@ static int __devinit nvme_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, dev);
dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
- nvme_set_instance(dev);
+ result = nvme_set_instance(dev);
+ if (result)
+ goto disable;
+
dev->entry[0].vector = pdev->irq;
result = nvme_setup_prp_pools(dev);
@@ -1704,15 +1755,17 @@ static struct pci_driver nvme_driver = {
static int __init nvme_init(void)
{
- int result = -EBUSY;
+ int result;
nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
if (IS_ERR(nvme_thread))
return PTR_ERR(nvme_thread);
- nvme_major = register_blkdev(nvme_major, "nvme");
- if (nvme_major <= 0)
+ result = register_blkdev(nvme_major, "nvme");
+ if (result < 0)
goto kill_kthread;
+ else if (result > 0)
+ nvme_major = result;
result = pci_register_driver(&nvme_driver);
if (result)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 65665c9c42c..54a55f03115 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -55,8 +55,6 @@
#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
-#define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
-#define RBD_MAX_POOL_NAME_LEN 64
#define RBD_MAX_SNAP_NAME_LEN 32
#define RBD_MAX_OPT_LEN 1024
@@ -78,13 +76,12 @@
*/
struct rbd_image_header {
u64 image_size;
- char block_name[32];
+ char *object_prefix;
__u8 obj_order;
__u8 crypt_type;
__u8 comp_type;
struct ceph_snap_context *snapc;
size_t snap_names_len;
- u64 snap_seq;
u32 total_snaps;
char *snap_names;
@@ -150,7 +147,7 @@ struct rbd_snap {
* a single device
*/
struct rbd_device {
- int id; /* blkdev unique id */
+ int dev_id; /* blkdev unique id */
int major; /* blkdev assigned major */
struct gendisk *disk; /* blkdev's gendisk and rq */
@@ -163,20 +160,24 @@ struct rbd_device {
spinlock_t lock; /* queue lock */
struct rbd_image_header header;
- char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
- int obj_len;
- char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
- char pool_name[RBD_MAX_POOL_NAME_LEN];
- int poolid;
+ char *image_name;
+ size_t image_name_len;
+ char *header_name;
+ char *pool_name;
+ int pool_id;
struct ceph_osd_event *watch_event;
struct ceph_osd_request *watch_request;
/* protects updating the header */
struct rw_semaphore header_rwsem;
- char snap_name[RBD_MAX_SNAP_NAME_LEN];
+ /* name of the snapshot this device reads from */
+ char *snap_name;
+ /* id of the snapshot this device reads from */
u64 snap_id; /* current snapshot id */
- int read_only;
+ /* whether the snap_id this device reads from still exists */
+ bool snap_exists;
+ int read_only;
struct list_head node;
@@ -201,8 +202,7 @@ static ssize_t rbd_snap_add(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count);
-static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
- struct rbd_snap *snap);
+static void __rbd_remove_snap_dev(struct rbd_snap *snap);
static ssize_t rbd_add(struct bus_type *bus, const char *buf,
size_t count);
@@ -240,19 +240,18 @@ static void rbd_put_dev(struct rbd_device *rbd_dev)
put_device(&rbd_dev->dev);
}
-static int __rbd_refresh_header(struct rbd_device *rbd_dev);
+static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver);
static int rbd_open(struct block_device *bdev, fmode_t mode)
{
struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
- rbd_get_dev(rbd_dev);
-
- set_device_ro(bdev, rbd_dev->read_only);
-
if ((mode & FMODE_WRITE) && rbd_dev->read_only)
return -EROFS;
+ rbd_get_dev(rbd_dev);
+ set_device_ro(bdev, rbd_dev->read_only);
+
return 0;
}
@@ -273,9 +272,9 @@ static const struct block_device_operations rbd_bd_ops = {
/*
* Initialize an rbd client instance.
- * We own *opt.
+ * We own *ceph_opts.
*/
-static struct rbd_client *rbd_client_create(struct ceph_options *opt,
+static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts,
struct rbd_options *rbd_opts)
{
struct rbd_client *rbdc;
@@ -291,10 +290,10 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt,
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rbdc->client = ceph_create_client(opt, rbdc, 0, 0);
+ rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
if (IS_ERR(rbdc->client))
goto out_mutex;
- opt = NULL; /* Now rbdc->client is responsible for opt */
+ ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
ret = ceph_open_session(rbdc->client);
if (ret < 0)
@@ -317,23 +316,23 @@ out_mutex:
mutex_unlock(&ctl_mutex);
kfree(rbdc);
out_opt:
- if (opt)
- ceph_destroy_options(opt);
+ if (ceph_opts)
+ ceph_destroy_options(ceph_opts);
return ERR_PTR(ret);
}
/*
* Find a ceph client with specific addr and configuration.
*/
-static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
+static struct rbd_client *__rbd_client_find(struct ceph_options *ceph_opts)
{
struct rbd_client *client_node;
- if (opt->flags & CEPH_OPT_NOSHARE)
+ if (ceph_opts->flags & CEPH_OPT_NOSHARE)
return NULL;
list_for_each_entry(client_node, &rbd_client_list, node)
- if (ceph_compare_options(opt, client_node->client) == 0)
+ if (!ceph_compare_options(ceph_opts, client_node->client))
return client_node;
return NULL;
}
@@ -349,7 +348,7 @@ enum {
/* string args above */
};
-static match_table_t rbdopt_tokens = {
+static match_table_t rbd_opts_tokens = {
{Opt_notify_timeout, "notify_timeout=%d"},
/* int args above */
/* string args above */
@@ -358,11 +357,11 @@ static match_table_t rbdopt_tokens = {
static int parse_rbd_opts_token(char *c, void *private)
{
- struct rbd_options *rbdopt = private;
+ struct rbd_options *rbd_opts = private;
substring_t argstr[MAX_OPT_ARGS];
int token, intval, ret;
- token = match_token(c, rbdopt_tokens, argstr);
+ token = match_token(c, rbd_opts_tokens, argstr);
if (token < 0)
return -EINVAL;
@@ -383,7 +382,7 @@ static int parse_rbd_opts_token(char *c, void *private)
switch (token) {
case Opt_notify_timeout:
- rbdopt->notify_timeout = intval;
+ rbd_opts->notify_timeout = intval;
break;
default:
BUG_ON(token);
@@ -400,7 +399,7 @@ static struct rbd_client *rbd_get_client(const char *mon_addr,
char *options)
{
struct rbd_client *rbdc;
- struct ceph_options *opt;
+ struct ceph_options *ceph_opts;
struct rbd_options *rbd_opts;
rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
@@ -409,29 +408,29 @@ static struct rbd_client *rbd_get_client(const char *mon_addr,
rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT;
- opt = ceph_parse_options(options, mon_addr,
- mon_addr + mon_addr_len,
- parse_rbd_opts_token, rbd_opts);
- if (IS_ERR(opt)) {
+ ceph_opts = ceph_parse_options(options, mon_addr,
+ mon_addr + mon_addr_len,
+ parse_rbd_opts_token, rbd_opts);
+ if (IS_ERR(ceph_opts)) {
kfree(rbd_opts);
- return ERR_CAST(opt);
+ return ERR_CAST(ceph_opts);
}
spin_lock(&rbd_client_list_lock);
- rbdc = __rbd_client_find(opt);
+ rbdc = __rbd_client_find(ceph_opts);
if (rbdc) {
/* using an existing client */
kref_get(&rbdc->kref);
spin_unlock(&rbd_client_list_lock);
- ceph_destroy_options(opt);
+ ceph_destroy_options(ceph_opts);
kfree(rbd_opts);
return rbdc;
}
spin_unlock(&rbd_client_list_lock);
- rbdc = rbd_client_create(opt, rbd_opts);
+ rbdc = rbd_client_create(ceph_opts, rbd_opts);
if (IS_ERR(rbdc))
kfree(rbd_opts);
@@ -480,46 +479,60 @@ static void rbd_coll_release(struct kref *kref)
kfree(coll);
}
+static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
+{
+ return !memcmp(&ondisk->text,
+ RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT));
+}
+
/*
* Create a new header structure, translate header format from the on-disk
* header.
*/
static int rbd_header_from_disk(struct rbd_image_header *header,
struct rbd_image_header_ondisk *ondisk,
- u32 allocated_snaps,
- gfp_t gfp_flags)
+ u32 allocated_snaps)
{
- u32 i, snap_count;
+ u32 snap_count;
- if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)))
+ if (!rbd_dev_ondisk_valid(ondisk))
return -ENXIO;
snap_count = le32_to_cpu(ondisk->snap_count);
- if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context))
- / sizeof (*ondisk))
+ if (snap_count > (SIZE_MAX - sizeof(struct ceph_snap_context))
+ / sizeof (u64))
return -EINVAL;
header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
- snap_count * sizeof (*ondisk),
- gfp_flags);
+ snap_count * sizeof(u64),
+ GFP_KERNEL);
if (!header->snapc)
return -ENOMEM;
- header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
if (snap_count) {
+ header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
header->snap_names = kmalloc(header->snap_names_len,
- gfp_flags);
+ GFP_KERNEL);
if (!header->snap_names)
goto err_snapc;
header->snap_sizes = kmalloc(snap_count * sizeof(u64),
- gfp_flags);
+ GFP_KERNEL);
if (!header->snap_sizes)
goto err_names;
} else {
+ WARN_ON(ondisk->snap_names_len);
+ header->snap_names_len = 0;
header->snap_names = NULL;
header->snap_sizes = NULL;
}
- memcpy(header->block_name, ondisk->block_name,
+
+ header->object_prefix = kmalloc(sizeof (ondisk->block_name) + 1,
+ GFP_KERNEL);
+ if (!header->object_prefix)
+ goto err_sizes;
+
+ memcpy(header->object_prefix, ondisk->block_name,
sizeof(ondisk->block_name));
+ header->object_prefix[sizeof (ondisk->block_name)] = '\0';
header->image_size = le64_to_cpu(ondisk->image_size);
header->obj_order = ondisk->options.order;
@@ -527,11 +540,13 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
header->comp_type = ondisk->options.comp_type;
atomic_set(&header->snapc->nref, 1);
- header->snap_seq = le64_to_cpu(ondisk->snap_seq);
+ header->snapc->seq = le64_to_cpu(ondisk->snap_seq);
header->snapc->num_snaps = snap_count;
header->total_snaps = snap_count;
if (snap_count && allocated_snaps == snap_count) {
+ int i;
+
for (i = 0; i < snap_count; i++) {
header->snapc->snaps[i] =
le64_to_cpu(ondisk->snaps[i].id);
@@ -540,16 +555,22 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
}
/* copy snapshot names */
- memcpy(header->snap_names, &ondisk->snaps[i],
+ memcpy(header->snap_names, &ondisk->snaps[snap_count],
header->snap_names_len);
}
return 0;
+err_sizes:
+ kfree(header->snap_sizes);
+ header->snap_sizes = NULL;
err_names:
kfree(header->snap_names);
+ header->snap_names = NULL;
err_snapc:
kfree(header->snapc);
+ header->snapc = NULL;
+
return -ENOMEM;
}
@@ -575,52 +596,50 @@ static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
return -ENOENT;
}
-static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
+static int rbd_header_set_snap(struct rbd_device *rbd_dev, u64 *size)
{
- struct rbd_image_header *header = &dev->header;
- struct ceph_snap_context *snapc = header->snapc;
- int ret = -ENOENT;
-
- BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME));
+ int ret;
- down_write(&dev->header_rwsem);
+ down_write(&rbd_dev->header_rwsem);
- if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME,
+ if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
sizeof (RBD_SNAP_HEAD_NAME))) {
- if (header->total_snaps)
- snapc->seq = header->snap_seq;
- else
- snapc->seq = 0;
- dev->snap_id = CEPH_NOSNAP;
- dev->read_only = 0;
+ rbd_dev->snap_id = CEPH_NOSNAP;
+ rbd_dev->snap_exists = false;
+ rbd_dev->read_only = 0;
if (size)
- *size = header->image_size;
+ *size = rbd_dev->header.image_size;
} else {
- ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
+ u64 snap_id = 0;
+
+ ret = snap_by_name(&rbd_dev->header, rbd_dev->snap_name,
+ &snap_id, size);
if (ret < 0)
goto done;
- dev->snap_id = snapc->seq;
- dev->read_only = 1;
+ rbd_dev->snap_id = snap_id;
+ rbd_dev->snap_exists = true;
+ rbd_dev->read_only = 1;
}
ret = 0;
done:
- up_write(&dev->header_rwsem);
+ up_write(&rbd_dev->header_rwsem);
return ret;
}
static void rbd_header_free(struct rbd_image_header *header)
{
- kfree(header->snapc);
- kfree(header->snap_names);
+ kfree(header->object_prefix);
kfree(header->snap_sizes);
+ kfree(header->snap_names);
+ ceph_put_snap_context(header->snapc);
}
/*
* get the actual striped segment name, offset and length
*/
static u64 rbd_get_segment(struct rbd_image_header *header,
- const char *block_name,
+ const char *object_prefix,
u64 ofs, u64 len,
char *seg_name, u64 *segofs)
{
@@ -628,7 +647,7 @@ static u64 rbd_get_segment(struct rbd_image_header *header,
if (seg_name)
snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
- "%s.%012llx", block_name, seg);
+ "%s.%012llx", object_prefix, seg);
ofs = ofs & ((1 << header->obj_order) - 1);
len = min_t(u64, len, (1 << header->obj_order) - ofs);
@@ -726,9 +745,8 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
* split_bio will BUG_ON if this is not the case
*/
dout("bio_chain_clone split! total=%d remaining=%d"
- "bi_size=%d\n",
- (int)total, (int)len-total,
- (int)old_chain->bi_size);
+ "bi_size=%u\n",
+ total, len - total, old_chain->bi_size);
/* split the bio. We'll release it either in the next
call, or it will have to be released outside */
@@ -777,22 +795,24 @@ err_out:
/*
* helpers for osd request op vectors.
*/
-static int rbd_create_rw_ops(struct ceph_osd_req_op **ops,
- int num_ops,
- int opcode,
- u32 payload_len)
-{
- *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1),
- GFP_NOIO);
- if (!*ops)
- return -ENOMEM;
- (*ops)[0].op = opcode;
+static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops,
+ int opcode, u32 payload_len)
+{
+ struct ceph_osd_req_op *ops;
+
+ ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO);
+ if (!ops)
+ return NULL;
+
+ ops[0].op = opcode;
+
/*
* op extent offset and length will be set later on
* in calc_raw_layout()
*/
- (*ops)[0].payload_len = payload_len;
- return 0;
+ ops[0].payload_len = payload_len;
+
+ return ops;
}
static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
@@ -808,8 +828,8 @@ static void rbd_coll_end_req_index(struct request *rq,
struct request_queue *q;
int min, max, i;
- dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n",
- coll, index, ret, len);
+ dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n",
+ coll, index, ret, (unsigned long long) len);
if (!rq)
return;
@@ -848,16 +868,15 @@ static void rbd_coll_end_req(struct rbd_request *req,
* Send ceph osd request
*/
static int rbd_do_request(struct request *rq,
- struct rbd_device *dev,
+ struct rbd_device *rbd_dev,
struct ceph_snap_context *snapc,
u64 snapid,
- const char *obj, u64 ofs, u64 len,
+ const char *object_name, u64 ofs, u64 len,
struct bio *bio,
struct page **pages,
int num_pages,
int flags,
struct ceph_osd_req_op *ops,
- int num_reply,
struct rbd_req_coll *coll,
int coll_index,
void (*rbd_cb)(struct ceph_osd_request *req,
@@ -887,15 +906,13 @@ static int rbd_do_request(struct request *rq,
req_data->coll_index = coll_index;
}
- dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs);
-
- down_read(&dev->header_rwsem);
+ dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name,
+ (unsigned long long) ofs, (unsigned long long) len);
- osdc = &dev->rbd_client->client->osdc;
+ osdc = &rbd_dev->rbd_client->client->osdc;
req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
false, GFP_NOIO, pages, bio);
if (!req) {
- up_read(&dev->header_rwsem);
ret = -ENOMEM;
goto done_pages;
}
@@ -912,7 +929,7 @@ static int rbd_do_request(struct request *rq,
reqhead = req->r_request->front.iov_base;
reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
- strncpy(req->r_oid, obj, sizeof(req->r_oid));
+ strncpy(req->r_oid, object_name, sizeof(req->r_oid));
req->r_oid_len = strlen(req->r_oid);
layout = &req->r_file_layout;
@@ -920,7 +937,7 @@ static int rbd_do_request(struct request *rq,
layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
layout->fl_stripe_count = cpu_to_le32(1);
layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
- layout->fl_pg_pool = cpu_to_le32(dev->poolid);
+ layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id);
ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
req, ops);
@@ -929,7 +946,6 @@ static int rbd_do_request(struct request *rq,
snapc,
&mtime,
req->r_oid, req->r_oid_len);
- up_read(&dev->header_rwsem);
if (linger_req) {
ceph_osdc_set_request_linger(osdc, req);
@@ -944,8 +960,9 @@ static int rbd_do_request(struct request *rq,
ret = ceph_osdc_wait_request(osdc, req);
if (ver)
*ver = le64_to_cpu(req->r_reassert_version.version);
- dout("reassert_ver=%lld\n",
- le64_to_cpu(req->r_reassert_version.version));
+ dout("reassert_ver=%llu\n",
+ (unsigned long long)
+ le64_to_cpu(req->r_reassert_version.version));
ceph_osdc_put_request(req);
}
return ret;
@@ -977,9 +994,10 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
op = (void *)(replyhead + 1);
rc = le32_to_cpu(replyhead->result);
bytes = le64_to_cpu(op->extent.length);
- read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
+ read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ);
- dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
+ dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n",
+ (unsigned long long) bytes, read_op, (int) rc);
if (rc == -ENOENT && read_op) {
zero_bio_chain(req_data->bio, 0);
@@ -1006,14 +1024,12 @@ static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg
/*
* Do a synchronous ceph osd operation
*/
-static int rbd_req_sync_op(struct rbd_device *dev,
+static int rbd_req_sync_op(struct rbd_device *rbd_dev,
struct ceph_snap_context *snapc,
u64 snapid,
- int opcode,
int flags,
- struct ceph_osd_req_op *orig_ops,
- int num_reply,
- const char *obj,
+ struct ceph_osd_req_op *ops,
+ const char *object_name,
u64 ofs, u64 len,
char *buf,
struct ceph_osd_request **linger_req,
@@ -1022,45 +1038,28 @@ static int rbd_req_sync_op(struct rbd_device *dev,
int ret;
struct page **pages;
int num_pages;
- struct ceph_osd_req_op *ops = orig_ops;
- u32 payload_len;
+
+ BUG_ON(ops == NULL);
num_pages = calc_pages_for(ofs , len);
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
if (IS_ERR(pages))
return PTR_ERR(pages);
- if (!orig_ops) {
- payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0);
- ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
- if (ret < 0)
- goto done;
-
- if ((flags & CEPH_OSD_FLAG_WRITE) && buf) {
- ret = ceph_copy_to_page_vector(pages, buf, ofs, len);
- if (ret < 0)
- goto done_ops;
- }
- }
-
- ret = rbd_do_request(NULL, dev, snapc, snapid,
- obj, ofs, len, NULL,
+ ret = rbd_do_request(NULL, rbd_dev, snapc, snapid,
+ object_name, ofs, len, NULL,
pages, num_pages,
flags,
ops,
- 2,
NULL, 0,
NULL,
linger_req, ver);
if (ret < 0)
- goto done_ops;
+ goto done;
if ((flags & CEPH_OSD_FLAG_READ) && buf)
ret = ceph_copy_from_page_vector(pages, buf, ofs, ret);
-done_ops:
- if (!orig_ops)
- rbd_destroy_ops(ops);
done:
ceph_release_page_vector(pages, num_pages);
return ret;
@@ -1070,10 +1069,10 @@ done:
* Do an asynchronous ceph osd operation
*/
static int rbd_do_op(struct request *rq,
- struct rbd_device *rbd_dev ,
+ struct rbd_device *rbd_dev,
struct ceph_snap_context *snapc,
u64 snapid,
- int opcode, int flags, int num_reply,
+ int opcode, int flags,
u64 ofs, u64 len,
struct bio *bio,
struct rbd_req_coll *coll,
@@ -1091,14 +1090,15 @@ static int rbd_do_op(struct request *rq,
return -ENOMEM;
seg_len = rbd_get_segment(&rbd_dev->header,
- rbd_dev->header.block_name,
+ rbd_dev->header.object_prefix,
ofs, len,
seg_name, &seg_ofs);
payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
- ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
- if (ret < 0)
+ ret = -ENOMEM;
+ ops = rbd_create_rw_ops(1, opcode, payload_len);
+ if (!ops)
goto done;
/* we've taken care of segment sizes earlier when we
@@ -1112,7 +1112,6 @@ static int rbd_do_op(struct request *rq,
NULL, 0,
flags,
ops,
- num_reply,
coll, coll_index,
rbd_req_cb, 0, NULL);
@@ -1136,7 +1135,6 @@ static int rbd_req_write(struct request *rq,
return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
- 2,
ofs, len, bio, coll, coll_index);
}
@@ -1155,55 +1153,58 @@ static int rbd_req_read(struct request *rq,
snapid,
CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ,
- 2,
ofs, len, bio, coll, coll_index);
}
/*
* Request sync osd read
*/
-static int rbd_req_sync_read(struct rbd_device *dev,
- struct ceph_snap_context *snapc,
+static int rbd_req_sync_read(struct rbd_device *rbd_dev,
u64 snapid,
- const char *obj,
+ const char *object_name,
u64 ofs, u64 len,
char *buf,
u64 *ver)
{
- return rbd_req_sync_op(dev, NULL,
+ struct ceph_osd_req_op *ops;
+ int ret;
+
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0);
+ if (!ops)
+ return -ENOMEM;
+
+ ret = rbd_req_sync_op(rbd_dev, NULL,
snapid,
- CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ,
- NULL,
- 1, obj, ofs, len, buf, NULL, ver);
+ ops, object_name, ofs, len, buf, NULL, ver);
+ rbd_destroy_ops(ops);
+
+ return ret;
}
/*
* Request sync osd watch
*/
-static int rbd_req_sync_notify_ack(struct rbd_device *dev,
+static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev,
u64 ver,
- u64 notify_id,
- const char *obj)
+ u64 notify_id)
{
struct ceph_osd_req_op *ops;
- struct page **pages = NULL;
int ret;
- ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0);
- if (ret < 0)
- return ret;
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0);
+ if (!ops)
+ return -ENOMEM;
- ops[0].watch.ver = cpu_to_le64(dev->header.obj_version);
+ ops[0].watch.ver = cpu_to_le64(ver);
ops[0].watch.cookie = notify_id;
ops[0].watch.flag = 0;
- ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP,
- obj, 0, 0, NULL,
- pages, 0,
+ ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP,
+ rbd_dev->header_name, 0, 0, NULL,
+ NULL, 0,
CEPH_OSD_FLAG_READ,
ops,
- 1,
NULL, 0,
rbd_simple_req_cb, 0, NULL);
@@ -1213,54 +1214,53 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev,
static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
{
- struct rbd_device *dev = (struct rbd_device *)data;
+ struct rbd_device *rbd_dev = (struct rbd_device *)data;
+ u64 hver;
int rc;
- if (!dev)
+ if (!rbd_dev)
return;
- dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
- notify_id, (int)opcode);
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rc = __rbd_refresh_header(dev);
- mutex_unlock(&ctl_mutex);
+ dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n",
+ rbd_dev->header_name, (unsigned long long) notify_id,
+ (unsigned int) opcode);
+ rc = rbd_refresh_header(rbd_dev, &hver);
if (rc)
pr_warning(RBD_DRV_NAME "%d got notification but failed to "
- " update snaps: %d\n", dev->major, rc);
+ " update snaps: %d\n", rbd_dev->major, rc);
- rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
+ rbd_req_sync_notify_ack(rbd_dev, hver, notify_id);
}
/*
* Request sync osd watch
*/
-static int rbd_req_sync_watch(struct rbd_device *dev,
- const char *obj,
- u64 ver)
+static int rbd_req_sync_watch(struct rbd_device *rbd_dev)
{
struct ceph_osd_req_op *ops;
- struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ int ret;
- int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
- if (ret < 0)
- return ret;
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0);
+ if (!ops)
+ return -ENOMEM;
ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
- (void *)dev, &dev->watch_event);
+ (void *)rbd_dev, &rbd_dev->watch_event);
if (ret < 0)
goto fail;
- ops[0].watch.ver = cpu_to_le64(ver);
- ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
+ ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version);
+ ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
ops[0].watch.flag = 1;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
- 0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL,
- &dev->watch_request, NULL);
+ rbd_dev->header_name,
+ 0, 0, NULL,
+ &rbd_dev->watch_request, NULL);
if (ret < 0)
goto fail_event;
@@ -1269,8 +1269,8 @@ static int rbd_req_sync_watch(struct rbd_device *dev,
return 0;
fail_event:
- ceph_osdc_cancel_event(dev->watch_event);
- dev->watch_event = NULL;
+ ceph_osdc_cancel_event(rbd_dev->watch_event);
+ rbd_dev->watch_event = NULL;
fail:
rbd_destroy_ops(ops);
return ret;
@@ -1279,64 +1279,65 @@ fail:
/*
* Request sync osd unwatch
*/
-static int rbd_req_sync_unwatch(struct rbd_device *dev,
- const char *obj)
+static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev)
{
struct ceph_osd_req_op *ops;
+ int ret;
- int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
- if (ret < 0)
- return ret;
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0);
+ if (!ops)
+ return -ENOMEM;
ops[0].watch.ver = 0;
- ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
+ ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
ops[0].watch.flag = 0;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
- 0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL, NULL, NULL);
+ rbd_dev->header_name,
+ 0, 0, NULL, NULL, NULL);
+
rbd_destroy_ops(ops);
- ceph_osdc_cancel_event(dev->watch_event);
- dev->watch_event = NULL;
+ ceph_osdc_cancel_event(rbd_dev->watch_event);
+ rbd_dev->watch_event = NULL;
return ret;
}
struct rbd_notify_info {
- struct rbd_device *dev;
+ struct rbd_device *rbd_dev;
};
static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
{
- struct rbd_device *dev = (struct rbd_device *)data;
- if (!dev)
+ struct rbd_device *rbd_dev = (struct rbd_device *)data;
+ if (!rbd_dev)
return;
- dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
- notify_id, (int)opcode);
+ dout("rbd_notify_cb %s notify_id=%llu opcode=%u\n",
+ rbd_dev->header_name, (unsigned long long) notify_id,
+ (unsigned int) opcode);
}
/*
* Request sync osd notify
*/
-static int rbd_req_sync_notify(struct rbd_device *dev,
- const char *obj)
+static int rbd_req_sync_notify(struct rbd_device *rbd_dev)
{
struct ceph_osd_req_op *ops;
- struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
struct ceph_osd_event *event;
struct rbd_notify_info info;
int payload_len = sizeof(u32) + sizeof(u32);
int ret;
- ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY, payload_len);
- if (ret < 0)
- return ret;
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY, payload_len);
+ if (!ops)
+ return -ENOMEM;
- info.dev = dev;
+ info.rbd_dev = rbd_dev;
ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1,
(void *)&info, &event);
@@ -1349,12 +1350,12 @@ static int rbd_req_sync_notify(struct rbd_device *dev,
ops[0].watch.prot_ver = RADOS_NOTIFY_VER;
ops[0].watch.timeout = 12;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
- 0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL, NULL, NULL);
+ rbd_dev->header_name,
+ 0, 0, NULL, NULL, NULL);
if (ret < 0)
goto fail_event;
@@ -1373,36 +1374,37 @@ fail:
/*
* Request sync osd read
*/
-static int rbd_req_sync_exec(struct rbd_device *dev,
- const char *obj,
- const char *cls,
- const char *method,
+static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
+ const char *object_name,
+ const char *class_name,
+ const char *method_name,
const char *data,
int len,
u64 *ver)
{
struct ceph_osd_req_op *ops;
- int cls_len = strlen(cls);
- int method_len = strlen(method);
- int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
- cls_len + method_len + len);
- if (ret < 0)
- return ret;
+ int class_name_len = strlen(class_name);
+ int method_name_len = strlen(method_name);
+ int ret;
+
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL,
+ class_name_len + method_name_len + len);
+ if (!ops)
+ return -ENOMEM;
- ops[0].cls.class_name = cls;
- ops[0].cls.class_len = (__u8)cls_len;
- ops[0].cls.method_name = method;
- ops[0].cls.method_len = (__u8)method_len;
+ ops[0].cls.class_name = class_name;
+ ops[0].cls.class_len = (__u8) class_name_len;
+ ops[0].cls.method_name = method_name;
+ ops[0].cls.method_len = (__u8) method_name_len;
ops[0].cls.argc = 0;
ops[0].cls.indata = data;
ops[0].cls.indata_len = len;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
- 0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL, NULL, ver);
+ object_name, 0, 0, NULL, NULL, ver);
rbd_destroy_ops(ops);
@@ -1437,10 +1439,12 @@ static void rbd_rq_fn(struct request_queue *q)
struct bio *bio;
struct bio *rq_bio, *next_bio = NULL;
bool do_write;
- int size, op_size = 0;
+ unsigned int size;
+ u64 op_size = 0;
u64 ofs;
int num_segs, cur_seg = 0;
struct rbd_req_coll *coll;
+ struct ceph_snap_context *snapc;
/* peek at request from block layer */
if (!rq)
@@ -1467,23 +1471,38 @@ static void rbd_rq_fn(struct request_queue *q)
spin_unlock_irq(q->queue_lock);
+ down_read(&rbd_dev->header_rwsem);
+
+ if (rbd_dev->snap_id != CEPH_NOSNAP && !rbd_dev->snap_exists) {
+ up_read(&rbd_dev->header_rwsem);
+ dout("request for non-existent snapshot");
+ spin_lock_irq(q->queue_lock);
+ __blk_end_request_all(rq, -ENXIO);
+ continue;
+ }
+
+ snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+
+ up_read(&rbd_dev->header_rwsem);
+
dout("%s 0x%x bytes at 0x%llx\n",
do_write ? "write" : "read",
- size, blk_rq_pos(rq) * SECTOR_SIZE);
+ size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE);
num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
coll = rbd_alloc_coll(num_segs);
if (!coll) {
spin_lock_irq(q->queue_lock);
__blk_end_request_all(rq, -ENOMEM);
+ ceph_put_snap_context(snapc);
continue;
}
do {
/* a bio clone to be passed down to OSD req */
- dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
+ dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt);
op_size = rbd_get_segment(&rbd_dev->header,
- rbd_dev->header.block_name,
+ rbd_dev->header.object_prefix,
ofs, size,
NULL, NULL);
kref_get(&coll->kref);
@@ -1499,7 +1518,7 @@ static void rbd_rq_fn(struct request_queue *q)
/* init OSD command: write or read */
if (do_write)
rbd_req_write(rq, rbd_dev,
- rbd_dev->header.snapc,
+ snapc,
ofs,
op_size, bio,
coll, cur_seg);
@@ -1522,6 +1541,8 @@ next_seg:
if (bp)
bio_pair_release(bp);
spin_lock_irq(q->queue_lock);
+
+ ceph_put_snap_context(snapc);
}
}
@@ -1592,18 +1613,19 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
return -ENOMEM;
rc = rbd_req_sync_read(rbd_dev,
- NULL, CEPH_NOSNAP,
- rbd_dev->obj_md_name,
+ CEPH_NOSNAP,
+ rbd_dev->header_name,
0, len,
(char *)dh, &ver);
if (rc < 0)
goto out_dh;
- rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
+ rc = rbd_header_from_disk(header, dh, snap_count);
if (rc < 0) {
if (rc == -ENXIO)
pr_warning("unrecognized header format"
- " for image %s", rbd_dev->obj);
+ " for image %s\n",
+ rbd_dev->image_name);
goto out_dh;
}
@@ -1628,7 +1650,7 @@ out_dh:
/*
* create a snapshot
*/
-static int rbd_header_add_snap(struct rbd_device *dev,
+static int rbd_header_add_snap(struct rbd_device *rbd_dev,
const char *snap_name,
gfp_t gfp_flags)
{
@@ -1636,16 +1658,15 @@ static int rbd_header_add_snap(struct rbd_device *dev,
u64 new_snapid;
int ret;
void *data, *p, *e;
- u64 ver;
struct ceph_mon_client *monc;
/* we should create a snapshot only if we're pointing at the head */
- if (dev->snap_id != CEPH_NOSNAP)
+ if (rbd_dev->snap_id != CEPH_NOSNAP)
return -EINVAL;
- monc = &dev->rbd_client->client->monc;
- ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid);
- dout("created snapid=%lld\n", new_snapid);
+ monc = &rbd_dev->rbd_client->client->monc;
+ ret = ceph_monc_create_snapid(monc, rbd_dev->pool_id, &new_snapid);
+ dout("created snapid=%llu\n", (unsigned long long) new_snapid);
if (ret < 0)
return ret;
@@ -1659,19 +1680,13 @@ static int rbd_header_add_snap(struct rbd_device *dev,
ceph_encode_string_safe(&p, e, snap_name, name_len, bad);
ceph_encode_64_safe(&p, e, new_snapid, bad);
- ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
- data, p - data, &ver);
+ ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+ "rbd", "snap_add",
+ data, p - data, NULL);
kfree(data);
- if (ret < 0)
- return ret;
-
- down_write(&dev->header_rwsem);
- dev->header.snapc->seq = new_snapid;
- up_write(&dev->header_rwsem);
-
- return 0;
+ return ret < 0 ? ret : 0;
bad:
return -ERANGE;
}
@@ -1679,52 +1694,52 @@ bad:
static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
{
struct rbd_snap *snap;
+ struct rbd_snap *next;
- while (!list_empty(&rbd_dev->snaps)) {
- snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node);
- __rbd_remove_snap_dev(rbd_dev, snap);
- }
+ list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node)
+ __rbd_remove_snap_dev(snap);
}
/*
* only read the first part of the ondisk header, without the snaps info
*/
-static int __rbd_refresh_header(struct rbd_device *rbd_dev)
+static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver)
{
int ret;
struct rbd_image_header h;
- u64 snap_seq;
- int follow_seq = 0;
ret = rbd_read_header(rbd_dev, &h);
if (ret < 0)
return ret;
- /* resized? */
- set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE);
-
down_write(&rbd_dev->header_rwsem);
- snap_seq = rbd_dev->header.snapc->seq;
- if (rbd_dev->header.total_snaps &&
- rbd_dev->header.snapc->snaps[0] == snap_seq)
- /* pointing at the head, will need to follow that
- if head moves */
- follow_seq = 1;
+ /* resized? */
+ if (rbd_dev->snap_id == CEPH_NOSNAP) {
+ sector_t size = (sector_t) h.image_size / SECTOR_SIZE;
- kfree(rbd_dev->header.snapc);
- kfree(rbd_dev->header.snap_names);
+ dout("setting size to %llu sectors", (unsigned long long) size);
+ set_capacity(rbd_dev->disk, size);
+ }
+
+ /* rbd_dev->header.object_prefix shouldn't change */
kfree(rbd_dev->header.snap_sizes);
+ kfree(rbd_dev->header.snap_names);
+ /* osd requests may still refer to snapc */
+ ceph_put_snap_context(rbd_dev->header.snapc);
+ if (hver)
+ *hver = h.obj_version;
+ rbd_dev->header.obj_version = h.obj_version;
+ rbd_dev->header.image_size = h.image_size;
rbd_dev->header.total_snaps = h.total_snaps;
rbd_dev->header.snapc = h.snapc;
rbd_dev->header.snap_names = h.snap_names;
rbd_dev->header.snap_names_len = h.snap_names_len;
rbd_dev->header.snap_sizes = h.snap_sizes;
- if (follow_seq)
- rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0];
- else
- rbd_dev->header.snapc->seq = snap_seq;
+ /* Free the extra copy of the object prefix */
+ WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix));
+ kfree(h.object_prefix);
ret = __rbd_init_snaps_header(rbd_dev);
@@ -1733,6 +1748,17 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev)
return ret;
}
+static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver)
+{
+ int ret;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ ret = __rbd_refresh_header(rbd_dev, hver);
+ mutex_unlock(&ctl_mutex);
+
+ return ret;
+}
+
static int rbd_init_disk(struct rbd_device *rbd_dev)
{
struct gendisk *disk;
@@ -1762,7 +1788,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
goto out;
snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
- rbd_dev->id);
+ rbd_dev->dev_id);
disk->major = rbd_dev->major;
disk->first_minor = 0;
disk->fops = &rbd_bd_ops;
@@ -1819,8 +1845,13 @@ static ssize_t rbd_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+ sector_t size;
+
+ down_read(&rbd_dev->header_rwsem);
+ size = get_capacity(rbd_dev->disk);
+ up_read(&rbd_dev->header_rwsem);
- return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
+ return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE);
}
static ssize_t rbd_major_show(struct device *dev,
@@ -1848,12 +1879,20 @@ static ssize_t rbd_pool_show(struct device *dev,
return sprintf(buf, "%s\n", rbd_dev->pool_name);
}
+static ssize_t rbd_pool_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "%d\n", rbd_dev->pool_id);
+}
+
static ssize_t rbd_name_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
- return sprintf(buf, "%s\n", rbd_dev->obj);
+ return sprintf(buf, "%s\n", rbd_dev->image_name);
}
static ssize_t rbd_snap_show(struct device *dev,
@@ -1871,23 +1910,18 @@ static ssize_t rbd_image_refresh(struct device *dev,
size_t size)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
- int rc;
- int ret = size;
-
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ int ret;
- rc = __rbd_refresh_header(rbd_dev);
- if (rc < 0)
- ret = rc;
+ ret = rbd_refresh_header(rbd_dev, NULL);
- mutex_unlock(&ctl_mutex);
- return ret;
+ return ret < 0 ? ret : size;
}
static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
+static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
@@ -1898,6 +1932,7 @@ static struct attribute *rbd_attrs[] = {
&dev_attr_major.attr,
&dev_attr_client_id.attr,
&dev_attr_pool.attr,
+ &dev_attr_pool_id.attr,
&dev_attr_name.attr,
&dev_attr_current_snap.attr,
&dev_attr_refresh.attr,
@@ -1977,15 +2012,13 @@ static struct device_type rbd_snap_device_type = {
.release = rbd_snap_dev_release,
};
-static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
- struct rbd_snap *snap)
+static void __rbd_remove_snap_dev(struct rbd_snap *snap)
{
list_del(&snap->node);
device_unregister(&snap->dev);
}
-static int rbd_register_snap_dev(struct rbd_device *rbd_dev,
- struct rbd_snap *snap,
+static int rbd_register_snap_dev(struct rbd_snap *snap,
struct device *parent)
{
struct device *dev = &snap->dev;
@@ -2000,29 +2033,36 @@ static int rbd_register_snap_dev(struct rbd_device *rbd_dev,
return ret;
}
-static int __rbd_add_snap_dev(struct rbd_device *rbd_dev,
- int i, const char *name,
- struct rbd_snap **snapp)
+static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev,
+ int i, const char *name)
{
+ struct rbd_snap *snap;
int ret;
- struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL);
+
+ snap = kzalloc(sizeof (*snap), GFP_KERNEL);
if (!snap)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
+
+ ret = -ENOMEM;
snap->name = kstrdup(name, GFP_KERNEL);
+ if (!snap->name)
+ goto err;
+
snap->size = rbd_dev->header.snap_sizes[i];
snap->id = rbd_dev->header.snapc->snaps[i];
if (device_is_registered(&rbd_dev->dev)) {
- ret = rbd_register_snap_dev(rbd_dev, snap,
- &rbd_dev->dev);
+ ret = rbd_register_snap_dev(snap, &rbd_dev->dev);
if (ret < 0)
goto err;
}
- *snapp = snap;
- return 0;
+
+ return snap;
+
err:
kfree(snap->name);
kfree(snap);
- return ret;
+
+ return ERR_PTR(ret);
}
/*
@@ -2055,7 +2095,6 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
const char *name, *first_name;
int i = rbd_dev->header.total_snaps;
struct rbd_snap *snap, *old_snap = NULL;
- int ret;
struct list_head *p, *n;
first_name = rbd_dev->header.snap_names;
@@ -2070,8 +2109,15 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
cur_id = rbd_dev->header.snapc->snaps[i - 1];
if (!i || old_snap->id < cur_id) {
- /* old_snap->id was skipped, thus was removed */
- __rbd_remove_snap_dev(rbd_dev, old_snap);
+ /*
+ * old_snap->id was skipped, thus was
+ * removed. If this rbd_dev is mapped to
+ * the removed snapshot, record that it no
+ * longer exists, to prevent further I/O.
+ */
+ if (rbd_dev->snap_id == old_snap->id)
+ rbd_dev->snap_exists = false;
+ __rbd_remove_snap_dev(old_snap);
continue;
}
if (old_snap->id == cur_id) {
@@ -2091,9 +2137,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
if (cur_id >= old_snap->id)
break;
/* a new snapshot */
- ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
- if (ret < 0)
- return ret;
+ snap = __rbd_add_snap_dev(rbd_dev, i - 1, name);
+ if (IS_ERR(snap))
+ return PTR_ERR(snap);
/* note that we add it backward so using n and not p */
list_add(&snap->node, n);
@@ -2107,9 +2153,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
WARN_ON(1);
return -EINVAL;
}
- ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
- if (ret < 0)
- return ret;
+ snap = __rbd_add_snap_dev(rbd_dev, i - 1, name);
+ if (IS_ERR(snap))
+ return PTR_ERR(snap);
list_add(&snap->node, &rbd_dev->snaps);
}
@@ -2129,14 +2175,13 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
dev->type = &rbd_device_type;
dev->parent = &rbd_root_dev;
dev->release = rbd_dev_release;
- dev_set_name(dev, "%d", rbd_dev->id);
+ dev_set_name(dev, "%d", rbd_dev->dev_id);
ret = device_register(dev);
if (ret < 0)
goto out;
list_for_each_entry(snap, &rbd_dev->snaps, node) {
- ret = rbd_register_snap_dev(rbd_dev, snap,
- &rbd_dev->dev);
+ ret = rbd_register_snap_dev(snap, &rbd_dev->dev);
if (ret < 0)
break;
}
@@ -2155,12 +2200,9 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
int ret, rc;
do {
- ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name,
- rbd_dev->header.obj_version);
+ ret = rbd_req_sync_watch(rbd_dev);
if (ret == -ERANGE) {
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rc = __rbd_refresh_header(rbd_dev);
- mutex_unlock(&ctl_mutex);
+ rc = rbd_refresh_header(rbd_dev, NULL);
if (rc < 0)
return rc;
}
@@ -2177,7 +2219,7 @@ static atomic64_t rbd_id_max = ATOMIC64_INIT(0);
*/
static void rbd_id_get(struct rbd_device *rbd_dev)
{
- rbd_dev->id = atomic64_inc_return(&rbd_id_max);
+ rbd_dev->dev_id = atomic64_inc_return(&rbd_id_max);
spin_lock(&rbd_dev_list_lock);
list_add_tail(&rbd_dev->node, &rbd_dev_list);
@@ -2191,7 +2233,7 @@ static void rbd_id_get(struct rbd_device *rbd_dev)
static void rbd_id_put(struct rbd_device *rbd_dev)
{
struct list_head *tmp;
- int rbd_id = rbd_dev->id;
+ int rbd_id = rbd_dev->dev_id;
int max_id;
BUG_ON(rbd_id < 1);
@@ -2282,19 +2324,58 @@ static inline size_t copy_token(const char **buf,
}
/*
- * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
+ * Finds the next token in *buf, dynamically allocates a buffer big
+ * enough to hold a copy of it, and copies the token into the new
+ * buffer. The copy is guaranteed to be terminated with '\0'. Note
+ * that a duplicate buffer is created even for a zero-length token.
+ *
+ * Returns a pointer to the newly-allocated duplicate, or a null
+ * pointer if memory for the duplicate was not available. If
+ * the lenp argument is a non-null pointer, the length of the token
+ * (not including the '\0') is returned in *lenp.
+ *
+ * If successful, the *buf pointer will be updated to point beyond
+ * the end of the found token.
+ *
+ * Note: uses GFP_KERNEL for allocation.
+ */
+static inline char *dup_token(const char **buf, size_t *lenp)
+{
+ char *dup;
+ size_t len;
+
+ len = next_token(buf);
+ dup = kmalloc(len + 1, GFP_KERNEL);
+ if (!dup)
+ return NULL;
+
+ memcpy(dup, *buf, len);
+ *(dup + len) = '\0';
+ *buf += len;
+
+ if (lenp)
+ *lenp = len;
+
+ return dup;
+}
+
+/*
+ * This fills in the pool_name, image_name, image_name_len, snap_name,
* rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
* on the list of monitor addresses and other options provided via
* /sys/bus/rbd/add.
+ *
+ * Note: rbd_dev is assumed to have been initially zero-filled.
*/
static int rbd_add_parse_args(struct rbd_device *rbd_dev,
const char *buf,
const char **mon_addrs,
size_t *mon_addrs_size,
char *options,
- size_t options_size)
+ size_t options_size)
{
- size_t len;
+ size_t len;
+ int ret;
/* The first four tokens are required */
@@ -2310,56 +2391,74 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev,
if (!len || len >= options_size)
return -EINVAL;
- len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name));
- if (!len || len >= sizeof (rbd_dev->pool_name))
- return -EINVAL;
-
- len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
- if (!len || len >= sizeof (rbd_dev->obj))
- return -EINVAL;
+ ret = -ENOMEM;
+ rbd_dev->pool_name = dup_token(&buf, NULL);
+ if (!rbd_dev->pool_name)
+ goto out_err;
- /* We have the object length in hand, save it. */
+ rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len);
+ if (!rbd_dev->image_name)
+ goto out_err;
- rbd_dev->obj_len = len;
+ /* Create the name of the header object */
- BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN
- < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX));
- sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX);
+ rbd_dev->header_name = kmalloc(rbd_dev->image_name_len
+ + sizeof (RBD_SUFFIX),
+ GFP_KERNEL);
+ if (!rbd_dev->header_name)
+ goto out_err;
+ sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX);
/*
- * The snapshot name is optional, but it's an error if it's
- * too long. If no snapshot is supplied, fill in the default.
+ * The snapshot name is optional. If none is is supplied,
+ * we use the default value.
*/
- len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name));
- if (!len)
+ rbd_dev->snap_name = dup_token(&buf, &len);
+ if (!rbd_dev->snap_name)
+ goto out_err;
+ if (!len) {
+ /* Replace the empty name with the default */
+ kfree(rbd_dev->snap_name);
+ rbd_dev->snap_name
+ = kmalloc(sizeof (RBD_SNAP_HEAD_NAME), GFP_KERNEL);
+ if (!rbd_dev->snap_name)
+ goto out_err;
+
memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
sizeof (RBD_SNAP_HEAD_NAME));
- else if (len >= sizeof (rbd_dev->snap_name))
- return -EINVAL;
+ }
return 0;
+
+out_err:
+ kfree(rbd_dev->header_name);
+ kfree(rbd_dev->image_name);
+ kfree(rbd_dev->pool_name);
+ rbd_dev->pool_name = NULL;
+
+ return ret;
}
static ssize_t rbd_add(struct bus_type *bus,
const char *buf,
size_t count)
{
- struct rbd_device *rbd_dev;
+ char *options;
+ struct rbd_device *rbd_dev = NULL;
const char *mon_addrs = NULL;
size_t mon_addrs_size = 0;
- char *options = NULL;
struct ceph_osd_client *osdc;
int rc = -ENOMEM;
if (!try_module_get(THIS_MODULE))
return -ENODEV;
- rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
- if (!rbd_dev)
- goto err_nomem;
options = kmalloc(count, GFP_KERNEL);
if (!options)
goto err_nomem;
+ rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
+ if (!rbd_dev)
+ goto err_nomem;
/* static rbd_device initialization */
spin_lock_init(&rbd_dev->lock);
@@ -2367,15 +2466,13 @@ static ssize_t rbd_add(struct bus_type *bus,
INIT_LIST_HEAD(&rbd_dev->snaps);
init_rwsem(&rbd_dev->header_rwsem);
- init_rwsem(&rbd_dev->header_rwsem);
-
/* generate unique id: find highest unique id, add one */
rbd_id_get(rbd_dev);
/* Fill in the device name, now that we have its id. */
BUILD_BUG_ON(DEV_NAME_LEN
< sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
- sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
+ sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
/* parse add command */
rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size,
@@ -2395,7 +2492,7 @@ static ssize_t rbd_add(struct bus_type *bus,
rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
if (rc < 0)
goto err_out_client;
- rbd_dev->poolid = rc;
+ rbd_dev->pool_id = rc;
/* register our block device */
rc = register_blkdev(0, rbd_dev->name);
@@ -2435,10 +2532,16 @@ err_out_blkdev:
err_out_client:
rbd_put_client(rbd_dev);
err_put_id:
+ if (rbd_dev->pool_name) {
+ kfree(rbd_dev->snap_name);
+ kfree(rbd_dev->header_name);
+ kfree(rbd_dev->image_name);
+ kfree(rbd_dev->pool_name);
+ }
rbd_id_put(rbd_dev);
err_nomem:
- kfree(options);
kfree(rbd_dev);
+ kfree(options);
dout("Error adding device %s\n", buf);
module_put(THIS_MODULE);
@@ -2446,7 +2549,7 @@ err_nomem:
return (ssize_t) rc;
}
-static struct rbd_device *__rbd_get_dev(unsigned long id)
+static struct rbd_device *__rbd_get_dev(unsigned long dev_id)
{
struct list_head *tmp;
struct rbd_device *rbd_dev;
@@ -2454,7 +2557,7 @@ static struct rbd_device *__rbd_get_dev(unsigned long id)
spin_lock(&rbd_dev_list_lock);
list_for_each(tmp, &rbd_dev_list) {
rbd_dev = list_entry(tmp, struct rbd_device, node);
- if (rbd_dev->id == id) {
+ if (rbd_dev->dev_id == dev_id) {
spin_unlock(&rbd_dev_list_lock);
return rbd_dev;
}
@@ -2474,7 +2577,7 @@ static void rbd_dev_release(struct device *dev)
rbd_dev->watch_request);
}
if (rbd_dev->watch_event)
- rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name);
+ rbd_req_sync_unwatch(rbd_dev);
rbd_put_client(rbd_dev);
@@ -2483,6 +2586,10 @@ static void rbd_dev_release(struct device *dev)
unregister_blkdev(rbd_dev->major, rbd_dev->name);
/* done with the id, and with the rbd_dev */
+ kfree(rbd_dev->snap_name);
+ kfree(rbd_dev->header_name);
+ kfree(rbd_dev->pool_name);
+ kfree(rbd_dev->image_name);
rbd_id_put(rbd_dev);
kfree(rbd_dev);
@@ -2544,7 +2651,7 @@ static ssize_t rbd_snap_add(struct device *dev,
if (ret < 0)
goto err_unlock;
- ret = __rbd_refresh_header(rbd_dev);
+ ret = __rbd_refresh_header(rbd_dev, NULL);
if (ret < 0)
goto err_unlock;
@@ -2553,7 +2660,7 @@ static ssize_t rbd_snap_add(struct device *dev,
mutex_unlock(&ctl_mutex);
/* make a best effort, don't error if failed */
- rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name);
+ rbd_req_sync_notify(rbd_dev);
ret = count;
kfree(name);
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
index 950708688f1..0924e9e41a6 100644
--- a/drivers/block/rbd_types.h
+++ b/drivers/block/rbd_types.h
@@ -31,7 +31,6 @@
#define RBD_MIN_OBJ_ORDER 16
#define RBD_MAX_OBJ_ORDER 30
-#define RBD_MAX_OBJ_NAME_LEN 96
#define RBD_MAX_SEG_NAME_LEN 128
#define RBD_COMP_NONE 0
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 9a72277a31d..eb0d8216f55 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -513,42 +513,19 @@ static void process_page(unsigned long data)
}
}
-struct mm_plug_cb {
- struct blk_plug_cb cb;
- struct cardinfo *card;
-};
-
-static void mm_unplug(struct blk_plug_cb *cb)
+static void mm_unplug(struct blk_plug_cb *cb, bool from_schedule)
{
- struct mm_plug_cb *mmcb = container_of(cb, struct mm_plug_cb, cb);
+ struct cardinfo *card = cb->data;
- spin_lock_irq(&mmcb->card->lock);
- activate(mmcb->card);
- spin_unlock_irq(&mmcb->card->lock);
- kfree(mmcb);
+ spin_lock_irq(&card->lock);
+ activate(card);
+ spin_unlock_irq(&card->lock);
+ kfree(cb);
}
static int mm_check_plugged(struct cardinfo *card)
{
- struct blk_plug *plug = current->plug;
- struct mm_plug_cb *mmcb;
-
- if (!plug)
- return 0;
-
- list_for_each_entry(mmcb, &plug->cb_list, cb.list) {
- if (mmcb->cb.callback == mm_unplug && mmcb->card == card)
- return 1;
- }
- /* Not currently on the callback list */
- mmcb = kmalloc(sizeof(*mmcb), GFP_ATOMIC);
- if (!mmcb)
- return 0;
-
- mmcb->card = card;
- mmcb->cb.callback = mm_unplug;
- list_add(&mmcb->cb.list, &plug->cb_list);
- return 1;
+ return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb));
}
static void mm_make_request(struct request_queue *q, struct bio *bio)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 693187df760..c0bbeb47075 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -21,8 +21,6 @@ struct workqueue_struct *virtblk_wq;
struct virtio_blk
{
- spinlock_t lock;
-
struct virtio_device *vdev;
struct virtqueue *vq;
@@ -65,7 +63,7 @@ static void blk_done(struct virtqueue *vq)
unsigned int len;
unsigned long flags;
- spin_lock_irqsave(&vblk->lock, flags);
+ spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
int error;
@@ -99,7 +97,7 @@ static void blk_done(struct virtqueue *vq)
}
/* In case queue is stopped waiting for more buffers. */
blk_start_queue(vblk->disk->queue);
- spin_unlock_irqrestore(&vblk->lock, flags);
+ spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
}
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
@@ -397,6 +395,83 @@ static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
return 0;
}
+static int virtblk_get_cache_mode(struct virtio_device *vdev)
+{
+ u8 writeback;
+ int err;
+
+ err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE,
+ offsetof(struct virtio_blk_config, wce),
+ &writeback);
+ if (err)
+ writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
+
+ return writeback;
+}
+
+static void virtblk_update_cache_mode(struct virtio_device *vdev)
+{
+ u8 writeback = virtblk_get_cache_mode(vdev);
+ struct virtio_blk *vblk = vdev->priv;
+
+ if (writeback)
+ blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
+ else
+ blk_queue_flush(vblk->disk->queue, 0);
+
+ revalidate_disk(vblk->disk);
+}
+
+static const char *const virtblk_cache_types[] = {
+ "write through", "write back"
+};
+
+static ssize_t
+virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct virtio_blk *vblk = disk->private_data;
+ struct virtio_device *vdev = vblk->vdev;
+ int i;
+ u8 writeback;
+
+ BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
+ for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
+ if (sysfs_streq(buf, virtblk_cache_types[i]))
+ break;
+
+ if (i < 0)
+ return -EINVAL;
+
+ writeback = i;
+ vdev->config->set(vdev,
+ offsetof(struct virtio_blk_config, wce),
+ &writeback, sizeof(writeback));
+
+ virtblk_update_cache_mode(vdev);
+ return count;
+}
+
+static ssize_t
+virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct virtio_blk *vblk = disk->private_data;
+ u8 writeback = virtblk_get_cache_mode(vblk->vdev);
+
+ BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
+ return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
+}
+
+static const struct device_attribute dev_attr_cache_type_ro =
+ __ATTR(cache_type, S_IRUGO,
+ virtblk_cache_type_show, NULL);
+static const struct device_attribute dev_attr_cache_type_rw =
+ __ATTR(cache_type, S_IRUGO|S_IWUSR,
+ virtblk_cache_type_show, virtblk_cache_type_store);
+
static int __devinit virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
@@ -431,7 +506,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
goto out_free_index;
}
- spin_lock_init(&vblk->lock);
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
@@ -456,7 +530,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
goto out_mempool;
}
- q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
+ q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL);
if (!q) {
err = -ENOMEM;
goto out_put_disk;
@@ -474,8 +548,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
vblk->index = index;
/* configure queue flush support */
- if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
- blk_queue_flush(q, REQ_FLUSH);
+ virtblk_update_cache_mode(vdev);
/* If disk is read-only in the host, the guest should obey */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
@@ -553,6 +626,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
if (err)
goto out_del_disk;
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
+ err = device_create_file(disk_to_dev(vblk->disk),
+ &dev_attr_cache_type_rw);
+ else
+ err = device_create_file(disk_to_dev(vblk->disk),
+ &dev_attr_cache_type_ro);
+ if (err)
+ goto out_del_disk;
return 0;
out_del_disk:
@@ -576,30 +657,20 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
int index = vblk->index;
- struct virtblk_req *vbr;
- unsigned long flags;
/* Prevent config work handler from accessing the device. */
mutex_lock(&vblk->config_lock);
vblk->config_enable = false;
mutex_unlock(&vblk->config_lock);
+ del_gendisk(vblk->disk);
+ blk_cleanup_queue(vblk->disk->queue);
+
/* Stop all the virtqueues. */
vdev->config->reset(vdev);
flush_work(&vblk->config_work);
- del_gendisk(vblk->disk);
-
- /* Abort requests dispatched to driver. */
- spin_lock_irqsave(&vblk->lock, flags);
- while ((vbr = virtqueue_detach_unused_buf(vblk->vq))) {
- __blk_end_request_all(vbr->req, -EIO);
- mempool_free(vbr, vblk->pool);
- }
- spin_unlock_irqrestore(&vblk->lock, flags);
-
- blk_cleanup_queue(vblk->disk->queue);
put_disk(vblk->disk);
mempool_destroy(vblk->pool);
vdev->config->del_vqs(vdev);
@@ -655,7 +726,7 @@ static const struct virtio_device_id id_table[] = {
static unsigned int features[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
- VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
+ VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
};
/*
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 73f196ca713..c6decb901e5 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -337,7 +337,7 @@ static void xen_blkbk_unmap(struct pending_req *req)
invcount++;
}
- ret = gnttab_unmap_refs(unmap, pages, invcount, false);
+ ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
BUG_ON(ret);
}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index e4fb3374dcd..2c2d2e5c159 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -888,9 +888,8 @@ static int setup_blkring(struct xenbus_device *dev,
if (err)
goto fail;
- err = bind_evtchn_to_irqhandler(info->evtchn,
- blkif_interrupt,
- IRQF_SAMPLE_RANDOM, "blkif", info);
+ err = bind_evtchn_to_irqhandler(info->evtchn, blkif_interrupt, 0,
+ "blkif", info);
if (err <= 0) {
xenbus_dev_fatal(dev, err,
"bind_evtchn_to_irqhandler failed");