From 9478554ae5d21d65e948a3eff4ee2a8ad30d70e9 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 9 Oct 2012 13:50:17 -0700 Subject: rbd: define rbd_update_mapping_size() Encapsulate the code that handles updating the size of a mapping after an rbd image has been refreshed. This is done in anticipation of the next patch, which will make this common code for format 1 and 2 images. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bb3d9be3b1b..b64125d1d7b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1716,6 +1716,19 @@ static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) __rbd_remove_snap_dev(snap); } +static void rbd_update_mapping_size(struct rbd_device *rbd_dev) +{ + sector_t size; + + if (rbd_dev->mapping.snap_id != CEPH_NOSNAP) + return; + + size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE; + dout("setting size to %llu sectors", (unsigned long long) size); + rbd_dev->mapping.size = (u64) size; + set_capacity(rbd_dev->disk, size); +} + /* * only read the first part of the ondisk header, without the snaps info */ @@ -1730,17 +1743,9 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) down_write(&rbd_dev->header_rwsem); - /* resized? */ - if (rbd_dev->mapping.snap_id == CEPH_NOSNAP) { - sector_t size = (sector_t) h.image_size / SECTOR_SIZE; - - if (size != (sector_t) rbd_dev->mapping.size) { - dout("setting size to %llu sectors", - (unsigned long long) size); - rbd_dev->mapping.size = (u64) size; - set_capacity(rbd_dev->disk, size); - } - } + /* Update image size, and check for resize of mapped image */ + rbd_dev->header.image_size = h.image_size; + rbd_update_mapping_size(rbd_dev); /* rbd_dev->header.object_prefix shouldn't change */ kfree(rbd_dev->header.snap_sizes); -- cgit v1.2.3-70-g09d2 From 117973fb4c91f3fd913127577e9f71b3aa6cb556 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 31 Aug 2012 17:29:55 -0500 Subject: rbd: define rbd_dev_v2_refresh() Define a new function rbd_dev_v2_refresh() to update/refresh the snapshot context for a format version 2 rbd image. This function will update anything that is not fixed for the life of an rbd image--at the moment this is mainly the snapshot context and (for a base mapping) the size. Update rbd_refresh_header() so it selects which function to use based on the image format. Rename __rbd_refresh_header() to be rbd_dev_v1_refresh() to be consistent with the naming of its version 2 counterpart. Similarly rename rbd_refresh_header() to be rbd_dev_refresh(). Unrelated--we use rbd_image_format_valid() here. Delete the other use of it, which was primarily put in place to ensure that function was referenced at the time it was defined. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 55 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b64125d1d7b..f11b839166e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -268,7 +268,8 @@ static void rbd_put_dev(struct rbd_device *rbd_dev) put_device(&rbd_dev->dev); } -static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver); +static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver); +static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); static int rbd_open(struct block_device *bdev, fmode_t mode) { @@ -1304,7 +1305,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", rbd_dev->header_name, (unsigned long long) notify_id, (unsigned int) opcode); - rc = rbd_refresh_header(rbd_dev, &hver); + rc = rbd_dev_refresh(rbd_dev, &hver); if (rc) pr_warning(RBD_DRV_NAME "%d got notification but failed to " " update snaps: %d\n", rbd_dev->major, rc); @@ -1732,7 +1733,7 @@ static void rbd_update_mapping_size(struct rbd_device *rbd_dev) /* * only read the first part of the ondisk header, without the snaps info */ -static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) +static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver) { int ret; struct rbd_image_header h; @@ -1773,12 +1774,16 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) return ret; } -static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) +static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver) { int ret; + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - ret = __rbd_refresh_header(rbd_dev, hver); + if (rbd_dev->image_format == 1) + ret = rbd_dev_v1_refresh(rbd_dev, hver); + else + ret = rbd_dev_v2_refresh(rbd_dev, hver); mutex_unlock(&ctl_mutex); return ret; @@ -1938,7 +1943,7 @@ static ssize_t rbd_image_refresh(struct device *dev, struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); int ret; - ret = rbd_refresh_header(rbd_dev, NULL); + ret = rbd_dev_refresh(rbd_dev, NULL); return ret < 0 ? ret : size; } @@ -2402,6 +2407,41 @@ static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which, return ERR_PTR(-EINVAL); } +static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver) +{ + int ret; + __u8 obj_order; + + down_write(&rbd_dev->header_rwsem); + + /* Grab old order first, to see if it changes */ + + obj_order = rbd_dev->header.obj_order, + ret = rbd_dev_v2_image_size(rbd_dev); + if (ret) + goto out; + if (rbd_dev->header.obj_order != obj_order) { + ret = -EIO; + goto out; + } + rbd_update_mapping_size(rbd_dev); + + ret = rbd_dev_v2_snap_context(rbd_dev, hver); + dout("rbd_dev_v2_snap_context returned %d\n", ret); + if (ret) + goto out; + ret = rbd_dev_snaps_update(rbd_dev); + dout("rbd_dev_snaps_update returned %d\n", ret); + if (ret) + goto out; + ret = rbd_dev_snaps_register(rbd_dev); + dout("rbd_dev_snaps_register returned %d\n", ret); +out: + up_write(&rbd_dev->header_rwsem); + + return ret; +} + /* * Scan the rbd device's current snapshot list and compare it to the * newly-received snapshot context. Remove any existing snapshots @@ -2564,7 +2604,7 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) do { ret = rbd_req_sync_watch(rbd_dev); if (ret == -ERANGE) { - rc = rbd_refresh_header(rbd_dev, NULL); + rc = rbd_dev_refresh(rbd_dev, NULL); if (rc < 0) return rc; } @@ -3045,7 +3085,6 @@ static ssize_t rbd_add(struct bus_type *bus, rc = rbd_dev_probe(rbd_dev); if (rc < 0) goto err_out_client; - rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); /* no need to lock here, as rbd_dev is not registered yet */ rc = rbd_dev_snaps_update(rbd_dev); -- cgit v1.2.3-70-g09d2 From d889140c4a1c5edb6a7bd90392b9d878bfaccfb6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 9 Oct 2012 13:50:17 -0700 Subject: rbd: implement feature checks Version 2 images have two sets of feature bit fields. The first indicates features possibly used by the image. The second indicates features that the client *must* support in order to use the image. When an image (or snapshot) is first examined, we need to make sure that the local implementation supports the image's required features. If not, fail the probe for the image. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f11b839166e..0f260a6e97c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -70,6 +70,14 @@ #define RBD_IMAGE_ID_LEN_MAX 64 #define RBD_OBJ_PREFIX_LEN_MAX 64 +/* Feature bits */ + +#define RBD_FEATURE_LAYERING 1 + +/* Features supported by this (client software) implementation. */ + +#define RBD_FEATURES_ALL (0) + /* * An RBD device name will be "rbd#", where the "rbd" comes from * RBD_DRV_NAME above, and # is a unique integer identifier. @@ -2226,6 +2234,7 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, __le64 features; __le64 incompat; } features_buf = { 0 }; + u64 incompat; int ret; ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, @@ -2236,6 +2245,11 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); if (ret < 0) return ret; + + incompat = le64_to_cpu(features_buf.incompat); + if (incompat & ~RBD_FEATURES_ALL) + return -ENOTSUPP; + *snap_features = le64_to_cpu(features_buf.features); dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", @@ -2977,7 +2991,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) if (ret < 0) goto out_err; - /* Get the features for the image */ + /* Get the and check features for the image */ ret = rbd_dev_v2_features(rbd_dev); if (ret < 0) -- cgit v1.2.3-70-g09d2 From 35152979e6181b1fbb4b61c3ff641c14df53ad66 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 31 Aug 2012 17:29:55 -0500 Subject: rbd: activate v2 image support Now that v2 images support is fully implemented, have rbd_dev_v2_probe() return 0 to indicate a successful probe. (Note that an image that implements layering will fail the probe early because of the feature chekc.) Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 0f260a6e97c..8f56d37637a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3014,7 +3014,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) dout("discovered version 2 image, header name is %s\n", rbd_dev->header_name); - return -ENOTSUPP; + return 0; out_err: kfree(rbd_dev->header_name); rbd_dev->header_name = NULL; -- cgit v1.2.3-70-g09d2 From b213e0b1a62637b2a9395a34349b13d73ca2b90a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 10 Oct 2012 21:19:13 -0700 Subject: rbd: fix bug in rbd_dev_id_put() In rbd_dev_id_put(), there's a loop that's intended to determine the maximum device id in use. But it isn't doing that at all, the effect of how it's written is to simply use the just-put id number, which ignores whole purpose of this function. Fix the bug. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8f56d37637a..4a16464ad5b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2680,8 +2680,8 @@ static void rbd_dev_id_put(struct rbd_device *rbd_dev) struct rbd_device *rbd_dev; rbd_dev = list_entry(tmp, struct rbd_device, node); - if (rbd_id > max_id) - max_id = rbd_id; + if (rbd_dev->dev_id > max_id) + max_id = rbd_dev->dev_id; } spin_unlock(&rbd_dev_list_lock); -- cgit v1.2.3-70-g09d2 From a0ea3a40fd20b8c66381f747c454f89d6d1f50d4 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 10 Oct 2012 21:19:13 -0700 Subject: rbd: zero return code in rbd_dev_image_id() When rbd_dev_probe() calls rbd_dev_image_id() it expects to get a 0 return code if successful, but it is getting a positive value. The reason is that rbd_dev_image_id() returns the value it gets from rbd_req_sync_exec(), which returns the number of bytes read in as a result of the request. (This ultimately comes from ceph_copy_from_page_vector() in rbd_req_sync_op()). Force the return value to 0 when successful in rbd_dev_image_id(). Do the same in rbd_dev_v2_object_prefix(). Signed-off-by: Alex Elder Reviewed-by: Josh Durgin Reviewed-by: Dan Mick --- drivers/block/rbd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4a16464ad5b..94d613208c4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2207,6 +2207,7 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); if (ret < 0) goto out; + ret = 0; /* rbd_req_sync_exec() can return positive */ p = reply_buf; rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, @@ -2900,6 +2901,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); if (ret < 0) goto out; + ret = 0; /* rbd_req_sync_exec() can return positive */ p = response; rbd_dev->image_id = ceph_extract_encoded_string(&p, -- cgit v1.2.3-70-g09d2 From be466c1cc36621590ef17b05a6d342dfd33f7280 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 22 Oct 2012 11:31:26 -0500 Subject: rbd: fix read-only option name The name of the "read-only" mapping option was inadvertently changed in this commit: f84344f3 rbd: separate mapping info in rbd_dev Revert that hunk to return it to what it should be. Signed-off-by: Alex Elder Reviewed-by: Dan Mick Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 94d613208c4..5d9e2cc5c51 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -397,7 +397,7 @@ enum { static match_table_t rbd_opts_tokens = { /* int args above */ /* string args above */ - {Opt_read_only, "mapping.read_only"}, + {Opt_read_only, "read_only"}, {Opt_read_only, "ro"}, /* Alternate spelling */ {Opt_read_write, "read_write"}, {Opt_read_write, "rw"}, /* Alternate spelling */ -- cgit v1.2.3-70-g09d2 From 13f4042c05b6a1a638ccab3f0cabdb84993803a2 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 10 Oct 2012 18:59:29 -0700 Subject: rbd: kill rbd_req_{read,write}() Both rbd_req_read() and rbd_req_write() are simple wrapper routines for rbd_do_op(), and each is only called once. Replace each wrapper call with a direct call to rbd_do_op(), and get rid of the wrapper functions. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 60 ++++++++++++----------------------------------------- 1 file changed, 13 insertions(+), 47 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5d9e2cc5c51..8f2c39ad3bb 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1210,41 +1210,6 @@ done: return ret; } -/* - * Request async osd write - */ -static int rbd_req_write(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, - CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ofs, len, bio, coll, coll_index); -} - -/* - * Request async osd read - */ -static int rbd_req_read(struct request *rq, - struct rbd_device *rbd_dev, - u64 snapid, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - return rbd_do_op(rq, rbd_dev, NULL, - snapid, - CEPH_OSD_OP_READ, - CEPH_OSD_FLAG_READ, - ofs, len, bio, coll, coll_index); -} - /* * Request sync osd read */ @@ -1550,21 +1515,22 @@ static void rbd_rq_fn(struct request_queue *q) goto next_seg; } - /* init OSD command: write or read */ if (do_write) - rbd_req_write(rq, rbd_dev, - snapc, - ofs, - op_size, bio, - coll, cur_seg); + (void) rbd_do_op(rq, rbd_dev, + snapc, CEPH_NOSNAP, + CEPH_OSD_OP_WRITE, + CEPH_OSD_FLAG_WRITE | + CEPH_OSD_FLAG_ONDISK, + ofs, op_size, bio, + coll, cur_seg); else - rbd_req_read(rq, rbd_dev, - rbd_dev->mapping.snap_id, - ofs, - op_size, bio, - coll, cur_seg); - + (void) rbd_do_op(rq, rbd_dev, + NULL, rbd_dev->mapping.snap_id, + CEPH_OSD_OP_READ, + CEPH_OSD_FLAG_READ, + ofs, op_size, bio, + coll, cur_seg); next_seg: size -= op_size; ofs += op_size; -- cgit v1.2.3-70-g09d2 From ff2e4bb5b32f89c455979a4222a9b78007cde254 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 10 Oct 2012 18:59:29 -0700 Subject: rbd: drop rbd_do_op() opcode and flags The only callers of rbd_do_op() are in rbd_rq_fn(), where call one is used for writes and the other used for reads. The request passed to rbd_do_op() already encodes the I/O direction, and that information can be used inside the function to set the opcode and flags value (rather than passing them in as arguments). So get rid of the opcode and flags arguments to rbd_do_op(). Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8f2c39ad3bb..a29c6d2a49a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1164,7 +1164,6 @@ static int rbd_do_op(struct request *rq, struct rbd_device *rbd_dev, struct ceph_snap_context *snapc, u64 snapid, - int opcode, int flags, u64 ofs, u64 len, struct bio *bio, struct rbd_req_coll *coll, @@ -1176,6 +1175,8 @@ static int rbd_do_op(struct request *rq, int ret; struct ceph_osd_req_op *ops; u32 payload_len; + int opcode; + int flags; seg_name = rbd_segment_name(rbd_dev, ofs); if (!seg_name) @@ -1183,7 +1184,15 @@ static int rbd_do_op(struct request *rq, seg_len = rbd_segment_length(rbd_dev, ofs, len); seg_ofs = rbd_segment_offset(rbd_dev, ofs); - payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); + if (rq_data_dir(rq) == WRITE) { + opcode = CEPH_OSD_OP_WRITE; + flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; + payload_len = seg_len; + } else { + opcode = CEPH_OSD_OP_READ; + flags = CEPH_OSD_FLAG_READ; + payload_len = 0; + } ret = -ENOMEM; ops = rbd_create_rw_ops(1, opcode, payload_len); @@ -1519,16 +1528,11 @@ static void rbd_rq_fn(struct request_queue *q) if (do_write) (void) rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, - CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_WRITE | - CEPH_OSD_FLAG_ONDISK, ofs, op_size, bio, coll, cur_seg); else (void) rbd_do_op(rq, rbd_dev, NULL, rbd_dev->mapping.snap_id, - CEPH_OSD_OP_READ, - CEPH_OSD_FLAG_READ, ofs, op_size, bio, coll, cur_seg); next_seg: -- cgit v1.2.3-70-g09d2 From 4634246db8cb2e5117ef7c682efcc383fa3354f8 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 10 Oct 2012 18:59:29 -0700 Subject: rbd: consolidate rbd_do_op() calls The two calls to rbd_do_op() from rbd_rq_fn() differ only in the value passed for the snapshot id and the snapshot context. For reads the snapshot always comes from the mapping, and for writes the snapshot id is always CEPH_NOSNAP. The snapshot context is always null for reads. For writes, the snapshot context always comes from the rbd header, but it is acquired under protection of header semaphore and could change thereafter, so we can't simply use what's available inside rbd_do_op(). Eliminate the snapid parameter from rbd_do_op(), and set it based on the I/O direction inside that function instead. Always pass the snapshot context acquired in the caller, but reset it to a null pointer inside rbd_do_op() if the operation is a read. As a result, there is no difference in the read and write calls to rbd_do_op() made in rbd_rq_fn(), so just call it unconditionally. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a29c6d2a49a..d0328835bbd 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1163,7 +1163,6 @@ done: static int rbd_do_op(struct request *rq, struct rbd_device *rbd_dev, struct ceph_snap_context *snapc, - u64 snapid, u64 ofs, u64 len, struct bio *bio, struct rbd_req_coll *coll, @@ -1177,6 +1176,7 @@ static int rbd_do_op(struct request *rq, u32 payload_len; int opcode; int flags; + u64 snapid; seg_name = rbd_segment_name(rbd_dev, ofs); if (!seg_name) @@ -1187,10 +1187,13 @@ static int rbd_do_op(struct request *rq, if (rq_data_dir(rq) == WRITE) { opcode = CEPH_OSD_OP_WRITE; flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; + snapid = CEPH_NOSNAP; payload_len = seg_len; } else { opcode = CEPH_OSD_OP_READ; flags = CEPH_OSD_FLAG_READ; + snapc = NULL; + snapid = rbd_dev->mapping.snap_id; payload_len = 0; } @@ -1518,24 +1521,13 @@ static void rbd_rq_fn(struct request_queue *q) kref_get(&coll->kref); bio = bio_chain_clone(&rq_bio, &next_bio, &bp, op_size, GFP_ATOMIC); - if (!bio) { + if (bio) + (void) rbd_do_op(rq, rbd_dev, snapc, + ofs, op_size, + bio, coll, cur_seg); + else rbd_coll_end_req_index(rq, coll, cur_seg, -ENOMEM, op_size); - goto next_seg; - } - - /* init OSD command: write or read */ - if (do_write) - (void) rbd_do_op(rq, rbd_dev, - snapc, CEPH_NOSNAP, - ofs, op_size, bio, - coll, cur_seg); - else - (void) rbd_do_op(rq, rbd_dev, - NULL, rbd_dev->mapping.snap_id, - ofs, op_size, bio, - coll, cur_seg); -next_seg: size -= op_size; ofs += op_size; -- cgit v1.2.3-70-g09d2 From db2388b6ee40a949084e4cdddc3b0a4357068a62 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 20 Oct 2012 22:17:27 -0500 Subject: rbd: verify rbd image order value This adds a verification that an rbd image's object order is within the upper and lower bounds supported by this implementation. It must be at least 9 (SECTOR_SHIFT), because the Linux bio system assumes that minimum granularity. It also must be less than 32 (at the moment anyway) because there exist spots in the code that store the size of a "segment" (object backing an rbd image) in a signed int variable, which can be 32 bits including the sign. We should be able to relax this limit once we've verified the code uses 64-bit types where needed. Note that the CLI tool already limits the order to the range 12-25. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index d0328835bbd..4734446c3b5 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -533,6 +533,16 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) return false; + /* The bio layer requires at least sector-sized I/O */ + + if (ondisk->options.order < SECTOR_SHIFT) + return false; + + /* If we use u64 in a few spots we may be able to loosen this */ + + if (ondisk->options.order > 8 * sizeof (int) - 1) + return false; + /* * The size of a snapshot header has to fit in a size_t, and * that limits the number of snapshots. -- cgit v1.2.3-70-g09d2 From d4b125e9eb43babd14538ba61718e3db71a98d29 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 3 Jul 2012 16:01:19 -0500 Subject: rbd: increase maximum snapshot name length Change RBD_MAX_SNAP_NAME_LEN to be based on NAME_MAX. That is a practical limit for the length of a snapshot name (based on the presence of a directory using the name under /sys/bus/rbd to represent the snapshot). The /sys entry is created by prefixing it with "snap_"; define that prefix symbolically, and take its length into account in defining the snapshot name length limit. Enforce the limit in rbd_add_parse_args(). Also delete a dout() call in that function that was not meant to be committed. Signed-off-by: Alex Elder Reviewed-by: Dan Mick Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4734446c3b5..4858d925b95 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -61,7 +61,10 @@ #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ -#define RBD_MAX_SNAP_NAME_LEN 32 +#define RBD_SNAP_DEV_NAME_PREFIX "snap_" +#define RBD_MAX_SNAP_NAME_LEN \ + (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) + #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ #define RBD_MAX_OPT_LEN 1024 @@ -2063,7 +2066,7 @@ static int rbd_register_snap_dev(struct rbd_snap *snap, dev->type = &rbd_snap_device_type; dev->parent = parent; dev->release = rbd_snap_dev_release; - dev_set_name(dev, "snap_%s", snap->name); + dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name); dout("%s: registering device for snapshot %s\n", __func__, snap->name); ret = device_register(dev); @@ -2797,8 +2800,13 @@ static char *rbd_add_parse_args(struct rbd_device *rbd_dev, if (!rbd_dev->image_name) goto out_err; - /* Snapshot name is optional */ + /* Snapshot name is optional; default is to use "head" */ + len = next_token(&buf); + if (len > RBD_MAX_SNAP_NAME_LEN) { + err_ptr = ERR_PTR(-ENAMETOOLONG); + goto out_err; + } if (!len) { buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ len = sizeof (RBD_SNAP_HEAD_NAME) - 1; @@ -2809,8 +2817,6 @@ static char *rbd_add_parse_args(struct rbd_device *rbd_dev, memcpy(snap_name, buf, len); *(snap_name + len) = '\0'; -dout(" SNAP_NAME is <%s>, len is %zd\n", snap_name, len); - return snap_name; out_err: -- cgit v1.2.3-70-g09d2 From e5cfeed281a842a37c9da84bad2911c9b470347e Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 20 Oct 2012 22:17:27 -0500 Subject: rbd: simplify rbd_merge_bvec() The aim of this patch is to make what's going on rbd_merge_bvec() a bit more obvious than it was before. This was an issue when a recent btrfs bug led us to question whether the merge function was working correctly. Use "obj" rather than "chunk" to indicate the units whose boundaries we care about we call (rados) "objects". Signed-off-by: Alex Elder Reviewed-by: Dan Mick --- drivers/block/rbd.c | 51 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 16 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4858d925b95..76fbfa12006 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1566,22 +1566,41 @@ static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, struct bio_vec *bvec) { struct rbd_device *rbd_dev = q->queuedata; - unsigned int chunk_sectors; - sector_t sector; - unsigned int bio_sectors; - int max; - - chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); - sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); - bio_sectors = bmd->bi_size >> SECTOR_SHIFT; - - max = (chunk_sectors - ((sector & (chunk_sectors - 1)) - + bio_sectors)) << SECTOR_SHIFT; - if (max < 0) - max = 0; /* bio_add cannot handle a negative return */ - if (max <= bvec->bv_len && bio_sectors == 0) - return bvec->bv_len; - return max; + sector_t sector_offset; + sector_t sectors_per_obj; + sector_t obj_sector_offset; + int ret; + + /* + * Find how far into its rbd object the partition-relative + * bio start sector is to offset relative to the enclosing + * device. + */ + sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector; + sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); + obj_sector_offset = sector_offset & (sectors_per_obj - 1); + + /* + * Compute the number of bytes from that offset to the end + * of the object. Account for what's already used by the bio. + */ + ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT; + if (ret > bmd->bi_size) + ret -= bmd->bi_size; + else + ret = 0; + + /* + * Don't send back more than was asked for. And if the bio + * was empty, let the whole thing through because: "Note + * that a block device *must* allow a single page to be + * added to an empty bio." + */ + rbd_assert(bvec->bv_len <= PAGE_SIZE); + if (ret > (int) bvec->bv_len || !bmd->bi_size) + ret = (int) bvec->bv_len; + + return ret; } static void rbd_free_disk(struct rbd_device *rbd_dev) -- cgit v1.2.3-70-g09d2 From 069a4b5690a952e74157fd489833c71c73f012b3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 22 Oct 2012 11:31:27 -0500 Subject: rbd: kill rbd_device->rbd_opts The rbd_device structure has an embedded rbd_options structure. Such a structure is needed to work with the generic ceph argument parsing code, but there's no need to keep it around once argument parsing is done. Use a local variable to hold the rbd options used in parsing in rbd_get_client(), and just transfer its content (it's just a read_only flag) into the field in the rbd_mapping sub-structure that requires that information. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin Reviewed-by: Dan Mick --- drivers/block/rbd.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 76fbfa12006..c800047f583 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -184,7 +184,6 @@ struct rbd_device { struct gendisk *disk; /* blkdev's gendisk and rq */ u32 image_format; /* Either 1 or 2 */ - struct rbd_options rbd_opts; struct rbd_client *rbd_client; char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ @@ -456,18 +455,24 @@ static int parse_rbd_opts_token(char *c, void *private) static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, size_t mon_addr_len, char *options) { - struct rbd_options *rbd_opts = &rbd_dev->rbd_opts; + struct rbd_options rbd_opts; struct ceph_options *ceph_opts; struct rbd_client *rbdc; - rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; + /* Initialize all rbd options to the defaults */ + + rbd_opts.read_only = RBD_READ_ONLY_DEFAULT; ceph_opts = ceph_parse_options(options, mon_addr, mon_addr + mon_addr_len, - parse_rbd_opts_token, rbd_opts); + parse_rbd_opts_token, &rbd_opts); if (IS_ERR(ceph_opts)) return PTR_ERR(ceph_opts); + /* Record the parsed rbd options */ + + rbd_dev->mapping.read_only = rbd_opts.read_only; + rbdc = rbd_client_find(ceph_opts); if (rbdc) { /* using an existing client */ @@ -685,7 +690,6 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev, char *snap_name) rbd_dev->mapping.size = rbd_dev->header.image_size; rbd_dev->mapping.features = rbd_dev->header.features; rbd_dev->mapping.snap_exists = false; - rbd_dev->mapping.read_only = rbd_dev->rbd_opts.read_only; ret = 0; } else { ret = snap_by_name(rbd_dev, snap_name); -- cgit v1.2.3-70-g09d2 From f7760dad286829682a8d36f4563ab20a65732414 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 20 Oct 2012 22:17:27 -0500 Subject: rbd: simplify rbd_rq_fn() When processing a request, rbd_rq_fn() makes clones of the bio's in the request's bio chain and submits the results to osd's to be satisfied. If a request bio straddles the boundary between objects backing the rbd image, it must be represented by two cloned bio's, one for the first part (at the end of one object) and one for the second (at the beginning of the next object). This has been handled by a function bio_chain_clone(), which includes an interface only a mother could love, and which has been found to have other problems. This patch defines two new fairly generic bio functions (one which replaces bio_chain_clone()) to help out the situation, and then revises rbd_rq_fn() to make use of them. First, bio_clone_range() clones a portion of a single bio, starting at a given offset within the bio and including only as many bytes as requested. As a convenience, a request to clone the entire bio is passed directly to bio_clone(). Second, bio_chain_clone_range() performs a similar function, producing a chain of cloned bio's covering a sub-range of the source chain. No bio_pair structures are used, and if successful the result will represent exactly the specified range. Using bio_chain_clone_range() makes bio_rq_fn() a little easier to understand, because it avoids the need to pass very much state information between consecutive calls. By avoiding the need to track a bio_pair structure, it also eliminates the problem described here: http://tracker.newdream.net/issues/2933 Note that a block request (and therefore the complete length of a bio chain processed in rbd_rq_fn()) is an unsigned int, while the result of rbd_segment_length() is u64. This change makes this range trunctation explicit, and trips a bug if the the segment boundary is too far off. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 231 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 152 insertions(+), 79 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c800047f583..cc06c55875b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -826,77 +826,144 @@ static void zero_bio_chain(struct bio *chain, int start_ofs) } /* - * bio_chain_clone - clone a chain of bios up to a certain length. - * might return a bio_pair that will need to be released. + * Clone a portion of a bio, starting at the given byte offset + * and continuing for the number of bytes indicated. */ -static struct bio *bio_chain_clone(struct bio **old, struct bio **next, - struct bio_pair **bp, - int len, gfp_t gfpmask) -{ - struct bio *old_chain = *old; - struct bio *new_chain = NULL; - struct bio *tail; - int total = 0; - - if (*bp) { - bio_pair_release(*bp); - *bp = NULL; - } +static struct bio *bio_clone_range(struct bio *bio_src, + unsigned int offset, + unsigned int len, + gfp_t gfpmask) +{ + struct bio_vec *bv; + unsigned int resid; + unsigned short idx; + unsigned int voff; + unsigned short end_idx; + unsigned short vcnt; + struct bio *bio; - while (old_chain && (total < len)) { - struct bio *tmp; + /* Handle the easy case for the caller */ - tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); - if (!tmp) - goto err_out; - gfpmask &= ~__GFP_WAIT; /* can't wait after the first */ + if (!offset && len == bio_src->bi_size) + return bio_clone(bio_src, gfpmask); - if (total + old_chain->bi_size > len) { - struct bio_pair *bp; + if (WARN_ON_ONCE(!len)) + return NULL; + if (WARN_ON_ONCE(len > bio_src->bi_size)) + return NULL; + if (WARN_ON_ONCE(offset > bio_src->bi_size - len)) + return NULL; - /* - * this split can only happen with a single paged bio, - * split_bio will BUG_ON if this is not the case - */ - dout("bio_chain_clone split! total=%d remaining=%d" - "bi_size=%u\n", - total, len - total, old_chain->bi_size); + /* Find first affected segment... */ - /* split the bio. We'll release it either in the next - call, or it will have to be released outside */ - bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); - if (!bp) - goto err_out; + resid = offset; + __bio_for_each_segment(bv, bio_src, idx, 0) { + if (resid < bv->bv_len) + break; + resid -= bv->bv_len; + } + voff = resid; - __bio_clone(tmp, &bp->bio1); + /* ...and the last affected segment */ - *next = &bp->bio2; - } else { - __bio_clone(tmp, old_chain); - *next = old_chain->bi_next; - } + resid += len; + __bio_for_each_segment(bv, bio_src, end_idx, idx) { + if (resid <= bv->bv_len) + break; + resid -= bv->bv_len; + } + vcnt = end_idx - idx + 1; + + /* Build the clone */ + + bio = bio_alloc(gfpmask, (unsigned int) vcnt); + if (!bio) + return NULL; /* ENOMEM */ - tmp->bi_bdev = NULL; - tmp->bi_next = NULL; - if (new_chain) - tail->bi_next = tmp; - else - new_chain = tmp; - tail = tmp; - old_chain = old_chain->bi_next; + bio->bi_bdev = bio_src->bi_bdev; + bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT); + bio->bi_rw = bio_src->bi_rw; + bio->bi_flags |= 1 << BIO_CLONED; - total += tmp->bi_size; + /* + * Copy over our part of the bio_vec, then update the first + * and last (or only) entries. + */ + memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx], + vcnt * sizeof (struct bio_vec)); + bio->bi_io_vec[0].bv_offset += voff; + if (vcnt > 1) { + bio->bi_io_vec[0].bv_len -= voff; + bio->bi_io_vec[vcnt - 1].bv_len = resid; + } else { + bio->bi_io_vec[0].bv_len = len; } - rbd_assert(total == len); + bio->bi_vcnt = vcnt; + bio->bi_size = len; + bio->bi_idx = 0; + + return bio; +} + +/* + * Clone a portion of a bio chain, starting at the given byte offset + * into the first bio in the source chain and continuing for the + * number of bytes indicated. The result is another bio chain of + * exactly the given length, or a null pointer on error. + * + * The bio_src and offset parameters are both in-out. On entry they + * refer to the first source bio and the offset into that bio where + * the start of data to be cloned is located. + * + * On return, bio_src is updated to refer to the bio in the source + * chain that contains first un-cloned byte, and *offset will + * contain the offset of that byte within that bio. + */ +static struct bio *bio_chain_clone_range(struct bio **bio_src, + unsigned int *offset, + unsigned int len, + gfp_t gfpmask) +{ + struct bio *bi = *bio_src; + unsigned int off = *offset; + struct bio *chain = NULL; + struct bio **end; + + /* Build up a chain of clone bios up to the limit */ + + if (!bi || off >= bi->bi_size || !len) + return NULL; /* Nothing to clone */ - *old = old_chain; + end = &chain; + while (len) { + unsigned int bi_size; + struct bio *bio; + + if (!bi) + goto out_err; /* EINVAL; ran out of bio's */ + bi_size = min_t(unsigned int, bi->bi_size - off, len); + bio = bio_clone_range(bi, off, bi_size, gfpmask); + if (!bio) + goto out_err; /* ENOMEM */ + + *end = bio; + end = &bio->bi_next; + + off += bi_size; + if (off == bi->bi_size) { + bi = bi->bi_next; + off = 0; + } + len -= bi_size; + } + *bio_src = bi; + *offset = off; - return new_chain; + return chain; +out_err: + bio_chain_put(chain); -err_out: - dout("bio_chain_clone with err\n"); - bio_chain_put(new_chain); return NULL; } @@ -1014,8 +1081,9 @@ static int rbd_do_request(struct request *rq, req_data->coll_index = coll_index; } - dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name, - (unsigned long long) ofs, (unsigned long long) len); + dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", + object_name, (unsigned long long) ofs, + (unsigned long long) len, coll, coll_index); osdc = &rbd_dev->rbd_client->client->osdc; req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, @@ -1463,18 +1531,16 @@ static void rbd_rq_fn(struct request_queue *q) { struct rbd_device *rbd_dev = q->queuedata; struct request *rq; - struct bio_pair *bp = NULL; while ((rq = blk_fetch_request(q))) { struct bio *bio; - struct bio *rq_bio, *next_bio = NULL; bool do_write; unsigned int size; - u64 op_size = 0; u64 ofs; int num_segs, cur_seg = 0; struct rbd_req_coll *coll; struct ceph_snap_context *snapc; + unsigned int bio_offset; dout("fetched request\n"); @@ -1486,10 +1552,6 @@ static void rbd_rq_fn(struct request_queue *q) /* deduce our operation (read, write) */ do_write = (rq_data_dir(rq) == WRITE); - - size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * SECTOR_SIZE; - rq_bio = rq->bio; if (do_write && rbd_dev->mapping.read_only) { __blk_end_request_all(rq, -EROFS); continue; @@ -1512,6 +1574,10 @@ static void rbd_rq_fn(struct request_queue *q) up_read(&rbd_dev->header_rwsem); + size = blk_rq_bytes(rq); + ofs = blk_rq_pos(rq) * SECTOR_SIZE; + bio = rq->bio; + dout("%s 0x%x bytes at 0x%llx\n", do_write ? "write" : "read", size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); @@ -1531,30 +1597,37 @@ static void rbd_rq_fn(struct request_queue *q) continue; } + bio_offset = 0; do { - /* a bio clone to be passed down to OSD req */ + u64 limit = rbd_segment_length(rbd_dev, ofs, size); + unsigned int chain_size; + struct bio *bio_chain; + + BUG_ON(limit > (u64) UINT_MAX); + chain_size = (unsigned int) limit; dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); - op_size = rbd_segment_length(rbd_dev, ofs, size); + kref_get(&coll->kref); - bio = bio_chain_clone(&rq_bio, &next_bio, &bp, - op_size, GFP_ATOMIC); - if (bio) + + /* Pass a cloned bio chain via an osd request */ + + bio_chain = bio_chain_clone_range(&bio, + &bio_offset, chain_size, + GFP_ATOMIC); + if (bio_chain) (void) rbd_do_op(rq, rbd_dev, snapc, - ofs, op_size, - bio, coll, cur_seg); + ofs, chain_size, + bio_chain, coll, cur_seg); else rbd_coll_end_req_index(rq, coll, cur_seg, - -ENOMEM, op_size); - size -= op_size; - ofs += op_size; + -ENOMEM, chain_size); + size -= chain_size; + ofs += chain_size; cur_seg++; - rq_bio = next_bio; } while (size > 0); kref_put(&coll->kref, rbd_coll_release); - if (bp) - bio_pair_release(bp); spin_lock_irq(q->queue_lock); ceph_put_snap_context(snapc); @@ -1564,7 +1637,7 @@ static void rbd_rq_fn(struct request_queue *q) /* * a queue callback. Makes sure that we don't create a bio that spans across * multiple osd objects. One exception would be with a single page bios, - * which we handle later at bio_chain_clone + * which we handle later at bio_chain_clone_range() */ static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, struct bio_vec *bvec) -- cgit v1.2.3-70-g09d2 From 41f38c2b2f8b66b176a0e548ef06294343a7bfa2 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:40 -0500 Subject: rbd: remove snapshots on error in rbd_add() If rbd_dev_snaps_update() has ever been called for an rbd device structure there could be snapshot structures on its snaps list. In rbd_add(), this function is called but a subsequent error path neglected to clean up any of these snapshots. Add a call to rbd_remove_all_snaps() in the appropriate spot to remedy this. Change a couple of error labels to be a little clearer while there. Drop the leading underscores from the function name; there's nothing special about that function that they might signify. As suggested in review, the leading underscores in __rbd_remove_snap_dev() have been removed as well. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index cc06c55875b..c7681d46cf8 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -228,7 +228,7 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev); static int rbd_dev_snaps_register(struct rbd_device *rbd_dev); static void rbd_dev_release(struct device *dev); -static void __rbd_remove_snap_dev(struct rbd_snap *snap); +static void rbd_remove_snap_dev(struct rbd_snap *snap); static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count); @@ -1787,13 +1787,13 @@ static int rbd_read_header(struct rbd_device *rbd_dev, return ret; } -static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) +static void rbd_remove_all_snaps(struct rbd_device *rbd_dev) { struct rbd_snap *snap; struct rbd_snap *next; list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) - __rbd_remove_snap_dev(snap); + rbd_remove_snap_dev(snap); } static void rbd_update_mapping_size(struct rbd_device *rbd_dev) @@ -2146,7 +2146,7 @@ static bool rbd_snap_registered(struct rbd_snap *snap) return ret; } -static void __rbd_remove_snap_dev(struct rbd_snap *snap) +static void rbd_remove_snap_dev(struct rbd_snap *snap) { list_del(&snap->node); if (device_is_registered(&snap->dev)) @@ -2569,7 +2569,7 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) if (rbd_dev->mapping.snap_id == snap->id) rbd_dev->mapping.snap_exists = false; - __rbd_remove_snap_dev(snap); + rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", rbd_dev->mapping.snap_id == snap->id ? "mapped " : "", @@ -3179,11 +3179,11 @@ static ssize_t rbd_add(struct bus_type *bus, /* no need to lock here, as rbd_dev is not registered yet */ rc = rbd_dev_snaps_update(rbd_dev); if (rc) - goto err_out_header; + goto err_out_probe; rc = rbd_dev_set_mapping(rbd_dev, snap_name); if (rc) - goto err_out_header; + goto err_out_snaps; /* generate unique id: find highest unique id, add one */ rbd_dev_id_get(rbd_dev); @@ -3247,7 +3247,9 @@ err_out_blkdev: unregister_blkdev(rbd_dev->major, rbd_dev->name); err_out_id: rbd_dev_id_put(rbd_dev); -err_out_header: +err_out_snaps: + rbd_remove_all_snaps(rbd_dev); +err_out_probe: rbd_header_free(&rbd_dev->header); err_out_client: kfree(rbd_dev->header_name); @@ -3345,7 +3347,7 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } - __rbd_remove_all_snaps(rbd_dev); + rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); done: -- cgit v1.2.3-70-g09d2 From 86992098e7fdb97d01feb51495a952b264a55b7c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:41 -0500 Subject: rbd: make pool_id a 64 bit value If a format 2 image has a parent, its pool id will be specified using a 64-bit value. Change the pool id we save for an image to match that. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c7681d46cf8..ef82e9091f4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -197,7 +197,7 @@ struct rbd_device { size_t image_name_len; char *header_name; char *pool_name; - int pool_id; + u64 pool_id; struct ceph_osd_event *watch_event; struct ceph_osd_request *watch_request; @@ -1113,7 +1113,7 @@ static int rbd_do_request(struct request *rq, layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); layout->fl_stripe_count = cpu_to_le32(1); layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); + layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->pool_id); ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, req, ops); rbd_assert(ret == 0); @@ -1982,7 +1982,7 @@ static ssize_t rbd_pool_id_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%d\n", rbd_dev->pool_id); + return sprintf(buf, "%llu\n", (unsigned long long) rbd_dev->pool_id); } static ssize_t rbd_name_show(struct device *dev, @@ -3170,7 +3170,7 @@ static ssize_t rbd_add(struct bus_type *bus, rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); if (rc < 0) goto err_out_client; - rbd_dev->pool_id = rc; + rbd_dev->pool_id = (u64) rc; rc = rbd_dev_probe(rbd_dev); if (rc < 0) -- cgit v1.2.3-70-g09d2 From 971f839a7670197366c04e99472943532caeb0dc Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:41 -0500 Subject: rbd: move snap info out of rbd_mapping struct Moving the snap_id and snap_name fields into the separate rbd_mapping structure was misguided. (And in time, perhaps we'll do away with that structure altogether...) Move these fields back into struct rbd_device. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ef82e9091f4..7d28ce33056 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -166,8 +166,6 @@ struct rbd_snap { }; struct rbd_mapping { - char *snap_name; - u64 snap_id; u64 size; u64 features; bool snap_exists; @@ -199,6 +197,9 @@ struct rbd_device { char *pool_name; u64 pool_id; + char *snap_name; + u64 snap_id; + struct ceph_osd_event *watch_event; struct ceph_osd_request *watch_request; @@ -669,7 +670,7 @@ static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) list_for_each_entry(snap, &rbd_dev->snaps, node) { if (!strcmp(snap_name, snap->name)) { - rbd_dev->mapping.snap_id = snap->id; + rbd_dev->snap_id = snap->id; rbd_dev->mapping.size = snap->size; rbd_dev->mapping.features = snap->features; @@ -686,7 +687,7 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev, char *snap_name) if (!memcmp(snap_name, RBD_SNAP_HEAD_NAME, sizeof (RBD_SNAP_HEAD_NAME))) { - rbd_dev->mapping.snap_id = CEPH_NOSNAP; + rbd_dev->snap_id = CEPH_NOSNAP; rbd_dev->mapping.size = rbd_dev->header.image_size; rbd_dev->mapping.features = rbd_dev->header.features; rbd_dev->mapping.snap_exists = false; @@ -698,7 +699,7 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev, char *snap_name) rbd_dev->mapping.snap_exists = true; rbd_dev->mapping.read_only = true; } - rbd_dev->mapping.snap_name = snap_name; + rbd_dev->snap_name = snap_name; done: return ret; } @@ -1278,7 +1279,7 @@ static int rbd_do_op(struct request *rq, opcode = CEPH_OSD_OP_READ; flags = CEPH_OSD_FLAG_READ; snapc = NULL; - snapid = rbd_dev->mapping.snap_id; + snapid = rbd_dev->snap_id; payload_len = 0; } @@ -1561,7 +1562,7 @@ static void rbd_rq_fn(struct request_queue *q) down_read(&rbd_dev->header_rwsem); - if (rbd_dev->mapping.snap_id != CEPH_NOSNAP && + if (rbd_dev->snap_id != CEPH_NOSNAP && !rbd_dev->mapping.snap_exists) { up_read(&rbd_dev->header_rwsem); dout("request for non-existent snapshot"); @@ -1800,7 +1801,7 @@ static void rbd_update_mapping_size(struct rbd_device *rbd_dev) { sector_t size; - if (rbd_dev->mapping.snap_id != CEPH_NOSNAP) + if (rbd_dev->snap_id != CEPH_NOSNAP) return; size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE; @@ -2011,7 +2012,7 @@ static ssize_t rbd_snap_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->mapping.snap_name); + return sprintf(buf, "%s\n", rbd_dev->snap_name); } static ssize_t rbd_image_refresh(struct device *dev, @@ -2567,12 +2568,11 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) /* Existing snapshot not in the new snap context */ - if (rbd_dev->mapping.snap_id == snap->id) + if (rbd_dev->snap_id == snap->id) rbd_dev->mapping.snap_exists = false; rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", - rbd_dev->mapping.snap_id == snap->id ? - "mapped " : "", + rbd_dev->snap_id == snap->id ? "mapped " : "", (unsigned long long) snap->id); /* Done with this list entry; advance */ @@ -3256,7 +3256,7 @@ err_out_client: rbd_put_client(rbd_dev); kfree(rbd_dev->image_id); err_out_args: - kfree(rbd_dev->mapping.snap_name); + kfree(rbd_dev->snap_name); kfree(rbd_dev->image_name); kfree(rbd_dev->pool_name); err_out_mem: @@ -3309,7 +3309,7 @@ static void rbd_dev_release(struct device *dev) rbd_header_free(&rbd_dev->header); /* done with the id, and with the rbd_dev */ - kfree(rbd_dev->mapping.snap_name); + kfree(rbd_dev->snap_name); kfree(rbd_dev->image_id); kfree(rbd_dev->header_name); kfree(rbd_dev->pool_name); -- cgit v1.2.3-70-g09d2 From daba5fdb4c469838dcee4b8dd4fecf7be69fa218 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 26 Oct 2012 17:25:23 -0500 Subject: rbd: rename snap_exists field A Boolean field "snap_exists" in an rbd mapping is used to indicate whether a mapped snapshot has been removed from an image's snapshot context, to stop sending requests for that snapshot as soon as we know it's gone. Generalize the interpretation of this field so it applies to non-snapshot (i.e. "head") mappings. That is, define its value to be false until the mapping has been set, and then define it to be true for both snapshot mappings or head mappings. Rename the field "exists" to reflect the broader interpretation. The rbd_mapping structure is on its way out, so move the field back into the rbd_device structure. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 7d28ce33056..589f56542df 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -168,7 +168,6 @@ struct rbd_snap { struct rbd_mapping { u64 size; u64 features; - bool snap_exists; bool read_only; }; @@ -189,6 +188,7 @@ struct rbd_device { spinlock_t lock; /* queue lock */ struct rbd_image_header header; + bool exists; char *image_id; size_t image_id_len; char *image_name; @@ -690,16 +690,15 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev, char *snap_name) rbd_dev->snap_id = CEPH_NOSNAP; rbd_dev->mapping.size = rbd_dev->header.image_size; rbd_dev->mapping.features = rbd_dev->header.features; - rbd_dev->mapping.snap_exists = false; ret = 0; } else { ret = snap_by_name(rbd_dev, snap_name); if (ret < 0) goto done; - rbd_dev->mapping.snap_exists = true; rbd_dev->mapping.read_only = true; } rbd_dev->snap_name = snap_name; + rbd_dev->exists = true; done: return ret; } @@ -1562,8 +1561,8 @@ static void rbd_rq_fn(struct request_queue *q) down_read(&rbd_dev->header_rwsem); - if (rbd_dev->snap_id != CEPH_NOSNAP && - !rbd_dev->mapping.snap_exists) { + if (!rbd_dev->exists) { + rbd_assert(rbd_dev->snap_id != CEPH_NOSNAP); up_read(&rbd_dev->header_rwsem); dout("request for non-existent snapshot"); spin_lock_irq(q->queue_lock); @@ -2569,7 +2568,7 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) /* Existing snapshot not in the new snap context */ if (rbd_dev->snap_id == snap->id) - rbd_dev->mapping.snap_exists = false; + rbd_dev->exists = false; rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", rbd_dev->snap_id == snap->id ? "mapped " : "", -- cgit v1.2.3-70-g09d2 From 78cea76e0580befaf561c6989f4fc985bc66c8f7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:41 -0500 Subject: rbd: move ceph_parse_options() call up Move option parsing out of rbd_get_client() and into its caller. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 589f56542df..e83bddcca34 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -453,27 +453,11 @@ static int parse_rbd_opts_token(char *c, void *private) * Get a ceph client with specific addr and configuration, if one does * not exist create it. */ -static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, - size_t mon_addr_len, char *options) +static int rbd_get_client(struct rbd_device *rbd_dev, + struct ceph_options *ceph_opts) { - struct rbd_options rbd_opts; - struct ceph_options *ceph_opts; struct rbd_client *rbdc; - /* Initialize all rbd options to the defaults */ - - rbd_opts.read_only = RBD_READ_ONLY_DEFAULT; - - ceph_opts = ceph_parse_options(options, mon_addr, - mon_addr + mon_addr_len, - parse_rbd_opts_token, &rbd_opts); - if (IS_ERR(ceph_opts)) - return PTR_ERR(ceph_opts); - - /* Record the parsed rbd options */ - - rbd_dev->mapping.read_only = rbd_opts.read_only; - rbdc = rbd_client_find(ceph_opts); if (rbdc) { /* using an existing client */ @@ -3132,9 +3116,11 @@ static ssize_t rbd_add(struct bus_type *bus, struct rbd_device *rbd_dev = NULL; const char *mon_addrs = NULL; size_t mon_addrs_size = 0; + char *snap_name; + struct rbd_options rbd_opts; + struct ceph_options *ceph_opts; struct ceph_osd_client *osdc; int rc = -ENOMEM; - char *snap_name; if (!try_module_get(THIS_MODULE)) return -ENODEV; @@ -3160,9 +3146,26 @@ static ssize_t rbd_add(struct bus_type *bus, goto err_out_mem; } - rc = rbd_get_client(rbd_dev, mon_addrs, mon_addrs_size - 1, options); - if (rc < 0) + /* Initialize all rbd options to the defaults */ + + rbd_opts.read_only = RBD_READ_ONLY_DEFAULT; + + ceph_opts = ceph_parse_options(options, mon_addrs, + mon_addrs + mon_addrs_size - 1, + parse_rbd_opts_token, &rbd_opts); + if (IS_ERR(ceph_opts)) { + rc = PTR_ERR(ceph_opts); goto err_out_args; + } + + /* Record the parsed rbd options */ + + rbd_dev->mapping.read_only = rbd_opts.read_only; + + rc = rbd_get_client(rbd_dev, ceph_opts); + if (rc < 0) + goto err_out_opts; + ceph_opts = NULL; /* ceph_opts now owned by rbd_dev client */ /* pick the pool */ osdc = &rbd_dev->rbd_client->client->osdc; @@ -3254,6 +3257,9 @@ err_out_client: kfree(rbd_dev->header_name); rbd_put_client(rbd_dev); kfree(rbd_dev->image_id); +err_out_opts: + if (ceph_opts) + ceph_destroy_options(ceph_opts); err_out_args: kfree(rbd_dev->snap_name); kfree(rbd_dev->image_name); -- cgit v1.2.3-70-g09d2 From 0ddebc0c6c518ae42c376151e34d9d4b84443ba5 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:41 -0500 Subject: rbd: do all argument parsing in one place This patch makes rbd_add_parse_args() be the single place all argument parsing occurs for an image map request: - Move the ceph_parse_options() call into that function - Use local variables rather than parameters to hold the list of monitor addresses supplied - Rather than returning it, pass the snapshot name (and its length) back via parameters - Have the function return a ceph_options structure pointer Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 77 +++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 40 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index e83bddcca34..68447d83288 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2845,24 +2845,27 @@ static inline char *dup_token(const char **buf, size_t *lenp) * * Note: rbd_dev is assumed to have been initially zero-filled. */ -static char *rbd_add_parse_args(struct rbd_device *rbd_dev, - const char *buf, - const char **mon_addrs, - size_t *mon_addrs_size, - char *options, - size_t options_size) +static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, + const char *buf, + char *options, + size_t options_size, + char **snap_name, + size_t *snap_name_len) { size_t len; - char *err_ptr = ERR_PTR(-EINVAL); - char *snap_name; + const char *mon_addrs; + size_t mon_addrs_size; + struct rbd_options rbd_opts; + struct ceph_options *ceph_opts; + struct ceph_options *err_ptr = ERR_PTR(-EINVAL); /* The first four tokens are required */ len = next_token(&buf); if (!len) return err_ptr; - *mon_addrs_size = len + 1; - *mon_addrs = buf; + mon_addrs_size = len + 1; + mon_addrs = buf; buf += len; @@ -2890,14 +2893,27 @@ static char *rbd_add_parse_args(struct rbd_device *rbd_dev, buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ len = sizeof (RBD_SNAP_HEAD_NAME) - 1; } - snap_name = kmalloc(len + 1, GFP_KERNEL); - if (!snap_name) + *snap_name = kmalloc(len + 1, GFP_KERNEL); + if (!*snap_name) goto out_err; - memcpy(snap_name, buf, len); - *(snap_name + len) = '\0'; + memcpy(*snap_name, buf, len); + *(*snap_name + len) = '\0'; + *snap_name_len = len; + /* Initialize all rbd options to the defaults */ - return snap_name; + rbd_opts.read_only = RBD_READ_ONLY_DEFAULT; + ceph_opts = ceph_parse_options(options, mon_addrs, + mon_addrs + mon_addrs_size - 1, + parse_rbd_opts_token, &rbd_opts); + + /* Record the parsed rbd options */ + + if (!IS_ERR(ceph_opts)) { + rbd_dev->mapping.read_only = rbd_opts.read_only; + } + + return ceph_opts; out_err: kfree(rbd_dev->image_name); rbd_dev->image_name = NULL; @@ -3114,10 +3130,8 @@ static ssize_t rbd_add(struct bus_type *bus, { char *options; struct rbd_device *rbd_dev = NULL; - const char *mon_addrs = NULL; - size_t mon_addrs_size = 0; char *snap_name; - struct rbd_options rbd_opts; + size_t snap_name_len = 0; struct ceph_options *ceph_opts; struct ceph_osd_client *osdc; int rc = -ENOMEM; @@ -3139,32 +3153,16 @@ static ssize_t rbd_add(struct bus_type *bus, init_rwsem(&rbd_dev->header_rwsem); /* parse add command */ - snap_name = rbd_add_parse_args(rbd_dev, buf, - &mon_addrs, &mon_addrs_size, options, count); - if (IS_ERR(snap_name)) { - rc = PTR_ERR(snap_name); - goto err_out_mem; - } - - /* Initialize all rbd options to the defaults */ - - rbd_opts.read_only = RBD_READ_ONLY_DEFAULT; - - ceph_opts = ceph_parse_options(options, mon_addrs, - mon_addrs + mon_addrs_size - 1, - parse_rbd_opts_token, &rbd_opts); + ceph_opts = rbd_add_parse_args(rbd_dev, buf, options, count, + &snap_name, &snap_name_len); if (IS_ERR(ceph_opts)) { rc = PTR_ERR(ceph_opts); - goto err_out_args; + goto err_out_mem; } - /* Record the parsed rbd options */ - - rbd_dev->mapping.read_only = rbd_opts.read_only; - rc = rbd_get_client(rbd_dev, ceph_opts); if (rc < 0) - goto err_out_opts; + goto err_out_args; ceph_opts = NULL; /* ceph_opts now owned by rbd_dev client */ /* pick the pool */ @@ -3257,10 +3255,9 @@ err_out_client: kfree(rbd_dev->header_name); rbd_put_client(rbd_dev); kfree(rbd_dev->image_id); -err_out_opts: +err_out_args: if (ceph_opts) ceph_destroy_options(ceph_opts); -err_out_args: kfree(rbd_dev->snap_name); kfree(rbd_dev->image_name); kfree(rbd_dev->pool_name); -- cgit v1.2.3-70-g09d2 From e5c35534042f4b5957a32bba651222c91247beba Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:41 -0500 Subject: rbd: get rid of snap_name_len The value returned in the "snap_name_len" argument to rbd_add_parse_args() is never actually used, so get rid of it. The snap_name_len recorded in rbd_dev_v2_snap_name() is not useful either, so get rid of that too. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 68447d83288..7bd23139b94 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2408,7 +2408,6 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) int ret; void *p; void *end; - size_t snap_name_len; char *snap_name; size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN; @@ -2428,9 +2427,7 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) p = reply_buf; end = (char *) reply_buf + size; - snap_name_len = 0; - snap_name = ceph_extract_encoded_string(&p, end, &snap_name_len, - GFP_KERNEL); + snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); if (IS_ERR(snap_name)) { ret = PTR_ERR(snap_name); goto out; @@ -2849,8 +2846,7 @@ static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, const char *buf, char *options, size_t options_size, - char **snap_name, - size_t *snap_name_len) + char **snap_name) { size_t len; const char *mon_addrs; @@ -2898,7 +2894,7 @@ static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, goto out_err; memcpy(*snap_name, buf, len); *(*snap_name + len) = '\0'; - *snap_name_len = len; + /* Initialize all rbd options to the defaults */ rbd_opts.read_only = RBD_READ_ONLY_DEFAULT; @@ -3131,7 +3127,6 @@ static ssize_t rbd_add(struct bus_type *bus, char *options; struct rbd_device *rbd_dev = NULL; char *snap_name; - size_t snap_name_len = 0; struct ceph_options *ceph_opts; struct ceph_osd_client *osdc; int rc = -ENOMEM; @@ -3154,7 +3149,7 @@ static ssize_t rbd_add(struct bus_type *bus, /* parse add command */ ceph_opts = rbd_add_parse_args(rbd_dev, buf, options, count, - &snap_name, &snap_name_len); + &snap_name); if (IS_ERR(ceph_opts)) { rc = PTR_ERR(ceph_opts); goto err_out_mem; -- cgit v1.2.3-70-g09d2 From f28e565a1b15eef62618db4011d9e320089a4214 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:41 -0500 Subject: rbd: remove options args from rbd_add_parse_args() They "options" argument to rbd_add_parse_args() (and it's partner options_size) is now only needed within the function, so there's no need to have the caller allocate and pass the options buffer. Just allocate the options buffer within the function using dup_token(). Also distinguish between failures due to failed memory allocation and failing because a required argument was missing. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 58 ++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 7bd23139b94..62df67a1132 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2844,54 +2844,58 @@ static inline char *dup_token(const char **buf, size_t *lenp) */ static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, const char *buf, - char *options, - size_t options_size, char **snap_name) { size_t len; const char *mon_addrs; size_t mon_addrs_size; + char *options; + struct ceph_options *err_ptr = ERR_PTR(-EINVAL); struct rbd_options rbd_opts; struct ceph_options *ceph_opts; - struct ceph_options *err_ptr = ERR_PTR(-EINVAL); /* The first four tokens are required */ len = next_token(&buf); if (!len) - return err_ptr; - mon_addrs_size = len + 1; + return err_ptr; /* Missing monitor address(es) */ mon_addrs = buf; - + mon_addrs_size = len + 1; buf += len; - len = copy_token(&buf, options, options_size); - if (!len || len >= options_size) - return err_ptr; + options = dup_token(&buf, NULL); + if (!options) + goto out_mem; + if (!*options) + goto out_err; /* Missing options */ - err_ptr = ERR_PTR(-ENOMEM); rbd_dev->pool_name = dup_token(&buf, NULL); if (!rbd_dev->pool_name) - goto out_err; + goto out_mem; + if (!*rbd_dev->pool_name) + goto out_err; /* Missing pool name */ rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len); if (!rbd_dev->image_name) - goto out_err; - - /* Snapshot name is optional; default is to use "head" */ + goto out_mem; + if (!*rbd_dev->image_name) + goto out_err; /* Missing image name */ + /* + * Snapshot name is optional; default is to use "-" + * (indicating the head/no snapshot). + */ len = next_token(&buf); - if (len > RBD_MAX_SNAP_NAME_LEN) { - err_ptr = ERR_PTR(-ENAMETOOLONG); - goto out_err; - } if (!len) { buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ len = sizeof (RBD_SNAP_HEAD_NAME) - 1; + } else if (len > RBD_MAX_SNAP_NAME_LEN) { + err_ptr = ERR_PTR(-ENAMETOOLONG); + goto out_err; } *snap_name = kmalloc(len + 1, GFP_KERNEL); if (!*snap_name) - goto out_err; + goto out_mem; memcpy(*snap_name, buf, len); *(*snap_name + len) = '\0'; @@ -2902,20 +2906,23 @@ static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, ceph_opts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, parse_rbd_opts_token, &rbd_opts); + kfree(options); /* Record the parsed rbd options */ - if (!IS_ERR(ceph_opts)) { + if (!IS_ERR(ceph_opts)) rbd_dev->mapping.read_only = rbd_opts.read_only; - } return ceph_opts; +out_mem: + err_ptr = ERR_PTR(-ENOMEM); out_err: kfree(rbd_dev->image_name); rbd_dev->image_name = NULL; rbd_dev->image_name_len = 0; kfree(rbd_dev->pool_name); rbd_dev->pool_name = NULL; + kfree(options); return err_ptr; } @@ -3124,7 +3131,6 @@ static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count) { - char *options; struct rbd_device *rbd_dev = NULL; char *snap_name; struct ceph_options *ceph_opts; @@ -3134,9 +3140,6 @@ static ssize_t rbd_add(struct bus_type *bus, if (!try_module_get(THIS_MODULE)) return -ENODEV; - options = kmalloc(count, GFP_KERNEL); - if (!options) - goto err_out_mem; rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); if (!rbd_dev) goto err_out_mem; @@ -3148,8 +3151,7 @@ static ssize_t rbd_add(struct bus_type *bus, init_rwsem(&rbd_dev->header_rwsem); /* parse add command */ - ceph_opts = rbd_add_parse_args(rbd_dev, buf, options, count, - &snap_name); + ceph_opts = rbd_add_parse_args(rbd_dev, buf, &snap_name); if (IS_ERR(ceph_opts)) { rc = PTR_ERR(ceph_opts); goto err_out_mem; @@ -3233,7 +3235,6 @@ err_out_bus: /* this will also clean up rest of rbd_dev stuff */ rbd_bus_del_dev(rbd_dev); - kfree(options); return rc; err_out_disk: @@ -3258,7 +3259,6 @@ err_out_args: kfree(rbd_dev->pool_name); err_out_mem: kfree(rbd_dev); - kfree(options); dout("Error adding device %s\n", buf); module_put(THIS_MODULE); -- cgit v1.2.3-70-g09d2 From 819d52bf72b61a8455024ff7863eed5d681e73c7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:41 -0500 Subject: rbd: remove snap_name arg from rbd_add_parse_args() The snapshot name returned by rbd_add_parse_args() just gets saved in the rbd_dev eventually. So just do that inside that function and do away with the snap_name argument, both in rbd_add_parse_args() and rbd_dev_set_mapping(). Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 62df67a1132..ae16cf615f0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -665,23 +665,22 @@ static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) return -ENOENT; } -static int rbd_dev_set_mapping(struct rbd_device *rbd_dev, char *snap_name) +static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) { int ret; - if (!memcmp(snap_name, RBD_SNAP_HEAD_NAME, + if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, sizeof (RBD_SNAP_HEAD_NAME))) { rbd_dev->snap_id = CEPH_NOSNAP; rbd_dev->mapping.size = rbd_dev->header.image_size; rbd_dev->mapping.features = rbd_dev->header.features; ret = 0; } else { - ret = snap_by_name(rbd_dev, snap_name); + ret = snap_by_name(rbd_dev, rbd_dev->snap_name); if (ret < 0) goto done; rbd_dev->mapping.read_only = true; } - rbd_dev->snap_name = snap_name; rbd_dev->exists = true; done: return ret; @@ -2843,8 +2842,7 @@ static inline char *dup_token(const char **buf, size_t *lenp) * Note: rbd_dev is assumed to have been initially zero-filled. */ static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, - const char *buf, - char **snap_name) + const char *buf) { size_t len; const char *mon_addrs; @@ -2893,11 +2891,11 @@ static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, err_ptr = ERR_PTR(-ENAMETOOLONG); goto out_err; } - *snap_name = kmalloc(len + 1, GFP_KERNEL); - if (!*snap_name) + rbd_dev->snap_name = kmalloc(len + 1, GFP_KERNEL); + if (!rbd_dev->snap_name) goto out_mem; - memcpy(*snap_name, buf, len); - *(*snap_name + len) = '\0'; + memcpy(rbd_dev->snap_name, buf, len); + *(rbd_dev->snap_name + len) = '\0'; /* Initialize all rbd options to the defaults */ @@ -3132,7 +3130,6 @@ static ssize_t rbd_add(struct bus_type *bus, size_t count) { struct rbd_device *rbd_dev = NULL; - char *snap_name; struct ceph_options *ceph_opts; struct ceph_osd_client *osdc; int rc = -ENOMEM; @@ -3151,7 +3148,7 @@ static ssize_t rbd_add(struct bus_type *bus, init_rwsem(&rbd_dev->header_rwsem); /* parse add command */ - ceph_opts = rbd_add_parse_args(rbd_dev, buf, &snap_name); + ceph_opts = rbd_add_parse_args(rbd_dev, buf); if (IS_ERR(ceph_opts)) { rc = PTR_ERR(ceph_opts); goto err_out_mem; @@ -3178,7 +3175,7 @@ static ssize_t rbd_add(struct bus_type *bus, if (rc) goto err_out_probe; - rc = rbd_dev_set_mapping(rbd_dev, snap_name); + rc = rbd_dev_set_mapping(rbd_dev); if (rc) goto err_out_snaps; -- cgit v1.2.3-70-g09d2 From 4e9afeba7aa9ca925a79d9ce82f1a8e79e14c291 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:41 -0500 Subject: rbd: pass and populate rbd_options structure Have the caller pass the address of an rbd_options structure to rbd_add_parse_args(), to be initialized with the information gleaned as a result of the parse. I know, this is another near-reversal of a recent change... Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ae16cf615f0..338cfadad63 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2842,15 +2842,16 @@ static inline char *dup_token(const char **buf, size_t *lenp) * Note: rbd_dev is assumed to have been initially zero-filled. */ static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, - const char *buf) + const char *buf, + struct rbd_options **opts) { size_t len; const char *mon_addrs; size_t mon_addrs_size; char *options; struct ceph_options *err_ptr = ERR_PTR(-EINVAL); - struct rbd_options rbd_opts; struct ceph_options *ceph_opts; + struct rbd_options *rbd_opts = NULL; /* The first four tokens are required */ @@ -2899,17 +2900,17 @@ static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, /* Initialize all rbd options to the defaults */ - rbd_opts.read_only = RBD_READ_ONLY_DEFAULT; + rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL); + if (!rbd_opts) + goto out_mem; + + rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; ceph_opts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, - parse_rbd_opts_token, &rbd_opts); + parse_rbd_opts_token, rbd_opts); kfree(options); - - /* Record the parsed rbd options */ - - if (!IS_ERR(ceph_opts)) - rbd_dev->mapping.read_only = rbd_opts.read_only; + *opts = rbd_opts; return ceph_opts; out_mem: @@ -3131,6 +3132,7 @@ static ssize_t rbd_add(struct bus_type *bus, { struct rbd_device *rbd_dev = NULL; struct ceph_options *ceph_opts; + struct rbd_options *rbd_opts = NULL; struct ceph_osd_client *osdc; int rc = -ENOMEM; @@ -3139,7 +3141,7 @@ static ssize_t rbd_add(struct bus_type *bus, rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); if (!rbd_dev) - goto err_out_mem; + return -ENOMEM; /* static rbd_device initialization */ spin_lock_init(&rbd_dev->lock); @@ -3148,11 +3150,12 @@ static ssize_t rbd_add(struct bus_type *bus, init_rwsem(&rbd_dev->header_rwsem); /* parse add command */ - ceph_opts = rbd_add_parse_args(rbd_dev, buf); + ceph_opts = rbd_add_parse_args(rbd_dev, buf, &rbd_opts); if (IS_ERR(ceph_opts)) { rc = PTR_ERR(ceph_opts); goto err_out_mem; } + rbd_dev->mapping.read_only = rbd_opts->read_only; rc = rbd_get_client(rbd_dev, ceph_opts); if (rc < 0) @@ -3219,6 +3222,8 @@ static ssize_t rbd_add(struct bus_type *bus, if (rc) goto err_out_bus; + kfree(rbd_opts); + /* Everything's ready. Announce the disk to the world. */ add_disk(rbd_dev->disk); @@ -3232,6 +3237,8 @@ err_out_bus: /* this will also clean up rest of rbd_dev stuff */ rbd_bus_del_dev(rbd_dev); + kfree(rbd_opts); + return rc; err_out_disk: @@ -3254,6 +3261,7 @@ err_out_args: kfree(rbd_dev->snap_name); kfree(rbd_dev->image_name); kfree(rbd_dev->pool_name); + kfree(rbd_opts); err_out_mem: kfree(rbd_dev); -- cgit v1.2.3-70-g09d2 From dc79b113d6db48ee6ee7decf0216feee48757f01 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:41 -0500 Subject: rbd: have rbd_add_parse_args() return error Change the interface to rbd_add_parse_args() so it returns an error code rather than a pointer. Return the ceph_options result via a pointer whose address is passed as an argument. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 338cfadad63..ab6e6d8364e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2841,30 +2841,31 @@ static inline char *dup_token(const char **buf, size_t *lenp) * * Note: rbd_dev is assumed to have been initially zero-filled. */ -static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, - const char *buf, - struct rbd_options **opts) +static int rbd_add_parse_args(struct rbd_device *rbd_dev, + const char *buf, + struct ceph_options **ceph_opts, + struct rbd_options **opts) { size_t len; const char *mon_addrs; size_t mon_addrs_size; char *options; - struct ceph_options *err_ptr = ERR_PTR(-EINVAL); - struct ceph_options *ceph_opts; struct rbd_options *rbd_opts = NULL; + int ret; /* The first four tokens are required */ len = next_token(&buf); if (!len) - return err_ptr; /* Missing monitor address(es) */ + return -EINVAL; /* Missing monitor address(es) */ mon_addrs = buf; mon_addrs_size = len + 1; buf += len; + ret = -EINVAL; options = dup_token(&buf, NULL); if (!options) - goto out_mem; + return -ENOMEM; if (!*options) goto out_err; /* Missing options */ @@ -2889,7 +2890,7 @@ static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ len = sizeof (RBD_SNAP_HEAD_NAME) - 1; } else if (len > RBD_MAX_SNAP_NAME_LEN) { - err_ptr = ERR_PTR(-ENAMETOOLONG); + ret = -ENAMETOOLONG; goto out_err; } rbd_dev->snap_name = kmalloc(len + 1, GFP_KERNEL); @@ -2906,15 +2907,19 @@ static struct ceph_options *rbd_add_parse_args(struct rbd_device *rbd_dev, rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; - ceph_opts = ceph_parse_options(options, mon_addrs, + *ceph_opts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, parse_rbd_opts_token, rbd_opts); kfree(options); + if (IS_ERR(*ceph_opts)) { + ret = PTR_ERR(*ceph_opts); + goto out_err; + } *opts = rbd_opts; - return ceph_opts; + return 0; out_mem: - err_ptr = ERR_PTR(-ENOMEM); + ret = -ENOMEM; out_err: kfree(rbd_dev->image_name); rbd_dev->image_name = NULL; @@ -2923,7 +2928,7 @@ out_err: rbd_dev->pool_name = NULL; kfree(options); - return err_ptr; + return ret; } /* @@ -3131,7 +3136,7 @@ static ssize_t rbd_add(struct bus_type *bus, size_t count) { struct rbd_device *rbd_dev = NULL; - struct ceph_options *ceph_opts; + struct ceph_options *ceph_opts = NULL; struct rbd_options *rbd_opts = NULL; struct ceph_osd_client *osdc; int rc = -ENOMEM; @@ -3150,11 +3155,9 @@ static ssize_t rbd_add(struct bus_type *bus, init_rwsem(&rbd_dev->header_rwsem); /* parse add command */ - ceph_opts = rbd_add_parse_args(rbd_dev, buf, &rbd_opts); - if (IS_ERR(ceph_opts)) { - rc = PTR_ERR(ceph_opts); + rc = rbd_add_parse_args(rbd_dev, buf, &ceph_opts, &rbd_opts); + if (rc < 0) goto err_out_mem; - } rbd_dev->mapping.read_only = rbd_opts->read_only; rc = rbd_get_client(rbd_dev, ceph_opts); -- cgit v1.2.3-70-g09d2 From 0d7dbfce9d6e3a57a6946fadf7f92b1792b8acc0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:41 -0500 Subject: rbd: define image specification structure Group the fields that uniquely specify an rbd image into a new reference-counted rbd_spec structure. This structure will be used to describe the desired image when mapping an image, and when probing parent images in layered rbd devices. Replace the set of fields in the rbd device structure with a pointer to a dynamically allocated rbd_spec. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 158 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 90 insertions(+), 68 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ab6e6d8364e..2049810fcdc 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -112,6 +112,27 @@ struct rbd_image_header { u64 obj_version; }; +/* + * An rbd image specification. + * + * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely + * identify an image. + */ +struct rbd_spec { + u64 pool_id; + char *pool_name; + + char *image_id; + size_t image_id_len; + char *image_name; + size_t image_name_len; + + u64 snap_id; + char *snap_name; + + struct kref kref; +}; + struct rbd_options { bool read_only; }; @@ -189,16 +210,9 @@ struct rbd_device { struct rbd_image_header header; bool exists; - char *image_id; - size_t image_id_len; - char *image_name; - size_t image_name_len; - char *header_name; - char *pool_name; - u64 pool_id; + struct rbd_spec *spec; - char *snap_name; - u64 snap_id; + char *header_name; struct ceph_osd_event *watch_event; struct ceph_osd_request *watch_request; @@ -654,7 +668,7 @@ static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) list_for_each_entry(snap, &rbd_dev->snaps, node) { if (!strcmp(snap_name, snap->name)) { - rbd_dev->snap_id = snap->id; + rbd_dev->spec->snap_id = snap->id; rbd_dev->mapping.size = snap->size; rbd_dev->mapping.features = snap->features; @@ -669,14 +683,14 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) { int ret; - if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, + if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME, sizeof (RBD_SNAP_HEAD_NAME))) { - rbd_dev->snap_id = CEPH_NOSNAP; + rbd_dev->spec->snap_id = CEPH_NOSNAP; rbd_dev->mapping.size = rbd_dev->header.image_size; rbd_dev->mapping.features = rbd_dev->header.features; ret = 0; } else { - ret = snap_by_name(rbd_dev, rbd_dev->snap_name); + ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name); if (ret < 0) goto done; rbd_dev->mapping.read_only = true; @@ -1096,7 +1110,7 @@ static int rbd_do_request(struct request *rq, layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); layout->fl_stripe_count = cpu_to_le32(1); layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->pool_id); + layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, req, ops); rbd_assert(ret == 0); @@ -1261,7 +1275,7 @@ static int rbd_do_op(struct request *rq, opcode = CEPH_OSD_OP_READ; flags = CEPH_OSD_FLAG_READ; snapc = NULL; - snapid = rbd_dev->snap_id; + snapid = rbd_dev->spec->snap_id; payload_len = 0; } @@ -1545,7 +1559,7 @@ static void rbd_rq_fn(struct request_queue *q) down_read(&rbd_dev->header_rwsem); if (!rbd_dev->exists) { - rbd_assert(rbd_dev->snap_id != CEPH_NOSNAP); + rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); up_read(&rbd_dev->header_rwsem); dout("request for non-existent snapshot"); spin_lock_irq(q->queue_lock); @@ -1726,13 +1740,13 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) ret = -ENXIO; pr_warning("short header read for image %s" " (want %zd got %d)\n", - rbd_dev->image_name, size, ret); + rbd_dev->spec->image_name, size, ret); goto out_err; } if (!rbd_dev_ondisk_valid(ondisk)) { ret = -ENXIO; pr_warning("invalid header for image %s\n", - rbd_dev->image_name); + rbd_dev->spec->image_name); goto out_err; } @@ -1783,7 +1797,7 @@ static void rbd_update_mapping_size(struct rbd_device *rbd_dev) { sector_t size; - if (rbd_dev->snap_id != CEPH_NOSNAP) + if (rbd_dev->spec->snap_id != CEPH_NOSNAP) return; size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE; @@ -1957,7 +1971,7 @@ static ssize_t rbd_pool_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->pool_name); + return sprintf(buf, "%s\n", rbd_dev->spec->pool_name); } static ssize_t rbd_pool_id_show(struct device *dev, @@ -1965,7 +1979,8 @@ static ssize_t rbd_pool_id_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%llu\n", (unsigned long long) rbd_dev->pool_id); + return sprintf(buf, "%llu\n", + (unsigned long long) rbd_dev->spec->pool_id); } static ssize_t rbd_name_show(struct device *dev, @@ -1973,7 +1988,7 @@ static ssize_t rbd_name_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->image_name); + return sprintf(buf, "%s\n", rbd_dev->spec->image_name); } static ssize_t rbd_image_id_show(struct device *dev, @@ -1981,7 +1996,7 @@ static ssize_t rbd_image_id_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->image_id); + return sprintf(buf, "%s\n", rbd_dev->spec->image_id); } /* @@ -1994,7 +2009,7 @@ static ssize_t rbd_snap_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->snap_name); + return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); } static ssize_t rbd_image_refresh(struct device *dev, @@ -2547,11 +2562,12 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) /* Existing snapshot not in the new snap context */ - if (rbd_dev->snap_id == snap->id) + if (rbd_dev->spec->snap_id == snap->id) rbd_dev->exists = false; rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", - rbd_dev->snap_id == snap->id ? "mapped " : "", + rbd_dev->spec->snap_id == snap->id ? + "mapped " : "", (unsigned long long) snap->id); /* Done with this list entry; advance */ @@ -2869,16 +2885,17 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev, if (!*options) goto out_err; /* Missing options */ - rbd_dev->pool_name = dup_token(&buf, NULL); - if (!rbd_dev->pool_name) + rbd_dev->spec->pool_name = dup_token(&buf, NULL); + if (!rbd_dev->spec->pool_name) goto out_mem; - if (!*rbd_dev->pool_name) + if (!*rbd_dev->spec->pool_name) goto out_err; /* Missing pool name */ - rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len); - if (!rbd_dev->image_name) + rbd_dev->spec->image_name = + dup_token(&buf, &rbd_dev->spec->image_name_len); + if (!rbd_dev->spec->image_name) goto out_mem; - if (!*rbd_dev->image_name) + if (!*rbd_dev->spec->image_name) goto out_err; /* Missing image name */ /* @@ -2893,11 +2910,11 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev, ret = -ENAMETOOLONG; goto out_err; } - rbd_dev->snap_name = kmalloc(len + 1, GFP_KERNEL); - if (!rbd_dev->snap_name) + rbd_dev->spec->snap_name = kmalloc(len + 1, GFP_KERNEL); + if (!rbd_dev->spec->snap_name) goto out_mem; - memcpy(rbd_dev->snap_name, buf, len); - *(rbd_dev->snap_name + len) = '\0'; + memcpy(rbd_dev->spec->snap_name, buf, len); + *(rbd_dev->spec->snap_name + len) = '\0'; /* Initialize all rbd options to the defaults */ @@ -2921,11 +2938,11 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev, out_mem: ret = -ENOMEM; out_err: - kfree(rbd_dev->image_name); - rbd_dev->image_name = NULL; - rbd_dev->image_name_len = 0; - kfree(rbd_dev->pool_name); - rbd_dev->pool_name = NULL; + kfree(rbd_dev->spec->image_name); + rbd_dev->spec->image_name = NULL; + rbd_dev->spec->image_name_len = 0; + kfree(rbd_dev->spec->pool_name); + rbd_dev->spec->pool_name = NULL; kfree(options); return ret; @@ -2957,11 +2974,11 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) * First, see if the format 2 image id file exists, and if * so, get the image's persistent id from it. */ - size = sizeof (RBD_ID_PREFIX) + rbd_dev->image_name_len; + size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; object_name = kmalloc(size, GFP_NOIO); if (!object_name) return -ENOMEM; - sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->image_name); + sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name); dout("rbd id object name is %s\n", object_name); /* Response will be an encoded string, which includes a length */ @@ -2984,15 +3001,15 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) ret = 0; /* rbd_req_sync_exec() can return positive */ p = response; - rbd_dev->image_id = ceph_extract_encoded_string(&p, + rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, p + RBD_IMAGE_ID_LEN_MAX, - &rbd_dev->image_id_len, + &rbd_dev->spec->image_id_len, GFP_NOIO); - if (IS_ERR(rbd_dev->image_id)) { - ret = PTR_ERR(rbd_dev->image_id); - rbd_dev->image_id = NULL; + if (IS_ERR(rbd_dev->spec->image_id)) { + ret = PTR_ERR(rbd_dev->spec->image_id); + rbd_dev->spec->image_id = NULL; } else { - dout("image_id is %s\n", rbd_dev->image_id); + dout("image_id is %s\n", rbd_dev->spec->image_id); } out: kfree(response); @@ -3008,20 +3025,21 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) /* Version 1 images have no id; empty string is used */ - rbd_dev->image_id = kstrdup("", GFP_KERNEL); - if (!rbd_dev->image_id) + rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); + if (!rbd_dev->spec->image_id) return -ENOMEM; - rbd_dev->image_id_len = 0; + rbd_dev->spec->image_id_len = 0; /* Record the header object name for this rbd image. */ - size = rbd_dev->image_name_len + sizeof (RBD_SUFFIX); + size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) { ret = -ENOMEM; goto out_err; } - sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX); + sprintf(rbd_dev->header_name, "%s%s", + rbd_dev->spec->image_name, RBD_SUFFIX); /* Populate rbd image metadata */ @@ -3038,8 +3056,8 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) out_err: kfree(rbd_dev->header_name); rbd_dev->header_name = NULL; - kfree(rbd_dev->image_id); - rbd_dev->image_id = NULL; + kfree(rbd_dev->spec->image_id); + rbd_dev->spec->image_id = NULL; return ret; } @@ -3054,12 +3072,12 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) * Image id was filled in by the caller. Record the header * object name for this rbd image. */ - size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->image_id_len; + size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) return -ENOMEM; sprintf(rbd_dev->header_name, "%s%s", - RBD_HEADER_PREFIX, rbd_dev->image_id); + RBD_HEADER_PREFIX, rbd_dev->spec->image_id); /* Get the size and object order for the image */ @@ -3147,6 +3165,9 @@ static ssize_t rbd_add(struct bus_type *bus, rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); if (!rbd_dev) return -ENOMEM; + rbd_dev->spec = kzalloc(sizeof (*rbd_dev->spec), GFP_KERNEL); + if (!rbd_dev->spec) + goto err_out_mem; /* static rbd_device initialization */ spin_lock_init(&rbd_dev->lock); @@ -3167,10 +3188,10 @@ static ssize_t rbd_add(struct bus_type *bus, /* pick the pool */ osdc = &rbd_dev->rbd_client->client->osdc; - rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); + rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->spec->pool_name); if (rc < 0) goto err_out_client; - rbd_dev->pool_id = (u64) rc; + rbd_dev->spec->pool_id = (u64) rc; rc = rbd_dev_probe(rbd_dev); if (rc < 0) @@ -3257,15 +3278,16 @@ err_out_probe: err_out_client: kfree(rbd_dev->header_name); rbd_put_client(rbd_dev); - kfree(rbd_dev->image_id); + kfree(rbd_dev->spec->image_id); err_out_args: if (ceph_opts) ceph_destroy_options(ceph_opts); - kfree(rbd_dev->snap_name); - kfree(rbd_dev->image_name); - kfree(rbd_dev->pool_name); + kfree(rbd_dev->spec->snap_name); + kfree(rbd_dev->spec->image_name); + kfree(rbd_dev->spec->pool_name); kfree(rbd_opts); err_out_mem: + kfree(rbd_dev->spec); kfree(rbd_dev); dout("Error adding device %s\n", buf); @@ -3314,11 +3336,11 @@ static void rbd_dev_release(struct device *dev) rbd_header_free(&rbd_dev->header); /* done with the id, and with the rbd_dev */ - kfree(rbd_dev->snap_name); - kfree(rbd_dev->image_id); + kfree(rbd_dev->spec->snap_name); + kfree(rbd_dev->spec->image_id); kfree(rbd_dev->header_name); - kfree(rbd_dev->pool_name); - kfree(rbd_dev->image_name); + kfree(rbd_dev->spec->pool_name); + kfree(rbd_dev->spec->image_name); rbd_dev_id_put(rbd_dev); kfree(rbd_dev); -- cgit v1.2.3-70-g09d2 From 8b8fb99c5c93a0bdfe7b0c0c9fd2d41a3244555e Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 26 Oct 2012 17:25:24 -0500 Subject: rbd: add reference counting to rbd_spec With layered images we'll share rbd_spec structures, so add a reference count to it. It neatens up some code also. A silly get/put pair is added to the alloc routine just to avoid "defined but not used" warnings. It will go away soon. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 52 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 10 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 2049810fcdc..86206a75017 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2134,6 +2134,45 @@ static struct device_type rbd_snap_device_type = { .release = rbd_snap_dev_release, }; +static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec) +{ + kref_get(&spec->kref); + + return spec; +} + +static void rbd_spec_free(struct kref *kref); +static void rbd_spec_put(struct rbd_spec *spec) +{ + if (spec) + kref_put(&spec->kref, rbd_spec_free); +} + +static struct rbd_spec *rbd_spec_alloc(void) +{ + struct rbd_spec *spec; + + spec = kzalloc(sizeof (*spec), GFP_KERNEL); + if (!spec) + return NULL; + kref_init(&spec->kref); + + rbd_spec_put(rbd_spec_get(spec)); /* TEMPORARY */ + + return spec; +} + +static void rbd_spec_free(struct kref *kref) +{ + struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref); + + kfree(spec->pool_name); + kfree(spec->image_id); + kfree(spec->image_name); + kfree(spec->snap_name); + kfree(spec); +} + static bool rbd_snap_registered(struct rbd_snap *snap) { bool ret = snap->dev.type == &rbd_snap_device_type; @@ -3165,7 +3204,7 @@ static ssize_t rbd_add(struct bus_type *bus, rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); if (!rbd_dev) return -ENOMEM; - rbd_dev->spec = kzalloc(sizeof (*rbd_dev->spec), GFP_KERNEL); + rbd_dev->spec = rbd_spec_alloc(); if (!rbd_dev->spec) goto err_out_mem; @@ -3278,16 +3317,12 @@ err_out_probe: err_out_client: kfree(rbd_dev->header_name); rbd_put_client(rbd_dev); - kfree(rbd_dev->spec->image_id); err_out_args: if (ceph_opts) ceph_destroy_options(ceph_opts); - kfree(rbd_dev->spec->snap_name); - kfree(rbd_dev->spec->image_name); - kfree(rbd_dev->spec->pool_name); kfree(rbd_opts); err_out_mem: - kfree(rbd_dev->spec); + rbd_spec_put(rbd_dev->spec); kfree(rbd_dev); dout("Error adding device %s\n", buf); @@ -3336,12 +3371,9 @@ static void rbd_dev_release(struct device *dev) rbd_header_free(&rbd_dev->header); /* done with the id, and with the rbd_dev */ - kfree(rbd_dev->spec->snap_name); - kfree(rbd_dev->spec->image_id); kfree(rbd_dev->header_name); - kfree(rbd_dev->spec->pool_name); - kfree(rbd_dev->spec->image_name); rbd_dev_id_put(rbd_dev); + rbd_spec_put(rbd_dev->spec); kfree(rbd_dev); /* release module ref */ -- cgit v1.2.3-70-g09d2 From 859c31df9cee9d1e1308b3b024b61355e6a629a5 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:42 -0500 Subject: rbd: fill rbd_spec in rbd_add_parse_args() Pass the address of an rbd_spec structure to rbd_add_parse_args(). Use it to hold the information defining the rbd image to be mapped in an rbd_add() call. Use the result in the caller to initialize the rbd_dev->id field. This means rbd_dev is no longer needed in rbd_add_parse_args(), so get rid of it. Now that this transformation of rbd_add_parse_args() is complete, correct and expand on the its header documentation to reflect the new reality. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 113 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 75 insertions(+), 38 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 86206a75017..be85d925dfd 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2887,25 +2887,58 @@ static inline char *dup_token(const char **buf, size_t *lenp) } /* - * This fills in the pool_name, image_name, image_name_len, rbd_dev, - * rbd_md_name, and name fields of the given rbd_dev, based on the - * list of monitor addresses and other options provided via - * /sys/bus/rbd/add. Returns a pointer to a dynamically-allocated - * copy of the snapshot name to map if successful, or a - * pointer-coded error otherwise. + * Parse the options provided for an "rbd add" (i.e., rbd image + * mapping) request. These arrive via a write to /sys/bus/rbd/add, + * and the data written is passed here via a NUL-terminated buffer. + * Returns 0 if successful or an error code otherwise. * - * Note: rbd_dev is assumed to have been initially zero-filled. + * The information extracted from these options is recorded in + * the other parameters which return dynamically-allocated + * structures: + * ceph_opts + * The address of a pointer that will refer to a ceph options + * structure. Caller must release the returned pointer using + * ceph_destroy_options() when it is no longer needed. + * rbd_opts + * Address of an rbd options pointer. Fully initialized by + * this function; caller must release with kfree(). + * spec + * Address of an rbd image specification pointer. Fully + * initialized by this function based on parsed options. + * Caller must release with rbd_spec_put(). + * + * The options passed take this form: + * [] + * where: + * + * A comma-separated list of one or more monitor addresses. + * A monitor address is an ip address, optionally followed + * by a port number (separated by a colon). + * I.e.: ip1[:port1][,ip2[:port2]...] + * + * A comma-separated list of ceph and/or rbd options. + * + * The name of the rados pool containing the rbd image. + * + * The name of the image in that pool to map. + * + * An optional snapshot id. If provided, the mapping will + * present data from the image at the time that snapshot was + * created. The image head is used if no snapshot id is + * provided. Snapshot mappings are always read-only. */ -static int rbd_add_parse_args(struct rbd_device *rbd_dev, - const char *buf, +static int rbd_add_parse_args(const char *buf, struct ceph_options **ceph_opts, - struct rbd_options **opts) + struct rbd_options **opts, + struct rbd_spec **rbd_spec) { size_t len; + char *options; const char *mon_addrs; size_t mon_addrs_size; - char *options; + struct rbd_spec *spec = NULL; struct rbd_options *rbd_opts = NULL; + struct ceph_options *copts; int ret; /* The first four tokens are required */ @@ -2924,17 +2957,20 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev, if (!*options) goto out_err; /* Missing options */ - rbd_dev->spec->pool_name = dup_token(&buf, NULL); - if (!rbd_dev->spec->pool_name) + spec = rbd_spec_alloc(); + if (!spec) goto out_mem; - if (!*rbd_dev->spec->pool_name) + + spec->pool_name = dup_token(&buf, NULL); + if (!spec->pool_name) + goto out_mem; + if (!*spec->pool_name) goto out_err; /* Missing pool name */ - rbd_dev->spec->image_name = - dup_token(&buf, &rbd_dev->spec->image_name_len); - if (!rbd_dev->spec->image_name) + spec->image_name = dup_token(&buf, &spec->image_name_len); + if (!spec->image_name) goto out_mem; - if (!*rbd_dev->spec->image_name) + if (!*spec->image_name) goto out_err; /* Missing image name */ /* @@ -2949,11 +2985,11 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev, ret = -ENAMETOOLONG; goto out_err; } - rbd_dev->spec->snap_name = kmalloc(len + 1, GFP_KERNEL); - if (!rbd_dev->spec->snap_name) + spec->snap_name = kmalloc(len + 1, GFP_KERNEL); + if (!spec->snap_name) goto out_mem; - memcpy(rbd_dev->spec->snap_name, buf, len); - *(rbd_dev->spec->snap_name + len) = '\0'; + memcpy(spec->snap_name, buf, len); + *(spec->snap_name + len) = '\0'; /* Initialize all rbd options to the defaults */ @@ -2963,25 +2999,25 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev, rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; - *ceph_opts = ceph_parse_options(options, mon_addrs, + copts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, parse_rbd_opts_token, rbd_opts); - kfree(options); - if (IS_ERR(*ceph_opts)) { - ret = PTR_ERR(*ceph_opts); + if (IS_ERR(copts)) { + ret = PTR_ERR(copts); goto out_err; } + kfree(options); + + *ceph_opts = copts; *opts = rbd_opts; + *rbd_spec = spec; return 0; out_mem: ret = -ENOMEM; out_err: - kfree(rbd_dev->spec->image_name); - rbd_dev->spec->image_name = NULL; - rbd_dev->spec->image_name_len = 0; - kfree(rbd_dev->spec->pool_name); - rbd_dev->spec->pool_name = NULL; + kfree(rbd_opts); + rbd_spec_put(spec); kfree(options); return ret; @@ -3195,6 +3231,7 @@ static ssize_t rbd_add(struct bus_type *bus, struct rbd_device *rbd_dev = NULL; struct ceph_options *ceph_opts = NULL; struct rbd_options *rbd_opts = NULL; + struct rbd_spec *spec = NULL; struct ceph_osd_client *osdc; int rc = -ENOMEM; @@ -3204,9 +3241,6 @@ static ssize_t rbd_add(struct bus_type *bus, rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); if (!rbd_dev) return -ENOMEM; - rbd_dev->spec = rbd_spec_alloc(); - if (!rbd_dev->spec) - goto err_out_mem; /* static rbd_device initialization */ spin_lock_init(&rbd_dev->lock); @@ -3215,9 +3249,10 @@ static ssize_t rbd_add(struct bus_type *bus, init_rwsem(&rbd_dev->header_rwsem); /* parse add command */ - rc = rbd_add_parse_args(rbd_dev, buf, &ceph_opts, &rbd_opts); + rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); if (rc < 0) goto err_out_mem; + rbd_dev->mapping.read_only = rbd_opts->read_only; rc = rbd_get_client(rbd_dev, ceph_opts); @@ -3227,10 +3262,12 @@ static ssize_t rbd_add(struct bus_type *bus, /* pick the pool */ osdc = &rbd_dev->rbd_client->client->osdc; - rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->spec->pool_name); + rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name); if (rc < 0) goto err_out_client; - rbd_dev->spec->pool_id = (u64) rc; + spec->pool_id = (u64) rc; + + rbd_dev->spec = spec; rc = rbd_dev_probe(rbd_dev); if (rc < 0) @@ -3321,8 +3358,8 @@ err_out_args: if (ceph_opts) ceph_destroy_options(ceph_opts); kfree(rbd_opts); + rbd_spec_put(spec); err_out_mem: - rbd_spec_put(rbd_dev->spec); kfree(rbd_dev); dout("Error adding device %s\n", buf); -- cgit v1.2.3-70-g09d2 From 9d3997fdf4c82adfb37a4886a21eaa513ee071b6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:42 -0500 Subject: rbd: don't pass rbd_dev to rbd_get_client() The only reason rbd_dev is passed to rbd_get_client() is so its rbd_client field can get assigned. Instead, just return the rbd_client pointer as a result and have the caller do the assignment. Change rbd_put_client() so it takes an rbd_client structure, so follows the more typical symmetry with rbd_get_client(). Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index be85d925dfd..a528d4ca7a6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -467,23 +467,17 @@ static int parse_rbd_opts_token(char *c, void *private) * Get a ceph client with specific addr and configuration, if one does * not exist create it. */ -static int rbd_get_client(struct rbd_device *rbd_dev, - struct ceph_options *ceph_opts) +static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) { struct rbd_client *rbdc; rbdc = rbd_client_find(ceph_opts); - if (rbdc) { - /* using an existing client */ + if (rbdc) /* using an existing client */ ceph_destroy_options(ceph_opts); - } else { + else rbdc = rbd_client_create(ceph_opts); - if (IS_ERR(rbdc)) - return PTR_ERR(rbdc); - } - rbd_dev->rbd_client = rbdc; - return 0; + return rbdc; } /* @@ -508,10 +502,9 @@ static void rbd_client_release(struct kref *kref) * Drop reference to ceph client node. If it's not referenced anymore, release * it. */ -static void rbd_put_client(struct rbd_device *rbd_dev) +static void rbd_put_client(struct rbd_client *rbdc) { - kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); - rbd_dev->rbd_client = NULL; + kref_put(&rbdc->kref, rbd_client_release); } /* @@ -3232,6 +3225,7 @@ static ssize_t rbd_add(struct bus_type *bus, struct ceph_options *ceph_opts = NULL; struct rbd_options *rbd_opts = NULL; struct rbd_spec *spec = NULL; + struct rbd_client *rbdc; struct ceph_osd_client *osdc; int rc = -ENOMEM; @@ -3255,13 +3249,16 @@ static ssize_t rbd_add(struct bus_type *bus, rbd_dev->mapping.read_only = rbd_opts->read_only; - rc = rbd_get_client(rbd_dev, ceph_opts); - if (rc < 0) + rbdc = rbd_get_client(ceph_opts); + if (IS_ERR(rbdc)) { + rc = PTR_ERR(rbdc); goto err_out_args; + } + rbd_dev->rbd_client = rbdc; ceph_opts = NULL; /* ceph_opts now owned by rbd_dev client */ /* pick the pool */ - osdc = &rbd_dev->rbd_client->client->osdc; + osdc = &rbdc->client->osdc; rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name); if (rc < 0) goto err_out_client; @@ -3353,7 +3350,7 @@ err_out_probe: rbd_header_free(&rbd_dev->header); err_out_client: kfree(rbd_dev->header_name); - rbd_put_client(rbd_dev); + rbd_put_client(rbdc); err_out_args: if (ceph_opts) ceph_destroy_options(ceph_opts); @@ -3398,7 +3395,7 @@ static void rbd_dev_release(struct device *dev) if (rbd_dev->watch_event) rbd_req_sync_unwatch(rbd_dev); - rbd_put_client(rbd_dev); + rbd_put_client(rbd_dev->rbd_client); /* clean up and free blkdev */ rbd_free_disk(rbd_dev); -- cgit v1.2.3-70-g09d2 From bd4ba6554dcbae652b8b27a44f5a7795c9f3178a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:42 -0500 Subject: rbd: consolidate rbd_dev init in rbd_add() Group the allocation and initialization of fields of the rbd device structure created in rbd_add(). Move the grouped code down later in the function, just prior to the call to rbd_dev_probe(). This is for the most part simple code movement. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a528d4ca7a6..4771de2fba8 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3232,29 +3232,16 @@ static ssize_t rbd_add(struct bus_type *bus, if (!try_module_get(THIS_MODULE)) return -ENODEV; - rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); - if (!rbd_dev) - return -ENOMEM; - - /* static rbd_device initialization */ - spin_lock_init(&rbd_dev->lock); - INIT_LIST_HEAD(&rbd_dev->node); - INIT_LIST_HEAD(&rbd_dev->snaps); - init_rwsem(&rbd_dev->header_rwsem); - /* parse add command */ rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); if (rc < 0) - goto err_out_mem; - - rbd_dev->mapping.read_only = rbd_opts->read_only; + goto err_out_module; rbdc = rbd_get_client(ceph_opts); if (IS_ERR(rbdc)) { rc = PTR_ERR(rbdc); goto err_out_args; } - rbd_dev->rbd_client = rbdc; ceph_opts = NULL; /* ceph_opts now owned by rbd_dev client */ /* pick the pool */ @@ -3264,11 +3251,22 @@ static ssize_t rbd_add(struct bus_type *bus, goto err_out_client; spec->pool_id = (u64) rc; + rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL); + if (!rbd_dev) + goto err_out_client; + + spin_lock_init(&rbd_dev->lock); + INIT_LIST_HEAD(&rbd_dev->node); + INIT_LIST_HEAD(&rbd_dev->snaps); + init_rwsem(&rbd_dev->header_rwsem); + rbd_dev->rbd_client = rbdc; rbd_dev->spec = spec; + rbd_dev->mapping.read_only = rbd_opts->read_only; + rc = rbd_dev_probe(rbd_dev); if (rc < 0) - goto err_out_client; + goto err_out_mem; /* no need to lock here, as rbd_dev is not registered yet */ rc = rbd_dev_snaps_update(rbd_dev); @@ -3348,19 +3346,20 @@ err_out_snaps: rbd_remove_all_snaps(rbd_dev); err_out_probe: rbd_header_free(&rbd_dev->header); -err_out_client: kfree(rbd_dev->header_name); +err_out_mem: + kfree(rbd_dev); +err_out_client: rbd_put_client(rbdc); err_out_args: if (ceph_opts) ceph_destroy_options(ceph_opts); kfree(rbd_opts); rbd_spec_put(spec); -err_out_mem: - kfree(rbd_dev); +err_out_module: + module_put(THIS_MODULE); dout("Error adding device %s\n", buf); - module_put(THIS_MODULE); return (ssize_t) rc; } -- cgit v1.2.3-70-g09d2 From c53d589337e9a211413484a604c76072e8474dc0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:42 -0500 Subject: rbd: define rbd_dev_{create,destroy}() helpers Encapsulate the creation/initialization and destruction of rbd device structures. The rbd_client and the rbd_spec structures provided on creation hold references whose ownership is transferred to the new rbd_device structure. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 62 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 21 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4771de2fba8..a8ad8f8370b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -504,7 +504,8 @@ static void rbd_client_release(struct kref *kref) */ static void rbd_put_client(struct rbd_client *rbdc) { - kref_put(&rbdc->kref, rbd_client_release); + if (rbdc) + kref_put(&rbdc->kref, rbd_client_release); } /* @@ -2166,6 +2167,34 @@ static void rbd_spec_free(struct kref *kref) kfree(spec); } +struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, + struct rbd_spec *spec) +{ + struct rbd_device *rbd_dev; + + rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL); + if (!rbd_dev) + return NULL; + + spin_lock_init(&rbd_dev->lock); + INIT_LIST_HEAD(&rbd_dev->node); + INIT_LIST_HEAD(&rbd_dev->snaps); + init_rwsem(&rbd_dev->header_rwsem); + + rbd_dev->spec = spec; + rbd_dev->rbd_client = rbdc; + + return rbd_dev; +} + +static void rbd_dev_destroy(struct rbd_device *rbd_dev) +{ + kfree(rbd_dev->header_name); + rbd_put_client(rbd_dev->rbd_client); + rbd_spec_put(rbd_dev->spec); + kfree(rbd_dev); +} + static bool rbd_snap_registered(struct rbd_snap *snap) { bool ret = snap->dev.type == &rbd_snap_device_type; @@ -3242,7 +3271,7 @@ static ssize_t rbd_add(struct bus_type *bus, rc = PTR_ERR(rbdc); goto err_out_args; } - ceph_opts = NULL; /* ceph_opts now owned by rbd_dev client */ + ceph_opts = NULL; /* rbd_dev client now owns this */ /* pick the pool */ osdc = &rbdc->client->osdc; @@ -3251,22 +3280,19 @@ static ssize_t rbd_add(struct bus_type *bus, goto err_out_client; spec->pool_id = (u64) rc; - rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL); + rbd_dev = rbd_dev_create(rbdc, spec); if (!rbd_dev) goto err_out_client; - - spin_lock_init(&rbd_dev->lock); - INIT_LIST_HEAD(&rbd_dev->node); - INIT_LIST_HEAD(&rbd_dev->snaps); - init_rwsem(&rbd_dev->header_rwsem); - rbd_dev->rbd_client = rbdc; - rbd_dev->spec = spec; + rbdc = NULL; /* rbd_dev now owns this */ + spec = NULL; /* rbd_dev now owns this */ rbd_dev->mapping.read_only = rbd_opts->read_only; + kfree(rbd_opts); + rbd_opts = NULL; /* done with this */ rc = rbd_dev_probe(rbd_dev); if (rc < 0) - goto err_out_mem; + goto err_out_rbd_dev; /* no need to lock here, as rbd_dev is not registered yet */ rc = rbd_dev_snaps_update(rbd_dev); @@ -3317,8 +3343,6 @@ static ssize_t rbd_add(struct bus_type *bus, if (rc) goto err_out_bus; - kfree(rbd_opts); - /* Everything's ready. Announce the disk to the world. */ add_disk(rbd_dev->disk); @@ -3332,7 +3356,6 @@ err_out_bus: /* this will also clean up rest of rbd_dev stuff */ rbd_bus_del_dev(rbd_dev); - kfree(rbd_opts); return rc; @@ -3346,9 +3369,8 @@ err_out_snaps: rbd_remove_all_snaps(rbd_dev); err_out_probe: rbd_header_free(&rbd_dev->header); - kfree(rbd_dev->header_name); -err_out_mem: - kfree(rbd_dev); +err_out_rbd_dev: + rbd_dev_destroy(rbd_dev); err_out_client: rbd_put_client(rbdc); err_out_args: @@ -3394,7 +3416,6 @@ static void rbd_dev_release(struct device *dev) if (rbd_dev->watch_event) rbd_req_sync_unwatch(rbd_dev); - rbd_put_client(rbd_dev->rbd_client); /* clean up and free blkdev */ rbd_free_disk(rbd_dev); @@ -3404,10 +3425,9 @@ static void rbd_dev_release(struct device *dev) rbd_header_free(&rbd_dev->header); /* done with the id, and with the rbd_dev */ - kfree(rbd_dev->header_name); rbd_dev_id_put(rbd_dev); - rbd_spec_put(rbd_dev->spec); - kfree(rbd_dev); + rbd_assert(rbd_dev->rbd_client != NULL); + rbd_dev_destroy(rbd_dev); /* release module ref */ module_put(THIS_MODULE); -- cgit v1.2.3-70-g09d2 From 83a06263625b823afa3a842ddbf53473c22f24b2 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 30 Oct 2012 15:47:17 -0500 Subject: rbd: encapsulate last part of probe Group the activities that now take place after an rbd_dev_probe() call into a single function, and move the call to that function into rbd_dev_probe() itself. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 161 ++++++++++++++++++++++++++++------------------------ 1 file changed, 86 insertions(+), 75 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a8ad8f8370b..8d26c0f2be1 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3221,6 +3221,84 @@ out_err: return ret; } +static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) +{ + int ret; + + /* no need to lock here, as rbd_dev is not registered yet */ + ret = rbd_dev_snaps_update(rbd_dev); + if (ret) + return ret; + + ret = rbd_dev_set_mapping(rbd_dev); + if (ret) + goto err_out_snaps; + + /* generate unique id: find highest unique id, add one */ + rbd_dev_id_get(rbd_dev); + + /* Fill in the device name, now that we have its id. */ + BUILD_BUG_ON(DEV_NAME_LEN + < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); + sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); + + /* Get our block major device number. */ + + ret = register_blkdev(0, rbd_dev->name); + if (ret < 0) + goto err_out_id; + rbd_dev->major = ret; + + /* Set up the blkdev mapping. */ + + ret = rbd_init_disk(rbd_dev); + if (ret) + goto err_out_blkdev; + + ret = rbd_bus_add_dev(rbd_dev); + if (ret) + goto err_out_disk; + + /* + * At this point cleanup in the event of an error is the job + * of the sysfs code (initiated by rbd_bus_del_dev()). + */ + down_write(&rbd_dev->header_rwsem); + ret = rbd_dev_snaps_register(rbd_dev); + up_write(&rbd_dev->header_rwsem); + if (ret) + goto err_out_bus; + + ret = rbd_init_watch_dev(rbd_dev); + if (ret) + goto err_out_bus; + + /* Everything's ready. Announce the disk to the world. */ + + add_disk(rbd_dev->disk); + + pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name, + (unsigned long long) rbd_dev->mapping.size); + + return ret; +err_out_bus: + /* this will also clean up rest of rbd_dev stuff */ + + rbd_bus_del_dev(rbd_dev); + + return ret; +err_out_disk: + rbd_free_disk(rbd_dev); +err_out_blkdev: + unregister_blkdev(rbd_dev->major, rbd_dev->name); +err_out_id: + rbd_dev_id_put(rbd_dev); +err_out_snaps: + rbd_remove_all_snaps(rbd_dev); + + return ret; +} + /* * Probe for the existence of the header object for the given rbd * device. For format 2 images this includes determining the image @@ -3240,9 +3318,16 @@ static int rbd_dev_probe(struct rbd_device *rbd_dev) ret = rbd_dev_v1_probe(rbd_dev); else ret = rbd_dev_v2_probe(rbd_dev); - if (ret) + if (ret) { dout("probe failed, returning %d\n", ret); + return ret; + } + + ret = rbd_dev_probe_finish(rbd_dev); + if (ret) + rbd_header_free(&rbd_dev->header); + return ret; } @@ -3294,81 +3379,7 @@ static ssize_t rbd_add(struct bus_type *bus, if (rc < 0) goto err_out_rbd_dev; - /* no need to lock here, as rbd_dev is not registered yet */ - rc = rbd_dev_snaps_update(rbd_dev); - if (rc) - goto err_out_probe; - - rc = rbd_dev_set_mapping(rbd_dev); - if (rc) - goto err_out_snaps; - - /* generate unique id: find highest unique id, add one */ - rbd_dev_id_get(rbd_dev); - - /* Fill in the device name, now that we have its id. */ - BUILD_BUG_ON(DEV_NAME_LEN - < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); - sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); - - /* Get our block major device number. */ - - rc = register_blkdev(0, rbd_dev->name); - if (rc < 0) - goto err_out_id; - rbd_dev->major = rc; - - /* Set up the blkdev mapping. */ - - rc = rbd_init_disk(rbd_dev); - if (rc) - goto err_out_blkdev; - - rc = rbd_bus_add_dev(rbd_dev); - if (rc) - goto err_out_disk; - - /* - * At this point cleanup in the event of an error is the job - * of the sysfs code (initiated by rbd_bus_del_dev()). - */ - - down_write(&rbd_dev->header_rwsem); - rc = rbd_dev_snaps_register(rbd_dev); - up_write(&rbd_dev->header_rwsem); - if (rc) - goto err_out_bus; - - rc = rbd_init_watch_dev(rbd_dev); - if (rc) - goto err_out_bus; - - /* Everything's ready. Announce the disk to the world. */ - - add_disk(rbd_dev->disk); - - pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name, - (unsigned long long) rbd_dev->mapping.size); - return count; - -err_out_bus: - /* this will also clean up rest of rbd_dev stuff */ - - rbd_bus_del_dev(rbd_dev); - - return rc; - -err_out_disk: - rbd_free_disk(rbd_dev); -err_out_blkdev: - unregister_blkdev(rbd_dev->major, rbd_dev->name); -err_out_id: - rbd_dev_id_put(rbd_dev); -err_out_snaps: - rbd_remove_all_snaps(rbd_dev); -err_out_probe: - rbd_header_free(&rbd_dev->header); err_out_rbd_dev: rbd_dev_destroy(rbd_dev); err_out_client: -- cgit v1.2.3-70-g09d2 From 2c0d0a10ea89456781218f458f6bf72e99d87d2a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 30 Oct 2012 19:40:33 -0500 Subject: rbd: allow null image name We will know the image id for format 2 parent images, but won't initially know its image name. Avoid making the query for an image id in rbd_dev_image_id() if it's already known. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8d26c0f2be1..a8521338bf4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3067,6 +3067,14 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) void *response; void *p; + /* + * When probing a parent image, the image id is already + * known (and the image name likely is not). There's no + * need to fetch the image id again in this case. + */ + if (rbd_dev->spec->image_id) + return 0; + /* * First, see if the format 2 image id file exists, and if * so, get the image's persistent id from it. -- cgit v1.2.3-70-g09d2 From a92ffdf8a9b09f8fae9a8f418f87f30a5e459570 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 30 Oct 2012 19:40:33 -0500 Subject: rbd: allow null image name Format 2 parent images are partially identified by their image id, but it may not be possible to determine their image name. The name is not strictly needed for correct operation, so we won't be treating it as an error if we don't know it. Handle this case gracefully in rbd_name_show(). Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a8521338bf4..28052ff679c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1982,7 +1982,10 @@ static ssize_t rbd_name_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->spec->image_name); + if (rbd_dev->spec->image_name) + return sprintf(buf, "%s\n", rbd_dev->spec->image_name); + + return sprintf(buf, "(unknown)\n"); } static ssize_t rbd_image_id_show(struct device *dev, -- cgit v1.2.3-70-g09d2 From 86b00e0da6be7bbc16412f126c5b548ac5d91d50 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:42 -0500 Subject: rbd: get parent spec for version 2 images Add support for getting the the information identifying the parent image for rbd images that have them. The child image holds a reference to its parent image specification structure. Create a new entry "parent" in /sys/bus/rbd/image/N/ to report the identifying information for the parent image, if any. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- Documentation/ABI/testing/sysfs-bus-rbd | 4 + drivers/block/rbd.c | 131 ++++++++++++++++++++++++++++++++ include/linux/ceph/rados.h | 2 + 3 files changed, 137 insertions(+) (limited to 'drivers/block') diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd index 1cf2adf46b1..cd9213ccf3d 100644 --- a/Documentation/ABI/testing/sysfs-bus-rbd +++ b/Documentation/ABI/testing/sysfs-bus-rbd @@ -70,6 +70,10 @@ snap_* A directory per each snapshot +parent + + Information identifying the pool, image, and snapshot id for + the parent image in a layered rbd image (format 2 only). Entries under /sys/bus/rbd/devices//snap_ ------------------------------------------------------------- diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 28052ff679c..bce1fcfb518 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -217,6 +217,9 @@ struct rbd_device { struct ceph_osd_event *watch_event; struct ceph_osd_request *watch_request; + struct rbd_spec *parent_spec; + u64 parent_overlap; + /* protects updating the header */ struct rw_semaphore header_rwsem; @@ -2009,6 +2012,49 @@ static ssize_t rbd_snap_show(struct device *dev, return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); } +/* + * For an rbd v2 image, shows the pool id, image id, and snapshot id + * for the parent image. If there is no parent, simply shows + * "(no parent image)". + */ +static ssize_t rbd_parent_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); + struct rbd_spec *spec = rbd_dev->parent_spec; + int count; + char *bufp = buf; + + if (!spec) + return sprintf(buf, "(no parent image)\n"); + + count = sprintf(bufp, "pool_id %llu\npool_name %s\n", + (unsigned long long) spec->pool_id, spec->pool_name); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id, + spec->image_name ? spec->image_name : "(unknown)"); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n", + (unsigned long long) spec->snap_id, spec->snap_name); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap); + if (count < 0) + return count; + bufp += count; + + return (ssize_t) (bufp - buf); +} + static ssize_t rbd_image_refresh(struct device *dev, struct device_attribute *attr, const char *buf, @@ -2032,6 +2078,7 @@ static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL); static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); +static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL); static struct attribute *rbd_attrs[] = { &dev_attr_size.attr, @@ -2043,6 +2090,7 @@ static struct attribute *rbd_attrs[] = { &dev_attr_name.attr, &dev_attr_image_id.attr, &dev_attr_current_snap.attr, + &dev_attr_parent.attr, &dev_attr_refresh.attr, NULL }; @@ -2192,6 +2240,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, static void rbd_dev_destroy(struct rbd_device *rbd_dev) { + rbd_spec_put(rbd_dev->parent_spec); kfree(rbd_dev->header_name); rbd_put_client(rbd_dev->rbd_client); rbd_spec_put(rbd_dev->spec); @@ -2400,6 +2449,71 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev) &rbd_dev->header.features); } +static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) +{ + struct rbd_spec *parent_spec; + size_t size; + void *reply_buf = NULL; + __le64 snapid; + void *p; + void *end; + char *image_id; + u64 overlap; + size_t len = 0; + int ret; + + parent_spec = rbd_spec_alloc(); + if (!parent_spec) + return -ENOMEM; + + size = sizeof (__le64) + /* pool_id */ + sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX + /* image_id */ + sizeof (__le64) + /* snap_id */ + sizeof (__le64); /* overlap */ + reply_buf = kmalloc(size, GFP_KERNEL); + if (!reply_buf) { + ret = -ENOMEM; + goto out_err; + } + + snapid = cpu_to_le64(CEPH_NOSNAP); + ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + "rbd", "get_parent", + (char *) &snapid, sizeof (snapid), + (char *) reply_buf, size, + CEPH_OSD_FLAG_READ, NULL); + dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + if (ret < 0) + goto out_err; + + ret = -ERANGE; + p = reply_buf; + end = (char *) reply_buf + size; + ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); + if (parent_spec->pool_id == CEPH_NOPOOL) + goto out; /* No parent? No problem. */ + + image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + if (IS_ERR(image_id)) { + ret = PTR_ERR(image_id); + goto out_err; + } + parent_spec->image_id = image_id; + ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); + ceph_decode_64_safe(&p, end, overlap, out_err); + + rbd_dev->parent_overlap = overlap; + rbd_dev->parent_spec = parent_spec; + parent_spec = NULL; /* rbd_dev now owns this */ +out: + ret = 0; +out_err: + kfree(reply_buf); + rbd_spec_put(parent_spec); + + return ret; +} + static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) { size_t size; @@ -3154,6 +3268,12 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) ret = rbd_read_header(rbd_dev, &rbd_dev->header); if (ret < 0) goto out_err; + + /* Version 1 images have no parent (no layering) */ + + rbd_dev->parent_spec = NULL; + rbd_dev->parent_overlap = 0; + rbd_dev->image_format = 1; dout("discovered version 1 image, header name is %s\n", @@ -3205,6 +3325,14 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) if (ret < 0) goto out_err; + /* If the image supports layering, get the parent info */ + + if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret < 0) + goto out_err; + } + /* crypto and compression type aren't (yet) supported for v2 images */ rbd_dev->header.crypt_type = 0; @@ -3224,6 +3352,9 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) return 0; out_err: + rbd_dev->parent_overlap = 0; + rbd_spec_put(rbd_dev->parent_spec); + rbd_dev->parent_spec = NULL; kfree(rbd_dev->header_name); rbd_dev->header_name = NULL; kfree(rbd_dev->header.object_prefix); diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 0a99099801a..15077db662e 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -87,6 +87,8 @@ struct ceph_pg { * * lpgp_num -- as above. */ +#define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */ + #define CEPH_PG_TYPE_REP 1 #define CEPH_PG_TYPE_RAID4 2 #define CEPH_PG_POOL_VERSION 2 -- cgit v1.2.3-70-g09d2 From 9e15b77d9af3b63dfbff14e695336dfca88c22b2 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 30 Oct 2012 19:40:33 -0500 Subject: rbd: get additional info in parent spec When a layered rbd image has a parent, that parent is identified only by its pool id, image id, and snapshot id. Images that have been mapped also record *names* for those three id's. Add code to look up these names for parent images so they match mapped images more closely. Skip doing this for an image if it already has its pool name defined (this will be the case for images mapped by the user). It is possible that an the name of a parent image can't be determined, even if the image id is valid. If this occurs it does not preclude correct operation, so don't treat this as an error. On the other hand, defined pools will always have both an id and a name. And any snapshot of an image identified as a parent for a clone image will exist, and will have a name (if not it indicates some other internal error). So treat failure to get these bits of information as errors. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bce1fcfb518..842caf4aab4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -70,7 +70,10 @@ #define RBD_SNAP_HEAD_NAME "-" +/* This allows a single page to hold an image name sent by OSD */ +#define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1) #define RBD_IMAGE_ID_LEN_MAX 64 + #define RBD_OBJ_PREFIX_LEN_MAX 64 /* Feature bits */ @@ -658,6 +661,20 @@ out_err: return -ENOMEM; } +static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) +{ + struct rbd_snap *snap; + + if (snap_id == CEPH_NOSNAP) + return RBD_SNAP_HEAD_NAME; + + list_for_each_entry(snap, &rbd_dev->snaps, node) + if (snap_id == snap->id) + return snap->name; + + return NULL; +} + static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) { @@ -2499,6 +2516,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) goto out_err; } parent_spec->image_id = image_id; + parent_spec->image_id_len = len; ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); ceph_decode_64_safe(&p, end, overlap, out_err); @@ -2514,6 +2532,117 @@ out_err: return ret; } +static char *rbd_dev_image_name(struct rbd_device *rbd_dev) +{ + size_t image_id_size; + char *image_id; + void *p; + void *end; + size_t size; + void *reply_buf = NULL; + size_t len = 0; + char *image_name = NULL; + int ret; + + rbd_assert(!rbd_dev->spec->image_name); + + image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len; + image_id = kmalloc(image_id_size, GFP_KERNEL); + if (!image_id) + return NULL; + + p = image_id; + end = (char *) image_id + image_id_size; + ceph_encode_string(&p, end, rbd_dev->spec->image_id, + (u32) rbd_dev->spec->image_id_len); + + size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; + reply_buf = kmalloc(size, GFP_KERNEL); + if (!reply_buf) + goto out; + + ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, + "rbd", "dir_get_name", + image_id, image_id_size, + (char *) reply_buf, size, + CEPH_OSD_FLAG_READ, NULL); + if (ret < 0) + goto out; + p = reply_buf; + end = (char *) reply_buf + size; + image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + if (IS_ERR(image_name)) + image_name = NULL; + else + dout("%s: name is %s len is %zd\n", __func__, image_name, len); +out: + kfree(reply_buf); + kfree(image_id); + + return image_name; +} + +/* + * When a parent image gets probed, we only have the pool, image, + * and snapshot ids but not the names of any of them. This call + * is made later to fill in those names. It has to be done after + * rbd_dev_snaps_update() has completed because some of the + * information (in particular, snapshot name) is not available + * until then. + */ +static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) +{ + struct ceph_osd_client *osdc; + const char *name; + void *reply_buf = NULL; + int ret; + + if (rbd_dev->spec->pool_name) + return 0; /* Already have the names */ + + /* Look up the pool name */ + + osdc = &rbd_dev->rbd_client->client->osdc; + name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); + if (!name) + return -EIO; /* pool id too large (>= 2^31) */ + + rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); + if (!rbd_dev->spec->pool_name) + return -ENOMEM; + + /* Fetch the image name; tolerate failure here */ + + name = rbd_dev_image_name(rbd_dev); + if (name) { + rbd_dev->spec->image_name_len = strlen(name); + rbd_dev->spec->image_name = (char *) name; + } else { + pr_warning(RBD_DRV_NAME "%d " + "unable to get image name for image id %s\n", + rbd_dev->major, rbd_dev->spec->image_id); + } + + /* Look up the snapshot name. */ + + name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); + if (!name) { + ret = -EIO; + goto out_err; + } + rbd_dev->spec->snap_name = kstrdup(name, GFP_KERNEL); + if(!rbd_dev->spec->snap_name) + goto out_err; + + return 0; +out_err: + kfree(reply_buf); + kfree(rbd_dev->spec->pool_name); + rbd_dev->spec->pool_name = NULL; + + return ret; +} + static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) { size_t size; @@ -3372,6 +3501,10 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) if (ret) return ret; + ret = rbd_dev_probe_update_spec(rbd_dev); + if (ret) + goto err_out_snaps; + ret = rbd_dev_set_mapping(rbd_dev); if (ret) goto err_out_snaps; -- cgit v1.2.3-70-g09d2 From 42382b709bd1d143b9f0fa93e0a3a1f2f4210707 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 16 Nov 2012 09:29:16 -0600 Subject: rbd: do not allow remove of mounted-on image There is no check in rbd_remove() to see if anybody holds open the image being removed. That's not cool. Add a simple open count that goes up and down with opens and closes (releases) of the device, and don't allow an rbd image to be removed if the count is non-zero. Protect the updates of the open count value with ctl_mutex to ensure the underlying rbd device doesn't get removed while concurrently being opened. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- drivers/block/rbd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 842caf4aab4..c7bf9613ad4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -235,6 +235,7 @@ struct rbd_device { /* sysfs related */ struct device dev; + unsigned long open_count; }; static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ @@ -309,8 +310,11 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) return -EROFS; + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); rbd_get_dev(rbd_dev); set_device_ro(bdev, rbd_dev->mapping.read_only); + rbd_dev->open_count++; + mutex_unlock(&ctl_mutex); return 0; } @@ -319,7 +323,11 @@ static int rbd_release(struct gendisk *disk, fmode_t mode) { struct rbd_device *rbd_dev = disk->private_data; + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + rbd_assert(rbd_dev->open_count > 0); + rbd_dev->open_count--; rbd_put_dev(rbd_dev); + mutex_unlock(&ctl_mutex); return 0; } @@ -3745,6 +3753,11 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } + if (rbd_dev->open_count) { + ret = -EBUSY; + goto done; + } + rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); -- cgit v1.2.3-70-g09d2 From 2fd82b9e92c2a718ae81fc987b4468ceeee6979b Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 9 Nov 2012 15:05:54 -0600 Subject: rbd: get rid of RBD_MAX_SEG_NAME_LEN RBD_MAX_SEG_NAME_LEN represents the maximum length of an rbd object name (i.e., one of the objects providing storage backing an rbd image). Another symbol, MAX_OBJ_NAME_SIZE, is used in the osd client code to define the maximum length of any object name in an osd request. Right now they disagree, with RBD_MAX_SEG_NAME_LEN being too big. There's no real benefit at this point to defining the rbd object name length limit separate from any other object name, so just get rid of RBD_MAX_SEG_NAME_LEN and use MAX_OBJ_NAME_SIZE in its place. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- drivers/block/rbd.c | 6 +++--- drivers/block/rbd_types.h | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c7bf9613ad4..ce26b749ede 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -740,13 +740,13 @@ static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) u64 segment; int ret; - name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); + name = kmalloc(MAX_OBJ_NAME_SIZE + 1, GFP_NOIO); if (!name) return NULL; segment = offset >> rbd_dev->header.obj_order; - ret = snprintf(name, RBD_MAX_SEG_NAME_LEN, "%s.%012llx", + ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, "%s.%012llx", rbd_dev->header.object_prefix, segment); - if (ret < 0 || ret >= RBD_MAX_SEG_NAME_LEN) { + if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) { pr_err("error formatting segment name for #%llu (%d)\n", segment, ret); kfree(name); diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h index cbe77fa105b..49d77cbcf8b 100644 --- a/drivers/block/rbd_types.h +++ b/drivers/block/rbd_types.h @@ -46,8 +46,6 @@ #define RBD_MIN_OBJ_ORDER 16 #define RBD_MAX_OBJ_ORDER 30 -#define RBD_MAX_SEG_NAME_LEN 128 - #define RBD_COMP_NONE 0 #define RBD_CRYPT_NONE 0 -- cgit v1.2.3-70-g09d2 From b8f5c6edca34ff441e1ccdec68828e933a1b905b Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 1 Nov 2012 08:39:26 -0500 Subject: rbd: don't use ENOTSUPP ENOTSUPP is not a standard errno (it shows up as "Unknown error 524" in an error message). This is what was getting produced when the the local rbd code does not implement features required by a discovered rbd image. Change the error code returned in this case to ENXIO. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ce26b749ede..4daa400c13a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2456,7 +2456,7 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, incompat = le64_to_cpu(features_buf.incompat); if (incompat & ~RBD_FEATURES_ALL) - return -ENOTSUPP; + return -ENXIO; *snap_features = le64_to_cpu(features_buf.features); -- cgit v1.2.3-70-g09d2 From c3e946ce7276faf0b302acd25c7b874edbeba661 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 16 Nov 2012 09:29:16 -0600 Subject: rbd: get rid of rbd_{get,put}_dev() The functions rbd_get_dev() and rbd_put_dev() are trivial wrappers that add no value, and their existence suggests they may do more than what they do. Get rid of them. Signed-off-by: Alex Elder Reviewed-by: Dan Mick --- drivers/block/rbd.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4daa400c13a..89576a0b3f2 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -290,16 +290,6 @@ static struct device rbd_root_dev = { # define rbd_assert(expr) ((void) 0) #endif /* !RBD_DEBUG */ -static struct device *rbd_get_dev(struct rbd_device *rbd_dev) -{ - return get_device(&rbd_dev->dev); -} - -static void rbd_put_dev(struct rbd_device *rbd_dev) -{ - put_device(&rbd_dev->dev); -} - static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver); static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); @@ -311,7 +301,7 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) return -EROFS; mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rbd_get_dev(rbd_dev); + (void) get_device(&rbd_dev->dev); set_device_ro(bdev, rbd_dev->mapping.read_only); rbd_dev->open_count++; mutex_unlock(&ctl_mutex); @@ -326,7 +316,7 @@ static int rbd_release(struct gendisk *disk, fmode_t mode) mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); rbd_assert(rbd_dev->open_count > 0); rbd_dev->open_count--; - rbd_put_dev(rbd_dev); + put_device(&rbd_dev->dev); mutex_unlock(&ctl_mutex); return 0; -- cgit v1.2.3-70-g09d2