diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-07 11:09:13 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-07 11:09:13 -0700 |
commit | 240cd6a817bd855e3f1e615ed9ae16407f8cfce6 (patch) | |
tree | da7d6267d549cd0fbdff3f30032720b416d1ff3d /fs/ceph/export.c | |
parent | 3021112598d2b722eee54d8a662fea2089abbdbc (diff) | |
parent | a30be7cb2ccb995ad5e67fd4b548f11fe37fc8b1 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil:
"The biggest chunk is a series of patches from Ilya that add support
for new Ceph osd and crush map features, including some new tunables,
primary affinity, and the new encoding that is needed for erasure
coding support. This brings things into parity with the server side
and the looming firefly release. There is also support for allocation
hints in RBD that help limit fragmentation on the server side.
There is also a series of patches from Zheng fixing NFS reexport,
directory fragmentation support, flock vs fnctl behavior, and some
issues with clustered MDS.
Finally, there are some miscellaneous fixes from Yunchuan Wen for
fscache, Fabian Frederick for ACLs, and from me for fsync(dirfd)
behavior"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (79 commits)
ceph: skip invalid dentry during dcache readdir
libceph: dump pool {read,write}_tier to debugfs
libceph: output primary affinity values on osdmap updates
ceph: flush cap release queue when trimming session caps
ceph: don't grabs open file reference for aborted request
ceph: drop extra open file reference in ceph_atomic_open()
ceph: preallocate buffer for readdir reply
libceph: enable PRIMARY_AFFINITY feature bit
libceph: redo ceph_calc_pg_primary() in terms of ceph_calc_pg_acting()
libceph: add support for osd primary affinity
libceph: add support for primary_temp mappings
libceph: return primary from ceph_calc_pg_acting()
libceph: switch ceph_calc_pg_acting() to new helpers
libceph: introduce apply_temps() helper
libceph: introduce pg_to_raw_osds() and raw_to_up_osds() helpers
libceph: ceph_can_shift_osds(pool) and pool type defines
libceph: ceph_osd_{exists,is_up,is_down}(osd) definitions
libceph: enable OSDMAP_ENC feature bit
libceph: primary_affinity decode bits
libceph: primary_affinity infrastructure
...
Diffstat (limited to 'fs/ceph/export.c')
-rw-r--r-- | fs/ceph/export.c | 267 |
1 files changed, 120 insertions, 147 deletions
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 16796be53ca..00d6af6a32e 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -8,23 +8,6 @@ #include "mds_client.h" /* - * NFS export support - * - * NFS re-export of a ceph mount is, at present, only semireliable. - * The basic issue is that the Ceph architectures doesn't lend itself - * well to generating filehandles that will remain valid forever. - * - * So, we do our best. If you're lucky, your inode will be in the - * client's cache. If it's not, and you have a connectable fh, then - * the MDS server may be able to find it for you. Otherwise, you get - * ESTALE. - * - * There are ways to this more reliable, but in the non-connectable fh - * case, we won't every work perfectly, and in the connectable case, - * some changes are needed on the MDS side to work better. - */ - -/* * Basic fh */ struct ceph_nfs_fh { @@ -32,22 +15,12 @@ struct ceph_nfs_fh { } __attribute__ ((packed)); /* - * Larger 'connectable' fh that includes parent ino and name hash. - * Use this whenever possible, as it works more reliably. + * Larger fh that includes parent ino. */ struct ceph_nfs_confh { u64 ino, parent_ino; - u32 parent_name_hash; } __attribute__ ((packed)); -/* - * The presence of @parent_inode here tells us whether NFS wants a - * connectable file handle. However, we want to make a connectionable - * file handle unconditionally so that the MDS gets as much of a hint - * as possible. That means we only use @parent_dentry to indicate - * whether nfsd wants a connectable fh, and whether we should indicate - * failure from a too-small @max_len. - */ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, struct inode *parent_inode) { @@ -56,54 +29,36 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, struct ceph_nfs_confh *cfh = (void *)rawfh; int connected_handle_length = sizeof(*cfh)/4; int handle_length = sizeof(*fh)/4; - struct dentry *dentry; - struct dentry *parent; /* don't re-export snaps */ if (ceph_snap(inode) != CEPH_NOSNAP) return -EINVAL; - dentry = d_find_alias(inode); + if (parent_inode && (*max_len < connected_handle_length)) { + *max_len = connected_handle_length; + return FILEID_INVALID; + } else if (*max_len < handle_length) { + *max_len = handle_length; + return FILEID_INVALID; + } - /* if we found an alias, generate a connectable fh */ - if (*max_len >= connected_handle_length && dentry) { - dout("encode_fh %p connectable\n", dentry); - spin_lock(&dentry->d_lock); - parent = dentry->d_parent; + if (parent_inode) { + dout("encode_fh %llx with parent %llx\n", + ceph_ino(inode), ceph_ino(parent_inode)); cfh->ino = ceph_ino(inode); - cfh->parent_ino = ceph_ino(parent->d_inode); - cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode, - dentry); + cfh->parent_ino = ceph_ino(parent_inode); *max_len = connected_handle_length; - type = 2; - spin_unlock(&dentry->d_lock); - } else if (*max_len >= handle_length) { - if (parent_inode) { - /* nfsd wants connectable */ - *max_len = connected_handle_length; - type = FILEID_INVALID; - } else { - dout("encode_fh %p\n", dentry); - fh->ino = ceph_ino(inode); - *max_len = handle_length; - type = 1; - } + type = FILEID_INO32_GEN_PARENT; } else { + dout("encode_fh %llx\n", ceph_ino(inode)); + fh->ino = ceph_ino(inode); *max_len = handle_length; - type = FILEID_INVALID; + type = FILEID_INO32_GEN; } - if (dentry) - dput(dentry); return type; } -/* - * convert regular fh to dentry - * - * FIXME: we should try harder by querying the mds for the ino. - */ -static struct dentry *__fh_to_dentry(struct super_block *sb, - struct ceph_nfs_fh *fh, int fh_len) +static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) { struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; struct inode *inode; @@ -111,11 +66,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, struct ceph_vino vino; int err; - if (fh_len < sizeof(*fh) / 4) - return ERR_PTR(-ESTALE); - - dout("__fh_to_dentry %llx\n", fh->ino); - vino.ino = fh->ino; + vino.ino = ino; vino.snap = CEPH_NOSNAP; inode = ceph_find_inode(sb, vino); if (!inode) { @@ -139,139 +90,161 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, dentry = d_obtain_alias(inode); if (IS_ERR(dentry)) { - pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", - fh->ino, inode); iput(inode); return dentry; } err = ceph_init_dentry(dentry); if (err < 0) { - iput(inode); + dput(dentry); return ERR_PTR(err); } - dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry); + dout("__fh_to_dentry %llx %p dentry %p\n", ino, inode, dentry); return dentry; } /* - * convert connectable fh to dentry + * convert regular fh to dentry */ -static struct dentry *__cfh_to_dentry(struct super_block *sb, - struct ceph_nfs_confh *cfh, int fh_len) +static struct dentry *ceph_fh_to_dentry(struct super_block *sb, + struct fid *fid, + int fh_len, int fh_type) +{ + struct ceph_nfs_fh *fh = (void *)fid->raw; + + if (fh_type != FILEID_INO32_GEN && + fh_type != FILEID_INO32_GEN_PARENT) + return NULL; + if (fh_len < sizeof(*fh) / 4) + return NULL; + + dout("fh_to_dentry %llx\n", fh->ino); + return __fh_to_dentry(sb, fh->ino); +} + +static struct dentry *__get_parent(struct super_block *sb, + struct dentry *child, u64 ino) { struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; + struct ceph_mds_request *req; struct inode *inode; struct dentry *dentry; - struct ceph_vino vino; int err; - if (fh_len < sizeof(*cfh) / 4) - return ERR_PTR(-ESTALE); - - dout("__cfh_to_dentry %llx (%llx/%x)\n", - cfh->ino, cfh->parent_ino, cfh->parent_name_hash); - - vino.ino = cfh->ino; - vino.snap = CEPH_NOSNAP; - inode = ceph_find_inode(sb, vino); - if (!inode) { - struct ceph_mds_request *req; - - req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH, - USE_ANY_MDS); - if (IS_ERR(req)) - return ERR_CAST(req); + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT, + USE_ANY_MDS); + if (IS_ERR(req)) + return ERR_CAST(req); - req->r_ino1 = vino; - req->r_ino2.ino = cfh->parent_ino; - req->r_ino2.snap = CEPH_NOSNAP; - req->r_path2 = kmalloc(16, GFP_NOFS); - snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); - req->r_num_caps = 1; - err = ceph_mdsc_do_request(mdsc, NULL, req); - inode = req->r_target_inode; - if (inode) - ihold(inode); - ceph_mdsc_put_request(req); - if (!inode) - return ERR_PTR(err ? err : -ESTALE); + if (child) { + req->r_inode = child->d_inode; + ihold(child->d_inode); + } else { + req->r_ino1 = (struct ceph_vino) { + .ino = ino, + .snap = CEPH_NOSNAP, + }; } + req->r_num_caps = 1; + err = ceph_mdsc_do_request(mdsc, NULL, req); + inode = req->r_target_inode; + if (inode) + ihold(inode); + ceph_mdsc_put_request(req); + if (!inode) + return ERR_PTR(-ENOENT); dentry = d_obtain_alias(inode); if (IS_ERR(dentry)) { - pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", - cfh->ino, inode); iput(inode); return dentry; } err = ceph_init_dentry(dentry); if (err < 0) { - iput(inode); + dput(dentry); return ERR_PTR(err); } - dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry); + dout("__get_parent ino %llx parent %p ino %llx.%llx\n", + child ? ceph_ino(child->d_inode) : ino, + dentry, ceph_vinop(inode)); return dentry; } -static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) +struct dentry *ceph_get_parent(struct dentry *child) { - if (fh_type == 1) - return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw, - fh_len); - else - return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw, - fh_len); + /* don't re-export snaps */ + if (ceph_snap(child->d_inode) != CEPH_NOSNAP) + return ERR_PTR(-EINVAL); + + dout("get_parent %p ino %llx.%llx\n", + child, ceph_vinop(child->d_inode)); + return __get_parent(child->d_sb, child, 0); } /* - * get parent, if possible. - * - * FIXME: we could do better by querying the mds to discover the - * parent. + * convert regular fh to parent */ static struct dentry *ceph_fh_to_parent(struct super_block *sb, - struct fid *fid, + struct fid *fid, int fh_len, int fh_type) { struct ceph_nfs_confh *cfh = (void *)fid->raw; - struct ceph_vino vino; - struct inode *inode; struct dentry *dentry; - int err; - if (fh_type == 1) - return ERR_PTR(-ESTALE); + if (fh_type != FILEID_INO32_GEN_PARENT) + return NULL; if (fh_len < sizeof(*cfh) / 4) - return ERR_PTR(-ESTALE); + return NULL; - pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino, - cfh->parent_name_hash); + dout("fh_to_parent %llx\n", cfh->parent_ino); + dentry = __get_parent(sb, NULL, cfh->ino); + if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT) + dentry = __fh_to_dentry(sb, cfh->parent_ino); + return dentry; +} - vino.ino = cfh->ino; - vino.snap = CEPH_NOSNAP; - inode = ceph_find_inode(sb, vino); - if (!inode) - return ERR_PTR(-ESTALE); +static int ceph_get_name(struct dentry *parent, char *name, + struct dentry *child) +{ + struct ceph_mds_client *mdsc; + struct ceph_mds_request *req; + int err; - dentry = d_obtain_alias(inode); - if (IS_ERR(dentry)) { - pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", - cfh->ino, inode); - iput(inode); - return dentry; - } - err = ceph_init_dentry(dentry); - if (err < 0) { - iput(inode); - return ERR_PTR(err); + mdsc = ceph_inode_to_client(child->d_inode)->mdsc; + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, + USE_ANY_MDS); + if (IS_ERR(req)) + return PTR_ERR(req); + + mutex_lock(&parent->d_inode->i_mutex); + + req->r_inode = child->d_inode; + ihold(child->d_inode); + req->r_ino2 = ceph_vino(parent->d_inode); + req->r_locked_dir = parent->d_inode; + req->r_num_caps = 2; + err = ceph_mdsc_do_request(mdsc, NULL, req); + + mutex_unlock(&parent->d_inode->i_mutex); + + if (!err) { + struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; + memcpy(name, rinfo->dname, rinfo->dname_len); + name[rinfo->dname_len] = 0; + dout("get_name %p ino %llx.%llx name %s\n", + child, ceph_vinop(child->d_inode), name); + } else { + dout("get_name %p ino %llx.%llx err %d\n", + child, ceph_vinop(child->d_inode), err); } - dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry); - return dentry; + + ceph_mdsc_put_request(req); + return err; } const struct export_operations ceph_export_ops = { .encode_fh = ceph_encode_fh, .fh_to_dentry = ceph_fh_to_dentry, .fh_to_parent = ceph_fh_to_parent, + .get_parent = ceph_get_parent, + .get_name = ceph_get_name, }; |