diff options
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r-- | fs/ceph/mds_client.c | 236 |
1 files changed, 162 insertions, 74 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fad95f8f260..a1ee8fa3a8e 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1,17 +1,20 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> +#include <linux/fs.h> #include <linux/wait.h> #include <linux/slab.h> #include <linux/sched.h> -#include <linux/smp_lock.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> -#include "mds_client.h" -#include "mon_client.h" #include "super.h" -#include "messenger.h" -#include "decode.h" -#include "auth.h" -#include "pagelist.h" +#include "mds_client.h" + +#include <linux/ceph/messenger.h> +#include <linux/ceph/decode.h> +#include <linux/ceph/pagelist.h> +#include <linux/ceph/auth.h> +#include <linux/ceph/debugfs.h> /* * A cluster of MDS (metadata server) daemons is responsible for @@ -57,7 +60,8 @@ static const struct ceph_connection_operations mds_con_ops; * parse individual inode info */ static int parse_reply_info_in(void **p, void *end, - struct ceph_mds_reply_info_in *info) + struct ceph_mds_reply_info_in *info, + int features) { int err = -EIO; @@ -71,6 +75,12 @@ static int parse_reply_info_in(void **p, void *end, info->symlink = *p; *p += info->symlink_len; + if (features & CEPH_FEATURE_DIRLAYOUTHASH) + ceph_decode_copy_safe(p, end, &info->dir_layout, + sizeof(info->dir_layout), bad); + else + memset(&info->dir_layout, 0, sizeof(info->dir_layout)); + ceph_decode_32_safe(p, end, info->xattr_len, bad); ceph_decode_need(p, end, info->xattr_len, bad); info->xattr_data = *p; @@ -85,12 +95,13 @@ bad: * target inode. */ static int parse_reply_info_trace(void **p, void *end, - struct ceph_mds_reply_info_parsed *info) + struct ceph_mds_reply_info_parsed *info, + int features) { int err; if (info->head->is_dentry) { - err = parse_reply_info_in(p, end, &info->diri); + err = parse_reply_info_in(p, end, &info->diri, features); if (err < 0) goto out_bad; @@ -111,7 +122,7 @@ static int parse_reply_info_trace(void **p, void *end, } if (info->head->is_target) { - err = parse_reply_info_in(p, end, &info->targeti); + err = parse_reply_info_in(p, end, &info->targeti, features); if (err < 0) goto out_bad; } @@ -131,7 +142,8 @@ out_bad: * parse readdir results */ static int parse_reply_info_dir(void **p, void *end, - struct ceph_mds_reply_info_parsed *info) + struct ceph_mds_reply_info_parsed *info, + int features) { u32 num, i = 0; int err; @@ -179,7 +191,7 @@ static int parse_reply_info_dir(void **p, void *end, *p += sizeof(struct ceph_mds_reply_lease); /* inode */ - err = parse_reply_info_in(p, end, &info->dir_in[i]); + err = parse_reply_info_in(p, end, &info->dir_in[i], features); if (err < 0) goto out_bad; i++; @@ -199,10 +211,45 @@ out_bad: } /* + * parse fcntl F_GETLK results + */ +static int parse_reply_info_filelock(void **p, void *end, + struct ceph_mds_reply_info_parsed *info, + int features) +{ + if (*p + sizeof(*info->filelock_reply) > end) + goto bad; + + info->filelock_reply = *p; + *p += sizeof(*info->filelock_reply); + + if (unlikely(*p != end)) + goto bad; + return 0; + +bad: + return -EIO; +} + +/* + * parse extra results + */ +static int parse_reply_info_extra(void **p, void *end, + struct ceph_mds_reply_info_parsed *info, + int features) +{ + if (info->head->op == CEPH_MDS_OP_GETFILELOCK) + return parse_reply_info_filelock(p, end, info, features); + else + return parse_reply_info_dir(p, end, info, features); +} + +/* * parse entire mds reply */ static int parse_reply_info(struct ceph_msg *msg, - struct ceph_mds_reply_info_parsed *info) + struct ceph_mds_reply_info_parsed *info, + int features) { void *p, *end; u32 len; @@ -215,15 +262,15 @@ static int parse_reply_info(struct ceph_msg *msg, /* trace */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { - err = parse_reply_info_trace(&p, p+len, info); + err = parse_reply_info_trace(&p, p+len, info, features); if (err < 0) goto out_bad; } - /* dir content */ + /* extra */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { - err = parse_reply_info_dir(&p, p+len, info); + err = parse_reply_info_extra(&p, p+len, info, features); if (err < 0) goto out_bad; } @@ -286,8 +333,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); if (atomic_dec_and_test(&s->s_ref)) { if (s->s_authorizer) - s->s_mdsc->client->monc.auth->ops->destroy_authorizer( - s->s_mdsc->client->monc.auth, s->s_authorizer); + s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer( + s->s_mdsc->fsc->client->monc.auth, + s->s_authorizer); kfree(s); } } @@ -344,7 +392,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_seq = 0; mutex_init(&s->s_mutex); - ceph_con_init(mdsc->client->msgr, &s->s_con); + ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); s->s_con.private = s; s->s_con.ops = &mds_con_ops; s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; @@ -524,6 +572,9 @@ static void __register_request(struct ceph_mds_client *mdsc, ceph_mdsc_get_request(req); __insert_request(mdsc, req); + req->r_uid = current_fsuid(); + req->r_gid = current_fsgid(); + if (dir) { struct ceph_inode_info *ci = ceph_inode(dir); @@ -599,7 +650,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, } else if (req->r_dentry) { struct inode *dir = req->r_dentry->d_parent->d_inode; - if (dir->i_sb != mdsc->client->sb) { + if (dir->i_sb != mdsc->fsc->sb) { /* not this fs! */ inode = req->r_dentry->d_inode; } else if (ceph_snap(dir) != CEPH_NOSNAP) { @@ -615,7 +666,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, } else { /* dir + name */ inode = dir; - hash = req->r_dentry->d_name.hash; + hash = ceph_dentry_hash(req->r_dentry); is_hash = true; } } @@ -642,9 +693,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc, dout("choose_mds %p %llx.%llx " "frag %u mds%d (%d/%d)\n", inode, ceph_vinop(inode), - frag.frag, frag.mds, + frag.frag, mds, (int)r, frag.ndist); - return mds; + if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= + CEPH_MDS_STATE_ACTIVE) + return mds; } /* since this file/dir wasn't known to be @@ -657,7 +710,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, dout("choose_mds %p %llx.%llx " "frag %u mds%d (auth)\n", inode, ceph_vinop(inode), frag.frag, mds); - return mds; + if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= + CEPH_MDS_STATE_ACTIVE) + return mds; } } } @@ -884,7 +939,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, __ceph_remove_cap(cap); if (!__ceph_is_any_real_caps(ci)) { struct ceph_mds_client *mdsc = - &ceph_sb_to_client(inode->i_sb)->mdsc; + ceph_sb_to_client(inode->i_sb)->mdsc; spin_lock(&mdsc->cap_dirty_lock); if (!list_empty(&ci->i_dirty_item)) { @@ -1146,7 +1201,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, struct ceph_msg *msg, *partial = NULL; struct ceph_mds_cap_release *head; int err = -ENOMEM; - int extra = mdsc->client->mount_args->cap_release_safety; + int extra = mdsc->fsc->mount_options->cap_release_safety; int num; dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, @@ -1447,7 +1502,7 @@ retry: *base = ceph_ino(temp->d_inode); *plen = len; dout("build_path on %p %d built %llx '%.*s'\n", - dentry, atomic_read(&dentry->d_count), *base, len, path); + dentry, dentry->d_count, *base, len, path); return path; } @@ -1583,8 +1638,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); head->op = cpu_to_le32(req->r_op); - head->caller_uid = cpu_to_le32(current_fsuid()); - head->caller_gid = cpu_to_le32(current_fsgid()); + head->caller_uid = cpu_to_le32(req->r_uid); + head->caller_gid = cpu_to_le32(req->r_gid); head->args = req->r_args; ceph_encode_filepath(&p, end, ino1, path1); @@ -1654,7 +1709,6 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, struct ceph_msg *msg; int flags = 0; - req->r_mds = mds; req->r_attempts++; if (req->r_inode) { struct ceph_cap *cap = @@ -1741,6 +1795,8 @@ static int __do_request(struct ceph_mds_client *mdsc, goto finish; } + put_request_session(req); + mds = __choose_mds(mdsc, req); if (mds < 0 || ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { @@ -1758,6 +1814,8 @@ static int __do_request(struct ceph_mds_client *mdsc, goto finish; } } + req->r_session = get_session(session); + dout("do_request mds%d session %p state %s\n", mds, session, session_state_name(session->s_state)); if (session->s_state != CEPH_MDS_SESSION_OPEN && @@ -1770,7 +1828,6 @@ static int __do_request(struct ceph_mds_client *mdsc, } /* send request */ - req->r_session = get_session(session); req->r_resend_mds = -1; /* forget any previous mds hint */ if (req->r_request_started == 0) /* note request start time */ @@ -1824,7 +1881,6 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) if (req->r_session && req->r_session->s_mds == mds) { dout(" kicking tid %llu\n", req->r_tid); - put_request_session(req); __do_request(mdsc, req); } } @@ -2017,8 +2073,11 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) goto out; } else { struct ceph_inode_info *ci = ceph_inode(req->r_inode); - struct ceph_cap *cap = - ceph_get_cap_for_mds(ci, req->r_mds);; + struct ceph_cap *cap = NULL; + + if (req->r_session) + cap = ceph_get_cap_for_mds(ci, + req->r_session->s_mds); dout("already using auth"); if ((!cap || cap != ci->i_auth_cap) || @@ -2062,12 +2121,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) dout("handle_reply tid %lld result %d\n", tid, result); rinfo = &req->r_reply_info; - err = parse_reply_info(msg, rinfo); + err = parse_reply_info(msg, rinfo, session->s_con.peer_features); mutex_unlock(&mdsc->mutex); mutex_lock(&session->s_mutex); if (err < 0) { - pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds); + pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); ceph_msg_dump(msg); goto out_err; } @@ -2085,9 +2144,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) /* insert trace into our cache */ mutex_lock(&req->r_fill_mutex); - err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); + err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); if (err == 0) { - if (result == 0 && rinfo->dir_nr) + if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && + rinfo->dir_nr) ceph_readdir_prepopulate(req, req->r_session); ceph_unreserve_caps(mdsc, &req->r_caps_reservation); } @@ -2361,19 +2421,35 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, if (recon_state->flock) { int num_fcntl_locks, num_flock_locks; - - lock_kernel(); - ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); - rec.v2.flock_len = (2*sizeof(u32) + - (num_fcntl_locks+num_flock_locks) * - sizeof(struct ceph_filelock)); - - err = ceph_pagelist_append(pagelist, &rec, reclen); - if (!err) - err = ceph_encode_locks(inode, pagelist, - num_fcntl_locks, - num_flock_locks); - unlock_kernel(); + struct ceph_pagelist_cursor trunc_point; + + ceph_pagelist_set_cursor(pagelist, &trunc_point); + do { + lock_flocks(); + ceph_count_locks(inode, &num_fcntl_locks, + &num_flock_locks); + rec.v2.flock_len = (2*sizeof(u32) + + (num_fcntl_locks+num_flock_locks) * + sizeof(struct ceph_filelock)); + unlock_flocks(); + + /* pre-alloc pagelist */ + ceph_pagelist_truncate(pagelist, &trunc_point); + err = ceph_pagelist_append(pagelist, &rec, reclen); + if (!err) + err = ceph_pagelist_reserve(pagelist, + rec.v2.flock_len); + + /* encode locks */ + if (!err) { + lock_flocks(); + err = ceph_encode_locks(inode, + pagelist, + num_fcntl_locks, + num_flock_locks); + unlock_flocks(); + } + } while (err == -ENOSPC); } else { err = ceph_pagelist_append(pagelist, &rec, reclen); } @@ -2613,7 +2689,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { - struct super_block *sb = mdsc->client->sb; + struct super_block *sb = mdsc->fsc->sb; struct inode *inode; struct ceph_inode_info *ci; struct dentry *parent, *dentry; @@ -2891,10 +2967,16 @@ static void delayed_work(struct work_struct *work) schedule_delayed(mdsc); } +int ceph_mdsc_init(struct ceph_fs_client *fsc) -int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) { - mdsc->client = client; + struct ceph_mds_client *mdsc; + + mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS); + if (!mdsc) + return -ENOMEM; + mdsc->fsc = fsc; + fsc->mdsc = mdsc; mutex_init(&mdsc->mutex); mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); if (mdsc->mdsmap == NULL) @@ -2927,7 +3009,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) INIT_LIST_HEAD(&mdsc->dentry_lru); ceph_caps_init(mdsc); - ceph_adjust_min_caps(mdsc, client->min_caps); + ceph_adjust_min_caps(mdsc, fsc->min_caps); return 0; } @@ -2939,7 +3021,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) static void wait_requests(struct ceph_mds_client *mdsc) { struct ceph_mds_request *req; - struct ceph_client *client = mdsc->client; + struct ceph_fs_client *fsc = mdsc->fsc; mutex_lock(&mdsc->mutex); if (__get_oldest_req(mdsc)) { @@ -2947,7 +3029,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) dout("wait_requests waiting for requests\n"); wait_for_completion_timeout(&mdsc->safe_umount_waiters, - client->mount_args->mount_timeout * HZ); + fsc->client->options->mount_timeout * HZ); /* tear down remaining requests */ mutex_lock(&mdsc->mutex); @@ -3030,7 +3112,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { u64 want_tid, want_flush; - if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) + if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) return; dout("sync\n"); @@ -3053,7 +3135,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc) { int i, n = 0; - if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) + if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) return true; mutex_lock(&mdsc->mutex); @@ -3071,8 +3153,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { struct ceph_mds_session *session; int i; - struct ceph_client *client = mdsc->client; - unsigned long timeout = client->mount_args->mount_timeout * HZ; + struct ceph_fs_client *fsc = mdsc->fsc; + unsigned long timeout = fsc->client->options->mount_timeout * HZ; dout("close_sessions\n"); @@ -3119,7 +3201,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) dout("stopped\n"); } -void ceph_mdsc_stop(struct ceph_mds_client *mdsc) +static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) { dout("stop\n"); cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ @@ -3129,6 +3211,15 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc) ceph_caps_finalize(mdsc); } +void ceph_mdsc_destroy(struct ceph_fs_client *fsc) +{ + struct ceph_mds_client *mdsc = fsc->mdsc; + + ceph_mdsc_stop(mdsc); + fsc->mdsc = NULL; + kfree(mdsc); +} + /* * handle mds map update. @@ -3145,14 +3236,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); ceph_decode_copy(&p, &fsid, sizeof(fsid)); - if (ceph_check_fsid(mdsc->client, &fsid) < 0) + if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0) return; epoch = ceph_decode_32(&p); maplen = ceph_decode_32(&p); dout("handle_map epoch %u len %d\n", epoch, (int)maplen); /* do we need it? */ - ceph_monc_got_mdsmap(&mdsc->client->monc, epoch); + ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch); mutex_lock(&mdsc->mutex); if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { dout("handle_map epoch %u <= our %u\n", @@ -3176,7 +3267,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) } else { mdsc->mdsmap = newmap; /* first mds map */ } - mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; + mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; __wake_requests(mdsc, &mdsc->waiting_for_map); @@ -3277,7 +3368,7 @@ static int get_authorizer(struct ceph_connection *con, { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; - struct ceph_auth_client *ac = mdsc->client->monc.auth; + struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; int ret = 0; if (force_new && s->s_authorizer) { @@ -3311,7 +3402,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; - struct ceph_auth_client *ac = mdsc->client->monc.auth; + struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); } @@ -3320,12 +3411,12 @@ static int invalidate_authorizer(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; - struct ceph_auth_client *ac = mdsc->client->monc.auth; + struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; if (ac->ops->invalidate_authorizer) ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); - return ceph_monc_validate_auth(&mdsc->client->monc); + return ceph_monc_validate_auth(&mdsc->fsc->client->monc); } static const struct ceph_connection_operations mds_con_ops = { @@ -3338,7 +3429,4 @@ static const struct ceph_connection_operations mds_con_ops = { .peer_reset = peer_reset, }; - - - /* eof */ |