From 6ab00d465a1c8c02c2216f8220727282f3aa50b5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Aug 2011 09:41:59 -0700 Subject: libceph: create messenger with client This simplifies the init/shutdown paths, and makes client->msgr available during the rest of the setup process. Signed-off-by: Sage Weil --- net/ceph/ceph_common.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'net/ceph') diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 2883ea01e68..97f70e50ad3 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -432,9 +432,12 @@ EXPORT_SYMBOL(ceph_client_id); /* * create a fresh client instance */ -struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) +struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, + unsigned supported_features, + unsigned required_features) { struct ceph_client *client; + struct ceph_entity_addr *myaddr = NULL; int err = -ENOMEM; client = kzalloc(sizeof(*client), GFP_KERNEL); @@ -449,15 +452,27 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) client->auth_err = 0; client->extra_mon_dispatch = NULL; - client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT; - client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT; - - client->msgr = NULL; + client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT | + supported_features; + client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT | + required_features; + + /* msgr */ + if (ceph_test_opt(client, MYIP)) + myaddr = &client->options->my_addr; + client->msgr = ceph_messenger_create(myaddr, + client->supported_features, + client->required_features); + if (IS_ERR(client->msgr)) { + err = PTR_ERR(client->msgr); + goto fail; + } + client->msgr->nocrc = ceph_test_opt(client, NOCRC); /* subsystems */ err = ceph_monc_init(&client->monc, client); if (err < 0) - goto fail; + goto fail_msgr; err = ceph_osdc_init(&client->osdc, client); if (err < 0) goto fail_monc; @@ -466,6 +481,8 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) fail_monc: ceph_monc_stop(&client->monc); +fail_msgr: + ceph_messenger_destroy(client->msgr); fail: kfree(client); return ERR_PTR(err); @@ -490,8 +507,7 @@ void ceph_destroy_client(struct ceph_client *client) ceph_debugfs_client_cleanup(client); - if (client->msgr) - ceph_messenger_destroy(client->msgr); + ceph_messenger_destroy(client->msgr); ceph_destroy_options(client->options); @@ -514,24 +530,9 @@ static int have_mon_and_osd_map(struct ceph_client *client) */ int __ceph_open_session(struct ceph_client *client, unsigned long started) { - struct ceph_entity_addr *myaddr = NULL; int err; unsigned long timeout = client->options->mount_timeout * HZ; - /* initialize the messenger */ - if (client->msgr == NULL) { - if (ceph_test_opt(client, MYIP)) - myaddr = &client->options->my_addr; - client->msgr = ceph_messenger_create(myaddr, - client->supported_features, - client->required_features); - if (IS_ERR(client->msgr)) { - client->msgr = NULL; - return PTR_ERR(client->msgr); - } - client->msgr->nocrc = ceph_test_opt(client, NOCRC); - } - /* open session, and wait for mon and osd maps */ err = ceph_monc_open_session(&client->monc); if (err < 0) -- cgit v1.2.3-70-g09d2 From f6a2f5be07463ef532b9f4e3cb9e42ab9df85832 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Aug 2011 09:27:44 -0700 Subject: libceph: always preallocate mon connection Allocate the mon connection on init. We already reuse it across reconnects. Remove now unnecessary (and incomplete) NULL checks. Signed-off-by: Sage Weil --- net/ceph/mon_client.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'net/ceph') diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index cbe31fa4550..55672166533 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -116,14 +116,12 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) */ static void __close_session(struct ceph_mon_client *monc) { - if (monc->con) { - dout("__close_session closing mon%d\n", monc->cur_mon); - ceph_con_revoke(monc->con, monc->m_auth); - ceph_con_close(monc->con); - monc->cur_mon = -1; - monc->pending_auth = 0; - ceph_auth_reset(monc->auth); - } + dout("__close_session closing mon%d\n", monc->cur_mon); + ceph_con_revoke(monc->con, monc->m_auth); + ceph_con_close(monc->con); + monc->cur_mon = -1; + monc->pending_auth = 0; + ceph_auth_reset(monc->auth); } /* @@ -302,15 +300,6 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) */ int ceph_monc_open_session(struct ceph_mon_client *monc) { - if (!monc->con) { - monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); - if (!monc->con) - return -ENOMEM; - ceph_con_init(monc->client->msgr, monc->con); - monc->con->private = monc; - monc->con->ops = &mon_con_ops; - } - mutex_lock(&monc->mutex); __open_session(monc); __schedule_delayed(monc); @@ -755,7 +744,13 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) if (err) goto out; - monc->con = NULL; + /* connection */ + monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); + if (!monc->con) + goto out_monmap; + ceph_con_init(monc->client->msgr, monc->con); + monc->con->private = monc; + monc->con->ops = &mon_con_ops; /* authentication */ monc->auth = ceph_auth_init(cl->options->name, @@ -772,7 +767,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) sizeof(struct ceph_mon_subscribe_ack), GFP_NOFS); if (!monc->m_subscribe_ack) - goto out_monmap; + goto out_con; monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); if (!monc->m_subscribe) @@ -808,6 +803,8 @@ out_subscribe: ceph_msg_put(monc->m_subscribe); out_subscribe_ack: ceph_msg_put(monc->m_subscribe_ack); +out_con: + monc->con->ops->put(monc->con); out_monmap: kfree(monc->monmap); out: @@ -822,11 +819,11 @@ void ceph_monc_stop(struct ceph_mon_client *monc) mutex_lock(&monc->mutex); __close_session(monc); - if (monc->con) { - monc->con->private = NULL; - monc->con->ops->put(monc->con); - monc->con = NULL; - } + + monc->con->private = NULL; + monc->con->ops->put(monc->con); + monc->con = NULL; + mutex_unlock(&monc->mutex); ceph_auth_destroy(monc->auth); @@ -1000,7 +997,7 @@ static void mon_fault(struct ceph_connection *con) if (!con->private) goto out; - if (monc->con && !monc->hunting) + if (!monc->hunting) pr_info("mon%d %s session lost, " "hunting for new mon\n", monc->cur_mon, ceph_pr_addr(&monc->con->peer_addr.in_addr)); -- cgit v1.2.3-70-g09d2 From b61c27636fffbaf1980e675282777b9467254a40 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Aug 2011 15:03:46 -0700 Subject: libceph: don't complain on msgpool alloc failures The pool allocation failures are masked by the pool; there is no need to spam the console about them. (That's the whole point of having the pool in the first place.) Mark msg allocations whose failure is safely handled as such. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 +- fs/ceph/mds_client.c | 11 ++++++----- include/linux/ceph/messenger.h | 3 ++- net/ceph/messenger.c | 15 +++++++++++---- net/ceph/mon_client.c | 24 +++++++++++++++--------- net/ceph/msgpool.c | 4 ++-- net/ceph/osd_client.c | 8 ++++---- 7 files changed, 41 insertions(+), 26 deletions(-) (limited to 'net/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 8d74ad7ba55..b8731bf3ef1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -945,7 +945,7 @@ static int send_cap_msg(struct ceph_mds_session *session, seq, issue_seq, mseq, follows, size, max_size, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS); + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false); if (!msg) return -ENOMEM; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 86c59e16ba7..1d72f15fe9f 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -764,7 +764,8 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) struct ceph_msg *msg; struct ceph_mds_session_head *h; - msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS); + msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS, + false); if (!msg) { pr_err("create_session_msg ENOMEM creating msg\n"); return NULL; @@ -1240,7 +1241,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, while (session->s_num_cap_releases < session->s_nr_caps + extra) { spin_unlock(&session->s_cap_lock); msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, - GFP_NOFS); + GFP_NOFS, false); if (!msg) goto out_unlocked; dout("add_cap_releases %p msg %p now %d\n", session, msg, @@ -1652,7 +1653,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, if (req->r_old_dentry_drop) len += req->r_old_dentry->d_name.len; - msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS); + msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS, false); if (!msg) { msg = ERR_PTR(-ENOMEM); goto out_free2; @@ -2518,7 +2519,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, goto fail_nopagelist; ceph_pagelist_init(pagelist); - reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS); + reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false); if (!reply) goto fail_nomsg; @@ -2831,7 +2832,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, dnamelen = dentry->d_name.len; len += dnamelen; - msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS); + msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false); if (!msg) return; lease = msg->front.iov_base; diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index d7adf151d33..bbd4a4bb2c6 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -238,7 +238,8 @@ extern void ceph_con_keepalive(struct ceph_connection *con); extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); extern void ceph_con_put(struct ceph_connection *con); -extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags); +extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, + bool can_fail); extern void ceph_msg_kfree(struct ceph_msg *m); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 9918e9eb276..2de711a0c03 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2281,7 +2281,8 @@ EXPORT_SYMBOL(ceph_con_keepalive); * construct a new message with given type, size * the new msg has a ref count of 1. */ -struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) +struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, + bool can_fail) { struct ceph_msg *m; @@ -2333,7 +2334,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) m->front.iov_base = kmalloc(front_len, flags); } if (m->front.iov_base == NULL) { - pr_err("msg_new can't allocate %d bytes\n", + dout("ceph_msg_new can't allocate %d bytes\n", front_len); goto out2; } @@ -2348,7 +2349,13 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) out2: ceph_msg_put(m); out: - pr_err("msg_new can't create type %d front %d\n", type, front_len); + if (!can_fail) { + pr_err("msg_new can't create type %d front %d\n", type, + front_len); + } else { + dout("msg_new can't create type %d front %d\n", type, + front_len); + } return NULL; } EXPORT_SYMBOL(ceph_msg_new); @@ -2398,7 +2405,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, } if (!msg) { *skip = 0; - msg = ceph_msg_new(type, front_len, GFP_NOFS); + msg = ceph_msg_new(type, front_len, GFP_NOFS, false); if (!msg) { pr_err("unable to allocate msg type %d len %d\n", type, front_len); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 55672166533..af2ef308249 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -517,10 +517,12 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) init_completion(&req->completion); err = -ENOMEM; - req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS); + req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS, + true); if (!req->request) goto out; - req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS); + req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS, + true); if (!req->reply) goto out; @@ -615,10 +617,12 @@ int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op, init_completion(&req->completion); err = -ENOMEM; - req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS); + req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS, + true); if (!req->request) goto out; - req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS); + req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS, + true); if (!req->reply) goto out; @@ -765,19 +769,21 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) err = -ENOMEM; monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, sizeof(struct ceph_mon_subscribe_ack), - GFP_NOFS); + GFP_NOFS, true); if (!monc->m_subscribe_ack) goto out_con; - monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); + monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS, + true); if (!monc->m_subscribe) goto out_subscribe_ack; - monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS); + monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS, + true); if (!monc->m_auth_reply) goto out_subscribe; - monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS); + monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS, true); monc->pending_auth = 0; if (!monc->m_auth) goto out_auth_reply; @@ -970,7 +976,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_MON_MAP: case CEPH_MSG_MDS_MAP: case CEPH_MSG_OSD_MAP: - m = ceph_msg_new(type, front_len, GFP_NOFS); + m = ceph_msg_new(type, front_len, GFP_NOFS, false); break; } diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index 1f4cb30a42c..11d5f4196a7 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg) struct ceph_msgpool *pool = arg; struct ceph_msg *msg; - msg = ceph_msg_new(0, pool->front_len, gfp_mask); + msg = ceph_msg_new(0, pool->front_len, gfp_mask, true); if (!msg) { dout("msgpool_alloc %s failed\n", pool->name); } else { @@ -61,7 +61,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, WARN_ON(1); /* try to alloc a fresh message */ - return ceph_msg_new(0, front_len, GFP_NOFS); + return ceph_msg_new(0, front_len, GFP_NOFS, false); } msg = mempool_alloc(pool->pool, GFP_NOFS); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 88ad8a2501b..01ceb59a8b4 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -227,7 +227,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, - OSD_OPREPLY_FRONT_LEN, gfp_flags); + OSD_OPREPLY_FRONT_LEN, gfp_flags, true); if (!msg) { ceph_osdc_put_request(req); return NULL; @@ -250,7 +250,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else - msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags); + msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags, true); if (!msg) { ceph_osdc_put_request(req); return NULL; @@ -2032,7 +2032,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (front > req->r_reply->front.iov_len) { pr_warning("get_reply front %d > preallocated %d\n", front, (int)req->r_reply->front.iov_len); - m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS); + m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS, false); if (!m) goto out; ceph_msg_put(req->r_reply); @@ -2080,7 +2080,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, switch (type) { case CEPH_MSG_OSD_MAP: case CEPH_MSG_WATCH_NOTIFY: - return ceph_msg_new(type, front, GFP_NOFS); + return ceph_msg_new(type, front, GFP_NOFS, false); case CEPH_MSG_OSD_OPREPLY: return get_reply(con, hdr, skip); default: -- cgit v1.2.3-70-g09d2 From f0ed1b7cef1e801ef470efc501f9c663fe10cebd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Aug 2011 15:05:07 -0700 Subject: libceph: warn on msg allocation failures Any non-masked msg allocation failure should generate a warning and stack trace to the console. All of these need to eventually be replaced by safe preallocation or msgpools. Signed-off-by: Sage Weil --- net/ceph/messenger.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ceph') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 2de711a0c03..f56aca30261 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2352,6 +2352,7 @@ out: if (!can_fail) { pr_err("msg_new can't create type %d front %d\n", type, front_len); + WARN_ON(1); } else { dout("msg_new can't create type %d front %d\n", type, front_len); -- cgit v1.2.3-70-g09d2 From 49d9224c047f23089c49a6749609447abd09ee03 Mon Sep 17 00:00:00 2001 From: Noah Watkins Date: Mon, 12 Sep 2011 14:51:58 -0700 Subject: ceph: fix ceph_monc_init memory leak failure clean up does not consider ceph_auth_init. Signed-off-by: Noah Watkins Signed-off-by: Sage Weil --- net/ceph/mon_client.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/ceph') diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index af2ef308249..0b62deae42b 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -759,8 +759,10 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) /* authentication */ monc->auth = ceph_auth_init(cl->options->name, cl->options->key); - if (IS_ERR(monc->auth)) - return PTR_ERR(monc->auth); + if (IS_ERR(monc->auth)) { + err = PTR_ERR(monc->auth); + goto out_con; + } monc->auth->want_keys = CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; @@ -771,7 +773,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) sizeof(struct ceph_mon_subscribe_ack), GFP_NOFS, true); if (!monc->m_subscribe_ack) - goto out_con; + goto out_auth; monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS, true); @@ -809,6 +811,8 @@ out_subscribe: ceph_msg_put(monc->m_subscribe); out_subscribe_ack: ceph_msg_put(monc->m_subscribe_ack); +out_auth: + ceph_auth_destroy(monc->auth); out_con: monc->con->ops->put(monc->con); out_monmap: -- cgit v1.2.3-70-g09d2 From ee3b56f265cafb28c9a1b58faed4f1dbdf51af86 Mon Sep 17 00:00:00 2001 From: Noah Watkins Date: Fri, 23 Sep 2011 11:48:42 -0700 Subject: ceph: use kernel DNS resolver Change ceph_parse_ips to take either names given as IP addresses or standard hostnames (e.g. localhost). The DNS lookup is done using the dns_resolver facility similar to its use in AFS, NFS, and CIFS. This patch defines CONFIG_CEPH_LIB_USE_DNS_RESOLVER that controls if this feature is on or off. Signed-off-by: Noah Watkins Signed-off-by: Sage Weil --- net/ceph/Kconfig | 14 +++++++ net/ceph/messenger.c | 114 +++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 116 insertions(+), 12 deletions(-) (limited to 'net/ceph') diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index be683f2d401..cc04dd667a1 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -27,3 +27,17 @@ config CEPH_LIB_PRETTYDEBUG If unsure, say N. +config CEPH_LIB_USE_DNS_RESOLVER + bool "Use in-kernel support for DNS lookup" + depends on CEPH_LIB + select DNS_RESOLVER + default n + help + If you say Y here, hostnames (e.g. monitor addresses) will + be resolved using the CONFIG_DNS_RESOLVER facility. + + For information on how to use CONFIG_DNS_RESOLVER consult + Documentation/networking/dns_resolver.txt + + If unsure, say N. + diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f56aca30261..f466930e26f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -1077,6 +1078,101 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) } } +/* + * Unlike other *_pton function semantics, zero indicates success. + */ +static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, + char delim, const char **ipend) +{ + struct sockaddr_in *in4 = (void *)ss; + struct sockaddr_in6 *in6 = (void *)ss; + + memset(ss, 0, sizeof(*ss)); + + if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) { + ss->ss_family = AF_INET; + return 0; + } + + if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) { + ss->ss_family = AF_INET6; + return 0; + } + + return -EINVAL; +} + +/* + * Extract hostname string and resolve using kernel DNS facility. + */ +#ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER +static int ceph_dns_resolve_name(const char *name, size_t namelen, + struct sockaddr_storage *ss, char delim, const char **ipend) +{ + const char *end, *delim_p; + char *colon_p, *ip_addr = NULL; + int ip_len, ret; + + /* + * The end of the hostname occurs immediately preceding the delimiter or + * the port marker (':') where the delimiter takes precedence. + */ + delim_p = memchr(name, delim, namelen); + colon_p = memchr(name, ':', namelen); + + if (delim_p && colon_p) + end = delim_p < colon_p ? delim_p : colon_p; + else if (!delim_p && colon_p) + end = colon_p; + else { + end = delim_p; + if (!end) /* case: hostname:/ */ + end = name + namelen; + } + + if (end <= name) + return -EINVAL; + + /* do dns_resolve upcall */ + ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL); + if (ip_len > 0) + ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL); + else + ret = -ESRCH; + + kfree(ip_addr); + + *ipend = end; + + pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name, + ret, ret ? "failed" : ceph_pr_addr(ss)); + + return ret; +} +#else +static inline int ceph_dns_resolve_name(const char *name, size_t namelen, + struct sockaddr_storage *ss, char delim, const char **ipend) +{ + return -EINVAL; +} +#endif + +/* + * Parse a server name (IP or hostname). If a valid IP address is not found + * then try to extract a hostname to resolve using userspace DNS upcall. + */ +static int ceph_parse_server_name(const char *name, size_t namelen, + struct sockaddr_storage *ss, char delim, const char **ipend) +{ + int ret; + + ret = ceph_pton(name, namelen, ss, delim, ipend); + if (ret) + ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend); + + return ret; +} + /* * Parse an ip[:port] list into an addr array. Use the default * monitor port if a port isn't specified. @@ -1085,15 +1181,13 @@ int ceph_parse_ips(const char *c, const char *end, struct ceph_entity_addr *addr, int max_count, int *count) { - int i; + int i, ret = -EINVAL; const char *p = c; dout("parse_ips on '%.*s'\n", (int)(end-c), c); for (i = 0; i < max_count; i++) { const char *ipend; struct sockaddr_storage *ss = &addr[i].in_addr; - struct sockaddr_in *in4 = (void *)ss; - struct sockaddr_in6 *in6 = (void *)ss; int port; char delim = ','; @@ -1102,15 +1196,11 @@ int ceph_parse_ips(const char *c, const char *end, p++; } - memset(ss, 0, sizeof(*ss)); - if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr, - delim, &ipend)) - ss->ss_family = AF_INET; - else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, - delim, &ipend)) - ss->ss_family = AF_INET6; - else + ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend); + if (ret) goto bad; + ret = -EINVAL; + p = ipend; if (delim == ']') { @@ -1155,7 +1245,7 @@ int ceph_parse_ips(const char *c, const char *end, bad: pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); - return -EINVAL; + return ret; } EXPORT_SYMBOL(ceph_parse_ips); -- cgit v1.2.3-70-g09d2 From 38d6453ca39ecce442628a93e1bb46d70d547512 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 14 Oct 2011 13:33:55 -0700 Subject: libceph: force resend of osd requests if we skip an osdmap If we skip over one or more map epochs, we need to resend all osd requests because it is possible they remapped to other servers and then back. Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'net/ceph') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 01ceb59a8b4..733e46008b8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -943,7 +943,7 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger); * Caller should hold map_sem for read and request_mutex. */ static int __map_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req) + struct ceph_osd_request *req, int force_resend) { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; @@ -967,7 +967,8 @@ static int __map_request(struct ceph_osd_client *osdc, num = err; } - if ((req->r_osd && req->r_osd->o_osd == o && + if ((!force_resend && + req->r_osd && req->r_osd->o_osd == o && req->r_sent >= req->r_osd->o_incarnation && req->r_num_pg_osds == num && memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || @@ -1289,18 +1290,18 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) * * Caller should hold map_sem for read and request_mutex. */ -static void kick_requests(struct ceph_osd_client *osdc) +static void kick_requests(struct ceph_osd_client *osdc, int force_resend) { struct ceph_osd_request *req, *nreq; struct rb_node *p; int needmap = 0; int err; - dout("kick_requests\n"); + dout("kick_requests %s\n", force_resend ? " (force resend)" : ""); mutex_lock(&osdc->request_mutex); for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { req = rb_entry(p, struct ceph_osd_request, r_node); - err = __map_request(osdc, req); + err = __map_request(osdc, req, force_resend); if (err < 0) continue; /* error */ if (req->r_osd == NULL) { @@ -1318,7 +1319,7 @@ static void kick_requests(struct ceph_osd_client *osdc) r_linger_item) { dout("linger req=%p req->r_osd=%p\n", req, req->r_osd); - err = __map_request(osdc, req); + err = __map_request(osdc, req, force_resend); if (err == 0) continue; /* no change and no osd was specified */ if (err < 0) @@ -1395,7 +1396,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = newmap; } - kick_requests(osdc); + kick_requests(osdc, 0); reset_changed_osds(osdc); } else { dout("ignoring incremental map %u len %d\n", @@ -1423,6 +1424,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) "older than our %u\n", epoch, maplen, osdc->osdmap->epoch); } else { + int skipped_map = 0; + dout("taking full map %u len %d\n", epoch, maplen); newmap = osdmap_decode(&p, p+maplen); if (IS_ERR(newmap)) { @@ -1432,9 +1435,12 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) BUG_ON(!newmap); oldmap = osdc->osdmap; osdc->osdmap = newmap; - if (oldmap) + if (oldmap) { + if (oldmap->epoch + 1 < newmap->epoch) + skipped_map = 1; ceph_osdmap_destroy(oldmap); - kick_requests(osdc); + } + kick_requests(osdc, skipped_map); } p += maplen; nr_maps--; @@ -1707,7 +1713,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, * the request still han't been touched yet. */ if (req->r_sent == 0) { - rc = __map_request(osdc, req); + rc = __map_request(osdc, req, 0); if (rc < 0) { if (nofail) { dout("osdc_start_request failed map, " -- cgit v1.2.3-70-g09d2