diff options
Diffstat (limited to 'fs/ocfs2/dlm')
-rw-r--r-- | fs/ocfs2/dlm/dlmast.c | 22 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 21 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmconvert.c | 11 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmdebug.c | 18 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmfs.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmlock.c | 14 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 227 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 50 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmunlock.c | 11 |
9 files changed, 300 insertions, 77 deletions
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 8d17d28ef91..355593dd8ef 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -307,8 +307,11 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) if (past->type != DLM_AST && past->type != DLM_BAST) { - mlog(ML_ERROR, "Unknown ast type! %d, cookie=%"MLFu64", " - "name=%.*s\n", past->type, cookie, locklen, name); + mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" + "name=%.*s\n", past->type, + dlm_get_lock_cookie_node(cookie), + dlm_get_lock_cookie_seq(cookie), + locklen, name); ret = DLM_IVLOCKID; goto leave; } @@ -316,9 +319,11 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) res = dlm_lookup_lockres(dlm, name, locklen); if (!res) { mlog(ML_ERROR, "got %sast for unknown lockres! " - "cookie=%"MLFu64", name=%.*s, namelen=%u\n", + "cookie=%u:%llu, name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", - cookie, locklen, name, locklen); + dlm_get_lock_cookie_node(cookie), + dlm_get_lock_cookie_seq(cookie), + locklen, name, locklen); ret = DLM_IVLOCKID; goto leave; } @@ -360,9 +365,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) goto do_ast; } - mlog(ML_ERROR, "got %sast for unknown lock! cookie=%"MLFu64", " - "name=%.*s, namelen=%u\n", - past->type == DLM_AST ? "" : "b", cookie, locklen, name, locklen); + mlog(ML_ERROR, "got %sast for unknown lock! cookie=%u:%llu, " + "name=%.*s, namelen=%u\n", + past->type == DLM_AST ? "" : "b", + dlm_get_lock_cookie_node(cookie), + dlm_get_lock_cookie_seq(cookie), + locklen, name, locklen); ret = DLM_NORMAL; unlock_out: diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 9c772583744..88cc43df18f 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -630,6 +630,21 @@ __dlm_lockres_state_to_status(struct dlm_lock_resource *res) return status; } +static inline u8 dlm_get_lock_cookie_node(u64 cookie) +{ + u8 ret; + cookie >>= 56; + ret = (u8)(cookie & 0xffULL); + return ret; +} + +static inline unsigned long long dlm_get_lock_cookie_seq(u64 cookie) +{ + unsigned long long ret; + ret = ((unsigned long long)cookie) & 0x00ffffffffffffffULL; + return ret; +} + struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, struct dlm_lockstatus *lksb); void dlm_lock_get(struct dlm_lock *lock); @@ -658,6 +673,7 @@ void dlm_complete_thread(struct dlm_ctxt *dlm); int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); void dlm_wait_for_recovery(struct dlm_ctxt *dlm); +void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); @@ -762,6 +778,11 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data); int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data); int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data); int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data); +int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, + u8 nodenum, u8 *real_master); +int dlm_lockres_master_requery(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, u8 *real_master); + int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index f66e2d818cc..8285228d9e3 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -284,8 +284,10 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, if (lock->ml.convert_type != LKM_IVMODE) { __dlm_print_one_lock_resource(res); mlog(ML_ERROR, "converting a remote lock that is already " - "converting! (cookie=%"MLFu64", conv=%d)\n", - lock->ml.cookie, lock->ml.convert_type); + "converting! (cookie=%u:%llu, conv=%d)\n", + dlm_get_lock_cookie_node(lock->ml.cookie), + dlm_get_lock_cookie_seq(lock->ml.cookie), + lock->ml.convert_type); status = DLM_DENIED; goto bail; } @@ -513,8 +515,9 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) leave: if (!lock) mlog(ML_ERROR, "did not find lock to convert on grant queue! " - "cookie=%"MLFu64"\n", - cnv->cookie); + "cookie=%u:%llu\n", + dlm_get_lock_cookie_node(cnv->cookie), + dlm_get_lock_cookie_seq(cnv->cookie)); else dlm_lock_put(lock); diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 54f61b76ab5..c7eae5d3324 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -72,8 +72,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) lock = list_entry(iter2, struct dlm_lock, list); spin_lock(&lock->spinlock); mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " - "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", - lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, + "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", + lock->ml.type, lock->ml.convert_type, lock->ml.node, + dlm_get_lock_cookie_node(lock->ml.cookie), + dlm_get_lock_cookie_seq(lock->ml.cookie), list_empty(&lock->ast_list) ? 'y' : 'n', lock->ast_pending ? 'y' : 'n', list_empty(&lock->bast_list) ? 'y' : 'n', @@ -85,8 +87,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) lock = list_entry(iter2, struct dlm_lock, list); spin_lock(&lock->spinlock); mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " - "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", - lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, + "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", + lock->ml.type, lock->ml.convert_type, lock->ml.node, + dlm_get_lock_cookie_node(lock->ml.cookie), + dlm_get_lock_cookie_seq(lock->ml.cookie), list_empty(&lock->ast_list) ? 'y' : 'n', lock->ast_pending ? 'y' : 'n', list_empty(&lock->bast_list) ? 'y' : 'n', @@ -98,8 +102,10 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) lock = list_entry(iter2, struct dlm_lock, list); spin_lock(&lock->spinlock); mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " - "cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", - lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie, + "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", + lock->ml.type, lock->ml.convert_type, lock->ml.node, + dlm_get_lock_cookie_node(lock->ml.cookie), + dlm_get_lock_cookie_seq(lock->ml.cookie), list_empty(&lock->ast_list) ? 'y' : 'n', lock->ast_pending ? 'y' : 'n', list_empty(&lock->bast_list) ? 'y' : 'n', diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index dd2d24dc25e..7e88e24b347 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -596,7 +596,8 @@ static int __init init_dlmfs_fs(void) dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", sizeof(struct dlmfs_inode_private), - 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, + 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), dlmfs_init_once, NULL); if (!dlmfs_inode_cache) return -ENOMEM; diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 671d4ff222c..6fea28318d6 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -141,13 +141,23 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, res->lockname.len)) { kick_thread = 1; call_ast = 1; + } else { + mlog(0, "%s: returning DLM_NORMAL to " + "node %u for reco lock\n", dlm->name, + lock->ml.node); } } else { /* for NOQUEUE request, unless we get the * lock right away, return DLM_NOTQUEUED */ - if (flags & LKM_NOQUEUE) + if (flags & LKM_NOQUEUE) { status = DLM_NOTQUEUED; - else { + if (dlm_is_recovery_lock(res->lockname.name, + res->lockname.len)) { + mlog(0, "%s: returning NOTQUEUED to " + "node %u for reco lock\n", dlm->name, + lock->ml.node); + } + } else { dlm_lock_get(lock); list_add_tail(&lock->list, &res->blocked); kick_thread = 1; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 847dd3cc4cf..940be4c13b1 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -239,6 +239,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, u8 target); +static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res); int dlm_is_host_down(int errno) @@ -677,6 +679,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, struct dlm_node_iter iter; unsigned int namelen; int tries = 0; + int bit, wait_on_recovery = 0; BUG_ON(!lockid); @@ -762,6 +765,18 @@ lookup: dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0); set_bit(dlm->node_num, mle->maybe_map); list_add(&mle->list, &dlm->master_list); + + /* still holding the dlm spinlock, check the recovery map + * to see if there are any nodes that still need to be + * considered. these will not appear in the mle nodemap + * but they might own this lockres. wait on them. */ + bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); + if (bit < O2NM_MAX_NODES) { + mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to" + "recover before lock mastery can begin\n", + dlm->name, namelen, (char *)lockid, bit); + wait_on_recovery = 1; + } } /* at this point there is either a DLM_MLE_BLOCK or a @@ -779,6 +794,39 @@ lookup: spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock); + while (wait_on_recovery) { + /* any cluster changes that occurred after dropping the + * dlm spinlock would be detectable be a change on the mle, + * so we only need to clear out the recovery map once. */ + if (dlm_is_recovery_lock(lockid, namelen)) { + mlog(ML_NOTICE, "%s: recovery map is not empty, but " + "must master $RECOVERY lock now\n", dlm->name); + if (!dlm_pre_master_reco_lockres(dlm, res)) + wait_on_recovery = 0; + else { + mlog(0, "%s: waiting 500ms for heartbeat state " + "change\n", dlm->name); + msleep(500); + } + continue; + } + + dlm_kick_recovery_thread(dlm); + msleep(100); + dlm_wait_for_recovery(dlm); + + spin_lock(&dlm->spinlock); + bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); + if (bit < O2NM_MAX_NODES) { + mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to" + "recover before lock mastery can begin\n", + dlm->name, namelen, (char *)lockid, bit); + wait_on_recovery = 1; + } else + wait_on_recovery = 0; + spin_unlock(&dlm->spinlock); + } + /* must wait for lock to be mastered elsewhere */ if (blocked) goto wait; @@ -792,7 +840,15 @@ redo_request: mlog_errno(ret); if (mle->master != O2NM_MAX_NODES) { /* found a master ! */ - break; + if (mle->master <= nodenum) + break; + /* if our master request has not reached the master + * yet, keep going until it does. this is how the + * master will know that asserts are needed back to + * the lower nodes. */ + mlog(0, "%s:%.*s: requests only up to %u but master " + "is %u, keep going\n", dlm->name, namelen, + lockid, nodenum, mle->master); } } @@ -860,7 +916,19 @@ recheck: /* check if another node has already become the owner */ spin_lock(&res->spinlock); if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) { + mlog(0, "%s:%.*s: owner is suddenly %u\n", dlm->name, + res->lockname.len, res->lockname.name, res->owner); spin_unlock(&res->spinlock); + /* this will cause the master to re-assert across + * the whole cluster, freeing up mles */ + ret = dlm_do_master_request(mle, res->owner); + if (ret < 0) { + /* give recovery a chance to run */ + mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret); + msleep(500); + goto recheck; + } + ret = 0; goto leave; } spin_unlock(&res->spinlock); @@ -1244,13 +1312,14 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data) { u8 response = DLM_MASTER_RESP_MAYBE; struct dlm_ctxt *dlm = data; - struct dlm_lock_resource *res; + struct dlm_lock_resource *res = NULL; struct dlm_master_request *request = (struct dlm_master_request *) msg->buf; struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL; char *name; unsigned int namelen; int found, ret; int set_maybe; + int dispatch_assert = 0; if (!dlm_grab(dlm)) return DLM_MASTER_RESP_NO; @@ -1287,7 +1356,6 @@ way_up_top: } if (res->owner == dlm->node_num) { - u32 flags = DLM_ASSERT_MASTER_MLE_CLEANUP; spin_unlock(&res->spinlock); // mlog(0, "this node is the master\n"); response = DLM_MASTER_RESP_YES; @@ -1300,16 +1368,7 @@ way_up_top: * caused all nodes up to this one to * create mles. this node now needs to * go back and clean those up. */ - mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", - dlm->node_num, res->lockname.len, res->lockname.name); - ret = dlm_dispatch_assert_master(dlm, res, 1, - request->node_idx, - flags); - if (ret < 0) { - mlog(ML_ERROR, "failed to dispatch assert " - "master work\n"); - response = DLM_MASTER_RESP_ERROR; - } + dispatch_assert = 1; goto send_response; } else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) { spin_unlock(&res->spinlock); @@ -1357,9 +1416,13 @@ way_up_top: } } else if (tmpmle->master != DLM_LOCK_RES_OWNER_UNKNOWN) { set_maybe = 0; - if (tmpmle->master == dlm->node_num) + if (tmpmle->master == dlm->node_num) { response = DLM_MASTER_RESP_YES; - else + /* this node will be the owner. + * go back and clean the mles on any + * other nodes */ + dispatch_assert = 1; + } else response = DLM_MASTER_RESP_NO; } else { // mlog(0, "this node is attempting to " @@ -1398,8 +1461,8 @@ way_up_top: mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, GFP_KERNEL); if (!mle) { - // bad bad bad... this sucks. response = DLM_MASTER_RESP_ERROR; + mlog_errno(-ENOMEM); goto send_response; } spin_lock(&dlm->spinlock); @@ -1418,25 +1481,19 @@ way_up_top: // mlog(0, "mle was found\n"); set_maybe = 1; spin_lock(&tmpmle->spinlock); + if (tmpmle->master == dlm->node_num) { + mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n"); + BUG(); + } if (tmpmle->type == DLM_MLE_BLOCK) response = DLM_MASTER_RESP_NO; else if (tmpmle->type == DLM_MLE_MIGRATION) { mlog(0, "migration mle was found (%u->%u)\n", tmpmle->master, tmpmle->new_master); - if (tmpmle->master == dlm->node_num) { - mlog(ML_ERROR, "no lockres, but migration mle " - "says that this node is master!\n"); - BUG(); - } /* real master can respond on its own */ response = DLM_MASTER_RESP_NO; - } else { - if (tmpmle->master == dlm->node_num) { - response = DLM_MASTER_RESP_YES; - set_maybe = 0; - } else - response = DLM_MASTER_RESP_MAYBE; - } + } else + response = DLM_MASTER_RESP_MAYBE; if (set_maybe) set_bit(request->node_idx, tmpmle->maybe_map); spin_unlock(&tmpmle->spinlock); @@ -1449,6 +1506,24 @@ way_up_top: dlm_put_mle(tmpmle); } send_response: + + if (dispatch_assert) { + if (response != DLM_MASTER_RESP_YES) + mlog(ML_ERROR, "invalid response %d\n", response); + if (!res) { + mlog(ML_ERROR, "bad lockres while trying to assert!\n"); + BUG(); + } + mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", + dlm->node_num, res->lockname.len, res->lockname.name); + ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, + DLM_ASSERT_MASTER_MLE_CLEANUP); + if (ret < 0) { + mlog(ML_ERROR, "failed to dispatch assert master work\n"); + response = DLM_MASTER_RESP_ERROR; + } + } + dlm_put(dlm); return response; } @@ -1471,8 +1546,11 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname, int to, tmpret; struct dlm_node_iter iter; int ret = 0; + int reassert; BUG_ON(namelen > O2NM_MAX_NAME_LEN); +again: + reassert = 0; /* note that if this nodemap is empty, it returns 0 */ dlm_node_iter_init(nodemap, &iter); @@ -1504,9 +1582,17 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname, "got %d.\n", namelen, lockname, to, r); dlm_dump_lock_resources(dlm); BUG(); + } else if (r == EAGAIN) { + mlog(0, "%.*s: node %u create mles on other " + "nodes and requests a re-assert\n", + namelen, lockname, to); + reassert = 1; } } + if (reassert) + goto again; + return ret; } @@ -1528,6 +1614,8 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data) char *name; unsigned int namelen; u32 flags; + int master_request = 0; + int ret = 0; if (!dlm_grab(dlm)) return 0; @@ -1642,11 +1730,22 @@ ok: // mlog(0, "woo! got an assert_master from node %u!\n", // assert->node_idx); if (mle) { - int extra_ref; + int extra_ref = 0; + int nn = -1; spin_lock(&mle->spinlock); - extra_ref = !!(mle->type == DLM_MLE_BLOCK - || mle->type == DLM_MLE_MIGRATION); + if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) + extra_ref = 1; + else { + /* MASTER mle: if any bits set in the response map + * then the calling node needs to re-assert to clear + * up nodes that this node contacted */ + while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, + nn+1)) < O2NM_MAX_NODES) { + if (nn != dlm->node_num && nn != assert->node_idx) + master_request = 1; + } + } mle->master = assert->node_idx; atomic_set(&mle->woken, 1); wake_up(&mle->wq); @@ -1677,10 +1776,15 @@ ok: } done: + ret = 0; if (res) dlm_lockres_put(res); dlm_put(dlm); - return 0; + if (master_request) { + mlog(0, "need to tell master to reassert\n"); + ret = EAGAIN; // positive. negative would shoot down the node. + } + return ret; kill: /* kill the caller! */ @@ -1713,6 +1817,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, item->u.am.request_from = request_from; item->u.am.flags = flags; + if (ignore_higher) + mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, + res->lockname.name); + spin_lock(&dlm->work_lock); list_add_tail(&item->list, &dlm->work_list); spin_unlock(&dlm->work_lock); @@ -1775,6 +1883,61 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data) mlog(0, "finished with dlm_assert_master_worker\n"); } +/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread. + * We cannot wait for node recovery to complete to begin mastering this + * lockres because this lockres is used to kick off recovery! ;-) + * So, do a pre-check on all living nodes to see if any of those nodes + * think that $RECOVERY is currently mastered by a dead node. If so, + * we wait a short time to allow that node to get notified by its own + * heartbeat stack, then check again. All $RECOVERY lock resources + * mastered by dead nodes are purged when the hearbeat callback is + * fired, so we can know for sure that it is safe to continue once + * the node returns a live node or no node. */ +static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) +{ + struct dlm_node_iter iter; + int nodenum; + int ret = 0; + u8 master = DLM_LOCK_RES_OWNER_UNKNOWN; + + spin_lock(&dlm->spinlock); + dlm_node_iter_init(dlm->domain_map, &iter); + spin_unlock(&dlm->spinlock); + + while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { + /* do not send to self */ + if (nodenum == dlm->node_num) + continue; + ret = dlm_do_master_requery(dlm, res, nodenum, &master); + if (ret < 0) { + mlog_errno(ret); + if (!dlm_is_host_down(ret)) + BUG(); + /* host is down, so answer for that node would be + * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */ + } + + if (master != DLM_LOCK_RES_OWNER_UNKNOWN) { + /* check to see if this master is in the recovery map */ + spin_lock(&dlm->spinlock); + if (test_bit(master, dlm->recovery_map)) { + mlog(ML_NOTICE, "%s: node %u has not seen " + "node %u go down yet, and thinks the " + "dead node is mastering the recovery " + "lock. must wait.\n", dlm->name, + nodenum, master); + ret = -EAGAIN; + } + spin_unlock(&dlm->spinlock); + mlog(0, "%s: reco lock master is %u\n", dlm->name, + master); + break; + } + } + return ret; +} + /* * DLM_MIGRATE_LOCKRES diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 1e232000f3f..805cbabac05 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -58,7 +58,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); static int dlm_recovery_thread(void *data); void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); -static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); +void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); static int dlm_do_recovery(struct dlm_ctxt *dlm); static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); @@ -78,15 +78,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, u8 send_to, struct dlm_lock_resource *res, int total_locks); -static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 *real_master); static int dlm_process_recovery_data(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, struct dlm_migratable_lockres *mres); -static int dlm_do_master_requery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 nodenum, u8 *real_master); static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm); static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to); @@ -165,7 +159,7 @@ void dlm_dispatch_work(void *data) * RECOVERY THREAD */ -static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm) +void dlm_kick_recovery_thread(struct dlm_ctxt *dlm) { /* wake the recovery thread * this will wake the reco thread in one of three places @@ -750,10 +744,12 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data) dlm->name, dlm->reco.dead_node, dlm->reco.new_master, dead_node, reco_master); mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u " - "entry[0]={c=%"MLFu64",l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n", + "entry[0]={c=%u:%llu,l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n", dlm->name, mres->lockname_len, mres->lockname, mres->master, mres->num_locks, mres->total_locks, mres->flags, - mres->ml[0].cookie, mres->ml[0].list, mres->ml[0].flags, + dlm_get_lock_cookie_node(mres->ml[0].cookie), + dlm_get_lock_cookie_seq(mres->ml[0].cookie), + mres->ml[0].list, mres->ml[0].flags, mres->ml[0].type, mres->ml[0].convert_type, mres->ml[0].highest_blocked, mres->ml[0].node); BUG(); @@ -1316,9 +1312,8 @@ leave: -static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 *real_master) +int dlm_lockres_master_requery(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, u8 *real_master) { struct dlm_node_iter iter; int nodenum; @@ -1360,8 +1355,10 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, ret = dlm_do_master_requery(dlm, res, nodenum, real_master); if (ret < 0) { mlog_errno(ret); - BUG(); - /* TODO: need to figure a way to restart this */ + if (!dlm_is_host_down(ret)) + BUG(); + /* host is down, so answer for that node would be + * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */ } if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) { mlog(0, "lock master is %u\n", *real_master); @@ -1372,9 +1369,8 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, } -static int dlm_do_master_requery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 nodenum, u8 *real_master) +int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, + u8 nodenum, u8 *real_master) { int ret = -EINVAL; struct dlm_master_requery req; @@ -1519,9 +1515,11 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, /* lock is always created locally first, and * destroyed locally last. it must be on the list */ if (!lock) { + u64 c = ml->cookie; mlog(ML_ERROR, "could not find local lock " - "with cookie %"MLFu64"!\n", - ml->cookie); + "with cookie %u:%llu!\n", + dlm_get_lock_cookie_node(c), + dlm_get_lock_cookie_seq(c)); BUG(); } BUG_ON(lock->ml.node != ml->node); @@ -1739,6 +1737,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, } else continue; + if (!list_empty(&res->recovering)) { + mlog(0, "%s:%.*s: lockres was " + "marked RECOVERING, owner=%u\n", + dlm->name, res->lockname.len, + res->lockname.name, res->owner); + list_del_init(&res->recovering); + } spin_lock(&res->spinlock); dlm_change_lockres_owner(dlm, res, new_master); res->state &= ~DLM_LOCK_RES_RECOVERING; @@ -2258,7 +2263,10 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data) mlog(0, "%u not in domain/live_nodes map " "so setting it in reco map manually\n", br->dead_node); - set_bit(br->dead_node, dlm->recovery_map); + /* force the recovery cleanup in __dlm_hb_node_down + * both of these will be cleared in a moment */ + set_bit(br->dead_node, dlm->domain_map); + set_bit(br->dead_node, dlm->live_nodes_map); __dlm_hb_node_down(dlm, br->dead_node); } spin_unlock(&dlm->spinlock); diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index c95f08d2e92..7b1a2754267 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -244,8 +244,10 @@ leave: if (actions & DLM_UNLOCK_FREE_LOCK) { /* this should always be coupled with list removal */ BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK)); - mlog(0, "lock %"MLFu64" should be gone now! refs=%d\n", - lock->ml.cookie, atomic_read(&lock->lock_refs.refcount)-1); + mlog(0, "lock %u:%llu should be gone now! refs=%d\n", + dlm_get_lock_cookie_node(lock->ml.cookie), + dlm_get_lock_cookie_seq(lock->ml.cookie), + atomic_read(&lock->lock_refs.refcount)-1); dlm_lock_put(lock); } if (actions & DLM_UNLOCK_CALL_AST) @@ -493,8 +495,9 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data) not_found: if (!found) mlog(ML_ERROR, "failed to find lock to unlock! " - "cookie=%"MLFu64"\n", - unlock->cookie); + "cookie=%u:%llu\n", + dlm_get_lock_cookie_node(unlock->cookie), + dlm_get_lock_cookie_seq(unlock->cookie)); else { /* send the lksb->status back to the other node */ status = lksb->status; |