diff options
Diffstat (limited to 'fs/dlm')
-rw-r--r-- | fs/dlm/dlm_internal.h | 1 | ||||
-rw-r--r-- | fs/dlm/lock.c | 211 | ||||
-rw-r--r-- | fs/dlm/lock.h | 3 | ||||
-rw-r--r-- | fs/dlm/lockspace.c | 3 | ||||
-rw-r--r-- | fs/dlm/lowcomms.c | 21 | ||||
-rw-r--r-- | fs/dlm/member.c | 45 | ||||
-rw-r--r-- | fs/dlm/memory.c | 14 | ||||
-rw-r--r-- | fs/dlm/midcomms.c | 17 | ||||
-rw-r--r-- | fs/dlm/rcom.c | 39 | ||||
-rw-r--r-- | fs/dlm/rcom.h | 5 | ||||
-rw-r--r-- | fs/dlm/recoverd.c | 11 | ||||
-rw-r--r-- | fs/dlm/requestqueue.c | 58 | ||||
-rw-r--r-- | fs/dlm/requestqueue.h | 4 |
13 files changed, 240 insertions, 192 deletions
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 74901e981e1..d2fc2384c3b 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -491,6 +491,7 @@ struct dlm_ls { uint64_t ls_recover_seq; struct dlm_recover *ls_recover_args; struct rw_semaphore ls_in_recovery; /* block local requests */ + struct rw_semaphore ls_recv_active; /* block dlm_recv */ struct list_head ls_requestqueue;/* queue remote requests */ struct mutex ls_requestqueue_mutex; char *ls_recover_buf; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index b455919c199..3915b8e1414 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1670,9 +1670,10 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, with a deadlk here, we'd have to generate something like grant_lock with the deadlk error.) */ -/* returns the highest requested mode of all blocked conversions */ +/* Returns the highest requested mode of all blocked conversions; sets + cw if there's a blocked conversion to DLM_LOCK_CW. */ -static int grant_pending_convert(struct dlm_rsb *r, int high) +static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw) { struct dlm_lkb *lkb, *s; int hi, demoted, quit, grant_restart, demote_restart; @@ -1709,6 +1710,9 @@ static int grant_pending_convert(struct dlm_rsb *r, int high) } hi = max_t(int, lkb->lkb_rqmode, hi); + + if (cw && lkb->lkb_rqmode == DLM_LOCK_CW) + *cw = 1; } if (grant_restart) @@ -1721,29 +1725,52 @@ static int grant_pending_convert(struct dlm_rsb *r, int high) return max_t(int, high, hi); } -static int grant_pending_wait(struct dlm_rsb *r, int high) +static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw) { struct dlm_lkb *lkb, *s; list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { if (can_be_granted(r, lkb, 0, NULL)) grant_lock_pending(r, lkb); - else + else { high = max_t(int, lkb->lkb_rqmode, high); + if (lkb->lkb_rqmode == DLM_LOCK_CW) + *cw = 1; + } } return high; } +/* cw of 1 means there's a lock with a rqmode of DLM_LOCK_CW that's blocked + on either the convert or waiting queue. + high is the largest rqmode of all locks blocked on the convert or + waiting queue. */ + +static int lock_requires_bast(struct dlm_lkb *gr, int high, int cw) +{ + if (gr->lkb_grmode == DLM_LOCK_PR && cw) { + if (gr->lkb_highbast < DLM_LOCK_EX) + return 1; + return 0; + } + + if (gr->lkb_highbast < high && + !__dlm_compat_matrix[gr->lkb_grmode+1][high+1]) + return 1; + return 0; +} + static void grant_pending_locks(struct dlm_rsb *r) { struct dlm_lkb *lkb, *s; int high = DLM_LOCK_IV; + int cw = 0; DLM_ASSERT(is_master(r), dlm_dump_rsb(r);); - high = grant_pending_convert(r, high); - high = grant_pending_wait(r, high); + high = grant_pending_convert(r, high, &cw); + high = grant_pending_wait(r, high, &cw); if (high == DLM_LOCK_IV) return; @@ -1751,27 +1778,41 @@ static void grant_pending_locks(struct dlm_rsb *r) /* * If there are locks left on the wait/convert queue then send blocking * ASTs to granted locks based on the largest requested mode (high) - * found above. FIXME: highbast < high comparison not valid for PR/CW. + * found above. */ list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) { - if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) && - !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) { - queue_bast(r, lkb, high); + if (lkb->lkb_bastaddr && lock_requires_bast(lkb, high, cw)) { + if (cw && high == DLM_LOCK_PR) + queue_bast(r, lkb, DLM_LOCK_CW); + else + queue_bast(r, lkb, high); lkb->lkb_highbast = high; } } } +static int modes_require_bast(struct dlm_lkb *gr, struct dlm_lkb *rq) +{ + if ((gr->lkb_grmode == DLM_LOCK_PR && rq->lkb_rqmode == DLM_LOCK_CW) || + (gr->lkb_grmode == DLM_LOCK_CW && rq->lkb_rqmode == DLM_LOCK_PR)) { + if (gr->lkb_highbast < DLM_LOCK_EX) + return 1; + return 0; + } + + if (gr->lkb_highbast < rq->lkb_rqmode && !modes_compat(gr, rq)) + return 1; + return 0; +} + static void send_bast_queue(struct dlm_rsb *r, struct list_head *head, struct dlm_lkb *lkb) { struct dlm_lkb *gr; list_for_each_entry(gr, head, lkb_statequeue) { - if (gr->lkb_bastaddr && - gr->lkb_highbast < lkb->lkb_rqmode && - !modes_compat(gr, lkb)) { + if (gr->lkb_bastaddr && modes_require_bast(gr, lkb)) { queue_bast(r, gr, lkb->lkb_rqmode); gr->lkb_highbast = lkb->lkb_rqmode; } @@ -2235,7 +2276,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) before we try again to grant this one. */ if (is_demoted(lkb)) { - grant_pending_convert(r, DLM_LOCK_IV); + grant_pending_convert(r, DLM_LOCK_IV, NULL); if (_can_be_granted(r, lkb, 1)) { grant_lock(r, lkb); queue_cast(r, lkb, 0); @@ -3597,55 +3638,8 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) dlm_put_lkb(lkb); } -int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) +static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) { - struct dlm_message *ms = (struct dlm_message *) hd; - struct dlm_ls *ls; - int error = 0; - - if (!recovery) - dlm_message_in(ms); - - ls = dlm_find_lockspace_global(hd->h_lockspace); - if (!ls) { - log_print("drop message %d from %d for unknown lockspace %d", - ms->m_type, nodeid, hd->h_lockspace); - return -EINVAL; - } - - /* recovery may have just ended leaving a bunch of backed-up requests - in the requestqueue; wait while dlm_recoverd clears them */ - - if (!recovery) - dlm_wait_requestqueue(ls); - - /* recovery may have just started while there were a bunch of - in-flight requests -- save them in requestqueue to be processed - after recovery. we can't let dlm_recvd block on the recovery - lock. if dlm_recoverd is calling this function to clear the - requestqueue, it needs to be interrupted (-EINTR) if another - recovery operation is starting. */ - - while (1) { - if (dlm_locking_stopped(ls)) { - if (recovery) { - error = -EINTR; - goto out; - } - error = dlm_add_requestqueue(ls, nodeid, hd); - if (error == -EAGAIN) - continue; - else { - error = -EINTR; - goto out; - } - } - - if (dlm_lock_recovery_try(ls)) - break; - schedule(); - } - switch (ms->m_type) { /* messages sent to a master node */ @@ -3720,17 +3714,90 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) log_error(ls, "unknown message type %d", ms->m_type); } - dlm_unlock_recovery(ls); - out: - dlm_put_lockspace(ls); dlm_astd_wake(); - return error; } +/* If the lockspace is in recovery mode (locking stopped), then normal + messages are saved on the requestqueue for processing after recovery is + done. When not in recovery mode, we wait for dlm_recoverd to drain saved + messages off the requestqueue before we process new ones. This occurs right + after recovery completes when we transition from saving all messages on + requestqueue, to processing all the saved messages, to processing new + messages as they arrive. */ -/* - * Recovery related - */ +static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms, + int nodeid) +{ + if (dlm_locking_stopped(ls)) { + dlm_add_requestqueue(ls, nodeid, (struct dlm_header *) ms); + } else { + dlm_wait_requestqueue(ls); + _receive_message(ls, ms); + } +} + +/* This is called by dlm_recoverd to process messages that were saved on + the requestqueue. */ + +void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms) +{ + _receive_message(ls, ms); +} + +/* This is called by the midcomms layer when something is received for + the lockspace. It could be either a MSG (normal message sent as part of + standard locking activity) or an RCOM (recovery message sent as part of + lockspace recovery). */ + +void dlm_receive_buffer(struct dlm_header *hd, int nodeid) +{ + struct dlm_message *ms = (struct dlm_message *) hd; + struct dlm_rcom *rc = (struct dlm_rcom *) hd; + struct dlm_ls *ls; + int type = 0; + + switch (hd->h_cmd) { + case DLM_MSG: + dlm_message_in(ms); + type = ms->m_type; + break; + case DLM_RCOM: + dlm_rcom_in(rc); + type = rc->rc_type; + break; + default: + log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid); + return; + } + + if (hd->h_nodeid != nodeid) { + log_print("invalid h_nodeid %d from %d lockspace %x", + hd->h_nodeid, nodeid, hd->h_lockspace); + return; + } + + ls = dlm_find_lockspace_global(hd->h_lockspace); + if (!ls) { + log_print("invalid h_lockspace %x from %d cmd %d type %d", + hd->h_lockspace, nodeid, hd->h_cmd, type); + + if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) + dlm_send_ls_not_ready(nodeid, rc); + return; + } + + /* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to + be inactive (in this ls) before transitioning to recovery mode */ + + down_read(&ls->ls_recv_active); + if (hd->h_cmd == DLM_MSG) + dlm_receive_message(ls, ms, nodeid); + else + dlm_receive_rcom(ls, rc, nodeid); + up_read(&ls->ls_recv_active); + + dlm_put_lockspace(ls); +} static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) { @@ -4388,7 +4455,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (lvb_in && ua->lksb.sb_lvbptr) memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); - ua->castparam = ua_tmp->castparam; + if (ua_tmp->castparam) + ua->castparam = ua_tmp->castparam; ua->user_lksb = ua_tmp->user_lksb; error = set_unlock_args(flags, ua, &args); @@ -4433,7 +4501,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, goto out; ua = (struct dlm_user_args *)lkb->lkb_astparam; - ua->castparam = ua_tmp->castparam; + if (ua_tmp->castparam) + ua->castparam = ua_tmp->castparam; ua->user_lksb = ua_tmp->user_lksb; error = set_unlock_args(flags, ua, &args); diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 1720313c22d..ada04680a1e 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -16,7 +16,8 @@ void dlm_print_rsb(struct dlm_rsb *r); void dlm_dump_rsb(struct dlm_rsb *r); void dlm_print_lkb(struct dlm_lkb *lkb); -int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery); +void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); +void dlm_receive_buffer(struct dlm_header *hd, int nodeid); int dlm_modes_compat(int mode1, int mode2); int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, unsigned int flags, struct dlm_rsb **r_ret); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 1dc72105ab1..6353a838452 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -167,7 +167,6 @@ static struct kobj_type dlm_ktype = { }; static struct kset dlm_kset = { - .kobj = {.name = "dlm",}, .ktype = &dlm_ktype, }; @@ -228,6 +227,7 @@ int dlm_lockspace_init(void) INIT_LIST_HEAD(&lslist); spin_lock_init(&lslist_lock); + kobject_set_name(&dlm_kset.kobj, "dlm"); kobj_set_kset_s(&dlm_kset, kernel_subsys); error = kset_register(&dlm_kset); if (error) @@ -519,6 +519,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, ls->ls_recover_seq = 0; ls->ls_recover_args = NULL; init_rwsem(&ls->ls_in_recovery); + init_rwsem(&ls->ls_recv_active); INIT_LIST_HEAD(&ls->ls_requestqueue); mutex_init(&ls->ls_requestqueue_mutex); mutex_init(&ls->ls_clear_proc_locks); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 0553a6158dc..58bf3f5cdbe 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -313,6 +313,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, in6_addr->sin6_port = cpu_to_be16(port); *addr_len = sizeof(struct sockaddr_in6); } + memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len); } /* Close a remote connection and tidy up */ @@ -332,6 +333,7 @@ static void close_connection(struct connection *con, bool and_other) __free_page(con->rx_page); con->rx_page = NULL; } + con->retries = 0; mutex_unlock(&con->sock_mutex); } @@ -631,7 +633,7 @@ out_resched: out_close: mutex_unlock(&con->sock_mutex); - if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { + if (ret != -EAGAIN) { close_connection(con, false); /* Reconnect when there is something to send */ } @@ -719,6 +721,8 @@ static int tcp_accept_from_sock(struct connection *con) INIT_WORK(&othercon->swork, process_send_sockets); INIT_WORK(&othercon->rwork, process_recv_sockets); set_bit(CF_IS_OTHERCON, &othercon->flags); + } + if (!othercon->sock) { newcon->othercon = othercon; othercon->sock = newsock; newsock->sk->sk_user_data = othercon; @@ -1122,8 +1126,6 @@ static int tcp_listen_for_all(void) log_print("Using TCP for communications"); - set_bit(CF_IS_OTHERCON, &con->flags); - sock = tcp_create_listen_sock(con, dlm_local_addr[0]); if (sock) { add_sock(sock, con); @@ -1262,14 +1264,15 @@ static void send_to_sock(struct connection *con) if (len) { ret = sendpage(con->sock, e->page, offset, len, msg_flags); - if (ret == -EAGAIN || ret == 0) + if (ret == -EAGAIN || ret == 0) { + cond_resched(); goto out; + } if (ret <= 0) goto send_error; - } else { + } /* Don't starve people filling buffers */ cond_resched(); - } spin_lock(&con->writequeue_lock); e->offset += ret; @@ -1407,7 +1410,7 @@ void dlm_lowcomms_stop(void) for (i = 0; i <= max_nodeid; i++) { con = __nodeid2con(i, 0); if (con) { - con->flags |= 0xFF; + con->flags |= 0x0F; if (con->sock) con->sock->sk->sk_user_data = NULL; } @@ -1423,8 +1426,6 @@ void dlm_lowcomms_stop(void) con = __nodeid2con(i, 0); if (con) { close_connection(con, true); - if (con->othercon) - kmem_cache_free(con_cache, con->othercon); kmem_cache_free(con_cache, con); } } @@ -1449,7 +1450,7 @@ int dlm_lowcomms_start(void) error = -ENOMEM; con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection), __alignof__(struct connection), 0, - NULL, NULL); + NULL); if (!con_cache) goto out; diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 073599dced2..e9cdcab306e 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -18,10 +18,6 @@ #include "rcom.h" #include "config.h" -/* - * Following called by dlm_recoverd thread - */ - static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) { struct dlm_member *memb = NULL; @@ -56,8 +52,10 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid) return -ENOMEM; w = dlm_node_weight(ls->ls_name, nodeid); - if (w < 0) + if (w < 0) { + kfree(memb); return w; + } memb->nodeid = nodeid; memb->weight = w; @@ -248,18 +246,30 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) return error; } -/* - * Following called from lockspace.c - */ +/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before + dlm_ls_start() is called on any of them to start the new recovery. */ int dlm_ls_stop(struct dlm_ls *ls) { int new; /* - * A stop cancels any recovery that's in progress (see RECOVERY_STOP, - * dlm_recovery_stopped()) and prevents any new locks from being - * processed (see RUNNING, dlm_locking_stopped()). + * Prevent dlm_recv from being in the middle of something when we do + * the stop. This includes ensuring dlm_recv isn't processing a + * recovery message (rcom), while dlm_recoverd is aborting and + * resetting things from an in-progress recovery. i.e. we want + * dlm_recoverd to abort its recovery without worrying about dlm_recv + * processing an rcom at the same time. Stopping dlm_recv also makes + * it easy for dlm_receive_message() to check locking stopped and add a + * message to the requestqueue without races. + */ + + down_write(&ls->ls_recv_active); + + /* + * Abort any recovery that's in progress (see RECOVERY_STOP, + * dlm_recovery_stopped()) and tell any other threads running in the + * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). */ spin_lock(&ls->ls_recover_lock); @@ -269,8 +279,14 @@ int dlm_ls_stop(struct dlm_ls *ls) spin_unlock(&ls->ls_recover_lock); /* + * Let dlm_recv run again, now any normal messages will be saved on the + * requestqueue for later. + */ + + up_write(&ls->ls_recv_active); + + /* * This in_recovery lock does two things: - * * 1) Keeps this function from returning until all threads are out * of locking routines and locking is truely stopped. * 2) Keeps any new requests from being processed until it's unlocked @@ -282,9 +298,8 @@ int dlm_ls_stop(struct dlm_ls *ls) /* * The recoverd suspend/resume makes sure that dlm_recoverd (if - * running) has noticed the clearing of RUNNING above and quit - * processing the previous recovery. This will be true for all nodes - * before any nodes start the new recovery. + * running) has noticed RECOVERY_STOP above and quit processing the + * previous recovery. */ dlm_recoverd_suspend(ls); diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index f858fef6e41..ecf0e5cb203 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -23,7 +23,7 @@ int dlm_memory_init(void) int ret = 0; lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb), - __alignof__(struct dlm_lkb), 0, NULL, NULL); + __alignof__(struct dlm_lkb), 0, NULL); if (!lkb_cache) ret = -ENOMEM; return ret; @@ -39,9 +39,7 @@ char *allocate_lvb(struct dlm_ls *ls) { char *p; - p = kmalloc(ls->ls_lvblen, GFP_KERNEL); - if (p) - memset(p, 0, ls->ls_lvblen); + p = kzalloc(ls->ls_lvblen, GFP_KERNEL); return p; } @@ -59,9 +57,7 @@ struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen) DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); - r = kmalloc(sizeof(*r) + namelen, GFP_KERNEL); - if (r) - memset(r, 0, sizeof(*r) + namelen); + r = kzalloc(sizeof(*r) + namelen, GFP_KERNEL); return r; } @@ -101,9 +97,7 @@ struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen) DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN, printk("namelen = %d\n", namelen);); - de = kmalloc(sizeof(*de) + namelen, GFP_KERNEL); - if (de) - memset(de, 0, sizeof(*de) + namelen); + de = kzalloc(sizeof(*de) + namelen, GFP_KERNEL); return de; } diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index a5126e0c68a..f8c69dda16a 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -27,7 +27,6 @@ #include "dlm_internal.h" #include "lowcomms.h" #include "config.h" -#include "rcom.h" #include "lock.h" #include "midcomms.h" @@ -117,19 +116,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, offset &= (limit - 1); len -= msglen; - switch (msg->h_cmd) { - case DLM_MSG: - dlm_receive_message(msg, nodeid, 0); - break; - - case DLM_RCOM: - dlm_receive_rcom(msg, nodeid); - break; - - default: - log_print("unknown msg type %x from %u: %u %u %u %u", - msg->h_cmd, nodeid, msglen, len, offset, ret); - } + dlm_receive_buffer(msg, nodeid); } if (msg != (struct dlm_header *) __tmp) diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index e3a1527cbdb..ae2fd97fa4a 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -386,8 +386,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) dlm_recover_process_copy(ls, rc_in); } -static int send_ls_not_ready(struct dlm_ls *ls, int nodeid, - struct dlm_rcom *rc_in) +/* If the lockspace doesn't exist then still send a status message + back; it's possible that it just doesn't have its global_id yet. */ + +int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) { struct dlm_rcom *rc; struct rcom_config *rf; @@ -395,7 +397,7 @@ static int send_ls_not_ready(struct dlm_ls *ls, int nodeid, char *mb; int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); - mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb); + mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_NOFS, &mb); if (!mh) return -ENOBUFS; memset(mb, 0, mb_len); @@ -447,28 +449,11 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc) return rv; } -/* Called by dlm_recvd; corresponds to dlm_receive_message() but special +/* Called by dlm_recv; corresponds to dlm_receive_message() but special recovery-only comms are sent through here. */ -void dlm_receive_rcom(struct dlm_header *hd, int nodeid) +void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { - struct dlm_rcom *rc = (struct dlm_rcom *) hd; - struct dlm_ls *ls; - - dlm_rcom_in(rc); - - /* If the lockspace doesn't exist then still send a status message - back; it's possible that it just doesn't have its global_id yet. */ - - ls = dlm_find_lockspace_global(hd->h_lockspace); - if (!ls) { - log_print("lockspace %x from %d type %x not found", - hd->h_lockspace, nodeid, rc->rc_type); - if (rc->rc_type == DLM_RCOM_STATUS) - send_ls_not_ready(ls, nodeid, rc); - return; - } - if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { log_debug(ls, "ignoring recovery message %x from %d", rc->rc_type, nodeid); @@ -478,12 +463,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) if (is_old_reply(ls, rc)) goto out; - if (nodeid != rc->rc_header.h_nodeid) { - log_error(ls, "bad rcom nodeid %d from %d", - rc->rc_header.h_nodeid, nodeid); - goto out; - } - switch (rc->rc_type) { case DLM_RCOM_STATUS: receive_rcom_status(ls, rc); @@ -521,6 +500,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); } out: - dlm_put_lockspace(ls); + return; } diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h index d7984321ff4..b09abd29ba3 100644 --- a/fs/dlm/rcom.h +++ b/fs/dlm/rcom.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -18,7 +18,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid); int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); -void dlm_receive_rcom(struct dlm_header *hd, int nodeid); +void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid); +int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in); #endif diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 66575997861..4b89e20eebe 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -24,19 +24,28 @@ /* If the start for which we're re-enabling locking (seq) has been superseded - by a newer stop (ls_recover_seq), we need to leave locking disabled. */ + by a newer stop (ls_recover_seq), we need to leave locking disabled. + + We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees + locking stopped and b) adds a message to the requestqueue, but dlm_recoverd + enables locking and clears the requestqueue between a and b. */ static int enable_locking(struct dlm_ls *ls, uint64_t seq) { int error = -EINTR; + down_write(&ls->ls_recv_active); + spin_lock(&ls->ls_recover_lock); if (ls->ls_recover_seq == seq) { set_bit(LSFL_RUNNING, &ls->ls_flags); + /* unblocks processes waiting to enter the dlm */ up_write(&ls->ls_in_recovery); error = 0; } spin_unlock(&ls->ls_recover_lock); + + up_write(&ls->ls_recv_active); return error; } diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index 65008d79c96..0de04f17cce 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -20,7 +20,7 @@ struct rq_entry { struct list_head list; int nodeid; - char request[1]; + char request[0]; }; /* @@ -30,42 +30,39 @@ struct rq_entry { * lockspace is enabled on some while still suspended on others. */ -int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) +void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) { struct rq_entry *e; int length = hd->h_length; - int rv = 0; e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); if (!e) { - log_print("dlm_add_requestqueue: out of memory\n"); - return 0; + log_print("dlm_add_requestqueue: out of memory len %d", length); + return; } e->nodeid = nodeid; memcpy(e->request, hd, length); - /* We need to check dlm_locking_stopped() after taking the mutex to - avoid a race where dlm_recoverd enables locking and runs - process_requestqueue between our earlier dlm_locking_stopped check - and this addition to the requestqueue. */ - mutex_lock(&ls->ls_requestqueue_mutex); - if (dlm_locking_stopped(ls)) - list_add_tail(&e->list, &ls->ls_requestqueue); - else { - log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid); - kfree(e); - rv = -EAGAIN; - } + list_add_tail(&e->list, &ls->ls_requestqueue); mutex_unlock(&ls->ls_requestqueue_mutex); - return rv; } +/* + * Called by dlm_recoverd to process normal messages saved while recovery was + * happening. Normal locking has been enabled before this is called. dlm_recv + * upon receiving a message, will wait for all saved messages to be drained + * here before processing the message it got. If a new dlm_ls_stop() arrives + * while we're processing these saved messages, it may block trying to suspend + * dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue. In that + * case, we don't abort since locking_stopped is still 0. If dlm_recv is not + * waiting for us, then this processing may be aborted due to locking_stopped. + */ + int dlm_process_requestqueue(struct dlm_ls *ls) { struct rq_entry *e; - struct dlm_header *hd; int error = 0; mutex_lock(&ls->ls_requestqueue_mutex); @@ -79,14 +76,7 @@ int dlm_process_requestqueue(struct dlm_ls *ls) e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); mutex_unlock(&ls->ls_requestqueue_mutex); - hd = (struct dlm_header *) e->request; - error = dlm_receive_message(hd, e->nodeid, 1); - - if (error == -EINTR) { - /* entry is left on requestqueue */ - log_debug(ls, "process_requestqueue abort eintr"); - break; - } + dlm_receive_message_saved(ls, (struct dlm_message *)e->request); mutex_lock(&ls->ls_requestqueue_mutex); list_del(&e->list); @@ -106,10 +96,12 @@ int dlm_process_requestqueue(struct dlm_ls *ls) /* * After recovery is done, locking is resumed and dlm_recoverd takes all the - * saved requests and processes them as they would have been by dlm_recvd. At - * the same time, dlm_recvd will start receiving new requests from remote - * nodes. We want to delay dlm_recvd processing new requests until - * dlm_recoverd has finished processing the old saved requests. + * saved requests and processes them as they would have been by dlm_recv. At + * the same time, dlm_recv will start receiving new requests from remote nodes. + * We want to delay dlm_recv processing new requests until dlm_recoverd has + * finished processing the old saved requests. We don't check for locking + * stopped here because dlm_ls_stop won't stop locking until it's suspended us + * (dlm_recv). */ void dlm_wait_requestqueue(struct dlm_ls *ls) @@ -118,8 +110,6 @@ void dlm_wait_requestqueue(struct dlm_ls *ls) mutex_lock(&ls->ls_requestqueue_mutex); if (list_empty(&ls->ls_requestqueue)) break; - if (dlm_locking_stopped(ls)) - break; mutex_unlock(&ls->ls_requestqueue_mutex); schedule(); } diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h index 6a53ea03335..aba34fc05ee 100644 --- a/fs/dlm/requestqueue.h +++ b/fs/dlm/requestqueue.h @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -13,7 +13,7 @@ #ifndef __REQUESTQUEUE_DOT_H__ #define __REQUESTQUEUE_DOT_H__ -int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); +void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); int dlm_process_requestqueue(struct dlm_ls *ls); void dlm_wait_requestqueue(struct dlm_ls *ls); void dlm_purge_requestqueue(struct dlm_ls *ls); |