diff options
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/lockdep.c | 2 | ||||
-rw-r--r-- | kernel/locking/mcs_spinlock.c | 72 | ||||
-rw-r--r-- | kernel/locking/mcs_spinlock.h | 13 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 41 | ||||
-rw-r--r-- | kernel/locking/qrwlock.c | 9 | ||||
-rw-r--r-- | kernel/locking/rtmutex-debug.c | 5 | ||||
-rw-r--r-- | kernel/locking/rtmutex-debug.h | 7 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 562 | ||||
-rw-r--r-- | kernel/locking/rtmutex.h | 7 | ||||
-rw-r--r-- | kernel/locking/rtmutex_common.h | 22 | ||||
-rw-r--r-- | kernel/locking/rwsem-spinlock.c | 28 | ||||
-rw-r--r-- | kernel/locking/rwsem-xadd.c | 20 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 2 |
13 files changed, 558 insertions, 232 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index d24e4339b46..88d0d4420ad 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -384,7 +384,9 @@ static void print_lockdep_off(const char *bug_msg) { printk(KERN_DEBUG "%s\n", bug_msg); printk(KERN_DEBUG "turning off the locking correctness validator.\n"); +#ifdef CONFIG_LOCK_STAT printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n"); +#endif } static int save_trace(struct stack_trace *trace) diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index 838dc9e0066..9887a905a76 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -1,6 +1,4 @@ - #include <linux/percpu.h> -#include <linux/mutex.h> #include <linux/sched.h> #include "mcs_spinlock.h" @@ -14,21 +12,47 @@ * called from interrupt context and we have preemption disabled while * spinning. */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); + +/* + * We use the value 0 to represent "no CPU", thus the encoded value + * will be the CPU number incremented by 1. + */ +static inline int encode_cpu(int cpu_nr) +{ + return cpu_nr + 1; +} + +static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) +{ + int cpu_nr = encoded_cpu_val - 1; + + return per_cpu_ptr(&osq_node, cpu_nr); +} /* * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. * Can return NULL in case we were the last queued and we updated @lock instead. */ -static inline struct optimistic_spin_queue * -osq_wait_next(struct optimistic_spin_queue **lock, - struct optimistic_spin_queue *node, - struct optimistic_spin_queue *prev) +static inline struct optimistic_spin_node * +osq_wait_next(struct optimistic_spin_queue *lock, + struct optimistic_spin_node *node, + struct optimistic_spin_node *prev) { - struct optimistic_spin_queue *next = NULL; + struct optimistic_spin_node *next = NULL; + int curr = encode_cpu(smp_processor_id()); + int old; + + /* + * If there is a prev node in queue, then the 'old' value will be + * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if + * we're currently last in queue, then the queue will then become empty. + */ + old = prev ? prev->cpu : OSQ_UNLOCKED_VAL; for (;;) { - if (*lock == node && cmpxchg(lock, node, prev) == node) { + if (atomic_read(&lock->tail) == curr && + atomic_cmpxchg(&lock->tail, curr, old) == curr) { /* * We were the last queued, we moved @lock back. @prev * will now observe @lock and will complete its @@ -53,24 +77,29 @@ osq_wait_next(struct optimistic_spin_queue **lock, break; } - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); } return next; } -bool osq_lock(struct optimistic_spin_queue **lock) +bool osq_lock(struct optimistic_spin_queue *lock) { - struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); - struct optimistic_spin_queue *prev, *next; + struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); + struct optimistic_spin_node *prev, *next; + int curr = encode_cpu(smp_processor_id()); + int old; node->locked = 0; node->next = NULL; + node->cpu = curr; - node->prev = prev = xchg(lock, node); - if (likely(prev == NULL)) + old = atomic_xchg(&lock->tail, curr); + if (old == OSQ_UNLOCKED_VAL) return true; + prev = decode_cpu(old); + node->prev = prev; ACCESS_ONCE(prev->next) = node; /* @@ -89,7 +118,7 @@ bool osq_lock(struct optimistic_spin_queue **lock) if (need_resched()) goto unqueue; - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); } return true; @@ -115,7 +144,7 @@ unqueue: if (smp_load_acquire(&node->locked)) return true; - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); /* * Or we race against a concurrent unqueue()'s step-B, in which @@ -149,20 +178,21 @@ unqueue: return false; } -void osq_unlock(struct optimistic_spin_queue **lock) +void osq_unlock(struct optimistic_spin_queue *lock) { - struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); - struct optimistic_spin_queue *next; + struct optimistic_spin_node *node, *next; + int curr = encode_cpu(smp_processor_id()); /* * Fast path for the uncontended case. */ - if (likely(cmpxchg(lock, node, NULL) == node)) + if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr)) return; /* * Second most likely case. */ + node = this_cpu_ptr(&osq_node); next = xchg(&node->next, NULL); if (next) { ACCESS_ONCE(next->locked) = 1; diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index a2dbac4aca6..23e89c5930e 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -27,7 +27,7 @@ struct mcs_spinlock { #define arch_mcs_spin_lock_contended(l) \ do { \ while (!(smp_load_acquire(l))) \ - arch_mutex_cpu_relax(); \ + cpu_relax_lowlatency(); \ } while (0) #endif @@ -104,7 +104,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) return; /* Wait until the next pointer is set */ while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); } /* Pass lock to next waiter. */ @@ -118,12 +118,13 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) * mutex_lock()/rwsem_down_{read,write}() etc. */ -struct optimistic_spin_queue { - struct optimistic_spin_queue *next, *prev; +struct optimistic_spin_node { + struct optimistic_spin_node *next, *prev; int locked; /* 1 if lock acquired */ + int cpu; /* encoded CPU # value */ }; -extern bool osq_lock(struct optimistic_spin_queue **lock); -extern void osq_unlock(struct optimistic_spin_queue **lock); +extern bool osq_lock(struct optimistic_spin_queue *lock); +extern void osq_unlock(struct optimistic_spin_queue *lock); #endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index bc73d33c676..ae712b25e49 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -46,12 +46,6 @@ # include <asm/mutex.h> #endif -/* - * A negative mutex count indicates that waiters are sleeping waiting for the - * mutex. - */ -#define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0) - void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { @@ -60,7 +54,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - lock->osq = NULL; + osq_lock_init(&lock->osq); #endif debug_mutex_init(lock, name, key); @@ -152,7 +146,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) if (need_resched()) break; - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); } rcu_read_unlock(); @@ -388,12 +382,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, /* * Optimistic spinning. * - * We try to spin for acquisition when we find that there are no - * pending waiters and the lock owner is currently running on a - * (different) CPU. - * - * The rationale is that if the lock owner is running, it is likely to - * release the lock soon. + * We try to spin for acquisition when we find that the lock owner + * is currently running on a (different) CPU and while we don't + * need to reschedule. The rationale is that if the lock owner is + * running, it is likely to release the lock soon. * * Since this needs the lock owner, and this mutex implementation * doesn't track the owner atomically in the lock field, we need to @@ -440,7 +432,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if (owner && !mutex_spin_on_owner(lock, owner)) break; - if ((atomic_read(&lock->count) == 1) && + /* Try to acquire the mutex if it is unlocked. */ + if (!mutex_is_locked(lock) && (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { lock_acquired(&lock->dep_map, ip); if (use_ww_ctx) { @@ -471,7 +464,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); } osq_unlock(&lock->osq); slowpath: @@ -485,8 +478,11 @@ slowpath: #endif spin_lock_mutex(&lock->wait_lock, flags); - /* once more, can we acquire the lock? */ - if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, 0) == 1)) + /* + * Once more, try to acquire the lock. Only try-lock the mutex if + * it is unlocked to reduce unnecessary xchg() operations. + */ + if (!mutex_is_locked(lock) && (atomic_xchg(&lock->count, 0) == 1)) goto skip_wait; debug_mutex_lock_common(lock, &waiter); @@ -506,9 +502,10 @@ slowpath: * it's unlocked. Later on, if we sleep, this is the * operation that gives us the lock. We xchg it to -1, so * that when we release the lock, we properly wake up the - * other waiters: + * other waiters. We only attempt the xchg if the count is + * non-negative in order to avoid unnecessary xchg operations: */ - if (MUTEX_SHOW_NO_WAITER(lock) && + if (atomic_read(&lock->count) >= 0 && (atomic_xchg(&lock->count, -1) == 1)) break; @@ -823,6 +820,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) unsigned long flags; int prev; + /* No need to trylock if the mutex is locked. */ + if (mutex_is_locked(lock)) + return 0; + spin_lock_mutex(&lock->wait_lock, flags); prev = atomic_xchg(&lock->count, -1); diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index fb5b8ac411a..f956ede7f90 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -20,7 +20,6 @@ #include <linux/cpumask.h> #include <linux/percpu.h> #include <linux/hardirq.h> -#include <linux/mutex.h> #include <asm/qrwlock.h> /** @@ -35,7 +34,7 @@ static __always_inline void rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) { while ((cnts & _QW_WMASK) == _QW_LOCKED) { - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); cnts = smp_load_acquire((u32 *)&lock->cnts); } } @@ -75,7 +74,7 @@ void queue_read_lock_slowpath(struct qrwlock *lock) * to make sure that the write lock isn't taken. */ while (atomic_read(&lock->cnts) & _QW_WMASK) - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS; rspin_until_writer_unlock(lock, cnts); @@ -114,7 +113,7 @@ void queue_write_lock_slowpath(struct qrwlock *lock) cnts | _QW_WAITING) == cnts)) break; - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); } /* When no more readers, set the locked flag */ @@ -125,7 +124,7 @@ void queue_write_lock_slowpath(struct qrwlock *lock) _QW_LOCKED) == _QW_WAITING)) break; - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); } unlock: arch_spin_unlock(&lock->lock); diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index 49b2ed3dced..62b6cee8ea7 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -66,12 +66,13 @@ void rt_mutex_debug_task_free(struct task_struct *task) * the deadlock. We print when we return. act_waiter can be NULL in * case of a remove waiter operation. */ -void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter, +void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, + struct rt_mutex_waiter *act_waiter, struct rt_mutex *lock) { struct task_struct *task; - if (!debug_locks || detect || !act_waiter) + if (!debug_locks || chwalk == RT_MUTEX_FULL_CHAINWALK || !act_waiter) return; task = rt_mutex_owner(act_waiter->lock); diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h index ab29b6a2266..d0519c3432b 100644 --- a/kernel/locking/rtmutex-debug.h +++ b/kernel/locking/rtmutex-debug.h @@ -20,14 +20,15 @@ extern void debug_rt_mutex_unlock(struct rt_mutex *lock); extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner); extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); -extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, +extern void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, + struct rt_mutex_waiter *waiter, struct rt_mutex *lock); extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter); # define debug_rt_mutex_reset_waiter(w) \ do { (w)->deadlock_lock = NULL; } while (0) -static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, - int detect) +static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, + enum rtmutex_chainwalk walk) { return (waiter != NULL); } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index fc605941b9b..a0ea2a141b3 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -308,6 +308,32 @@ static void rt_mutex_adjust_prio(struct task_struct *task) } /* + * Deadlock detection is conditional: + * + * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted + * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. + * + * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always + * conducted independent of the detect argument. + * + * If the waiter argument is NULL this indicates the deboost path and + * deadlock detection is disabled independent of the detect argument + * and the config settings. + */ +static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, + enum rtmutex_chainwalk chwalk) +{ + /* + * This is just a wrapper function for the following call, + * because debug_rt_mutex_detect_deadlock() smells like a magic + * debug feature and I wanted to keep the cond function in the + * main source file along with the comments instead of having + * two of the same in the headers. + */ + return debug_rt_mutex_detect_deadlock(waiter, chwalk); +} + +/* * Max number of times we'll walk the boosting chain: */ int max_lock_depth = 1024; @@ -337,21 +363,65 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) * @top_task: the current top waiter * * Returns 0 or -EDEADLK. + * + * Chain walk basics and protection scope + * + * [R] refcount on task + * [P] task->pi_lock held + * [L] rtmutex->wait_lock held + * + * Step Description Protected by + * function arguments: + * @task [R] + * @orig_lock if != NULL @top_task is blocked on it + * @next_lock Unprotected. Cannot be + * dereferenced. Only used for + * comparison. + * @orig_waiter if != NULL @top_task is blocked on it + * @top_task current, or in case of proxy + * locking protected by calling + * code + * again: + * loop_sanity_check(); + * retry: + * [1] lock(task->pi_lock); [R] acquire [P] + * [2] waiter = task->pi_blocked_on; [P] + * [3] check_exit_conditions_1(); [P] + * [4] lock = waiter->lock; [P] + * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] + * unlock(task->pi_lock); release [P] + * goto retry; + * } + * [6] check_exit_conditions_2(); [P] + [L] + * [7] requeue_lock_waiter(lock, waiter); [P] + [L] + * [8] unlock(task->pi_lock); release [P] + * put_task_struct(task); release [R] + * [9] check_exit_conditions_3(); [L] + * [10] task = owner(lock); [L] + * get_task_struct(task); [L] acquire [R] + * lock(task->pi_lock); [L] acquire [P] + * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] + * [12] check_exit_conditions_4(); [P] + [L] + * [13] unlock(task->pi_lock); release [P] + * unlock(lock->wait_lock); release [L] + * goto again; */ static int rt_mutex_adjust_prio_chain(struct task_struct *task, - int deadlock_detect, + enum rtmutex_chainwalk chwalk, struct rt_mutex *orig_lock, struct rt_mutex *next_lock, struct rt_mutex_waiter *orig_waiter, struct task_struct *top_task) { - struct rt_mutex *lock; struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; - int detect_deadlock, ret = 0, depth = 0; + struct rt_mutex_waiter *prerequeue_top_waiter; + int ret = 0, depth = 0; + struct rt_mutex *lock; + bool detect_deadlock; unsigned long flags; + bool requeue = true; - detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, - deadlock_detect); + detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); /* * The (de)boosting is a step by step approach with a lot of @@ -360,6 +430,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * carefully whether things change under us. */ again: + /* + * We limit the lock chain length for each invocation. + */ if (++depth > max_lock_depth) { static int prev_max; @@ -377,13 +450,28 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, return -EDEADLK; } + + /* + * We are fully preemptible here and only hold the refcount on + * @task. So everything can have changed under us since the + * caller or our own code below (goto retry/again) dropped all + * locks. + */ retry: /* - * Task can not go away as we did a get_task() before ! + * [1] Task cannot go away as we did a get_task() before ! */ raw_spin_lock_irqsave(&task->pi_lock, flags); + /* + * [2] Get the waiter on which @task is blocked on. + */ waiter = task->pi_blocked_on; + + /* + * [3] check_exit_conditions_1() protected by task->pi_lock. + */ + /* * Check whether the end of the boosting chain has been * reached or the state of the chain has changed while we @@ -421,20 +509,41 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, goto out_unlock_pi; /* * If deadlock detection is off, we stop here if we - * are not the top pi waiter of the task. + * are not the top pi waiter of the task. If deadlock + * detection is enabled we continue, but stop the + * requeueing in the chain walk. */ - if (!detect_deadlock && top_waiter != task_top_pi_waiter(task)) - goto out_unlock_pi; + if (top_waiter != task_top_pi_waiter(task)) { + if (!detect_deadlock) + goto out_unlock_pi; + else + requeue = false; + } } /* - * When deadlock detection is off then we check, if further - * priority adjustment is necessary. + * If the waiter priority is the same as the task priority + * then there is no further priority adjustment necessary. If + * deadlock detection is off, we stop the chain walk. If its + * enabled we continue, but stop the requeueing in the chain + * walk. */ - if (!detect_deadlock && waiter->prio == task->prio) - goto out_unlock_pi; + if (waiter->prio == task->prio) { + if (!detect_deadlock) + goto out_unlock_pi; + else + requeue = false; + } + /* + * [4] Get the next lock + */ lock = waiter->lock; + /* + * [5] We need to trylock here as we are holding task->pi_lock, + * which is the reverse lock order versus the other rtmutex + * operations. + */ if (!raw_spin_trylock(&lock->wait_lock)) { raw_spin_unlock_irqrestore(&task->pi_lock, flags); cpu_relax(); @@ -442,79 +551,180 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, } /* + * [6] check_exit_conditions_2() protected by task->pi_lock and + * lock->wait_lock. + * * Deadlock detection. If the lock is the same as the original * lock which caused us to walk the lock chain or if the * current lock is owned by the task which initiated the chain * walk, we detected a deadlock. */ if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { - debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); + debug_rt_mutex_deadlock(chwalk, orig_waiter, lock); raw_spin_unlock(&lock->wait_lock); ret = -EDEADLK; goto out_unlock_pi; } - top_waiter = rt_mutex_top_waiter(lock); + /* + * If we just follow the lock chain for deadlock detection, no + * need to do all the requeue operations. To avoid a truckload + * of conditionals around the various places below, just do the + * minimum chain walk checks. + */ + if (!requeue) { + /* + * No requeue[7] here. Just release @task [8] + */ + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + put_task_struct(task); + + /* + * [9] check_exit_conditions_3 protected by lock->wait_lock. + * If there is no owner of the lock, end of chain. + */ + if (!rt_mutex_owner(lock)) { + raw_spin_unlock(&lock->wait_lock); + return 0; + } + + /* [10] Grab the next task, i.e. owner of @lock */ + task = rt_mutex_owner(lock); + get_task_struct(task); + raw_spin_lock_irqsave(&task->pi_lock, flags); + + /* + * No requeue [11] here. We just do deadlock detection. + * + * [12] Store whether owner is blocked + * itself. Decision is made after dropping the locks + */ + next_lock = task_blocked_on_lock(task); + /* + * Get the top waiter for the next iteration + */ + top_waiter = rt_mutex_top_waiter(lock); + + /* [13] Drop locks */ + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + raw_spin_unlock(&lock->wait_lock); + + /* If owner is not blocked, end of chain. */ + if (!next_lock) + goto out_put_task; + goto again; + } - /* Requeue the waiter */ + /* + * Store the current top waiter before doing the requeue + * operation on @lock. We need it for the boost/deboost + * decision below. + */ + prerequeue_top_waiter = rt_mutex_top_waiter(lock); + + /* [7] Requeue the waiter in the lock waiter list. */ rt_mutex_dequeue(lock, waiter); waiter->prio = task->prio; rt_mutex_enqueue(lock, waiter); - /* Release the task */ + /* [8] Release the task */ raw_spin_unlock_irqrestore(&task->pi_lock, flags); + put_task_struct(task); + + /* + * [9] check_exit_conditions_3 protected by lock->wait_lock. + * + * We must abort the chain walk if there is no lock owner even + * in the dead lock detection case, as we have nothing to + * follow here. This is the end of the chain we are walking. + */ if (!rt_mutex_owner(lock)) { /* - * If the requeue above changed the top waiter, then we need - * to wake the new top waiter up to try to get the lock. + * If the requeue [7] above changed the top waiter, + * then we need to wake the new top waiter up to try + * to get the lock. */ - - if (top_waiter != rt_mutex_top_waiter(lock)) + if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) wake_up_process(rt_mutex_top_waiter(lock)->task); raw_spin_unlock(&lock->wait_lock); - goto out_put_task; + return 0; } - put_task_struct(task); - /* Grab the next task */ + /* [10] Grab the next task, i.e. the owner of @lock */ task = rt_mutex_owner(lock); get_task_struct(task); raw_spin_lock_irqsave(&task->pi_lock, flags); + /* [11] requeue the pi waiters if necessary */ if (waiter == rt_mutex_top_waiter(lock)) { - /* Boost the owner */ - rt_mutex_dequeue_pi(task, top_waiter); + /* + * The waiter became the new top (highest priority) + * waiter on the lock. Replace the previous top waiter + * in the owner tasks pi waiters list with this waiter + * and adjust the priority of the owner. + */ + rt_mutex_dequeue_pi(task, prerequeue_top_waiter); rt_mutex_enqueue_pi(task, waiter); __rt_mutex_adjust_prio(task); - } else if (top_waiter == waiter) { - /* Deboost the owner */ + } else if (prerequeue_top_waiter == waiter) { + /* + * The waiter was the top waiter on the lock, but is + * no longer the top prority waiter. Replace waiter in + * the owner tasks pi waiters list with the new top + * (highest priority) waiter and adjust the priority + * of the owner. + * The new top waiter is stored in @waiter so that + * @waiter == @top_waiter evaluates to true below and + * we continue to deboost the rest of the chain. + */ rt_mutex_dequeue_pi(task, waiter); waiter = rt_mutex_top_waiter(lock); rt_mutex_enqueue_pi(task, waiter); __rt_mutex_adjust_prio(task); + } else { + /* + * Nothing changed. No need to do any priority + * adjustment. + */ } /* + * [12] check_exit_conditions_4() protected by task->pi_lock + * and lock->wait_lock. The actual decisions are made after we + * dropped the locks. + * * Check whether the task which owns the current lock is pi * blocked itself. If yes we store a pointer to the lock for * the lock chain change detection above. After we dropped * task->pi_lock next_lock cannot be dereferenced anymore. */ next_lock = task_blocked_on_lock(task); + /* + * Store the top waiter of @lock for the end of chain walk + * decision below. + */ + top_waiter = rt_mutex_top_waiter(lock); + /* [13] Drop the locks */ raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - top_waiter = rt_mutex_top_waiter(lock); raw_spin_unlock(&lock->wait_lock); /* + * Make the actual exit decisions [12], based on the stored + * values. + * * We reached the end of the lock chain. Stop right here. No * point to go back just to figure that out. */ if (!next_lock) goto out_put_task; + /* + * If the current waiter is not the top waiter on the lock, + * then we can stop the chain walk here if we are not in full + * deadlock detection mode. + */ if (!detect_deadlock && waiter != top_waiter) goto out_put_task; @@ -533,76 +743,119 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * * Must be called with lock->wait_lock held. * - * @lock: the lock to be acquired. - * @task: the task which wants to acquire the lock - * @waiter: the waiter that is queued to the lock's wait list. (could be NULL) + * @lock: The lock to be acquired. + * @task: The task which wants to acquire the lock + * @waiter: The waiter that is queued to the lock's wait list if the + * callsite called task_blocked_on_lock(), otherwise NULL */ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, - struct rt_mutex_waiter *waiter) + struct rt_mutex_waiter *waiter) { + unsigned long flags; + /* - * We have to be careful here if the atomic speedups are - * enabled, such that, when - * - no other waiter is on the lock - * - the lock has been released since we did the cmpxchg - * the lock can be released or taken while we are doing the - * checks and marking the lock with RT_MUTEX_HAS_WAITERS. + * Before testing whether we can acquire @lock, we set the + * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all + * other tasks which try to modify @lock into the slow path + * and they serialize on @lock->wait_lock. + * + * The RT_MUTEX_HAS_WAITERS bit can have a transitional state + * as explained at the top of this file if and only if: * - * The atomic acquire/release aware variant of - * mark_rt_mutex_waiters uses a cmpxchg loop. After setting - * the WAITERS bit, the atomic release / acquire can not - * happen anymore and lock->wait_lock protects us from the - * non-atomic case. + * - There is a lock owner. The caller must fixup the + * transient state if it does a trylock or leaves the lock + * function due to a signal or timeout. * - * Note, that this might set lock->owner = - * RT_MUTEX_HAS_WAITERS in the case the lock is not contended - * any more. This is fixed up when we take the ownership. - * This is the transitional state explained at the top of this file. + * - @task acquires the lock and there are no other + * waiters. This is undone in rt_mutex_set_owner(@task) at + * the end of this function. */ mark_rt_mutex_waiters(lock); + /* + * If @lock has an owner, give up. + */ if (rt_mutex_owner(lock)) return 0; /* - * It will get the lock because of one of these conditions: - * 1) there is no waiter - * 2) higher priority than waiters - * 3) it is top waiter + * If @waiter != NULL, @task has already enqueued the waiter + * into @lock waiter list. If @waiter == NULL then this is a + * trylock attempt. */ - if (rt_mutex_has_waiters(lock)) { - if (task->prio >= rt_mutex_top_waiter(lock)->prio) { - if (!waiter || waiter != rt_mutex_top_waiter(lock)) - return 0; - } - } - - if (waiter || rt_mutex_has_waiters(lock)) { - unsigned long flags; - struct rt_mutex_waiter *top; - - raw_spin_lock_irqsave(&task->pi_lock, flags); + if (waiter) { + /* + * If waiter is not the highest priority waiter of + * @lock, give up. + */ + if (waiter != rt_mutex_top_waiter(lock)) + return 0; - /* remove the queued waiter. */ - if (waiter) { - rt_mutex_dequeue(lock, waiter); - task->pi_blocked_on = NULL; - } + /* + * We can acquire the lock. Remove the waiter from the + * lock waiters list. + */ + rt_mutex_dequeue(lock, waiter); + } else { /* - * We have to enqueue the top waiter(if it exists) into - * task->pi_waiters list. + * If the lock has waiters already we check whether @task is + * eligible to take over the lock. + * + * If there are no other waiters, @task can acquire + * the lock. @task->pi_blocked_on is NULL, so it does + * not need to be dequeued. */ if (rt_mutex_has_waiters(lock)) { - top = rt_mutex_top_waiter(lock); - rt_mutex_enqueue_pi(task, top); + /* + * If @task->prio is greater than or equal to + * the top waiter priority (kernel view), + * @task lost. + */ + if (task->prio >= rt_mutex_top_waiter(lock)->prio) + return 0; + + /* + * The current top waiter stays enqueued. We + * don't have to change anything in the lock + * waiters order. + */ + } else { + /* + * No waiters. Take the lock without the + * pi_lock dance.@task->pi_blocked_on is NULL + * and we have no waiters to enqueue in @task + * pi waiters list. + */ + goto takeit; } - raw_spin_unlock_irqrestore(&task->pi_lock, flags); } + /* + * Clear @task->pi_blocked_on. Requires protection by + * @task->pi_lock. Redundant operation for the @waiter == NULL + * case, but conditionals are more expensive than a redundant + * store. + */ + raw_spin_lock_irqsave(&task->pi_lock, flags); + task->pi_blocked_on = NULL; + /* + * Finish the lock acquisition. @task is the new owner. If + * other waiters exist we have to insert the highest priority + * waiter into @task->pi_waiters list. + */ + if (rt_mutex_has_waiters(lock)) + rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + +takeit: /* We got the lock. */ debug_rt_mutex_lock(lock); + /* + * This either preserves the RT_MUTEX_HAS_WAITERS bit if there + * are still waiters or clears it. + */ rt_mutex_set_owner(lock, task); rt_mutex_deadlock_account_lock(lock, task); @@ -620,7 +873,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, - int detect_deadlock) + enum rtmutex_chainwalk chwalk) { struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; @@ -666,7 +919,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, __rt_mutex_adjust_prio(owner); if (owner->pi_blocked_on) chain_walk = 1; - } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { + } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { chain_walk = 1; } @@ -691,7 +944,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, raw_spin_unlock(&lock->wait_lock); - res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, + res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, next_lock, waiter, task); raw_spin_lock(&lock->wait_lock); @@ -753,9 +1006,9 @@ static void wakeup_next_waiter(struct rt_mutex *lock) static void remove_waiter(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) { - int first = (waiter == rt_mutex_top_waiter(lock)); + bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); - struct rt_mutex *next_lock = NULL; + struct rt_mutex *next_lock; unsigned long flags; raw_spin_lock_irqsave(¤t->pi_lock, flags); @@ -763,29 +1016,31 @@ static void remove_waiter(struct rt_mutex *lock, current->pi_blocked_on = NULL; raw_spin_unlock_irqrestore(¤t->pi_lock, flags); - if (!owner) + /* + * Only update priority if the waiter was the highest priority + * waiter of the lock and there is an owner to update. + */ + if (!owner || !is_top_waiter) return; - if (first) { - - raw_spin_lock_irqsave(&owner->pi_lock, flags); + raw_spin_lock_irqsave(&owner->pi_lock, flags); - rt_mutex_dequeue_pi(owner, waiter); + rt_mutex_dequeue_pi(owner, waiter); - if (rt_mutex_has_waiters(lock)) { - struct rt_mutex_waiter *next; + if (rt_mutex_has_waiters(lock)) + rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); - next = rt_mutex_top_waiter(lock); - rt_mutex_enqueue_pi(owner, next); - } - __rt_mutex_adjust_prio(owner); + __rt_mutex_adjust_prio(owner); - /* Store the lock on which owner is blocked or NULL */ - next_lock = task_blocked_on_lock(owner); + /* Store the lock on which owner is blocked or NULL */ + next_lock = task_blocked_on_lock(owner); - raw_spin_unlock_irqrestore(&owner->pi_lock, flags); - } + raw_spin_unlock_irqrestore(&owner->pi_lock, flags); + /* + * Don't walk the chain, if the owner task is not blocked + * itself. + */ if (!next_lock) return; @@ -794,7 +1049,8 @@ static void remove_waiter(struct rt_mutex *lock, raw_spin_unlock(&lock->wait_lock); - rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current); + rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, + next_lock, NULL, current); raw_spin_lock(&lock->wait_lock); } @@ -824,7 +1080,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) /* gets dropped in rt_mutex_adjust_prio_chain()! */ get_task_struct(task); - rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task); + rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, + next_lock, NULL, task); } /** @@ -902,7 +1159,7 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - int detect_deadlock) + enum rtmutex_chainwalk chwalk) { struct rt_mutex_waiter waiter; int ret = 0; @@ -928,7 +1185,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, timeout->task = NULL; } - ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock); + ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); if (likely(!ret)) ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); @@ -937,7 +1194,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, if (unlikely(ret)) { remove_waiter(lock, &waiter); - rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter); + rt_mutex_handle_deadlock(ret, chwalk, &waiter); } /* @@ -960,22 +1217,31 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, /* * Slow path try-lock function: */ -static inline int -rt_mutex_slowtrylock(struct rt_mutex *lock) +static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) { - int ret = 0; + int ret; + + /* + * If the lock already has an owner we fail to get the lock. + * This can be done without taking the @lock->wait_lock as + * it is only being read, and this is a trylock anyway. + */ + if (rt_mutex_owner(lock)) + return 0; + /* + * The mutex has currently no owner. Lock the wait lock and + * try to acquire the lock. + */ raw_spin_lock(&lock->wait_lock); - if (likely(rt_mutex_owner(lock) != current)) { + ret = try_to_take_rt_mutex(lock, current, NULL); - ret = try_to_take_rt_mutex(lock, current, NULL); - /* - * try_to_take_rt_mutex() sets the lock waiters - * bit unconditionally. Clean this up. - */ - fixup_rt_mutex_waiters(lock); - } + /* + * try_to_take_rt_mutex() sets the lock waiters bit + * unconditionally. Clean this up. + */ + fixup_rt_mutex_waiters(lock); raw_spin_unlock(&lock->wait_lock); @@ -1053,30 +1319,31 @@ rt_mutex_slowunlock(struct rt_mutex *lock) */ static inline int rt_mutex_fastlock(struct rt_mutex *lock, int state, - int detect_deadlock, int (*slowfn)(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - int detect_deadlock)) + enum rtmutex_chainwalk chwalk)) { - if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { + if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { rt_mutex_deadlock_account_lock(lock, current); return 0; } else - return slowfn(lock, state, NULL, detect_deadlock); + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); } static inline int rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, int detect_deadlock, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, int (*slowfn)(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - int detect_deadlock)) + enum rtmutex_chainwalk chwalk)) { - if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { + if (chwalk == RT_MUTEX_MIN_CHAINWALK && + likely(rt_mutex_cmpxchg(lock, NULL, current))) { rt_mutex_deadlock_account_lock(lock, current); return 0; } else - return slowfn(lock, state, timeout, detect_deadlock); + return slowfn(lock, state, timeout, chwalk); } static inline int @@ -1109,54 +1376,61 @@ void __sched rt_mutex_lock(struct rt_mutex *lock) { might_sleep(); - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock); + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); } EXPORT_SYMBOL_GPL(rt_mutex_lock); /** * rt_mutex_lock_interruptible - lock a rt_mutex interruptible * - * @lock: the rt_mutex to be locked - * @detect_deadlock: deadlock detection on/off + * @lock: the rt_mutex to be locked * * Returns: - * 0 on success - * -EINTR when interrupted by a signal - * -EDEADLK when the lock would deadlock (when deadlock detection is on) + * 0 on success + * -EINTR when interrupted by a signal */ -int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, - int detect_deadlock) +int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) { might_sleep(); - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, - detect_deadlock, rt_mutex_slowlock); + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); } EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); +/* + * Futex variant with full deadlock detection. + */ +int rt_mutex_timed_futex_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *timeout) +{ + might_sleep(); + + return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, + RT_MUTEX_FULL_CHAINWALK, + rt_mutex_slowlock); +} + /** * rt_mutex_timed_lock - lock a rt_mutex interruptible * the timeout structure is provided * by the caller * - * @lock: the rt_mutex to be locked + * @lock: the rt_mutex to be locked * @timeout: timeout structure or NULL (no timeout) - * @detect_deadlock: deadlock detection on/off * * Returns: - * 0 on success - * -EINTR when interrupted by a signal + * 0 on success + * -EINTR when interrupted by a signal * -ETIMEDOUT when the timeout expired - * -EDEADLK when the lock would deadlock (when deadlock detection is on) */ int -rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout, - int detect_deadlock) +rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) { might_sleep(); return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, - detect_deadlock, rt_mutex_slowlock); + RT_MUTEX_MIN_CHAINWALK, + rt_mutex_slowlock); } EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); @@ -1262,7 +1536,6 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, * @lock: the rt_mutex to take * @waiter: the pre-initialized rt_mutex_waiter * @task: the task to prepare - * @detect_deadlock: perform deadlock detection (1) or not (0) * * Returns: * 0 - task blocked on lock @@ -1273,7 +1546,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, */ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, - struct task_struct *task, int detect_deadlock) + struct task_struct *task) { int ret; @@ -1285,7 +1558,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, } /* We enforce deadlock detection for futexes */ - ret = task_blocks_on_rt_mutex(lock, waiter, task, 1); + ret = task_blocks_on_rt_mutex(lock, waiter, task, + RT_MUTEX_FULL_CHAINWALK); if (ret && !rt_mutex_owner(lock)) { /* @@ -1331,22 +1605,20 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) * rt_mutex_finish_proxy_lock() - Complete lock acquisition * @lock: the rt_mutex we were woken on * @to: the timeout, null if none. hrtimer should already have - * been started. + * been started. * @waiter: the pre-initialized rt_mutex_waiter - * @detect_deadlock: perform deadlock detection (1) or not (0) * * Complete the lock acquisition started our behalf by another thread. * * Returns: * 0 - success - * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK + * <0 - error, one of -EINTR, -ETIMEDOUT * * Special API call for PI-futex requeue support */ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter, - int detect_deadlock) + struct rt_mutex_waiter *waiter) { int ret; diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h index f6a1f3c133b..c4060584c40 100644 --- a/kernel/locking/rtmutex.h +++ b/kernel/locking/rtmutex.h @@ -22,10 +22,15 @@ #define debug_rt_mutex_init(m, n) do { } while (0) #define debug_rt_mutex_deadlock(d, a ,l) do { } while (0) #define debug_rt_mutex_print_deadlock(w) do { } while (0) -#define debug_rt_mutex_detect_deadlock(w,d) (d) #define debug_rt_mutex_reset_waiter(w) do { } while (0) static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) { WARN(1, "rtmutex deadlock detected\n"); } + +static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *w, + enum rtmutex_chainwalk walk) +{ + return walk == RT_MUTEX_FULL_CHAINWALK; +} diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 7431a9c86f3..85521250140 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -102,6 +102,21 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) } /* + * Constants for rt mutex functions which have a selectable deadlock + * detection. + * + * RT_MUTEX_MIN_CHAINWALK: Stops the lock chain walk when there are + * no further PI adjustments to be made. + * + * RT_MUTEX_FULL_CHAINWALK: Invoke deadlock detection with a full + * walk of the lock chain. + */ +enum rtmutex_chainwalk { + RT_MUTEX_MIN_CHAINWALK, + RT_MUTEX_FULL_CHAINWALK, +}; + +/* * PI-futex support (proxy locking functions, etc.): */ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); @@ -111,12 +126,11 @@ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner); extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, - struct task_struct *task, - int detect_deadlock); + struct task_struct *task); extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter, - int detect_deadlock); + struct rt_mutex_waiter *waiter); +extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); #ifdef CONFIG_DEBUG_RT_MUTEXES # include "rtmutex-debug.h" diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 9be8a914497..2c93571162c 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -26,7 +26,7 @@ int rwsem_is_locked(struct rw_semaphore *sem) unsigned long flags; if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { - ret = (sem->activity != 0); + ret = (sem->count != 0); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } return ret; @@ -46,7 +46,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif - sem->activity = 0; + sem->count = 0; raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } @@ -95,7 +95,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) waiter = list_entry(next, struct rwsem_waiter, list); } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - sem->activity += woken; + sem->count += woken; out: return sem; @@ -126,9 +126,9 @@ void __sched __down_read(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity >= 0 && list_empty(&sem->wait_list)) { + if (sem->count >= 0 && list_empty(&sem->wait_list)) { /* granted */ - sem->activity++; + sem->count++; raw_spin_unlock_irqrestore(&sem->wait_lock, flags); goto out; } @@ -170,9 +170,9 @@ int __down_read_trylock(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity >= 0 && list_empty(&sem->wait_list)) { + if (sem->count >= 0 && list_empty(&sem->wait_list)) { /* granted */ - sem->activity++; + sem->count++; ret = 1; } @@ -206,7 +206,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) * itself into sleep and waiting for system woke it or someone * else in the head of the wait list up. */ - if (sem->activity == 0) + if (sem->count == 0) break; set_task_state(tsk, TASK_UNINTERRUPTIBLE); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -214,7 +214,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) raw_spin_lock_irqsave(&sem->wait_lock, flags); } /* got the lock */ - sem->activity = -1; + sem->count = -1; list_del(&waiter.list); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -235,9 +235,9 @@ int __down_write_trylock(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity == 0) { + if (sem->count == 0) { /* got the lock */ - sem->activity = -1; + sem->count = -1; ret = 1; } @@ -255,7 +255,7 @@ void __up_read(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (--sem->activity == 0 && !list_empty(&sem->wait_list)) + if (--sem->count == 0 && !list_empty(&sem->wait_list)) sem = __rwsem_wake_one_writer(sem); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -270,7 +270,7 @@ void __up_write(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - sem->activity = 0; + sem->count = 0; if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, 1); @@ -287,7 +287,7 @@ void __downgrade_write(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - sem->activity = 1; + sem->count = 1; if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, 0); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index dacc32142fc..d6203faf2eb 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -82,9 +82,9 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, sem->count = RWSEM_UNLOCKED_VALUE; raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#ifdef CONFIG_SMP +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER sem->owner = NULL; - sem->osq = NULL; + osq_lock_init(&sem->osq); #endif } @@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) return false; } -#ifdef CONFIG_SMP +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER /* * Try to acquire write lock before the writer has been put on wait queue. */ @@ -285,10 +285,10 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner; - bool on_cpu = true; + bool on_cpu = false; if (need_resched()) - return 0; + return false; rcu_read_lock(); owner = ACCESS_ONCE(sem->owner); @@ -297,9 +297,9 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) rcu_read_unlock(); /* - * If sem->owner is not set, the rwsem owner may have - * just acquired it and not set the owner yet or the rwsem - * has been released. + * If sem->owner is not set, yet we have just recently entered the + * slowpath, then there is a possibility reader(s) may have the lock. + * To be safe, avoid spinning in these situations. */ return on_cpu; } @@ -329,7 +329,7 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) if (need_resched()) break; - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); } rcu_read_unlock(); @@ -381,7 +381,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ - arch_mutex_cpu_relax(); + cpu_relax_lowlatency(); } osq_unlock(&sem->osq); done: diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 42f806de49d..e2d3bc7f03b 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -12,7 +12,7 @@ #include <linux/atomic.h> -#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM) +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER static inline void rwsem_set_owner(struct rw_semaphore *sem) { sem->owner = current; |