summaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 16:09:06 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 16:09:06 -0700
commit8efb90cf1e80129fad197b916714e1d01ee183d2 (patch)
tree8eacf9e5f34cc7a9e2b553974b9f9181d61be294 /kernel/futex.c
parent5bda4f638f36ef4c4e3b1397b02affc3db94356e (diff)
parent3a6bfbc91df04b081a44d419e0260bad54abddf7 (diff)
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "The main changes in this cycle are: - big rtmutex and futex cleanup and robustification from Thomas Gleixner - mutex optimizations and refinements from Jason Low - arch_mutex_cpu_relax() removal and related cleanups - smaller lockdep tweaks" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) arch, locking: Ciao arch_mutex_cpu_relax() locking/lockdep: Only ask for /proc/lock_stat output when available locking/mutexes: Optimize mutex trylock slowpath locking/mutexes: Try to acquire mutex only if it is unlocked locking/mutexes: Delete the MUTEX_SHOW_NO_WAITER macro locking/mutexes: Correct documentation on mutex optimistic spinning rtmutex: Make the rtmutex tester depend on BROKEN futex: Simplify futex_lock_pi_atomic() and make it more robust futex: Split out the first waiter attachment from lookup_pi_state() futex: Split out the waiter check from lookup_pi_state() futex: Use futex_top_waiter() in lookup_pi_state() futex: Make unlock_pi more robust rtmutex: Avoid pointless requeueing in the deadlock detection chain walk rtmutex: Cleanup deadlock detector debug logic rtmutex: Confine deadlock logic to futex rtmutex: Simplify remove_waiter() rtmutex: Document pi chain walk rtmutex: Clarify the boost/deboost part rtmutex: No need to keep task ref for lock owner check rtmutex: Simplify and document try_to_take_rtmutex() ...
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c402
1 files changed, 183 insertions, 219 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index b632b5f3f09..d3a9d946d0b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -792,94 +792,91 @@ void exit_pi_state_list(struct task_struct *curr)
* [10] There is no transient state which leaves owner and user space
* TID out of sync.
*/
-static int
-lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
- union futex_key *key, struct futex_pi_state **ps)
+
+/*
+ * Validate that the existing waiter has a pi_state and sanity check
+ * the pi_state against the user space value. If correct, attach to
+ * it.
+ */
+static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
+ struct futex_pi_state **ps)
{
- struct futex_pi_state *pi_state = NULL;
- struct futex_q *this, *next;
- struct task_struct *p;
pid_t pid = uval & FUTEX_TID_MASK;
- plist_for_each_entry_safe(this, next, &hb->chain, list) {
- if (match_futex(&this->key, key)) {
- /*
- * Sanity check the waiter before increasing
- * the refcount and attaching to it.
- */
- pi_state = this->pi_state;
- /*
- * Userspace might have messed up non-PI and
- * PI futexes [3]
- */
- if (unlikely(!pi_state))
- return -EINVAL;
+ /*
+ * Userspace might have messed up non-PI and PI futexes [3]
+ */
+ if (unlikely(!pi_state))
+ return -EINVAL;
- WARN_ON(!atomic_read(&pi_state->refcount));
+ WARN_ON(!atomic_read(&pi_state->refcount));
+ /*
+ * Handle the owner died case:
+ */
+ if (uval & FUTEX_OWNER_DIED) {
+ /*
+ * exit_pi_state_list sets owner to NULL and wakes the
+ * topmost waiter. The task which acquires the
+ * pi_state->rt_mutex will fixup owner.
+ */
+ if (!pi_state->owner) {
/*
- * Handle the owner died case:
+ * No pi state owner, but the user space TID
+ * is not 0. Inconsistent state. [5]
*/
- if (uval & FUTEX_OWNER_DIED) {
- /*
- * exit_pi_state_list sets owner to NULL and
- * wakes the topmost waiter. The task which
- * acquires the pi_state->rt_mutex will fixup
- * owner.
- */
- if (!pi_state->owner) {
- /*
- * No pi state owner, but the user
- * space TID is not 0. Inconsistent
- * state. [5]
- */
- if (pid)
- return -EINVAL;
- /*
- * Take a ref on the state and
- * return. [4]
- */
- goto out_state;
- }
-
- /*
- * If TID is 0, then either the dying owner
- * has not yet executed exit_pi_state_list()
- * or some waiter acquired the rtmutex in the
- * pi state, but did not yet fixup the TID in
- * user space.
- *
- * Take a ref on the state and return. [6]
- */
- if (!pid)
- goto out_state;
- } else {
- /*
- * If the owner died bit is not set,
- * then the pi_state must have an
- * owner. [7]
- */
- if (!pi_state->owner)
- return -EINVAL;
- }
-
+ if (pid)
+ return -EINVAL;
/*
- * Bail out if user space manipulated the
- * futex value. If pi state exists then the
- * owner TID must be the same as the user
- * space TID. [9/10]
+ * Take a ref on the state and return success. [4]
*/
- if (pid != task_pid_vnr(pi_state->owner))
- return -EINVAL;
-
- out_state:
- atomic_inc(&pi_state->refcount);
- *ps = pi_state;
- return 0;
+ goto out_state;
}
+
+ /*
+ * If TID is 0, then either the dying owner has not
+ * yet executed exit_pi_state_list() or some waiter
+ * acquired the rtmutex in the pi state, but did not
+ * yet fixup the TID in user space.
+ *
+ * Take a ref on the state and return success. [6]
+ */
+ if (!pid)
+ goto out_state;
+ } else {
+ /*
+ * If the owner died bit is not set, then the pi_state
+ * must have an owner. [7]
+ */
+ if (!pi_state->owner)
+ return -EINVAL;
}
/*
+ * Bail out if user space manipulated the futex value. If pi
+ * state exists then the owner TID must be the same as the
+ * user space TID. [9/10]
+ */
+ if (pid != task_pid_vnr(pi_state->owner))
+ return -EINVAL;
+out_state:
+ atomic_inc(&pi_state->refcount);
+ *ps = pi_state;
+ return 0;
+}
+
+/*
+ * Lookup the task for the TID provided from user space and attach to
+ * it after doing proper sanity checks.
+ */
+static int attach_to_pi_owner(u32 uval, union futex_key *key,
+ struct futex_pi_state **ps)
+{
+ pid_t pid = uval & FUTEX_TID_MASK;
+ struct futex_pi_state *pi_state;
+ struct task_struct *p;
+
+ /*
* We are the first waiter - try to look up the real owner and attach
* the new pi_state to it, but bail out when TID = 0 [1]
*/
@@ -920,7 +917,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
pi_state = alloc_pi_state();
/*
- * Initialize the pi_mutex in locked state and make 'p'
+ * Initialize the pi_mutex in locked state and make @p
* the owner of it:
*/
rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
@@ -940,6 +937,36 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
return 0;
}
+static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
+ union futex_key *key, struct futex_pi_state **ps)
+{
+ struct futex_q *match = futex_top_waiter(hb, key);
+
+ /*
+ * If there is a waiter on that futex, validate it and
+ * attach to the pi_state when the validation succeeds.
+ */
+ if (match)
+ return attach_to_pi_state(uval, match->pi_state, ps);
+
+ /*
+ * We are the first waiter - try to look up the owner based on
+ * @uval and attach to it.
+ */
+ return attach_to_pi_owner(uval, key, ps);
+}
+
+static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
+{
+ u32 uninitialized_var(curval);
+
+ if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
+ return -EFAULT;
+
+ /*If user space value changed, let the caller retry */
+ return curval != uval ? -EAGAIN : 0;
+}
+
/**
* futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
* @uaddr: the pi futex user address
@@ -963,113 +990,69 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
struct futex_pi_state **ps,
struct task_struct *task, int set_waiters)
{
- int lock_taken, ret, force_take = 0;
- u32 uval, newval, curval, vpid = task_pid_vnr(task);
-
-retry:
- ret = lock_taken = 0;
+ u32 uval, newval, vpid = task_pid_vnr(task);
+ struct futex_q *match;
+ int ret;
/*
- * To avoid races, we attempt to take the lock here again
- * (by doing a 0 -> TID atomic cmpxchg), while holding all
- * the locks. It will most likely not succeed.
+ * Read the user space value first so we can validate a few
+ * things before proceeding further.
*/
- newval = vpid;
- if (set_waiters)
- newval |= FUTEX_WAITERS;
-
- if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
+ if (get_futex_value_locked(&uval, uaddr))
return -EFAULT;
/*
* Detect deadlocks.
*/
- if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
+ if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
return -EDEADLK;
/*
- * Surprise - we got the lock, but we do not trust user space at all.
- */
- if (unlikely(!curval)) {
- /*
- * We verify whether there is kernel state for this
- * futex. If not, we can safely assume, that the 0 ->
- * TID transition is correct. If state exists, we do
- * not bother to fixup the user space state as it was
- * corrupted already.
- */
- return futex_top_waiter(hb, key) ? -EINVAL : 1;
- }
-
- uval = curval;
-
- /*
- * Set the FUTEX_WAITERS flag, so the owner will know it has someone
- * to wake at the next unlock.
+ * Lookup existing state first. If it exists, try to attach to
+ * its pi_state.
*/
- newval = curval | FUTEX_WAITERS;
+ match = futex_top_waiter(hb, key);
+ if (match)
+ return attach_to_pi_state(uval, match->pi_state, ps);
/*
- * Should we force take the futex? See below.
+ * No waiter and user TID is 0. We are here because the
+ * waiters or the owner died bit is set or called from
+ * requeue_cmp_pi or for whatever reason something took the
+ * syscall.
*/
- if (unlikely(force_take)) {
+ if (!(uval & FUTEX_TID_MASK)) {
/*
- * Keep the OWNER_DIED and the WAITERS bit and set the
- * new TID value.
+ * We take over the futex. No other waiters and the user space
+ * TID is 0. We preserve the owner died bit.
*/
- newval = (curval & ~FUTEX_TID_MASK) | vpid;
- force_take = 0;
- lock_taken = 1;
- }
+ newval = uval & FUTEX_OWNER_DIED;
+ newval |= vpid;
- if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
- return -EFAULT;
- if (unlikely(curval != uval))
- goto retry;
+ /* The futex requeue_pi code can enforce the waiters bit */
+ if (set_waiters)
+ newval |= FUTEX_WAITERS;
+
+ ret = lock_pi_update_atomic(uaddr, uval, newval);
+ /* If the take over worked, return 1 */
+ return ret < 0 ? ret : 1;
+ }
/*
- * We took the lock due to forced take over.
+ * First waiter. Set the waiters bit before attaching ourself to
+ * the owner. If owner tries to unlock, it will be forced into
+ * the kernel and blocked on hb->lock.
*/
- if (unlikely(lock_taken))
- return 1;
-
+ newval = uval | FUTEX_WAITERS;
+ ret = lock_pi_update_atomic(uaddr, uval, newval);
+ if (ret)
+ return ret;
/*
- * We dont have the lock. Look up the PI state (or create it if
- * we are the first waiter):
+ * If the update of the user space value succeeded, we try to
+ * attach to the owner. If that fails, no harm done, we only
+ * set the FUTEX_WAITERS bit in the user space variable.
*/
- ret = lookup_pi_state(uval, hb, key, ps);
-
- if (unlikely(ret)) {
- switch (ret) {
- case -ESRCH:
- /*
- * We failed to find an owner for this
- * futex. So we have no pi_state to block
- * on. This can happen in two cases:
- *
- * 1) The owner died
- * 2) A stale FUTEX_WAITERS bit
- *
- * Re-read the futex value.
- */
- if (get_futex_value_locked(&curval, uaddr))
- return -EFAULT;
-
- /*
- * If the owner died or we have a stale
- * WAITERS bit the owner TID in the user space
- * futex is 0.
- */
- if (!(curval & FUTEX_TID_MASK)) {
- force_take = 1;
- goto retry;
- }
- default:
- break;
- }
- }
-
- return ret;
+ return attach_to_pi_owner(uval, key, ps);
}
/**
@@ -1186,22 +1169,6 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
return 0;
}
-static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
-{
- u32 uninitialized_var(oldval);
-
- /*
- * There is no waiter, so we unlock the futex. The owner died
- * bit has not to be preserved here. We are the owner:
- */
- if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
- return -EFAULT;
- if (oldval != uval)
- return -EAGAIN;
-
- return 0;
-}
-
/*
* Express the locking dependencies for lockdep:
*/
@@ -1659,7 +1626,12 @@ retry_private:
goto retry;
goto out;
case -EAGAIN:
- /* The owner was exiting, try again. */
+ /*
+ * Two reasons for this:
+ * - Owner is exiting and we just wait for the
+ * exit to complete.
+ * - The user space value changed.
+ */
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
put_futex_key(&key2);
@@ -1718,7 +1690,7 @@ retry_private:
this->pi_state = pi_state;
ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
this->rt_waiter,
- this->task, 1);
+ this->task);
if (ret == 1) {
/* We got the lock. */
requeue_pi_wake_futex(this, &key2, hb2);
@@ -2316,8 +2288,10 @@ retry_private:
goto uaddr_faulted;
case -EAGAIN:
/*
- * Task is exiting and we just wait for the
- * exit to complete.
+ * Two reasons for this:
+ * - Task is exiting and we just wait for the
+ * exit to complete.
+ * - The user space value changed.
*/
queue_unlock(hb);
put_futex_key(&q.key);
@@ -2337,9 +2311,9 @@ retry_private:
/*
* Block on the PI mutex:
*/
- if (!trylock)
- ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
- else {
+ if (!trylock) {
+ ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to);
+ } else {
ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
/* Fixup the trylock return value: */
ret = ret ? 0 : -EWOULDBLOCK;
@@ -2401,10 +2375,10 @@ uaddr_faulted:
*/
static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
{
- struct futex_hash_bucket *hb;
- struct futex_q *this, *next;
+ u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
union futex_key key = FUTEX_KEY_INIT;
- u32 uval, vpid = task_pid_vnr(current);
+ struct futex_hash_bucket *hb;
+ struct futex_q *match;
int ret;
retry:
@@ -2417,57 +2391,47 @@ retry:
return -EPERM;
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
- if (unlikely(ret != 0))
- goto out;
+ if (ret)
+ return ret;
hb = hash_futex(&key);
spin_lock(&hb->lock);
/*
- * To avoid races, try to do the TID -> 0 atomic transition
- * again. If it succeeds then we can return without waking
- * anyone else up. We only try this if neither the waiters nor
- * the owner died bit are set.
- */
- if (!(uval & ~FUTEX_TID_MASK) &&
- cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
- goto pi_faulted;
- /*
- * Rare case: we managed to release the lock atomically,
- * no need to wake anyone else up:
- */
- if (unlikely(uval == vpid))
- goto out_unlock;
-
- /*
- * Ok, other tasks may need to be woken up - check waiters
- * and do the wakeup if necessary:
+ * Check waiters first. We do not trust user space values at
+ * all and we at least want to know if user space fiddled
+ * with the futex value instead of blindly unlocking.
*/
- plist_for_each_entry_safe(this, next, &hb->chain, list) {
- if (!match_futex (&this->key, &key))
- continue;
- ret = wake_futex_pi(uaddr, uval, this);
+ match = futex_top_waiter(hb, &key);
+ if (match) {
+ ret = wake_futex_pi(uaddr, uval, match);
/*
- * The atomic access to the futex value
- * generated a pagefault, so retry the
- * user-access and the wakeup:
+ * The atomic access to the futex value generated a
+ * pagefault, so retry the user-access and the wakeup:
*/
if (ret == -EFAULT)
goto pi_faulted;
goto out_unlock;
}
+
/*
- * No waiters - kernel unlocks the futex:
+ * We have no kernel internal state, i.e. no waiters in the
+ * kernel. Waiters which are about to queue themselves are stuck
+ * on hb->lock. So we can safely ignore them. We do neither
+ * preserve the WAITERS bit not the OWNER_DIED one. We are the
+ * owner.
*/
- ret = unlock_futex_pi(uaddr, uval);
- if (ret == -EFAULT)
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))
goto pi_faulted;
+ /*
+ * If uval has changed, let user space handle it.
+ */
+ ret = (curval == uval) ? 0 : -EAGAIN;
+
out_unlock:
spin_unlock(&hb->lock);
put_futex_key(&key);
-
-out:
return ret;
pi_faulted:
@@ -2669,7 +2633,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
*/
WARN_ON(!q.pi_state);
pi_mutex = &q.pi_state->pi_mutex;
- ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
+ ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
debug_rt_mutex_free_waiter(&rt_waiter);
spin_lock(q.lock_ptr);