summaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <m.chehab@samsung.com>2014-04-14 12:00:36 -0300
committerMauro Carvalho Chehab <m.chehab@samsung.com>2014-04-14 12:00:36 -0300
commit277a163c83d7ba93fba1e8980d29a9f8bfcfba6c (patch)
treeccfd357d152292958957b6b8a993892e7a8cc95f /kernel/futex.c
parenta83b93a7480441a47856dc9104bea970e84cda87 (diff)
parentc9eaa447e77efe77b7fa4c953bd62de8297fd6c5 (diff)
Merge tag 'v3.15-rc1' into patchwork
Linux 3.15-rc1 * tag 'v3.15-rc1': (12180 commits) Linux 3.15-rc1 mm: Initialize error in shmem_file_aio_read() cifs: Use min_t() when comparing "size_t" and "unsigned long" sym53c8xx_2: Set DID_REQUEUE return code when aborting squeue powerpc: Don't try to set LPCR unless we're in hypervisor mode futex: update documentation for ordering guarantees ceph: fix pr_fmt() redefinition vti: don't allow to add the same tunnel twice gre: don't allow to add the same tunnel twice drivers: net: xen-netfront: fix array initialization bug missing bits of "splice: fix racy pipe->buffers uses" cifs: fix the race in cifs_writev() ceph_sync_{,direct_}write: fix an oops on ceph_osdc_new_request() failure pktgen: be friendly to LLTX devices r8152: check RTL8152_UNPLUG net: sun4i-emac: add promiscuous support net/apne: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO blackfin: cleanup board files bf609: clock: drop unused clock bit set/clear functions Blackfin: bf537: rename "CONFIG_ADT75" ...
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c127
1 files changed, 95 insertions, 32 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 44a1261cb9f..5f589279e46 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -70,7 +70,10 @@
#include "locking/rtmutex_common.h"
/*
- * Basic futex operation and ordering guarantees:
+ * READ this before attempting to hack on futexes!
+ *
+ * Basic futex operation and ordering guarantees
+ * =============================================
*
* The waiter reads the futex value in user space and calls
* futex_wait(). This function computes the hash bucket and acquires
@@ -119,7 +122,7 @@
* sys_futex(WAIT, futex, val);
* futex_wait(futex, val);
*
- * waiters++;
+ * waiters++; (a)
* mb(); (A) <-- paired with -.
* |
* lock(hash_bucket(futex)); |
@@ -135,14 +138,14 @@
* unlock(hash_bucket(futex));
* schedule(); if (waiters)
* lock(hash_bucket(futex));
- * wake_waiters(futex);
- * unlock(hash_bucket(futex));
+ * else wake_waiters(futex);
+ * waiters--; (b) unlock(hash_bucket(futex));
*
- * Where (A) orders the waiters increment and the futex value read -- this
- * is guaranteed by the head counter in the hb spinlock; and where (B)
- * orders the write to futex and the waiters read -- this is done by the
- * barriers in get_futex_key_refs(), through either ihold or atomic_inc,
- * depending on the futex type.
+ * Where (A) orders the waiters increment and the futex value read through
+ * atomic operations (see hb_waiters_inc) and where (B) orders the write
+ * to futex and the waiters read -- this is done by the barriers in
+ * get_futex_key_refs(), through either ihold or atomic_inc, depending on the
+ * futex type.
*
* This yields the following case (where X:=waiters, Y:=futex):
*
@@ -155,9 +158,22 @@
* Which guarantees that x==0 && y==0 is impossible; which translates back into
* the guarantee that we cannot both miss the futex variable change and the
* enqueue.
+ *
+ * Note that a new waiter is accounted for in (a) even when it is possible that
+ * the wait call can return error, in which case we backtrack from it in (b).
+ * Refer to the comment in queue_lock().
+ *
+ * Similarly, in order to account for waiters being requeued on another
+ * address we always increment the waiters for the destination bucket before
+ * acquiring the lock. It then decrements them again after releasing it -
+ * the code that actually moves the futex(es) between hash buckets (requeue_futex)
+ * will do the additional required waiter count housekeeping. This is done for
+ * double_lock_hb() and double_unlock_hb(), respectively.
*/
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
int __read_mostly futex_cmpxchg_enabled;
+#endif
/*
* Futex flags used to encode options to functions and preserve them across
@@ -234,6 +250,7 @@ static const struct futex_q futex_q_init = {
* waiting on a futex.
*/
struct futex_hash_bucket {
+ atomic_t waiters;
spinlock_t lock;
struct plist_head chain;
} ____cacheline_aligned_in_smp;
@@ -253,22 +270,37 @@ static inline void futex_get_mm(union futex_key *key)
smp_mb__after_atomic_inc();
}
-static inline bool hb_waiters_pending(struct futex_hash_bucket *hb)
+/*
+ * Reflects a new waiter being added to the waitqueue.
+ */
+static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
{
#ifdef CONFIG_SMP
+ atomic_inc(&hb->waiters);
/*
- * Tasks trying to enter the critical region are most likely
- * potential waiters that will be added to the plist. Ensure
- * that wakers won't miss to-be-slept tasks in the window between
- * the wait call and the actual plist_add.
+ * Full barrier (A), see the ordering comment above.
*/
- if (spin_is_locked(&hb->lock))
- return true;
- smp_rmb(); /* Make sure we check the lock state first */
+ smp_mb__after_atomic_inc();
+#endif
+}
- return !plist_head_empty(&hb->chain);
+/*
+ * Reflects a waiter being removed from the waitqueue by wakeup
+ * paths.
+ */
+static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+ atomic_dec(&hb->waiters);
+#endif
+}
+
+static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+ return atomic_read(&hb->waiters);
#else
- return true;
+ return 1;
#endif
}
@@ -954,6 +986,7 @@ static void __unqueue_futex(struct futex_q *q)
hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
plist_del(&q->list, &hb->chain);
+ hb_waiters_dec(hb);
}
/*
@@ -1257,7 +1290,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
*/
if (likely(&hb1->chain != &hb2->chain)) {
plist_del(&q->list, &hb1->chain);
+ hb_waiters_dec(hb1);
plist_add(&q->list, &hb2->chain);
+ hb_waiters_inc(hb2);
q->lock_ptr = &hb2->lock;
}
get_futex_key_refs(key2);
@@ -1431,6 +1466,7 @@ retry:
hb2 = hash_futex(&key2);
retry_private:
+ hb_waiters_inc(hb2);
double_lock_hb(hb1, hb2);
if (likely(cmpval != NULL)) {
@@ -1440,6 +1476,7 @@ retry_private:
if (unlikely(ret)) {
double_unlock_hb(hb1, hb2);
+ hb_waiters_dec(hb2);
ret = get_user(curval, uaddr1);
if (ret)
@@ -1489,6 +1526,7 @@ retry_private:
break;
case -EFAULT:
double_unlock_hb(hb1, hb2);
+ hb_waiters_dec(hb2);
put_futex_key(&key2);
put_futex_key(&key1);
ret = fault_in_user_writeable(uaddr2);
@@ -1498,6 +1536,7 @@ retry_private:
case -EAGAIN:
/* The owner was exiting, try again. */
double_unlock_hb(hb1, hb2);
+ hb_waiters_dec(hb2);
put_futex_key(&key2);
put_futex_key(&key1);
cond_resched();
@@ -1573,6 +1612,7 @@ retry_private:
out_unlock:
double_unlock_hb(hb1, hb2);
+ hb_waiters_dec(hb2);
/*
* drop_futex_key_refs() must be called outside the spinlocks. During
@@ -1600,6 +1640,17 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
struct futex_hash_bucket *hb;
hb = hash_futex(&q->key);
+
+ /*
+ * Increment the counter before taking the lock so that
+ * a potential waker won't miss a to-be-slept task that is
+ * waiting for the spinlock. This is safe as all queue_lock()
+ * users end up calling queue_me(). Similarly, for housekeeping,
+ * decrement the counter at queue_unlock() when some error has
+ * occurred and we don't end up adding the task to the list.
+ */
+ hb_waiters_inc(hb);
+
q->lock_ptr = &hb->lock;
spin_lock(&hb->lock); /* implies MB (A) */
@@ -1611,6 +1662,7 @@ queue_unlock(struct futex_hash_bucket *hb)
__releases(&hb->lock)
{
spin_unlock(&hb->lock);
+ hb_waiters_dec(hb);
}
/**
@@ -2342,6 +2394,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* Unqueue the futex_q and determine which it was.
*/
plist_del(&q->list, &hb->chain);
+ hb_waiters_dec(hb);
/* Handle spurious wakeups gracefully */
ret = -EWOULDBLOCK;
@@ -2843,9 +2896,28 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
-static int __init futex_init(void)
+static void __init futex_detect_cmpxchg(void)
{
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
u32 curval;
+
+ /*
+ * This will fail and we want it. Some arch implementations do
+ * runtime detection of the futex_atomic_cmpxchg_inatomic()
+ * functionality. We want to know that before we call in any
+ * of the complex code paths. Also we want to prevent
+ * registration of robust lists in that case. NULL is
+ * guaranteed to fault and we get -EFAULT on functional
+ * implementation, the non-functional ones will return
+ * -ENOSYS.
+ */
+ if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
+ futex_cmpxchg_enabled = 1;
+#endif
+}
+
+static int __init futex_init(void)
+{
unsigned int futex_shift;
unsigned long i;
@@ -2861,20 +2933,11 @@ static int __init futex_init(void)
&futex_shift, NULL,
futex_hashsize, futex_hashsize);
futex_hashsize = 1UL << futex_shift;
- /*
- * This will fail and we want it. Some arch implementations do
- * runtime detection of the futex_atomic_cmpxchg_inatomic()
- * functionality. We want to know that before we call in any
- * of the complex code paths. Also we want to prevent
- * registration of robust lists in that case. NULL is
- * guaranteed to fault and we get -EFAULT on functional
- * implementation, the non-functional ones will return
- * -ENOSYS.
- */
- if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
- futex_cmpxchg_enabled = 1;
+
+ futex_detect_cmpxchg();
for (i = 0; i < futex_hashsize; i++) {
+ atomic_set(&futex_queues[i].waiters, 0);
plist_head_init(&futex_queues[i].chain);
spin_lock_init(&futex_queues[i].lock);
}