From 046a619d8e9746fa4c0e29e8c6b78e16efc008a8 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 14 Jul 2014 10:27:48 -0700 Subject: locking/spinlocks/mcs: Rename optimistic_spin_queue() to optimistic_spin_node() Currently, the per-cpu nodes structure for the cancellable MCS spinlock is named "optimistic_spin_queue". However, in a follow up patch in the series we will be introducing a new structure that serves as the new "handle" for the lock. It would make more sense if that structure is named "optimistic_spin_queue". Additionally, since the current use of the "optimistic_spin_queue" structure are "nodes", it might be better if we rename them to "node" anyway. This preparatory patch renames all current "optimistic_spin_queue" to "optimistic_spin_node". Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Cc: Scott Norton Cc: "Paul E. McKenney" Cc: Dave Chinner Cc: Waiman Long Cc: Davidlohr Bueso Cc: Rik van Riel Cc: Andrew Morton Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Tim Chen Cc: Konrad Rzeszutek Wilk Cc: Aswin Chandramouleeswaran Cc: Linus Torvalds Cc: Chris Mason Cc: Heiko Carstens Cc: Josef Bacik Link: http://lkml.kernel.org/r/1405358872-3732-2-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/mcs_spinlock.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'kernel/locking/mcs_spinlock.c') diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index 838dc9e0066..e9866f70e82 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -14,18 +14,18 @@ * called from interrupt context and we have preemption disabled while * spinning. */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); /* * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. * Can return NULL in case we were the last queued and we updated @lock instead. */ -static inline struct optimistic_spin_queue * -osq_wait_next(struct optimistic_spin_queue **lock, - struct optimistic_spin_queue *node, - struct optimistic_spin_queue *prev) +static inline struct optimistic_spin_node * +osq_wait_next(struct optimistic_spin_node **lock, + struct optimistic_spin_node *node, + struct optimistic_spin_node *prev) { - struct optimistic_spin_queue *next = NULL; + struct optimistic_spin_node *next = NULL; for (;;) { if (*lock == node && cmpxchg(lock, node, prev) == node) { @@ -59,10 +59,10 @@ osq_wait_next(struct optimistic_spin_queue **lock, return next; } -bool osq_lock(struct optimistic_spin_queue **lock) +bool osq_lock(struct optimistic_spin_node **lock) { - struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); - struct optimistic_spin_queue *prev, *next; + struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); + struct optimistic_spin_node *prev, *next; node->locked = 0; node->next = NULL; @@ -149,10 +149,10 @@ unqueue: return false; } -void osq_unlock(struct optimistic_spin_queue **lock) +void osq_unlock(struct optimistic_spin_node **lock) { - struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); - struct optimistic_spin_queue *next; + struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); + struct optimistic_spin_node *next; /* * Fast path for the uncontended case. -- cgit v1.2.3-70-g09d2 From 90631822c5d307b5410500806e8ac3e63928aa3e Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 14 Jul 2014 10:27:49 -0700 Subject: locking/spinlocks/mcs: Convert osq lock to atomic_t to reduce overhead The cancellable MCS spinlock is currently used to queue threads that are doing optimistic spinning. It uses per-cpu nodes, where a thread obtaining the lock would access and queue the local node corresponding to the CPU that it's running on. Currently, the cancellable MCS lock is implemented by using pointers to these nodes. In this patch, instead of operating on pointers to the per-cpu nodes, we store the CPU numbers in which the per-cpu nodes correspond to in atomic_t. A similar concept is used with the qspinlock. By operating on the CPU # of the nodes using atomic_t instead of pointers to those nodes, this can reduce the overhead of the cancellable MCS spinlock by 32 bits (on 64 bit systems). Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Cc: Scott Norton Cc: "Paul E. McKenney" Cc: Dave Chinner Cc: Waiman Long Cc: Davidlohr Bueso Cc: Rik van Riel Cc: Andrew Morton Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Tim Chen Cc: Konrad Rzeszutek Wilk Cc: Aswin Chandramouleeswaran Cc: Linus Torvalds Cc: Chris Mason Cc: Heiko Carstens Cc: Josef Bacik Link: http://lkml.kernel.org/r/1405358872-3732-3-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 4 ++-- include/linux/osq_lock.h | 19 ++++++++++++++++++ include/linux/rwsem.h | 7 +++---- kernel/locking/mcs_spinlock.c | 46 ++++++++++++++++++++++++++++++++++++------- kernel/locking/mcs_spinlock.h | 5 +++-- kernel/locking/mutex.c | 2 +- kernel/locking/rwsem-xadd.c | 2 +- 7 files changed, 68 insertions(+), 17 deletions(-) create mode 100644 include/linux/osq_lock.h (limited to 'kernel/locking/mcs_spinlock.c') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 885f3f56a77..42aa9b9ecd5 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -17,6 +17,7 @@ #include #include #include +#include /* * Simple, straightforward mutexes with strict semantics: @@ -46,7 +47,6 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ -struct optimistic_spin_node; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -56,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - struct optimistic_spin_node *osq; /* Spinner MCS lock */ + struct optimistic_spin_queue osq; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h new file mode 100644 index 00000000000..b001682bf7c --- /dev/null +++ b/include/linux/osq_lock.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_OSQ_LOCK_H +#define __LINUX_OSQ_LOCK_H + +/* + * An MCS like lock especially tailored for optimistic spinning for sleeping + * lock implementations (mutex, rwsem, etc). + */ + +#define OSQ_UNLOCKED_VAL (0) + +struct optimistic_spin_queue { + /* + * Stores an encoded value of the CPU # of the tail node in the queue. + * If the queue is empty, then it's set to OSQ_UNLOCKED_VAL. + */ + atomic_t tail; +}; + +#endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index ba3f108ddea..9fdcdd03507 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -13,10 +13,9 @@ #include #include #include - #include +#include -struct optimistic_spin_node; struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK @@ -33,7 +32,7 @@ struct rw_semaphore { * if the owner is running on the cpu. */ struct task_struct *owner; - struct optimistic_spin_node *osq; /* spinner MCS lock */ + struct optimistic_spin_queue osq; /* spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -70,7 +69,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ LIST_HEAD_INIT((name).wait_list), \ NULL, /* owner */ \ - NULL /* mcs lock */ \ + { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } /* osq */ \ __RWSEM_DEP_MAP_INIT(name) } #else #define __RWSEM_INITIALIZER(name) \ diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index e9866f70e82..32fc16c0a54 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -16,19 +16,45 @@ */ static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); +/* + * We use the value 0 to represent "no CPU", thus the encoded value + * will be the CPU number incremented by 1. + */ +static inline int encode_cpu(int cpu_nr) +{ + return cpu_nr + 1; +} + +static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) +{ + int cpu_nr = encoded_cpu_val - 1; + + return per_cpu_ptr(&osq_node, cpu_nr); +} + /* * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. * Can return NULL in case we were the last queued and we updated @lock instead. */ static inline struct optimistic_spin_node * -osq_wait_next(struct optimistic_spin_node **lock, +osq_wait_next(struct optimistic_spin_queue *lock, struct optimistic_spin_node *node, struct optimistic_spin_node *prev) { struct optimistic_spin_node *next = NULL; + int curr = encode_cpu(smp_processor_id()); + int old; + + /* + * If there is a prev node in queue, then the 'old' value will be + * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if + * we're currently last in queue, then the queue will then become empty. + */ + old = prev ? prev->cpu : OSQ_UNLOCKED_VAL; for (;;) { - if (*lock == node && cmpxchg(lock, node, prev) == node) { + if (atomic_read(&lock->tail) == curr && + atomic_cmpxchg(&lock->tail, curr, old) == curr) { /* * We were the last queued, we moved @lock back. @prev * will now observe @lock and will complete its @@ -59,18 +85,23 @@ osq_wait_next(struct optimistic_spin_node **lock, return next; } -bool osq_lock(struct optimistic_spin_node **lock) +bool osq_lock(struct optimistic_spin_queue *lock) { struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); struct optimistic_spin_node *prev, *next; + int curr = encode_cpu(smp_processor_id()); + int old; node->locked = 0; node->next = NULL; + node->cpu = curr; - node->prev = prev = xchg(lock, node); - if (likely(prev == NULL)) + old = atomic_xchg(&lock->tail, curr); + if (old == OSQ_UNLOCKED_VAL) return true; + prev = decode_cpu(old); + node->prev = prev; ACCESS_ONCE(prev->next) = node; /* @@ -149,15 +180,16 @@ unqueue: return false; } -void osq_unlock(struct optimistic_spin_node **lock) +void osq_unlock(struct optimistic_spin_queue *lock) { struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); struct optimistic_spin_node *next; + int curr = encode_cpu(smp_processor_id()); /* * Fast path for the uncontended case. */ - if (likely(cmpxchg(lock, node, NULL) == node)) + if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr)) return; /* diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index c99dc0052f4..74356dc0ce2 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -121,9 +121,10 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) struct optimistic_spin_node { struct optimistic_spin_node *next, *prev; int locked; /* 1 if lock acquired */ + int cpu; /* encoded CPU # value */ }; -extern bool osq_lock(struct optimistic_spin_node **lock); -extern void osq_unlock(struct optimistic_spin_node **lock); +extern bool osq_lock(struct optimistic_spin_queue *lock); +extern void osq_unlock(struct optimistic_spin_queue *lock); #endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index bc73d33c676..d9b313906ca 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -60,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - lock->osq = NULL; + atomic_set(&lock->osq.tail, OSQ_UNLOCKED_VAL); #endif debug_mutex_init(lock, name, key); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index c40c7d28661..b77a6230bbf 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -84,7 +84,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, INIT_LIST_HEAD(&sem->wait_list); #ifdef CONFIG_SMP sem->owner = NULL; - sem->osq = NULL; + atomic_set(&sem->osq.tail, OSQ_UNLOCKED_VAL); #endif } -- cgit v1.2.3-70-g09d2 From 33ecd2083a9560fbc1ef1b1279ef3ecb4c012a4f Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 14 Jul 2014 10:27:51 -0700 Subject: locking/spinlocks/mcs: Micro-optimize osq_unlock() In the unlock function of the cancellable MCS spinlock, the first thing we do is to retrive the current CPU's osq node. However, due to the changes made in the previous patch, in the common case where the lock is not contended, we wouldn't need to access the current CPU's osq node anymore. This patch optimizes this by only retriving this CPU's osq node after we attempt the initial cmpxchg to unlock the osq and found that its contended. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Cc: Scott Norton Cc: "Paul E. McKenney" Cc: Dave Chinner Cc: Waiman Long Cc: Davidlohr Bueso Cc: Rik van Riel Cc: Andrew Morton Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Tim Chen Cc: Konrad Rzeszutek Wilk Cc: Aswin Chandramouleeswaran Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1405358872-3732-5-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/mcs_spinlock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/locking/mcs_spinlock.c') diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index 32fc16c0a54..be9ee1559fc 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -182,8 +182,7 @@ unqueue: void osq_unlock(struct optimistic_spin_queue *lock) { - struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); - struct optimistic_spin_node *next; + struct optimistic_spin_node *node, *next; int curr = encode_cpu(smp_processor_id()); /* @@ -195,6 +194,7 @@ void osq_unlock(struct optimistic_spin_queue *lock) /* * Second most likely case. */ + node = this_cpu_ptr(&osq_node); next = xchg(&node->next, NULL); if (next) { ACCESS_ONCE(next->locked) = 1; -- cgit v1.2.3-70-g09d2