From 2f4f12e571c4e2f50f3818a3c2544929145f75dd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2013 11:58:20 -0700 Subject: lockref: uninline lockref helper functions They aren't very good to inline, since they already call external functions (the spinlock code), and we're going to create rather more complicated versions of them that can do the reference count updates locklessly. Signed-off-by: Linus Torvalds --- lib/lockref.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 lib/lockref.c (limited to 'lib/lockref.c') diff --git a/lib/lockref.c b/lib/lockref.c new file mode 100644 index 00000000000..a9a4f4e1eff --- /dev/null +++ b/lib/lockref.c @@ -0,0 +1,69 @@ +#include +#include + +/** + * lockref_get - Increments reference count unconditionally + * @lockcnt: pointer to lockref structure + * + * This operation is only valid if you already hold a reference + * to the object, so you know the count cannot be zero. + */ +void lockref_get(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + lockref->count++; + spin_unlock(&lockref->lock); +} +EXPORT_SYMBOL(lockref_get); + +/** + * lockref_get_not_zero - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count was zero + */ +int lockref_get_not_zero(struct lockref *lockref) +{ + int retval = 0; + + spin_lock(&lockref->lock); + if (lockref->count) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} +EXPORT_SYMBOL(lockref_get_not_zero); + +/** + * lockref_get_or_lock - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count was zero + * and we got the lock instead. + */ +int lockref_get_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (!lockref->count) + return 0; + lockref->count++; + spin_unlock(&lockref->lock); + return 1; +} +EXPORT_SYMBOL(lockref_get_or_lock); + +/** + * lockref_put_or_lock - decrements count unless count <= 1 before decrement + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken + */ +int lockref_put_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (lockref->count <= 1) + return 0; + lockref->count--; + spin_unlock(&lockref->lock); + return 1; +} +EXPORT_SYMBOL(lockref_put_or_lock); -- cgit v1.2.3-70-g09d2 From bc08b449ee14ace4d869adaa1bb35a44ce68d775 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2013 12:12:15 -0700 Subject: lockref: implement lockless reference count updates using cmpxchg() Instead of taking the spinlock, the lockless versions atomically check that the lock is not taken, and do the reference count update using a cmpxchg() loop. This is semantically identical to doing the reference count update protected by the lock, but avoids the "wait for lock" contention that you get when accesses to the reference count are contended. Note that a "lockref" is absolutely _not_ equivalent to an atomic_t. Even when the lockref reference counts are updated atomically with cmpxchg, the fact that they also verify the state of the spinlock means that the lockless updates can never happen while somebody else holds the spinlock. So while "lockref_put_or_lock()" looks a lot like just another name for "atomic_dec_and_lock()", and both optimize to lockless updates, they are fundamentally different: the decrement done by atomic_dec_and_lock() is truly independent of any lock (as long as it doesn't decrement to zero), so a locked region can still see the count change. The lockref structure, in contrast, really is a *locked* reference count. If you hold the spinlock, the reference count will be stable and you can modify the reference count without using atomics, because even the lockless updates will see and respect the state of the lock. In order to enable the cmpxchg lockless code, the architecture needs to do three things: (1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit in an aligned u64, and have a "cmpxchg()" implementation that works on such a u64 data type. (2) define a helper function to test for a spinlock being unlocked ("arch_spin_value_unlocked()") (3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its Kconfig file. This enables it for x86-64 (but not 32-bit, we'd need to make sure cmpxchg() turns into the proper cmpxchg8b in order to enable it for 32-bit mode). Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/include/asm/spinlock.h | 5 ++++ include/linux/lockref.h | 11 ++++++-- lib/Kconfig | 10 +++++++ lib/lockref.c | 60 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 84 insertions(+), 3 deletions(-) (limited to 'lib/lockref.c') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b32ebf92b0c..67e00740531 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -16,6 +16,7 @@ config X86_64 def_bool y depends on 64BIT select X86_DEV_DMA_OPS + select ARCH_USE_CMPXCHG_LOCKREF ### Arch settings config X86 diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index e3ddd7db723..e0e668422c7 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -34,6 +34,11 @@ # define UNLOCK_LOCK_PREFIX #endif +static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.tickets.head == lock.tickets.tail; +} + /* * Ticket locks are conceptually two parts, one indicating the current head of * the queue, and the other indicating the current tail. The lock is acquired diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 4c0af31c8d4..ca07b5028b0 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -17,8 +17,15 @@ #include struct lockref { - spinlock_t lock; - unsigned int count; + union { +#ifdef CONFIG_CMPXCHG_LOCKREF + aligned_u64 lock_count; +#endif + struct { + spinlock_t lock; + unsigned int count; + }; + }; }; extern void lockref_get(struct lockref *); diff --git a/lib/Kconfig b/lib/Kconfig index 71d9f81f6ee..65561716c16 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -48,6 +48,16 @@ config STMP_DEVICE config PERCPU_RWSEM boolean +config ARCH_USE_CMPXCHG_LOCKREF + bool + +config CMPXCHG_LOCKREF + def_bool y if ARCH_USE_CMPXCHG_LOCKREF + depends on SMP + depends on !GENERIC_LOCKBREAK + depends on !DEBUG_SPINLOCK + depends on !DEBUG_LOCK_ALLOC + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/lockref.c b/lib/lockref.c index a9a4f4e1eff..7819c2d1d31 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -1,6 +1,33 @@ #include #include +#ifdef CONFIG_CMPXCHG_LOCKREF + +/* + * Note that the "cmpxchg()" reloads the "old" value for the + * failure case. + */ +#define CMPXCHG_LOOP(CODE, SUCCESS) do { \ + struct lockref old; \ + BUILD_BUG_ON(sizeof(old) != 8); \ + old.lock_count = ACCESS_ONCE(lockref->lock_count); \ + while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ + struct lockref new = old, prev = old; \ + CODE \ + old.lock_count = cmpxchg(&lockref->lock_count, \ + old.lock_count, new.lock_count); \ + if (likely(old.lock_count == prev.lock_count)) { \ + SUCCESS; \ + } \ + } \ +} while (0) + +#else + +#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0) + +#endif + /** * lockref_get - Increments reference count unconditionally * @lockcnt: pointer to lockref structure @@ -10,6 +37,12 @@ */ void lockref_get(struct lockref *lockref) { + CMPXCHG_LOOP( + new.count++; + , + return; + ); + spin_lock(&lockref->lock); lockref->count++; spin_unlock(&lockref->lock); @@ -23,9 +56,18 @@ EXPORT_SYMBOL(lockref_get); */ int lockref_get_not_zero(struct lockref *lockref) { - int retval = 0; + int retval; + + CMPXCHG_LOOP( + new.count++; + if (!old.count) + return 0; + , + return 1; + ); spin_lock(&lockref->lock); + retval = 0; if (lockref->count) { lockref->count++; retval = 1; @@ -43,6 +85,14 @@ EXPORT_SYMBOL(lockref_get_not_zero); */ int lockref_get_or_lock(struct lockref *lockref) { + CMPXCHG_LOOP( + new.count++; + if (!old.count) + break; + , + return 1; + ); + spin_lock(&lockref->lock); if (!lockref->count) return 0; @@ -59,6 +109,14 @@ EXPORT_SYMBOL(lockref_get_or_lock); */ int lockref_put_or_lock(struct lockref *lockref) { + CMPXCHG_LOOP( + new.count--; + if (old.count <= 1) + break; + , + return 1; + ); + spin_lock(&lockref->lock); if (lockref->count <= 1) return 0; -- cgit v1.2.3-70-g09d2 From d472d9d98b463dd7a04f2bcdeafe4261686ce6ab Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 3 Sep 2013 14:49:49 -0700 Subject: lockref: Relax in cmpxchg loop While we are likley to succeed and break out of this loop, it isn't guaranteed. We should be power and thread friendly if we do have to go around for a second (or third, or more) attempt. Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- lib/lockref.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/lockref.c') diff --git a/lib/lockref.c b/lib/lockref.c index 7819c2d1d31..9d76f404ce9 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -19,6 +19,7 @@ if (likely(old.lock_count == prev.lock_count)) { \ SUCCESS; \ } \ + cpu_relax(); \ } \ } while (0) -- cgit v1.2.3-70-g09d2 From 44a0cf92926c343366a4986808d12ab068504eed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 7 Sep 2013 15:30:29 -0700 Subject: lockref: fix docbook argument names The code got rewritten, but the comments got copied as-is from older versions, and as a result the argument name in the comment didn't actually match the code any more. Signed-off-by: Linus Torvalds --- lib/lockref.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/lockref.c') diff --git a/lib/lockref.c b/lib/lockref.c index 9d76f404ce9..7aae8df37f6 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -31,7 +31,7 @@ /** * lockref_get - Increments reference count unconditionally - * @lockcnt: pointer to lockref structure + * @lockref: pointer to lockref structure * * This operation is only valid if you already hold a reference * to the object, so you know the count cannot be zero. @@ -52,7 +52,7 @@ EXPORT_SYMBOL(lockref_get); /** * lockref_get_not_zero - Increments count unless the count is 0 - * @lockcnt: pointer to lockref structure + * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero */ int lockref_get_not_zero(struct lockref *lockref) @@ -80,7 +80,7 @@ EXPORT_SYMBOL(lockref_get_not_zero); /** * lockref_get_or_lock - Increments count unless the count is 0 - * @lockcnt: pointer to lockref structure + * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero * and we got the lock instead. */ @@ -105,7 +105,7 @@ EXPORT_SYMBOL(lockref_get_or_lock); /** * lockref_put_or_lock - decrements count unless count <= 1 before decrement - * @lockcnt: pointer to lockref structure + * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken */ int lockref_put_or_lock(struct lockref *lockref) -- cgit v1.2.3-70-g09d2 From e7d33bb5ea82922e6ddcfc6b28a630b1a4ced071 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 7 Sep 2013 15:49:18 -0700 Subject: lockref: add ability to mark lockrefs "dead" The only actual current lockref user (dcache) uses zero reference counts even for perfectly live dentries, because it's a cache: there may not be any users, but that doesn't mean that we want to throw away the dentry. At the same time, the dentry cache does have a notion of a truly "dead" dentry that we must not even increment the reference count of, because we have pruned it and it is not valid. Currently that distinction is not visible in the lockref itself, and the dentry cache validation uses "lockref_get_or_lock()" to either get a new reference to a dentry that already had existing references (and thus cannot be dead), or get the dentry lock so that we can then verify the dentry and increment the reference count under the lock if that verification was successful. That's all somewhat complicated. This adds the concept of being "dead" to the lockref itself, by simply using a count that is negative. This allows a usage scenario where we can increment the refcount of a dentry without having to validate it, and pushing the special "we killed it" case into the lockref code. The dentry code itself doesn't actually use this yet, and it's probably too late in the merge window to do that code (the dentry_kill() code with its "should I decrement the count" logic really is pretty complex code), but let's introduce the concept at the lockref level now. Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 3 +++ lib/lockref.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'lib/lockref.c') diff --git a/include/linux/lockref.h b/include/linux/lockref.h index ca07b5028b0..f279ed9a916 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -33,4 +33,7 @@ extern int lockref_get_not_zero(struct lockref *); extern int lockref_get_or_lock(struct lockref *); extern int lockref_put_or_lock(struct lockref *); +extern void lockref_mark_dead(struct lockref *); +extern int lockref_get_not_dead(struct lockref *); + #endif /* __LINUX_LOCKREF_H */ diff --git a/lib/lockref.c b/lib/lockref.c index 7aae8df37f6..e2cd2c0a882 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -126,3 +126,41 @@ int lockref_put_or_lock(struct lockref *lockref) return 1; } EXPORT_SYMBOL(lockref_put_or_lock); + +/** + * lockref_mark_dead - mark lockref dead + * @lockref: pointer to lockref structure + */ +void lockref_mark_dead(struct lockref *lockref) +{ + assert_spin_locked(&lockref->lock); + lockref->count = -128; +} + +/** + * lockref_get_not_dead - Increments count unless the ref is dead + * @lockref: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if lockref was dead + */ +int lockref_get_not_dead(struct lockref *lockref) +{ + int retval; + + CMPXCHG_LOOP( + new.count++; + if ((int)old.count < 0) + return 0; + , + return 1; + ); + + spin_lock(&lockref->lock); + retval = 0; + if ((int) lockref->count >= 0) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} +EXPORT_SYMBOL(lockref_get_not_dead); -- cgit v1.2.3-70-g09d2 From 8f4c344696b9f9f8471d7f342076ef10ed7f66a5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 19 Sep 2013 19:06:46 +0100 Subject: lockref: use cmpxchg64 explicitly for lockless updates The cmpxchg() function tends not to support 64-bit arguments on 32-bit architectures. This could be either due to use of unsigned long arguments (like on ARM) or lack of instruction support (cmpxchgq on x86). However, these architectures may implement a specific cmpxchg64() function to provide 64-bit cmpxchg support instead. Since the lockref code requires a 64-bit cmpxchg and relies on the architecture selecting ARCH_USE_CMPXCHG_LOCKREF, move to using cmpxchg64 instead of cmpxchg and allow 32-bit architectures to make use of the lockless lockref implementation. Cc: Waiman Long Signed-off-by: Will Deacon Signed-off-by: Linus Torvalds --- lib/lockref.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/lockref.c') diff --git a/lib/lockref.c b/lib/lockref.c index e2cd2c0a882..677d036cf3c 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -14,8 +14,8 @@ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old, prev = old; \ CODE \ - old.lock_count = cmpxchg(&lockref->lock_count, \ - old.lock_count, new.lock_count); \ + old.lock_count = cmpxchg64(&lockref->lock_count, \ + old.lock_count, new.lock_count); \ if (likely(old.lock_count == prev.lock_count)) { \ SUCCESS; \ } \ -- cgit v1.2.3-70-g09d2 From d2212b4dce596fee83e5c523400bf084f4cc816c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 26 Sep 2013 17:27:00 +0100 Subject: lockref: allow relaxed cmpxchg64 variant for lockless updates The 64-bit cmpxchg operation on the lockref is ordered by virtue of hazarding between the cmpxchg operation and the reference count manipulation. On weakly ordered memory architectures (such as ARM), it can be of great benefit to omit the barrier instructions where they are not needed. This patch moves the lockless lockref code over to a cmpxchg64_relaxed operation, which doesn't provide barrier semantics. If the operation isn't defined, we simply #define it as the usual 64-bit cmpxchg macro. Cc: Waiman Long Signed-off-by: Will Deacon Signed-off-by: Linus Torvalds --- lib/lockref.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'lib/lockref.c') diff --git a/lib/lockref.c b/lib/lockref.c index 677d036cf3c..e294ae445c9 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -3,6 +3,14 @@ #ifdef CONFIG_CMPXCHG_LOCKREF +/* + * Allow weakly-ordered memory architectures to provide barrier-less + * cmpxchg semantics for lockref updates. + */ +#ifndef cmpxchg64_relaxed +# define cmpxchg64_relaxed cmpxchg64 +#endif + /* * Note that the "cmpxchg()" reloads the "old" value for the * failure case. @@ -14,8 +22,9 @@ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old, prev = old; \ CODE \ - old.lock_count = cmpxchg64(&lockref->lock_count, \ - old.lock_count, new.lock_count); \ + old.lock_count = cmpxchg64_relaxed(&lockref->lock_count, \ + old.lock_count, \ + new.lock_count); \ if (likely(old.lock_count == prev.lock_count)) { \ SUCCESS; \ } \ -- cgit v1.2.3-70-g09d2 From 491f6f8e5fd9a57aaf03b6d6e3e153f1c27d8a46 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 Sep 2013 12:59:56 +0200 Subject: lockref: use arch_mutex_cpu_relax() in CMPXCHG_LOOP() Make use of arch_mutex_cpu_relax() so architectures can override the default cpu_relax() semantics. This is especially useful for s390, where cpu_relax() means that we yield() the current (virtual) cpu and therefore is very expensive, and would contradict the whole purpose of the lockless cmpxchg loop. Signed-off-by: Heiko Carstens --- lib/lockref.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'lib/lockref.c') diff --git a/lib/lockref.c b/lib/lockref.c index e294ae445c9..6f9d434c152 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -11,6 +11,14 @@ # define cmpxchg64_relaxed cmpxchg64 #endif +/* + * Allow architectures to override the default cpu_relax() within CMPXCHG_LOOP. + * This is useful for architectures with an expensive cpu_relax(). + */ +#ifndef arch_mutex_cpu_relax +# define arch_mutex_cpu_relax() cpu_relax() +#endif + /* * Note that the "cmpxchg()" reloads the "old" value for the * failure case. @@ -28,7 +36,7 @@ if (likely(old.lock_count == prev.lock_count)) { \ SUCCESS; \ } \ - cpu_relax(); \ + arch_mutex_cpu_relax(); \ } \ } while (0) -- cgit v1.2.3-70-g09d2