From 0f8f2aaaab0b0f9c13635cb02e7d19bdaa9aa1bb Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 28 Aug 2013 18:13:26 -0700 Subject: Add new lockref infrastructure reference implementation This introduces a new "lockref" structure that supports the concept of lockless updates of reference counts that still honor an attached spinlock. NOTE! This reference implementation is not the optimized lockless version, rather it is the fallback implementation using standard spinlocks. The actual optimized versions will be merged into 3.12, but I wanted to get the infrastructure in place and document the new interfaces. [ Also note that this particular commit is drastically cut-down minimal version of the original patch by Waiman. In order to properly credit the original author I'm marking Waiman as the author here, but in the end this patch bears little resemblance to the patch by Waiman. So blame any errors on me editing things down to the point where I can introduce the infrastructure before the merge window for 3.12 actually opens. - Linus ] Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 include/linux/lockref.h (limited to 'include/linux/lockref.h') diff --git a/include/linux/lockref.h b/include/linux/lockref.h new file mode 100644 index 00000000000..01233e01627 --- /dev/null +++ b/include/linux/lockref.h @@ -0,0 +1,71 @@ +#ifndef __LINUX_LOCKREF_H +#define __LINUX_LOCKREF_H + +/* + * Locked reference counts. + * + * These are different from just plain atomic refcounts in that they + * are atomic with respect to the spinlock that goes with them. In + * particular, there can be implementations that don't actually get + * the spinlock for the common decrement/increment operations, but they + * still have to check that the operation is done semantically as if + * the spinlock had been taken (using a cmpxchg operation that covers + * both the lock and the count word, or using memory transactions, for + * example). + */ + +#include + +struct lockref { + spinlock_t lock; + unsigned int count; +}; + +/** + * lockref_get - Increments reference count unconditionally + * @lockcnt: pointer to lockref structure + * + * This operation is only valid if you already hold a reference + * to the object, so you know the count cannot be zero. + */ +static inline void lockref_get(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + lockref->count++; + spin_unlock(&lockref->lock); +} + +/** + * lockref_get_not_zero - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count is 0 + */ +static inline int lockref_get_not_zero(struct lockref *lockref) +{ + int retval = 0; + + spin_lock(&lockref->lock); + if (lockref->count) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} + +/** + * lockref_put_or_lock - decrements count unless count <= 1 before decrement + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken + */ +static inline int lockref_put_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (lockref->count <= 1) + return 0; + lockref->count--; + spin_unlock(&lockref->lock); + return 1; +} + +#endif /* __LINUX_LOCKREF_H */ -- cgit v1.2.3-70-g09d2 From b3abd80250c13414bc258b53e57242feb159af91 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2013 11:14:19 -0700 Subject: lockref: add 'lockref_get_or_lock() helper This behaves like "lockref_get_not_zero()", but instead of doing nothing if the count was zero, it returns with the lock held. This allows callers to revalidate the lockref-protected data structure if required even if the count was zero to begin with, and possibly increment the count if it passes muster. In particular, the dentry code wants this when it wants to turn an RCU-protected dentry into a stable refcounted one: if the dentry count it zero, but the sequence number still validates the dentry, we can take a reference to it. Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux/lockref.h') diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 01233e01627..0ea026092d1 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -53,6 +53,22 @@ static inline int lockref_get_not_zero(struct lockref *lockref) return retval; } +/** + * lockref_get_or_lock - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count was zero + * and we got the lock instead. + */ +static inline int lockref_get_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (!lockref->count) + return 0; + lockref->count++; + spin_unlock(&lockref->lock); + return 1; +} + /** * lockref_put_or_lock - decrements count unless count <= 1 before decrement * @lockcnt: pointer to lockref structure -- cgit v1.2.3-70-g09d2 From 2f4f12e571c4e2f50f3818a3c2544929145f75dd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2013 11:58:20 -0700 Subject: lockref: uninline lockref helper functions They aren't very good to inline, since they already call external functions (the spinlock code), and we're going to create rather more complicated versions of them that can do the reference count updates locklessly. Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 66 +++------------------------------------------- lib/Makefile | 1 + lib/lockref.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 62 deletions(-) create mode 100644 lib/lockref.c (limited to 'include/linux/lockref.h') diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 0ea026092d1..4c0af31c8d4 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -21,67 +21,9 @@ struct lockref { unsigned int count; }; -/** - * lockref_get - Increments reference count unconditionally - * @lockcnt: pointer to lockref structure - * - * This operation is only valid if you already hold a reference - * to the object, so you know the count cannot be zero. - */ -static inline void lockref_get(struct lockref *lockref) -{ - spin_lock(&lockref->lock); - lockref->count++; - spin_unlock(&lockref->lock); -} - -/** - * lockref_get_not_zero - Increments count unless the count is 0 - * @lockcnt: pointer to lockref structure - * Return: 1 if count updated successfully or 0 if count is 0 - */ -static inline int lockref_get_not_zero(struct lockref *lockref) -{ - int retval = 0; - - spin_lock(&lockref->lock); - if (lockref->count) { - lockref->count++; - retval = 1; - } - spin_unlock(&lockref->lock); - return retval; -} - -/** - * lockref_get_or_lock - Increments count unless the count is 0 - * @lockcnt: pointer to lockref structure - * Return: 1 if count updated successfully or 0 if count was zero - * and we got the lock instead. - */ -static inline int lockref_get_or_lock(struct lockref *lockref) -{ - spin_lock(&lockref->lock); - if (!lockref->count) - return 0; - lockref->count++; - spin_unlock(&lockref->lock); - return 1; -} - -/** - * lockref_put_or_lock - decrements count unless count <= 1 before decrement - * @lockcnt: pointer to lockref structure - * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken - */ -static inline int lockref_put_or_lock(struct lockref *lockref) -{ - spin_lock(&lockref->lock); - if (lockref->count <= 1) - return 0; - lockref->count--; - spin_unlock(&lockref->lock); - return 1; -} +extern void lockref_get(struct lockref *); +extern int lockref_get_not_zero(struct lockref *); +extern int lockref_get_or_lock(struct lockref *); +extern int lockref_put_or_lock(struct lockref *); #endif /* __LINUX_LOCKREF_H */ diff --git a/lib/Makefile b/lib/Makefile index 7baccfd8a4e..f2cb3082697 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -20,6 +20,7 @@ lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o klist.o +obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ diff --git a/lib/lockref.c b/lib/lockref.c new file mode 100644 index 00000000000..a9a4f4e1eff --- /dev/null +++ b/lib/lockref.c @@ -0,0 +1,69 @@ +#include +#include + +/** + * lockref_get - Increments reference count unconditionally + * @lockcnt: pointer to lockref structure + * + * This operation is only valid if you already hold a reference + * to the object, so you know the count cannot be zero. + */ +void lockref_get(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + lockref->count++; + spin_unlock(&lockref->lock); +} +EXPORT_SYMBOL(lockref_get); + +/** + * lockref_get_not_zero - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count was zero + */ +int lockref_get_not_zero(struct lockref *lockref) +{ + int retval = 0; + + spin_lock(&lockref->lock); + if (lockref->count) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} +EXPORT_SYMBOL(lockref_get_not_zero); + +/** + * lockref_get_or_lock - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count was zero + * and we got the lock instead. + */ +int lockref_get_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (!lockref->count) + return 0; + lockref->count++; + spin_unlock(&lockref->lock); + return 1; +} +EXPORT_SYMBOL(lockref_get_or_lock); + +/** + * lockref_put_or_lock - decrements count unless count <= 1 before decrement + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken + */ +int lockref_put_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (lockref->count <= 1) + return 0; + lockref->count--; + spin_unlock(&lockref->lock); + return 1; +} +EXPORT_SYMBOL(lockref_put_or_lock); -- cgit v1.2.3-70-g09d2 From bc08b449ee14ace4d869adaa1bb35a44ce68d775 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2013 12:12:15 -0700 Subject: lockref: implement lockless reference count updates using cmpxchg() Instead of taking the spinlock, the lockless versions atomically check that the lock is not taken, and do the reference count update using a cmpxchg() loop. This is semantically identical to doing the reference count update protected by the lock, but avoids the "wait for lock" contention that you get when accesses to the reference count are contended. Note that a "lockref" is absolutely _not_ equivalent to an atomic_t. Even when the lockref reference counts are updated atomically with cmpxchg, the fact that they also verify the state of the spinlock means that the lockless updates can never happen while somebody else holds the spinlock. So while "lockref_put_or_lock()" looks a lot like just another name for "atomic_dec_and_lock()", and both optimize to lockless updates, they are fundamentally different: the decrement done by atomic_dec_and_lock() is truly independent of any lock (as long as it doesn't decrement to zero), so a locked region can still see the count change. The lockref structure, in contrast, really is a *locked* reference count. If you hold the spinlock, the reference count will be stable and you can modify the reference count without using atomics, because even the lockless updates will see and respect the state of the lock. In order to enable the cmpxchg lockless code, the architecture needs to do three things: (1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit in an aligned u64, and have a "cmpxchg()" implementation that works on such a u64 data type. (2) define a helper function to test for a spinlock being unlocked ("arch_spin_value_unlocked()") (3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its Kconfig file. This enables it for x86-64 (but not 32-bit, we'd need to make sure cmpxchg() turns into the proper cmpxchg8b in order to enable it for 32-bit mode). Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/include/asm/spinlock.h | 5 ++++ include/linux/lockref.h | 11 ++++++-- lib/Kconfig | 10 +++++++ lib/lockref.c | 60 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 84 insertions(+), 3 deletions(-) (limited to 'include/linux/lockref.h') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b32ebf92b0c..67e00740531 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -16,6 +16,7 @@ config X86_64 def_bool y depends on 64BIT select X86_DEV_DMA_OPS + select ARCH_USE_CMPXCHG_LOCKREF ### Arch settings config X86 diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index e3ddd7db723..e0e668422c7 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -34,6 +34,11 @@ # define UNLOCK_LOCK_PREFIX #endif +static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.tickets.head == lock.tickets.tail; +} + /* * Ticket locks are conceptually two parts, one indicating the current head of * the queue, and the other indicating the current tail. The lock is acquired diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 4c0af31c8d4..ca07b5028b0 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -17,8 +17,15 @@ #include struct lockref { - spinlock_t lock; - unsigned int count; + union { +#ifdef CONFIG_CMPXCHG_LOCKREF + aligned_u64 lock_count; +#endif + struct { + spinlock_t lock; + unsigned int count; + }; + }; }; extern void lockref_get(struct lockref *); diff --git a/lib/Kconfig b/lib/Kconfig index 71d9f81f6ee..65561716c16 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -48,6 +48,16 @@ config STMP_DEVICE config PERCPU_RWSEM boolean +config ARCH_USE_CMPXCHG_LOCKREF + bool + +config CMPXCHG_LOCKREF + def_bool y if ARCH_USE_CMPXCHG_LOCKREF + depends on SMP + depends on !GENERIC_LOCKBREAK + depends on !DEBUG_SPINLOCK + depends on !DEBUG_LOCK_ALLOC + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/lockref.c b/lib/lockref.c index a9a4f4e1eff..7819c2d1d31 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -1,6 +1,33 @@ #include #include +#ifdef CONFIG_CMPXCHG_LOCKREF + +/* + * Note that the "cmpxchg()" reloads the "old" value for the + * failure case. + */ +#define CMPXCHG_LOOP(CODE, SUCCESS) do { \ + struct lockref old; \ + BUILD_BUG_ON(sizeof(old) != 8); \ + old.lock_count = ACCESS_ONCE(lockref->lock_count); \ + while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ + struct lockref new = old, prev = old; \ + CODE \ + old.lock_count = cmpxchg(&lockref->lock_count, \ + old.lock_count, new.lock_count); \ + if (likely(old.lock_count == prev.lock_count)) { \ + SUCCESS; \ + } \ + } \ +} while (0) + +#else + +#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0) + +#endif + /** * lockref_get - Increments reference count unconditionally * @lockcnt: pointer to lockref structure @@ -10,6 +37,12 @@ */ void lockref_get(struct lockref *lockref) { + CMPXCHG_LOOP( + new.count++; + , + return; + ); + spin_lock(&lockref->lock); lockref->count++; spin_unlock(&lockref->lock); @@ -23,9 +56,18 @@ EXPORT_SYMBOL(lockref_get); */ int lockref_get_not_zero(struct lockref *lockref) { - int retval = 0; + int retval; + + CMPXCHG_LOOP( + new.count++; + if (!old.count) + return 0; + , + return 1; + ); spin_lock(&lockref->lock); + retval = 0; if (lockref->count) { lockref->count++; retval = 1; @@ -43,6 +85,14 @@ EXPORT_SYMBOL(lockref_get_not_zero); */ int lockref_get_or_lock(struct lockref *lockref) { + CMPXCHG_LOOP( + new.count++; + if (!old.count) + break; + , + return 1; + ); + spin_lock(&lockref->lock); if (!lockref->count) return 0; @@ -59,6 +109,14 @@ EXPORT_SYMBOL(lockref_get_or_lock); */ int lockref_put_or_lock(struct lockref *lockref) { + CMPXCHG_LOOP( + new.count--; + if (old.count <= 1) + break; + , + return 1; + ); + spin_lock(&lockref->lock); if (lockref->count <= 1) return 0; -- cgit v1.2.3-70-g09d2 From e7d33bb5ea82922e6ddcfc6b28a630b1a4ced071 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 7 Sep 2013 15:49:18 -0700 Subject: lockref: add ability to mark lockrefs "dead" The only actual current lockref user (dcache) uses zero reference counts even for perfectly live dentries, because it's a cache: there may not be any users, but that doesn't mean that we want to throw away the dentry. At the same time, the dentry cache does have a notion of a truly "dead" dentry that we must not even increment the reference count of, because we have pruned it and it is not valid. Currently that distinction is not visible in the lockref itself, and the dentry cache validation uses "lockref_get_or_lock()" to either get a new reference to a dentry that already had existing references (and thus cannot be dead), or get the dentry lock so that we can then verify the dentry and increment the reference count under the lock if that verification was successful. That's all somewhat complicated. This adds the concept of being "dead" to the lockref itself, by simply using a count that is negative. This allows a usage scenario where we can increment the refcount of a dentry without having to validate it, and pushing the special "we killed it" case into the lockref code. The dentry code itself doesn't actually use this yet, and it's probably too late in the merge window to do that code (the dentry_kill() code with its "should I decrement the count" logic really is pretty complex code), but let's introduce the concept at the lockref level now. Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 3 +++ lib/lockref.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include/linux/lockref.h') diff --git a/include/linux/lockref.h b/include/linux/lockref.h index ca07b5028b0..f279ed9a916 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -33,4 +33,7 @@ extern int lockref_get_not_zero(struct lockref *); extern int lockref_get_or_lock(struct lockref *); extern int lockref_put_or_lock(struct lockref *); +extern void lockref_mark_dead(struct lockref *); +extern int lockref_get_not_dead(struct lockref *); + #endif /* __LINUX_LOCKREF_H */ diff --git a/lib/lockref.c b/lib/lockref.c index 7aae8df37f6..e2cd2c0a882 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -126,3 +126,41 @@ int lockref_put_or_lock(struct lockref *lockref) return 1; } EXPORT_SYMBOL(lockref_put_or_lock); + +/** + * lockref_mark_dead - mark lockref dead + * @lockref: pointer to lockref structure + */ +void lockref_mark_dead(struct lockref *lockref) +{ + assert_spin_locked(&lockref->lock); + lockref->count = -128; +} + +/** + * lockref_get_not_dead - Increments count unless the ref is dead + * @lockref: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if lockref was dead + */ +int lockref_get_not_dead(struct lockref *lockref) +{ + int retval; + + CMPXCHG_LOOP( + new.count++; + if ((int)old.count < 0) + return 0; + , + return 1; + ); + + spin_lock(&lockref->lock); + retval = 0; + if ((int) lockref->count >= 0) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} +EXPORT_SYMBOL(lockref_get_not_dead); -- cgit v1.2.3-70-g09d2