From 2f4f12e571c4e2f50f3818a3c2544929145f75dd Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 2 Sep 2013 11:58:20 -0700
Subject: lockref: uninline lockref helper functions

They aren't very good to inline, since they already call external
functions (the spinlock code), and we're going to create rather more
complicated versions of them that can do the reference count updates
locklessly.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/lockref.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 lib/lockref.c

(limited to 'lib/lockref.c')

diff --git a/lib/lockref.c b/lib/lockref.c
new file mode 100644
index 00000000000..a9a4f4e1eff
--- /dev/null
+++ b/lib/lockref.c
@@ -0,0 +1,69 @@
+#include <linux/export.h>
+#include <linux/lockref.h>
+
+/**
+ * lockref_get - Increments reference count unconditionally
+ * @lockcnt: pointer to lockref structure
+ *
+ * This operation is only valid if you already hold a reference
+ * to the object, so you know the count cannot be zero.
+ */
+void lockref_get(struct lockref *lockref)
+{
+	spin_lock(&lockref->lock);
+	lockref->count++;
+	spin_unlock(&lockref->lock);
+}
+EXPORT_SYMBOL(lockref_get);
+
+/**
+ * lockref_get_not_zero - Increments count unless the count is 0
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count was zero
+ */
+int lockref_get_not_zero(struct lockref *lockref)
+{
+	int retval = 0;
+
+	spin_lock(&lockref->lock);
+	if (lockref->count) {
+		lockref->count++;
+		retval = 1;
+	}
+	spin_unlock(&lockref->lock);
+	return retval;
+}
+EXPORT_SYMBOL(lockref_get_not_zero);
+
+/**
+ * lockref_get_or_lock - Increments count unless the count is 0
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count was zero
+ * and we got the lock instead.
+ */
+int lockref_get_or_lock(struct lockref *lockref)
+{
+	spin_lock(&lockref->lock);
+	if (!lockref->count)
+		return 0;
+	lockref->count++;
+	spin_unlock(&lockref->lock);
+	return 1;
+}
+EXPORT_SYMBOL(lockref_get_or_lock);
+
+/**
+ * lockref_put_or_lock - decrements count unless count <= 1 before decrement
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
+ */
+int lockref_put_or_lock(struct lockref *lockref)
+{
+	spin_lock(&lockref->lock);
+	if (lockref->count <= 1)
+		return 0;
+	lockref->count--;
+	spin_unlock(&lockref->lock);
+	return 1;
+}
+EXPORT_SYMBOL(lockref_put_or_lock);
-- 
cgit v1.2.3-70-g09d2


From bc08b449ee14ace4d869adaa1bb35a44ce68d775 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 2 Sep 2013 12:12:15 -0700
Subject: lockref: implement lockless reference count updates using cmpxchg()

Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop.  This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.

Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.

So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.

The lockref structure, in contrast, really is a *locked* reference
count.  If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.

In order to enable the cmpxchg lockless code, the architecture needs to
do three things:

 (1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
     in an aligned u64, and have a "cmpxchg()" implementation that works
     on such a u64 data type.

 (2) define a helper function to test for a spinlock being unlocked
     ("arch_spin_value_unlocked()")

 (3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
     Kconfig file.

This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig                |  1 +
 arch/x86/include/asm/spinlock.h |  5 ++++
 include/linux/lockref.h         | 11 ++++++--
 lib/Kconfig                     | 10 +++++++
 lib/lockref.c                   | 60 ++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 84 insertions(+), 3 deletions(-)

(limited to 'lib/lockref.c')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf92b0c..67e00740531 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -16,6 +16,7 @@ config X86_64
 	def_bool y
 	depends on 64BIT
 	select X86_DEV_DMA_OPS
+	select ARCH_USE_CMPXCHG_LOCKREF
 
 ### Arch settings
 config X86
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e3ddd7db723..e0e668422c7 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -34,6 +34,11 @@
 # define UNLOCK_LOCK_PREFIX
 #endif
 
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.tickets.head == lock.tickets.tail;
+}
+
 /*
  * Ticket locks are conceptually two parts, one indicating the current head of
  * the queue, and the other indicating the current tail. The lock is acquired
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index 4c0af31c8d4..ca07b5028b0 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -17,8 +17,15 @@
 #include <linux/spinlock.h>
 
 struct lockref {
-	spinlock_t lock;
-	unsigned int count;
+	union {
+#ifdef CONFIG_CMPXCHG_LOCKREF
+		aligned_u64 lock_count;
+#endif
+		struct {
+			spinlock_t lock;
+			unsigned int count;
+		};
+	};
 };
 
 extern void lockref_get(struct lockref *);
diff --git a/lib/Kconfig b/lib/Kconfig
index 71d9f81f6ee..65561716c16 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -48,6 +48,16 @@ config STMP_DEVICE
 config PERCPU_RWSEM
 	boolean
 
+config ARCH_USE_CMPXCHG_LOCKREF
+	bool
+
+config CMPXCHG_LOCKREF
+	def_bool y if ARCH_USE_CMPXCHG_LOCKREF
+	depends on SMP
+	depends on !GENERIC_LOCKBREAK
+	depends on !DEBUG_SPINLOCK
+	depends on !DEBUG_LOCK_ALLOC
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
diff --git a/lib/lockref.c b/lib/lockref.c
index a9a4f4e1eff..7819c2d1d31 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -1,6 +1,33 @@
 #include <linux/export.h>
 #include <linux/lockref.h>
 
+#ifdef CONFIG_CMPXCHG_LOCKREF
+
+/*
+ * Note that the "cmpxchg()" reloads the "old" value for the
+ * failure case.
+ */
+#define CMPXCHG_LOOP(CODE, SUCCESS) do {					\
+	struct lockref old;							\
+	BUILD_BUG_ON(sizeof(old) != 8);						\
+	old.lock_count = ACCESS_ONCE(lockref->lock_count);			\
+	while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {  	\
+		struct lockref new = old, prev = old;				\
+		CODE								\
+		old.lock_count = cmpxchg(&lockref->lock_count,			\
+					 old.lock_count, new.lock_count);	\
+		if (likely(old.lock_count == prev.lock_count)) {		\
+			SUCCESS;						\
+		}								\
+	}									\
+} while (0)
+
+#else
+
+#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0)
+
+#endif
+
 /**
  * lockref_get - Increments reference count unconditionally
  * @lockcnt: pointer to lockref structure
@@ -10,6 +37,12 @@
  */
 void lockref_get(struct lockref *lockref)
 {
+	CMPXCHG_LOOP(
+		new.count++;
+	,
+		return;
+	);
+
 	spin_lock(&lockref->lock);
 	lockref->count++;
 	spin_unlock(&lockref->lock);
@@ -23,9 +56,18 @@ EXPORT_SYMBOL(lockref_get);
  */
 int lockref_get_not_zero(struct lockref *lockref)
 {
-	int retval = 0;
+	int retval;
+
+	CMPXCHG_LOOP(
+		new.count++;
+		if (!old.count)
+			return 0;
+	,
+		return 1;
+	);
 
 	spin_lock(&lockref->lock);
+	retval = 0;
 	if (lockref->count) {
 		lockref->count++;
 		retval = 1;
@@ -43,6 +85,14 @@ EXPORT_SYMBOL(lockref_get_not_zero);
  */
 int lockref_get_or_lock(struct lockref *lockref)
 {
+	CMPXCHG_LOOP(
+		new.count++;
+		if (!old.count)
+			break;
+	,
+		return 1;
+	);
+
 	spin_lock(&lockref->lock);
 	if (!lockref->count)
 		return 0;
@@ -59,6 +109,14 @@ EXPORT_SYMBOL(lockref_get_or_lock);
  */
 int lockref_put_or_lock(struct lockref *lockref)
 {
+	CMPXCHG_LOOP(
+		new.count--;
+		if (old.count <= 1)
+			break;
+	,
+		return 1;
+	);
+
 	spin_lock(&lockref->lock);
 	if (lockref->count <= 1)
 		return 0;
-- 
cgit v1.2.3-70-g09d2


From d472d9d98b463dd7a04f2bcdeafe4261686ce6ab Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Tue, 3 Sep 2013 14:49:49 -0700
Subject: lockref: Relax in cmpxchg loop

While we are likley to succeed and break out of this loop, it isn't
guaranteed.  We should be power and thread friendly if we do have to
go around for a second (or third, or more) attempt.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/lockref.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib/lockref.c')

diff --git a/lib/lockref.c b/lib/lockref.c
index 7819c2d1d31..9d76f404ce9 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -19,6 +19,7 @@
 		if (likely(old.lock_count == prev.lock_count)) {		\
 			SUCCESS;						\
 		}								\
+		cpu_relax();							\
 	}									\
 } while (0)
 
-- 
cgit v1.2.3-70-g09d2


From 44a0cf92926c343366a4986808d12ab068504eed Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 7 Sep 2013 15:30:29 -0700
Subject: lockref: fix docbook argument names

The code got rewritten, but the comments got copied as-is from older
versions, and as a result the argument name in the comment didn't
actually match the code any more.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/lockref.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'lib/lockref.c')

diff --git a/lib/lockref.c b/lib/lockref.c
index 9d76f404ce9..7aae8df37f6 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -31,7 +31,7 @@
 
 /**
  * lockref_get - Increments reference count unconditionally
- * @lockcnt: pointer to lockref structure
+ * @lockref: pointer to lockref structure
  *
  * This operation is only valid if you already hold a reference
  * to the object, so you know the count cannot be zero.
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(lockref_get);
 
 /**
  * lockref_get_not_zero - Increments count unless the count is 0
- * @lockcnt: pointer to lockref structure
+ * @lockref: pointer to lockref structure
  * Return: 1 if count updated successfully or 0 if count was zero
  */
 int lockref_get_not_zero(struct lockref *lockref)
@@ -80,7 +80,7 @@ EXPORT_SYMBOL(lockref_get_not_zero);
 
 /**
  * lockref_get_or_lock - Increments count unless the count is 0
- * @lockcnt: pointer to lockref structure
+ * @lockref: pointer to lockref structure
  * Return: 1 if count updated successfully or 0 if count was zero
  * and we got the lock instead.
  */
@@ -105,7 +105,7 @@ EXPORT_SYMBOL(lockref_get_or_lock);
 
 /**
  * lockref_put_or_lock - decrements count unless count <= 1 before decrement
- * @lockcnt: pointer to lockref structure
+ * @lockref: pointer to lockref structure
  * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
  */
 int lockref_put_or_lock(struct lockref *lockref)
-- 
cgit v1.2.3-70-g09d2


From e7d33bb5ea82922e6ddcfc6b28a630b1a4ced071 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 7 Sep 2013 15:49:18 -0700
Subject: lockref: add ability to mark lockrefs "dead"

The only actual current lockref user (dcache) uses zero reference counts
even for perfectly live dentries, because it's a cache: there may not be
any users, but that doesn't mean that we want to throw away the dentry.

At the same time, the dentry cache does have a notion of a truly "dead"
dentry that we must not even increment the reference count of, because
we have pruned it and it is not valid.

Currently that distinction is not visible in the lockref itself, and the
dentry cache validation uses "lockref_get_or_lock()" to either get a new
reference to a dentry that already had existing references (and thus
cannot be dead), or get the dentry lock so that we can then verify the
dentry and increment the reference count under the lock if that
verification was successful.

That's all somewhat complicated.

This adds the concept of being "dead" to the lockref itself, by simply
using a count that is negative.  This allows a usage scenario where we
can increment the refcount of a dentry without having to validate it,
and pushing the special "we killed it" case into the lockref code.

The dentry code itself doesn't actually use this yet, and it's probably
too late in the merge window to do that code (the dentry_kill() code
with its "should I decrement the count" logic really is pretty complex
code), but let's introduce the concept at the lockref level now.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockref.h |  3 +++
 lib/lockref.c           | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'lib/lockref.c')

diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index ca07b5028b0..f279ed9a916 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -33,4 +33,7 @@ extern int lockref_get_not_zero(struct lockref *);
 extern int lockref_get_or_lock(struct lockref *);
 extern int lockref_put_or_lock(struct lockref *);
 
+extern void lockref_mark_dead(struct lockref *);
+extern int lockref_get_not_dead(struct lockref *);
+
 #endif /* __LINUX_LOCKREF_H */
diff --git a/lib/lockref.c b/lib/lockref.c
index 7aae8df37f6..e2cd2c0a882 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -126,3 +126,41 @@ int lockref_put_or_lock(struct lockref *lockref)
 	return 1;
 }
 EXPORT_SYMBOL(lockref_put_or_lock);
+
+/**
+ * lockref_mark_dead - mark lockref dead
+ * @lockref: pointer to lockref structure
+ */
+void lockref_mark_dead(struct lockref *lockref)
+{
+	assert_spin_locked(&lockref->lock);
+	lockref->count = -128;
+}
+
+/**
+ * lockref_get_not_dead - Increments count unless the ref is dead
+ * @lockref: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if lockref was dead
+ */
+int lockref_get_not_dead(struct lockref *lockref)
+{
+	int retval;
+
+	CMPXCHG_LOOP(
+		new.count++;
+		if ((int)old.count < 0)
+			return 0;
+	,
+		return 1;
+	);
+
+	spin_lock(&lockref->lock);
+	retval = 0;
+	if ((int) lockref->count >= 0) {
+		lockref->count++;
+		retval = 1;
+	}
+	spin_unlock(&lockref->lock);
+	return retval;
+}
+EXPORT_SYMBOL(lockref_get_not_dead);
-- 
cgit v1.2.3-70-g09d2


From 8f4c344696b9f9f8471d7f342076ef10ed7f66a5 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 19 Sep 2013 19:06:46 +0100
Subject: lockref: use cmpxchg64 explicitly for lockless updates

The cmpxchg() function tends not to support 64-bit arguments on 32-bit
architectures.  This could be either due to use of unsigned long
arguments (like on ARM) or lack of instruction support (cmpxchgq on
x86).  However, these architectures may implement a specific cmpxchg64()
function to provide 64-bit cmpxchg support instead.

Since the lockref code requires a 64-bit cmpxchg and relies on the
architecture selecting ARCH_USE_CMPXCHG_LOCKREF, move to using cmpxchg64
instead of cmpxchg and allow 32-bit architectures to make use of the
lockless lockref implementation.

Cc: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/lockref.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/lockref.c')

diff --git a/lib/lockref.c b/lib/lockref.c
index e2cd2c0a882..677d036cf3c 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -14,8 +14,8 @@
 	while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {  	\
 		struct lockref new = old, prev = old;				\
 		CODE								\
-		old.lock_count = cmpxchg(&lockref->lock_count,			\
-					 old.lock_count, new.lock_count);	\
+		old.lock_count = cmpxchg64(&lockref->lock_count,		\
+					   old.lock_count, new.lock_count);	\
 		if (likely(old.lock_count == prev.lock_count)) {		\
 			SUCCESS;						\
 		}								\
-- 
cgit v1.2.3-70-g09d2


From d2212b4dce596fee83e5c523400bf084f4cc816c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 26 Sep 2013 17:27:00 +0100
Subject: lockref: allow relaxed cmpxchg64 variant for lockless updates

The 64-bit cmpxchg operation on the lockref is ordered by virtue of
hazarding between the cmpxchg operation and the reference count
manipulation. On weakly ordered memory architectures (such as ARM), it
can be of great benefit to omit the barrier instructions where they are
not needed.

This patch moves the lockless lockref code over to a cmpxchg64_relaxed
operation, which doesn't provide barrier semantics. If the operation
isn't defined, we simply #define it as the usual 64-bit cmpxchg macro.

Cc: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/lockref.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'lib/lockref.c')

diff --git a/lib/lockref.c b/lib/lockref.c
index 677d036cf3c..e294ae445c9 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -3,6 +3,14 @@
 
 #ifdef CONFIG_CMPXCHG_LOCKREF
 
+/*
+ * Allow weakly-ordered memory architectures to provide barrier-less
+ * cmpxchg semantics for lockref updates.
+ */
+#ifndef cmpxchg64_relaxed
+# define cmpxchg64_relaxed cmpxchg64
+#endif
+
 /*
  * Note that the "cmpxchg()" reloads the "old" value for the
  * failure case.
@@ -14,8 +22,9 @@
 	while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {  	\
 		struct lockref new = old, prev = old;				\
 		CODE								\
-		old.lock_count = cmpxchg64(&lockref->lock_count,		\
-					   old.lock_count, new.lock_count);	\
+		old.lock_count = cmpxchg64_relaxed(&lockref->lock_count,	\
+						   old.lock_count,		\
+						   new.lock_count);		\
 		if (likely(old.lock_count == prev.lock_count)) {		\
 			SUCCESS;						\
 		}								\
-- 
cgit v1.2.3-70-g09d2


From 491f6f8e5fd9a57aaf03b6d6e3e153f1c27d8a46 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 23 Sep 2013 12:59:56 +0200
Subject: lockref: use arch_mutex_cpu_relax() in CMPXCHG_LOOP()

Make use of arch_mutex_cpu_relax() so architectures can override the
default cpu_relax() semantics.
This is especially useful for s390, where cpu_relax() means that we
yield() the current (virtual) cpu and therefore is very expensive,
and would contradict the whole purpose of the lockless cmpxchg loop.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 lib/lockref.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'lib/lockref.c')

diff --git a/lib/lockref.c b/lib/lockref.c
index e294ae445c9..6f9d434c152 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -11,6 +11,14 @@
 # define cmpxchg64_relaxed cmpxchg64
 #endif
 
+/*
+ * Allow architectures to override the default cpu_relax() within CMPXCHG_LOOP.
+ * This is useful for architectures with an expensive cpu_relax().
+ */
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
+#endif
+
 /*
  * Note that the "cmpxchg()" reloads the "old" value for the
  * failure case.
@@ -28,7 +36,7 @@
 		if (likely(old.lock_count == prev.lock_count)) {		\
 			SUCCESS;						\
 		}								\
-		cpu_relax();							\
+		arch_mutex_cpu_relax();						\
 	}									\
 } while (0)
 
-- 
cgit v1.2.3-70-g09d2