[RFC][PATCH 3/3] locking,arm64: Introduce cmpwait()

From: Peter Zijlstra
Date: Mon Apr 04 2016 - 08:37:42 EST


Provide the cmpwait() primitive, which will 'spin' wait for a variable
to change. Use it to implement smp_cond_load_acquire() and provide an
ARM64 implementation.

The ARM64 implementation uses LDXR+WFE to avoid most spinning by
letting the hardware go idle while waiting for the exclusive load of
the variable to be cancelled (as anybody changing the value must).

I've misplaced my arm64 compiler, so this is not even compile tested.

Suggested-by: Will Deacon <will.deacon@xxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/arm64/include/asm/cmpxchg.h | 36 +++++++++++++++++++++++++++++
include/linux/atomic.h | 47 +++++++++++++++++++++++++++++++++++++++
include/linux/compiler.h | 30 ------------------------
3 files changed, 83 insertions(+), 30 deletions(-)

--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -224,4 +224,40 @@ __CMPXCHG_GEN(_mb)
__ret; \
})

+#define __CMPWAIT_GEN(w, sz, name) \
+void __cmpwait_case_##name(volatile void *ptr, unsigned long val) \
+{ \
+ unsigned long tmp; \
+ \
+ asm volatile( \
+ " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \
+ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
+ " cbnz %" #w "[tmp], 1f\n" \
+ " wfe\n" \
+ "1:" \
+ : [tmp] "=&r" (tmp), [val] "=&r" (val), \
+ [v] "+Q" (*(unsigned long *)ptr)); \
+}
+
+__CMPWAIT_GEN(w, b, 1);
+__CMPWAIT_GEN(w, h, 2);
+__CMPWAIT_GEN(w, , 4);
+__CMPWAIT_GEN( , , 8);
+
+static inline void __cmpwait(volatile void *ptr, unsigned long val, int size)
+{
+ switch (size) {
+ case 1: return __cmpwait_case_1(ptr, val);
+ case 2: return __cmpwait_case_2(ptr, val);
+ case 4: return __cmpwait_case_4(ptr, val);
+ case 8: return __cmpwait_case_8(ptr, val);
+ default: BUILD_BUG();
+ }
+
+ unreachable();
+}
+
+#define cmpwait(ptr, val) \
+ __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+
#endif /* __ASM_CMPXCHG_H */
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -30,6 +30,53 @@
#define atomic_set_release(v, i) smp_store_release(&(v)->counter, (i))
#endif

+/**
+ * cmpwait - compare and wait for a variable to change
+ * @ptr: pointer to the variable to wait on
+ * @val: the value it should change from
+ *
+ * A simple constuct that waits for a variable to change from a known
+ * value; some architectures can do this in hardware.
+ */
+#ifndef cmpwait
+#define cmpwait(ptr, val) do { \
+ typeof (ptr) __ptr = (ptr); \
+ typeof (val) __val = (val); \
+ while (READ_ONCE(*__ptr) == __val) \
+ cpu_relax(); \
+} while (0)
+#endif
+
+/**
+ * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
+ * @ptr: pointer to the variable to wait on
+ * @cond: boolean expression to wait for
+ *
+ * Equivalent to using smp_load_acquire() on the condition variable but employs
+ * the control dependency of the wait to reduce the barrier on many platforms.
+ *
+ * Due to C lacking lambda expressions we load the value of *ptr into a
+ * pre-named variable @VAL to be used in @cond.
+ *
+ * The control dependency provides a LOAD->STORE order, the additional RMB
+ * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
+ * aka. ACQUIRE.
+ */
+#ifndef smp_cond_load_acquire
+#define smp_cond_load_acquire(ptr, cond_expr) ({ \
+ typeof(ptr) __PTR = (ptr); \
+ typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = READ_ONCE(*__PTR); \
+ if (cond_expr) \
+ break; \
+ cmpwait(__PTR, VAL); \
+ } \
+ smp_rmb(); /* ctrl + rmb := acquire */ \
+ VAL; \
+})
+#endif
+
/*
* The idea here is to build acquire/release variants by adding explicit
* barriers on top of the relaxed variant. In the case where the relaxed
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -304,36 +304,6 @@ static __always_inline void __write_once
__u.__val; \
})

-/**
- * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
- * @ptr: pointer to the variable to wait on
- * @cond: boolean expression to wait for
- *
- * Equivalent to using smp_load_acquire() on the condition variable but employs
- * the control dependency of the wait to reduce the barrier on many platforms.
- *
- * Due to C lacking lambda expressions we load the value of *ptr into a
- * pre-named variable @VAL to be used in @cond.
- *
- * The control dependency provides a LOAD->STORE order, the additional RMB
- * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
- * aka. ACQUIRE.
- */
-#ifndef smp_cond_load_acquire
-#define smp_cond_load_acquire(ptr, cond_expr) ({ \
- typeof(ptr) __PTR = (ptr); \
- typeof(*ptr) VAL; \
- for (;;) { \
- VAL = READ_ONCE(*__PTR); \
- if (cond_expr) \
- break; \
- cpu_relax(); \
- } \
- smp_rmb(); /* ctrl + rmb := acquire */ \
- VAL; \
-})
-#endif
-
#endif /* __KERNEL__ */

#endif /* __ASSEMBLY__ */