[PATCH] riscv: Implement LR+WRS-based smp_cond_load_{relaxed,acquire}()

From: Andrea Parri
Date: Tue Oct 17 2023 - 20:22:28 EST


The Zawrs extension defines instructions allowing a core to enter
a low-power state and wait on a store to a memory location.

Introduce the Zawrs-instruction WRS.STO and use it in the polling
loops of the macros smp_cond_load_{relaxed,acquire}().

Signed-off-by: Andrea Parri <parri.andrea@xxxxxxxxx>
[based on the arm64 implementation and work from Heiko and Christoph]
---
arch/riscv/Kconfig | 14 +++++++++
arch/riscv/include/asm/barrier.h | 26 ++++++++++++++++
arch/riscv/include/asm/cmpxchg.h | 51 +++++++++++++++++++++++++++++++
arch/riscv/include/asm/hwcap.h | 3 +-
arch/riscv/include/asm/insn-def.h | 2 ++
arch/riscv/kernel/cpufeature.c | 1 +
6 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d607ab0f7c6da..ff49f90d175ba 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -489,6 +489,20 @@ config RISCV_ISA_SVPBMT

If you don't know what to do here, say Y.

+config RISCV_ISA_ZAWRS
+ bool "Zawrs extension support for wait-on-reservation-set instructions"
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Adds support to dynamically detect the presence of the Zawrs
+ extension and enable its usage.
+
+ The Zawrs extension defines a pair of instructions to be used
+ in polling loops that allows a core to enter a low-power state
+ and wait on a store to a memory location.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_HAS_V
bool
default y
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 110752594228e..7ab7a28e72210 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -71,6 +71,32 @@ do { \
*/
#define smp_mb__after_spinlock() RISCV_FENCE(iorw,iorw)

+#define smp_cond_load_relaxed(ptr, cond_expr) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = READ_ONCE(*__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
+#define smp_cond_load_acquire(ptr, cond_expr) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
#include <asm-generic/barrier.h>

#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 2f4726d3cfcc2..65bc21379f40e 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -8,8 +8,11 @@

#include <linux/bug.h>

+#include <asm/alternative-macros.h>
#include <asm/barrier.h>
#include <asm/fence.h>
+#include <asm/hwcap.h>
+#include <asm/insn-def.h>

#define __xchg_relaxed(ptr, new, size) \
({ \
@@ -360,4 +363,52 @@
arch_cmpxchg_relaxed((ptr), (o), (n)); \
})

+#define __CMPWAIT_CASE(w, sz) \
+static inline void __cmpwait_case_##sz(volatile void *ptr, \
+ unsigned long val) \
+{ \
+ unsigned long tmp; \
+ \
+ asm volatile(ALTERNATIVE( \
+ __nops(4), \
+ "lr." #w "\t" "%[tmp], %[v]\n\t" \
+ "xor %[tmp], %[tmp], %[val]\n\t" \
+ "bnez %[tmp], 1f\n\t" \
+ WRS_sto "\n\t" \
+ "1:\n", \
+ 0, RISCV_ISA_EXT_ZAWRS, CONFIG_RISCV_ISA_ZAWRS) \
+ : [tmp] "=&r" (tmp), [v] "+A" (*(u##sz *)ptr) \
+ : [val] "rJ" (val) \
+ : "memory"); \
+}
+
+__CMPWAIT_CASE(w, 32);
+__CMPWAIT_CASE(d, 64);
+
+#undef __CMPWAIT_CASE
+
+#define __CMPWAIT_GEN() \
+static __always_inline void __cmpwait(volatile void *ptr, \
+ unsigned long val, \
+ int size) \
+{ \
+ switch (size) { \
+ case 4: \
+ return __cmpwait_case_32(ptr, val); \
+ case 8: \
+ return __cmpwait_case_64(ptr, val); \
+ default: \
+ BUILD_BUG(); \
+ } \
+ \
+ unreachable(); \
+}
+
+__CMPWAIT_GEN()
+
+#undef __CMPWAIT_GEN
+
+#define __cmpwait_relaxed(ptr, val) \
+ __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+
#endif /* _ASM_RISCV_CMPXCHG_H */
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index b7b58258f6c7c..afabcbf849526 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -58,6 +58,7 @@
#define RISCV_ISA_EXT_ZICSR 40
#define RISCV_ISA_EXT_ZIFENCEI 41
#define RISCV_ISA_EXT_ZIHPM 42
+#define RISCV_ISA_EXT_ZAWRS 43

#define RISCV_ISA_EXT_MAX 64

@@ -69,8 +70,6 @@

#ifndef __ASSEMBLY__

-#include <linux/jump_label.h>
-
unsigned long riscv_get_elf_hwcap(void);

struct riscv_isa_ext_data {
diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h
index 6960beb75f329..fecb744ed7b29 100644
--- a/arch/riscv/include/asm/insn-def.h
+++ b/arch/riscv/include/asm/insn-def.h
@@ -196,4 +196,6 @@
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
RS1(base), SIMM12(4))

+#define WRS_sto ".long 0x01d00073"
+
#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 1cfbba65d11ae..044682f54f78f 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -181,6 +181,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
+ __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS),
};

const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
--
2.34.1