[PATCH] x86/asm: Implement local_xchg using CMPXCHG without lock prefix

From: Uros Bizjak
Date: Wed Jan 24 2024 - 05:59:03 EST


Implement local_xchg using CMPXCHG instruction without lock prefix.
XCHG is expensive due to the implied lock prefix. The processor
cannot prefetch cachelines if XCHG is used.

Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
---
arch/x86/include/asm/local.h | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 73dba8b94443..f9af6908aa2f 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -131,8 +131,20 @@ static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
(typeof(l->a.counter) *) old, new);
}

-/* Always has a lock prefix */
-#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
+/*
+ * Implement local_xchg using CMPXCHG instruction without lock prefix.
+ * XCHG is expensive due to the implied lock prefix. The processor
+ * cannot prefetch cachelines if XCHG is used.
+ */
+static __always_inline long
+local_xchg(local_t *l, long n)
+{
+ long c = local_read(l);
+
+ do { } while (!local_try_cmpxchg(l, &c, n));
+
+ return c;
+}

/**
* local_add_unless - add unless the number is already a given value
--
2.31.1