Re: [cpuops cmpxchg V1 2/4] x86: this_cpu_cmpxchg and this_cpu_xchgoperations

From: Christoph Lameter
Date: Wed Dec 08 2010 - 13:08:24 EST


Alternate approach: Could also use cmpxchg for xchg..


Subject: cpuops: Use cmpxchg for xchg to avoid lock semantics

Cmpxchg has a lower cycle count due to the implied lock semantics of xchg.

Simulate the xchg through cmpxchg for the cpu ops.

Signed-off-by: Christoph Lameter <cl@xxxxxxxxx>

---
arch/x86/include/asm/percpu.h | 68 +++++++-----------------------------------
1 file changed, 12 insertions(+), 56 deletions(-)

Index: linux-2.6/arch/x86/include/asm/percpu.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/percpu.h 2010-12-08 11:43:50.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/percpu.h 2010-12-08 12:00:21.000000000 -0600
@@ -212,48 +212,6 @@ do { \
ret__; \
})

-/*
- * Beware: xchg on x86 has an implied lock prefix. There will be the cost of
- * full lock semantics even though they are not needed.
- */
-#define percpu_xchg_op(var, nval) \
-({ \
- typeof(var) __ret; \
- typeof(var) __new = (nval); \
- switch (sizeof(var)) { \
- case 1: \
- asm("xchgb %2, "__percpu_arg(1) \
- : "=a" (__ret), "+m" (var) \
- : "q" (__new) \
- : "memory"); \
- break; \
- case 2: \
- asm("xchgw %2, "__percpu_arg(1) \
- : "=a" (__ret), "+m" (var) \
- : "r" (__new) \
- : "memory"); \
- break; \
- case 4: \
- asm("xchgl %2, "__percpu_arg(1) \
- : "=a" (__ret), "+m" (var) \
- : "r" (__new) \
- : "memory"); \
- break; \
- case 8: \
- asm("xchgq %2, "__percpu_arg(1) \
- : "=a" (__ret), "+m" (var) \
- : "r" (__new) \
- : "memory"); \
- break; \
- default: __bad_percpu_size(); \
- } \
- __ret; \
-})
-
-/*
- * cmpxchg has no such implied lock semantics as a result it is much
- * more efficient for cpu local operations.
- */
#define percpu_cmpxchg_op(var, oval, nval) \
({ \
typeof(var) __ret; \
@@ -412,16 +370,6 @@ do { \
#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)

-#define __this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
-#define __this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
-#define __this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
-
#ifndef CONFIG_M386
#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
@@ -489,16 +437,24 @@ do { \
#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)

-#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-
#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)

#endif

+#define this_cpu_xchg(pcp, val) \
+({ \
+ typeof(val) __o; \
+ do { \
+ __o = __this_cpu_read(pcp); \
+ } while (this_cpu_cmpxchg(pcp, __o, val) != __o); \
+ __o; \
+})
+
+#define __this_cpu_xchg this_cpu_xchg
+#define irqsafe_cpu_xchg this_cpu_xchg
+
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
#define x86_test_and_clear_bit_percpu(bit, var) \
({ \

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/