[PATCH v2 5/9] atomic,x86: Implement atomic_dec_and_test_overflow()

From: Peter Zijlstra
Date: Fri Dec 10 2021 - 11:27:39 EST


Provide a better implementation of atomic_{dec,dec_and_test}_overflow() by
making use of the atomic-op condition codes.

This further improves the fast path code:

a980: b8 ff ff ff ff mov $0xffffffff,%eax
a985: f0 0f c1 07 lock xadd %eax,(%rdi)
a989: 83 e8 01 sub $0x1,%eax
a98c: 78 20 js a9ae <ring_buffer_put+0x2e>
a98e: 74 01 je a991 <ring_buffer_put+0x11>
a990: c3 ret

to:

a950: f0 ff 0f lock decl (%rdi)
a953: 7c 20 jl a975 <ring_buffer_put+0x25>
a955: 74 01 je a958 <ring_buffer_put+0x8>
a957: c3 ret

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/include/asm/atomic.h | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)

--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -263,6 +263,29 @@ static __always_inline int arch_atomic_f
}
#define arch_atomic_fetch_xor arch_atomic_fetch_xor

+#define arch_atomic_dec_overflow(_v, _label) \
+ asm_volatile_goto(LOCK_PREFIX "decl %[var]\n\t" \
+ "jle %l1" \
+ : : [var] "m" ((_v)->counter) \
+ : "memory" \
+ : _label)
+
+#define arch_atomic_dec_and_test_overflow(_v, _label) \
+({ \
+ __label__ __zero; \
+ __label__ __out; \
+ bool __ret = false; \
+ asm_volatile_goto(LOCK_PREFIX "decl %[var]\n\t" \
+ "jl %l2\n\t" \
+ "je %l[__zero]" \
+ : : [var] "m" ((_v)->counter) \
+ : "memory" \
+ : __zero, _label); \
+ goto __out; \
+__zero: __ret = true; \
+__out: __ret; \
+})
+
#ifdef CONFIG_X86_32
# include <asm/atomic64_32.h>
#else