Re: [RFC][PATCH 05/12] arch: Introduce arch_{,try_}_cmpxchg128{,_local}()

From: Heiko Carstens
Date: Tue Jan 03 2023 - 12:15:46 EST


On Mon, Dec 19, 2022 at 04:35:30PM +0100, Peter Zijlstra wrote:
> For all architectures that currently support cmpxchg_double()
> implement the cmpxchg128() family of functions that is basically the
> same but with a saner interface.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> ---
> arch/arm64/include/asm/atomic_ll_sc.h | 38 +++++++++++++++++++++++
> arch/arm64/include/asm/atomic_lse.h | 33 +++++++++++++++++++-
> arch/arm64/include/asm/cmpxchg.h | 26 ++++++++++++++++
> arch/s390/include/asm/cmpxchg.h | 33 ++++++++++++++++++++
> arch/x86/include/asm/cmpxchg_32.h | 3 +
> arch/x86/include/asm/cmpxchg_64.h | 55 +++++++++++++++++++++++++++++++++-
> 6 files changed, 185 insertions(+), 3 deletions(-)
...
> --- a/arch/s390/include/asm/cmpxchg.h
> +++ b/arch/s390/include/asm/cmpxchg.h
> @@ -201,4 +201,37 @@ static __always_inline int __cmpxchg_dou
> (unsigned long)(n1), (unsigned long)(n2)); \
> })
>
> +#define system_has_cmpxchg128() 1
> +
> +static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
> +{
> + asm volatile(
> + " cdsg %[old],%[new],%[ptr]\n"
> + : [old] "+&d" (old)
> + : [new] "d" (new),
> + [ptr] "QS" (*(unsigned long *)ptr)
> + : "memory", "cc");
> + return old;
> +}
> +
> +static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new)
> +{
> + u128 old = *oldp;
> + int cc;
> +
> + asm volatile(
> + " cdsg %[old],%[new],%[ptr]\n"
> + " ipm %[cc]\n"
> + " srl %[cc],28\n"
> + : [cc] "=&d" (cc), [old] "+&d" (old)
> + : [new] "d" (new),
> + [ptr] "QS" (*(unsigned long *)ptr)
> + : "memory", "cc");
> +
> + if (unlikely(!cc))
> + *oldp = old;
> +
> + return likely(cc);
> +}
> +

I was wondering why arch_try_cmpxchg128() isn't even used with later
code. Turned out this is because of a missing

#define arch_try_cmpxchg128 arch_try_cmpxchg128

which in turn means that the generic fallback variant is used.

The above arch_try_cmpxchg128() implementation is broken, since it has
inversed condition code handling (cc == 0 means compare and swap
succeeded, cc == 1 means it failed).

However I would prefer to use the generic fallback variant anyway.
Could you please merge the below into your current patch?

It addresses also the oddity that *ptr within arch_cmpxchg128() is
only specified as input, while it should be input/output - it doesn't
matter due to the memory clobber, but let's have that correct anyway.

diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 527c968945e8..0b98f57bbe9e 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -173,31 +173,12 @@ static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 n
{
asm volatile(
" cdsg %[old],%[new],%[ptr]\n"
- : [old] "+&d" (old)
- : [new] "d" (new),
- [ptr] "QS" (*(u128 *)ptr)
+ : [old] "+d" (old), [ptr] "+QS" (*ptr)
+ : [new] "d" (new)
: "memory", "cc");
return old;
}

-static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new)
-{
- u128 old = *oldp;
- int cc;
-
- asm volatile(
- " cdsg %[old],%[new],%[ptr]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=&d" (cc), [old] "+&d" (old)
- : [new] "d" (new),
- [ptr] "QS" (*(u128 *)ptr)
- : "memory", "cc");
-
- if (unlikely(!cc))
- *oldp = old;
-
- return likely(cc);
-}
+#define arch_cmpxchg128 arch_cmpxchg128

#endif /* __ASM_CMPXCHG_H */