Re: [RFC][PATCH 05/12] arch: Introduce arch_{,try_}_cmpxchg128{,_local}()

From: Boqun Feng
Date: Mon Dec 19 2022 - 15:08:29 EST


On Mon, Dec 19, 2022 at 04:35:30PM +0100, Peter Zijlstra wrote:
> For all architectures that currently support cmpxchg_double()
> implement the cmpxchg128() family of functions that is basically the
> same but with a saner interface.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> ---
> arch/arm64/include/asm/atomic_ll_sc.h | 38 +++++++++++++++++++++++
> arch/arm64/include/asm/atomic_lse.h | 33 +++++++++++++++++++-
> arch/arm64/include/asm/cmpxchg.h | 26 ++++++++++++++++
> arch/s390/include/asm/cmpxchg.h | 33 ++++++++++++++++++++
> arch/x86/include/asm/cmpxchg_32.h | 3 +
> arch/x86/include/asm/cmpxchg_64.h | 55 +++++++++++++++++++++++++++++++++-
> 6 files changed, 185 insertions(+), 3 deletions(-)
>
> --- a/arch/arm64/include/asm/atomic_ll_sc.h
> +++ b/arch/arm64/include/asm/atomic_ll_sc.h
> @@ -326,6 +326,44 @@ __CMPXCHG_DBL( , , , )
> __CMPXCHG_DBL(_mb, dmb ish, l, "memory")
>
> #undef __CMPXCHG_DBL
> +
> +union __u128_halves {
> + u128 full;
> + struct {
> + u64 low, high;
> + };
> +};
> +
> +#define __CMPXCHG128(name, mb, rel, cl) \
> +static __always_inline u128 \
> +__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
> +{ \
> + union __u128_halves r, o = { .full = (old) }, \
> + n = { .full = (new) }; \
> + \
> + asm volatile("// __cmpxchg128" #name "\n" \
> + " prfm pstl1strm, %2\n" \
> + "1: ldxp %0, %1, %2\n" \
> + " eor %3, %0, %3\n" \
> + " eor %4, %1, %4\n" \
> + " orr %3, %4, %3\n" \
> + " cbnz %3, 2f\n" \
> + " st" #rel "xp %w3, %5, %6, %2\n" \
> + " cbnz %w3, 1b\n" \
> + " " #mb "\n" \
> + "2:" \
> + : "=&r" (r.low), "=&r" (r.high), "+Q" (*(unsigned long *)ptr) \

I wonder whether we should use "(*(u128 *)ptr)" instead of "(*(unsigned
long *) ptr)"? Because compilers may think only 64bit value pointed by
"ptr" gets modified, and they are allowed to do "useful" optimization.

Same for lse and s390.

Regards,
Boqun

> + : "r" (o.low), "r" (o.high), "r" (n.low), "r" (n.high) \
> + : cl); \
> + \
> + return r.full; \
> +}
> +
> +__CMPXCHG128( , , , )
> +__CMPXCHG128(_mb, dmb ish, l, "memory")
> +
> +#undef __CMPXCHG128
> +
> #undef K
>
> #endif /* __ASM_ATOMIC_LL_SC_H */