Re: [PATCH 3/3] powerpc: rewrite atomics to use ARCH_ATOMIC

From: Boqun Feng
Date: Fri Nov 13 2020 - 10:30:50 EST


Hi Nicholas,

On Wed, Nov 11, 2020 at 09:07:23PM +1000, Nicholas Piggin wrote:
> All the cool kids are doing it.
>
> Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx>
> ---
> arch/powerpc/include/asm/atomic.h | 681 ++++++++++-------------------
> arch/powerpc/include/asm/cmpxchg.h | 62 +--
> 2 files changed, 248 insertions(+), 495 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
> index 8a55eb8cc97b..899aa2403ba7 100644
> --- a/arch/powerpc/include/asm/atomic.h
> +++ b/arch/powerpc/include/asm/atomic.h
> @@ -11,185 +11,285 @@
> #include <asm/cmpxchg.h>
> #include <asm/barrier.h>
>
> +#define ARCH_ATOMIC
> +
> +#ifndef CONFIG_64BIT
> +#include <asm-generic/atomic64.h>
> +#endif
> +
> /*
> * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
> * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
> * on the platform without lwsync.
> */
> #define __atomic_acquire_fence() \
> - __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
> + asm volatile(PPC_ACQUIRE_BARRIER "" : : : "memory")
>
> #define __atomic_release_fence() \
> - __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
> + asm volatile(PPC_RELEASE_BARRIER "" : : : "memory")
>
> -static __inline__ int atomic_read(const atomic_t *v)
> -{
> - int t;
> +#define __atomic_pre_full_fence smp_mb
>
> - __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
> +#define __atomic_post_full_fence smp_mb
>

Do you need to define __atomic_{pre,post}_full_fence for PPC? IIRC, they
are default smp_mb__{before,atomic}_atomic(), so are smp_mb() defautly
on PPC.

> - return t;
> +#define arch_atomic_read(v) __READ_ONCE((v)->counter)
> +#define arch_atomic_set(v, i) __WRITE_ONCE(((v)->counter), (i))
> +#ifdef CONFIG_64BIT
> +#define ATOMIC64_INIT(i) { (i) }
> +#define arch_atomic64_read(v) __READ_ONCE((v)->counter)
> +#define arch_atomic64_set(v, i) __WRITE_ONCE(((v)->counter), (i))
> +#endif
> +
[...]
>
> +#define ATOMIC_FETCH_OP_UNLESS_RELAXED(name, type, dtype, width, asm_op) \
> +static inline int arch_##name##_relaxed(type *v, dtype a, dtype u) \

I don't think we have atomic_fetch_*_unless_relaxed() at atomic APIs,
ditto for:

atomic_fetch_add_unless_relaxed()
atomic_inc_not_zero_relaxed()
atomic_dec_if_positive_relaxed()

, and we don't have the _acquire() and _release() variants for them
either, and if you don't define their fully-ordered version (e.g.
atomic_inc_not_zero()), atomic-arch-fallback.h will use read and cmpxchg
to implement them, and I think not what we want.

[...]
>
> #endif /* __KERNEL__ */
> #endif /* _ASM_POWERPC_ATOMIC_H_ */
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..181f7e8b3281 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -192,7 +192,7 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
> (unsigned long)_x_, sizeof(*(ptr))); \
> })
>
> -#define xchg_relaxed(ptr, x) \
> +#define arch_xchg_relaxed(ptr, x) \
> ({ \
> __typeof__(*(ptr)) _x_ = (x); \
> (__typeof__(*(ptr))) __xchg_relaxed((ptr), \
> @@ -448,35 +448,7 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
> return old;
> }
>
> -static __always_inline unsigned long
> -__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
> - unsigned int size)
> -{
> - switch (size) {
> - case 1:
> - return __cmpxchg_u8_acquire(ptr, old, new);
> - case 2:
> - return __cmpxchg_u16_acquire(ptr, old, new);
> - case 4:
> - return __cmpxchg_u32_acquire(ptr, old, new);
> -#ifdef CONFIG_PPC64
> - case 8:
> - return __cmpxchg_u64_acquire(ptr, old, new);
> -#endif
> - }
> - BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
> - return old;
> -}
> -#define cmpxchg(ptr, o, n) \
> - ({ \
> - __typeof__(*(ptr)) _o_ = (o); \
> - __typeof__(*(ptr)) _n_ = (n); \
> - (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
> - (unsigned long)_n_, sizeof(*(ptr))); \
> - })
> -
> -

If you remove {atomic_}_cmpxchg_{,_acquire}() and use the version
provided by atomic-arch-fallback.h, then a fail cmpxchg or
cmpxchg_acquire() will still result into a full barrier or a acquire
barrier after the RMW operation, the barrier is not necessary and
probably this is not what we want?

Regards,
Boqun

> -#define cmpxchg_local(ptr, o, n) \
> +#define arch_cmpxchg_local(ptr, o, n) \
> ({ \
> __typeof__(*(ptr)) _o_ = (o); \
> __typeof__(*(ptr)) _n_ = (n); \
> @@ -484,7 +456,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
> (unsigned long)_n_, sizeof(*(ptr))); \
> })
>
> -#define cmpxchg_relaxed(ptr, o, n) \
> +#define arch_cmpxchg_relaxed(ptr, o, n) \
> ({ \
> __typeof__(*(ptr)) _o_ = (o); \
> __typeof__(*(ptr)) _n_ = (n); \
> @@ -493,38 +465,20 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
> sizeof(*(ptr))); \
> })
>
> -#define cmpxchg_acquire(ptr, o, n) \
> -({ \
> - __typeof__(*(ptr)) _o_ = (o); \
> - __typeof__(*(ptr)) _n_ = (n); \
> - (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
> - (unsigned long)_o_, (unsigned long)_n_, \
> - sizeof(*(ptr))); \
> -})
> #ifdef CONFIG_PPC64
> -#define cmpxchg64(ptr, o, n) \
> - ({ \
> - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
> - cmpxchg((ptr), (o), (n)); \
> - })
> -#define cmpxchg64_local(ptr, o, n) \
> +#define arch_cmpxchg64_local(ptr, o, n) \
> ({ \
> BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
> - cmpxchg_local((ptr), (o), (n)); \
> + arch_cmpxchg_local((ptr), (o), (n)); \
> })
> -#define cmpxchg64_relaxed(ptr, o, n) \
> -({ \
> - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
> - cmpxchg_relaxed((ptr), (o), (n)); \
> -})
> -#define cmpxchg64_acquire(ptr, o, n) \
> +#define arch_cmpxchg64_relaxed(ptr, o, n) \
> ({ \
> BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
> - cmpxchg_acquire((ptr), (o), (n)); \
> + arch_cmpxchg_relaxed((ptr), (o), (n)); \
> })
> #else
> #include <asm-generic/cmpxchg-local.h>
> -#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
> +#define arch_cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
> #endif
>
> #endif /* __KERNEL__ */
> --
> 2.23.0
>