Re: [PATCH V4] powerpc: Implement {cmp}xchg for u8 and u16

From: Boqun Feng
Date: Wed Apr 27 2016 - 09:55:19 EST


On Wed, Apr 27, 2016 at 05:16:45PM +0800, Pan Xinhui wrote:
> From: Pan Xinhui <xinhui.pan@xxxxxxxxxxxxxxxxxx>
>
> Implement xchg{u8,u16}{local,relaxed}, and
> cmpxchg{u8,u16}{,local,acquire,relaxed}.
>
> It works on all ppc.
>
> remove volatile of first parameter in __cmpxchg_local and __cmpxchg
>
> Suggested-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> Signed-off-by: Pan Xinhui <xinhui.pan@xxxxxxxxxxxxxxxxxx>
> ---
> change from v3:
> rewrite in asm for the LL/SC.
> remove volatile in __cmpxchg_local and __cmpxchg.
> change from v2:
> in the do{}while(), we save one load and use corresponding cmpxchg suffix.
> Also add corresponding __cmpxchg_u32 function declaration in the __XCHG_GEN
> change from V1:
> rework totally.
> ---
> arch/powerpc/include/asm/cmpxchg.h | 109 ++++++++++++++++++++++++++++++++++++-
> 1 file changed, 106 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index 44efe73..8a3735f 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -7,6 +7,71 @@
> #include <asm/asm-compat.h>
> #include <linux/bug.h>
>
> +#ifdef __BIG_ENDIAN
> +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE)
> +#else
> +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE)
> +#endif
> +
> +#define XCHG_GEN(type, sfx, cl) \
> +static inline u32 __xchg_##type##sfx(void *p, u32 val) \
> +{ \
> + unsigned int prev, prev_mask, tmp, bitoff, off; \
> + \
> + off = (unsigned long)p % sizeof(u32); \
> + bitoff = BITOFF_CAL(sizeof(type), off); \
> + p -= off; \
> + val <<= bitoff; \
> + prev_mask = (u32)(type)-1 << bitoff; \
> + \
> + __asm__ __volatile__( \
> +"1: lwarx %0,0,%3\n" \
> +" andc %1,%0,%5\n" \
> +" or %1,%1,%4\n" \
> + PPC405_ERR77(0,%3) \
> +" stwcx. %1,0,%3\n" \
> +" bne- 1b\n" \
> + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \

I think we can save the "tmp" here by:

__asm__ volatile__(
"1: lwarx %0,0,%2\n"
" andc %0,%0,%4\n"
" or %0,%0,%3\n"
PPC405_ERR77(0,%2)
" stwcx. %0,0,%2\n"
" bne- 1b\n"
: "=&r" (prev), "+m" (*(u32*)p)
: "r" (p), "r" (val), "r" (prev_mask)
: "cc", cl);

right?

> + : "r" (p), "r" (val), "r" (prev_mask) \
> + : "cc", cl); \
> + \
> + return prev >> bitoff; \
> +}
> +
> +#define CMPXCHG_GEN(type, sfx, br, br2, cl) \
> +static inline \
> +u32 __cmpxchg_##type##sfx(void *p, u32 old, u32 new) \
> +{ \
> + unsigned int prev, prev_mask, tmp, bitoff, off; \
> + \
> + off = (unsigned long)p % sizeof(u32); \
> + bitoff = BITOFF_CAL(sizeof(type), off); \
> + p -= off; \
> + old <<= bitoff; \
> + new <<= bitoff; \
> + prev_mask = (u32)(type)-1 << bitoff; \
> + \
> + __asm__ __volatile__( \
> + br \
> +"1: lwarx %0,0,%3\n" \
> +" and %1,%0,%6\n" \
> +" cmpw 0,%1,%4\n" \
> +" bne- 2f\n" \
> +" andc %1,%0,%6\n" \
> +" or %1,%1,%5\n" \
> + PPC405_ERR77(0,%3) \
> +" stwcx. %1,0,%3\n" \
> +" bne- 1b\n" \
> + br2 \
> + "\n" \
> +"2:" \
> + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \

And "tmp" here could also be saved by:

"1: lwarx %0,0,%2\n" \
" xor %3,%0,%3\n" \
" and. %3,%3,%5\n" \
" bne- 2f\n" \
" andc %0,%0,%5\n" \
" or %0,%0,%4\n" \
PPC405_ERR77(0,%2) \
" stwcx. %0,0,%2\n" \
" bne- 1b\n" \
br2 \
"\n" \
"2:" \
: "=&r" (prev), "+m" (*(u32*)p) \
: "r" (p), "r" (old), "r" (new), "r" (prev_mask) \
: "cc", cl); \

right?

IIUC, saving the local variable "tmp" will result in saving a general
register for the compilers to use for other variables.

So thoughts?

Regards,
Boqun

> + : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \
> + : "cc", cl); \
> + \
> + return prev >> bitoff; \
> +}
> +
> /*
> * Atomic exchange
> *
> @@ -14,6 +79,11 @@
> * the previous value stored there.
> */
>
> +XCHG_GEN(u8, _local, "memory");
> +XCHG_GEN(u8, _relaxed, "cc");
> +XCHG_GEN(u16, _local, "memory");
> +XCHG_GEN(u16, _relaxed, "cc");
> +
> static __always_inline unsigned long
> __xchg_u32_local(volatile void *p, unsigned long val)
> {
> @@ -85,9 +155,13 @@ __xchg_u64_relaxed(u64 *p, unsigned long val)
> #endif
>
> static __always_inline unsigned long
> -__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
> +__xchg_local(void *ptr, unsigned long x, unsigned int size)
> {
> switch (size) {
> + case 1:
> + return __xchg_u8_local(ptr, x);
> + case 2:
> + return __xchg_u16_local(ptr, x);
> case 4:
> return __xchg_u32_local(ptr, x);
> #ifdef CONFIG_PPC64
> @@ -103,6 +177,10 @@ static __always_inline unsigned long
> __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
> {
> switch (size) {
> + case 1:
> + return __xchg_u8_relaxed(ptr, x);
> + case 2:
> + return __xchg_u16_relaxed(ptr, x);
> case 4:
> return __xchg_u32_relaxed(ptr, x);
> #ifdef CONFIG_PPC64
> @@ -131,6 +209,15 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
> * and return the old value of *p.
> */
>
> +CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
> +CMPXCHG_GEN(u8, _local, , , "memory");
> +CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> +CMPXCHG_GEN(u8, _relaxed, , , "cc");
> +CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
> +CMPXCHG_GEN(u16, _local, , , "memory");
> +CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> +CMPXCHG_GEN(u16, _relaxed, , , "cc");
> +
> static __always_inline unsigned long
> __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
> {
> @@ -312,10 +399,14 @@ __cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new)
> #endif
>
> static __always_inline unsigned long
> -__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
> +__cmpxchg(void *ptr, unsigned long old, unsigned long new,
> unsigned int size)
> {
> switch (size) {
> + case 1:
> + return __cmpxchg_u8(ptr, old, new);
> + case 2:
> + return __cmpxchg_u16(ptr, old, new);
> case 4:
> return __cmpxchg_u32(ptr, old, new);
> #ifdef CONFIG_PPC64
> @@ -328,10 +419,14 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
> }
>
> static __always_inline unsigned long
> -__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
> +__cmpxchg_local(void *ptr, unsigned long old, unsigned long new,
> unsigned int size)
> {
> switch (size) {
> + case 1:
> + return __cmpxchg_u8_local(ptr, old, new);
> + case 2:
> + return __cmpxchg_u16_local(ptr, old, new);
> case 4:
> return __cmpxchg_u32_local(ptr, old, new);
> #ifdef CONFIG_PPC64
> @@ -348,6 +443,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
> unsigned int size)
> {
> switch (size) {
> + case 1:
> + return __cmpxchg_u8_relaxed(ptr, old, new);
> + case 2:
> + return __cmpxchg_u16_relaxed(ptr, old, new);
> case 4:
> return __cmpxchg_u32_relaxed(ptr, old, new);
> #ifdef CONFIG_PPC64
> @@ -364,6 +463,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
> unsigned int size)
> {
> switch (size) {
> + case 1:
> + return __cmpxchg_u8_acquire(ptr, old, new);
> + case 2:
> + return __cmpxchg_u16_acquire(ptr, old, new);
> case 4:
> return __cmpxchg_u32_acquire(ptr, old, new);
> #ifdef CONFIG_PPC64
> --
> 2.4.3
>

Attachment: signature.asc
Description: PGP signature