Re: [PATCH] Documentation: atomic_t.txt: Explain ordering provided by smp_mb__{before,after}_atomic()

From: Peter Zijlstra
Date: Tue Apr 23 2019 - 08:17:39 EST


On Sat, Apr 20, 2019 at 01:54:40AM -0700, Paul E. McKenney wrote:
> 3. Make non-value-returning atomics provide full ordering.
> This would of course need some benchmarking, but would be a
> simple change to make and would eliminate a large class of
> potential bugs. My guess is that the loss in performance
> would be non-negligible, but who knows?

Well, only for the architectures that have
smp_mb__{before,after}_atomic() as barrier(), which are: ia64, mips,
s390, sparc, x86 and xtense.

$ ./compare.sh defconfig-build defconfig-build1 vmlinux
do_profile_hits 275 278 +3,+0
freezer_apply_state 86 98 +12,+0
perf_event_alloc 2232 2261 +29,+0
_free_event 631 660 +29,+0
shmem_add_to_page_cache 712 722 +10,+0
_enable_swap_info 333 337 +4,+0
do_mmu_notifier_register 303 311 +8,+0
__nfs_commit_inode 356 359 +3,+0
tcp_try_coalesce 246 250 +4,+0
i915_gem_free_object 90 97 +7,+0
mce_intel_hcpu_update 39 47 +8,+0
__ia32_sys_swapoff 1177 1181 +4,+0
pci_enable_ats 124 131 +7,+0
__x64_sys_swapoff 1178 1182 +4,+0
i915_gem_madvise_ioctl 447 443 -4,+0
calc_global_load_tick 75 82 +7,+0
i915_gem_object_set_tiling 712 708 -4,+0
total 11374236 11374367 +131,+0
Which doesn't look too bad.

---
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index ea3d95275b43..115127c7ad28 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -54,7 +54,7 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "addl %1,%0"
: "+m" (v->counter)
- : "ir" (i));
+ : "ir" (i) : "memory");
}

/**
@@ -68,7 +68,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "subl %1,%0"
: "+m" (v->counter)
- : "ir" (i));
+ : "ir" (i) : "memory");
}

/**
@@ -95,7 +95,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
- : "+m" (v->counter));
+ : "+m" (v->counter) :: "memory");
}
#define arch_atomic_inc arch_atomic_inc

@@ -108,7 +108,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
- : "+m" (v->counter));
+ : "+m" (v->counter) :: "memory");
}
#define arch_atomic_dec arch_atomic_dec

diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index dadc20adba21..5e86c0d68ac1 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -45,7 +45,7 @@ static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "addq %1,%0"
: "=m" (v->counter)
- : "er" (i), "m" (v->counter));
+ : "er" (i), "m" (v->counter) : "memory");
}

/**
@@ -59,7 +59,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "subq %1,%0"
: "=m" (v->counter)
- : "er" (i), "m" (v->counter));
+ : "er" (i), "m" (v->counter) : "memory");
}

/**
@@ -87,7 +87,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
- : "m" (v->counter));
+ : "m" (v->counter) : "memory");
}
#define arch_atomic64_inc arch_atomic64_inc

@@ -101,7 +101,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
- : "m" (v->counter));
+ : "m" (v->counter) : "memory");
}
#define arch_atomic64_dec arch_atomic64_dec