[PATCH] x86/smp: Use atomic_try_cmpxchg() to micro-optimize native_stop_other_cpus()

From: Uros Bizjak
Date: Tue Nov 14 2023 - 11:44:33 EST


Use atomic_try_cmpxchg() instead of atomic_cmpxchg(*ptr, old, new) == old
in native_stop_other_cpus(). On x86 the CMPXCHG instruction returns success
in the ZF flag, so this change saves a compare after CMPXCHG. Together
with a small code reorder, the generated asm code improves from:

74: 8b 05 00 00 00 00 mov 0x0(%rip),%eax
7a: 41 54 push %r12
7c: 55 push %rbp
7d: 65 8b 2d 00 00 00 00 mov %gs:0x0(%rip),%ebp
84: 53 push %rbx
85: 85 c0 test %eax,%eax
87: 75 71 jne fa <native_stop_other_cpus+0x8a>
89: b8 ff ff ff ff mov $0xffffffff,%eax
8e: f0 0f b1 2d 00 00 00 lock cmpxchg %ebp,0x0(%rip)
95: 00
96: 83 f8 ff cmp $0xffffffff,%eax
99: 75 5f jne fa <native_stop_other_cpus+0x8a>

to:

74: 8b 05 00 00 00 00 mov 0x0(%rip),%eax
7a: 85 c0 test %eax,%eax
7c: 0f 85 84 00 00 00 jne 106 <native_stop_other_cpus+0x96>
82: 41 54 push %r12
84: b8 ff ff ff ff mov $0xffffffff,%eax
89: 55 push %rbp
8a: 53 push %rbx
8b: 65 8b 1d 00 00 00 00 mov %gs:0x0(%rip),%ebx
92: f0 0f b1 1d 00 00 00 lock cmpxchg %ebx,0x0(%rip)
99: 00
9a: 75 5e jne fa <native_stop_other_cpus+0x8a>

Please note early exit and lack of CMP after CMPXCHG.

No functional change intended.

Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: "Peter Zijlstra (Intel)" <peterz@xxxxxxxxxxxxx>
Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx>
---
arch/x86/kernel/smp.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 96a771f9f930..2908e063d7d8 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -148,14 +148,16 @@ static int register_stop_handler(void)

static void native_stop_other_cpus(int wait)
{
- unsigned int cpu = smp_processor_id();
+ unsigned int old_cpu, this_cpu;
unsigned long flags, timeout;

if (reboot_force)
return;

/* Only proceed if this is the first CPU to reach this code */
- if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1)
+ old_cpu = -1;
+ this_cpu = smp_processor_id();
+ if (!atomic_try_cmpxchg(&stopping_cpu, &old_cpu, this_cpu))
return;

/* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */
@@ -186,7 +188,7 @@ static void native_stop_other_cpus(int wait)
* NMIs.
*/
cpumask_copy(&cpus_stop_mask, cpu_online_mask);
- cpumask_clear_cpu(cpu, &cpus_stop_mask);
+ cpumask_clear_cpu(this_cpu, &cpus_stop_mask);

if (!cpumask_empty(&cpus_stop_mask)) {
apic_send_IPI_allbutself(REBOOT_VECTOR);
@@ -210,6 +212,8 @@ static void native_stop_other_cpus(int wait)
* CPUs to stop.
*/
if (!smp_no_nmi_ipi && !register_stop_handler()) {
+ unsigned int cpu;
+
pr_emerg("Shutting down cpus with NMI\n");

for_each_cpu(cpu, &cpus_stop_mask)
--
2.41.0