Re: [patch 2/6] x86/idle: Sanitize X86_BUG_AMD_E400 handling

From: Thomas Gleixner
Date: Fri Mar 01 2024 - 07:33:42 EST


On Thu, Feb 29 2024 at 15:23, Thomas Gleixner wrote:
> @@ -985,7 +958,9 @@ void __init arch_post_acpi_subsys_init(v
>
> if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
> mark_tsc_unstable("TSC halt in AMD C1E");
> - pr_info("System has AMD C1E enabled\n");
> +
> + static_branch_enable(&arch_needs_tick_broadcast);
> + pr_info("System has AMD C1E erratum E400. Workaround enabled.\n");
> }

Breaks the 32-bit build with APIC=n :(

---
Subject: x86/idle: Sanitize X86_BUG_AMD_E400 handling
From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Date: Wed, 28 Feb 2024 23:13:00 +0100

amd_e400_idle(), the idle routine for AMD CPUs which are affected by
erratum 400 violates the RCU constraints by invoking tick_broadcast_enter()
and tick_broadcast_exit() after the core code has marked RCU non-idle. The
functions can end up in lockdep or tracing, which rightfully triggers a
RCU warning.

The core code provides now a static branch conditional invocation of the
broadcast functions.

Remove amd_e400_idle(), enforce default_idle() and enable the static branch
on affected CPUs to cure this.

Reported-by: Borislav Petkov <bp@xxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
V1b: Fix 32bit built with APIC=n (0-day)
---
arch/x86/Kconfig | 1 +
arch/x86/kernel/process.c | 42 +++++++++---------------------------------
2 files changed, 10 insertions(+), 33 deletions(-)

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -147,6 +147,7 @@ config X86
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
+ select GENERIC_CLOCKEVENTS_BROADCAST_IDLE if GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -846,31 +846,6 @@ void __noreturn stop_this_cpu(void *dumm
}

/*
- * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
- * states (local apic timer and TSC stop).
- *
- * XXX this function is completely buggered vs RCU and tracing.
- */
-static void amd_e400_idle(void)
-{
- /*
- * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E
- * gets set after static_cpu_has() places have been converted via
- * alternatives.
- */
- if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
- default_idle();
- return;
- }
-
- tick_broadcast_enter();
-
- default_idle();
-
- tick_broadcast_exit();
-}
-
-/*
* Prefer MWAIT over HALT if MWAIT is supported, MWAIT_CPUID leaf
* exists and whenever MONITOR/MWAIT extensions are present there is at
* least one C1 substate.
@@ -890,8 +865,8 @@ static int prefer_mwait_c1_over_halt(con
if (!cpu_has(c, X86_FEATURE_MWAIT))
return 0;

- /* Monitor has a bug. Fallback to HALT */
- if (boot_cpu_has_bug(X86_BUG_MONITOR))
+ /* Monitor has a bug or APIC stops in C1E. Fallback to HALT */
+ if (boot_cpu_has_bug(X86_BUG_MONITOR) || boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E))
return 0;

cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
@@ -942,17 +917,15 @@ void select_idle_routine(const struct cp
if (x86_idle_set() || boot_option_idle_override == IDLE_POLL)
return;

- if (boot_cpu_has_bug(X86_BUG_AMD_E400)) {
- pr_info("using AMD E400 aware idle routine\n");
- static_call_update(x86_idle, amd_e400_idle);
- } else if (prefer_mwait_c1_over_halt(c)) {
+ if (prefer_mwait_c1_over_halt(c)) {
pr_info("using mwait in idle threads\n");
static_call_update(x86_idle, mwait_idle);
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
pr_info("using TDX aware idle routine\n");
static_call_update(x86_idle, tdx_safe_halt);
- } else
+ } else {
static_call_update(x86_idle, default_idle);
+ }
}

void amd_e400_c1e_apic_setup(void)
@@ -985,7 +958,10 @@ void __init arch_post_acpi_subsys_init(v

if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halt in AMD C1E");
- pr_info("System has AMD C1E enabled\n");
+
+ if (IS_ENABLED(GENERIC_CLOCKEVENTS_BROADCAST_IDLE))
+ static_branch_enable(&arch_needs_tick_broadcast);
+ pr_info("System has AMD C1E erratum E400. Workaround enabled.\n");
}

static int __init idle_setup(char *str)