Re: [PATCH] x86 idle: repair large-server 50-watt idle-power regression

From: H. Peter Anvin
Date: Thu Dec 19 2013 - 11:14:05 EST


How does this look? Completely untested, of course.

I do wonder if we need more memory barriers, though.

An alternative would be to move everything into mwait_idle_with_hints().

-hpa

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7b034a4057f9..6dce588f94b4 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -723,6 +723,23 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
:: "a" (eax), "c" (ecx));
}

+/*
+ * Issue a clflush in preparation for a monitor instruction if the CPU
+ * needs it. We force the address into the ax register to get a fixed
+ * length for the instruction, however, this is what the monitor instruction
+ * is going to need anyway, so it shouldn't add any additional code.
+ */
+static inline void clflush_monitor(const void *addr, unsigned long ecx,
+ unsigned long edx)
+{
+ alternative_input(ASM_NOP3,
+ "clflush (%0)",
+ X86_FEATURE_CLFLUSH_MONITOR,
+ "a" (addr));
+ __monitor(addr, eax, edx);
+ smp_mb();
+}
+
extern void select_idle_routine(const struct cpuinfo_x86 *c);
extern void init_amd_e400_c1e_mask(void);

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index d2b7f27781bc..b14d02354134 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -163,11 +163,7 @@ EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
if (!need_resched()) {
- if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
- clflush((void *)&current_thread_info()->flags);
-
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
+ clflush_monitor(&current_thread_info()->flags, 0, 0);
if (!need_resched())
__mwait(ax, cx);
}