[PATCH v3 2/2] x86, mwaitt: introduce mwaix delay with a configurable timer

From: Huang Rui
Date: Thu Jun 11 2015 - 10:15:58 EST


MWAITX can enable a timer and a corresponding timer value specified in SW
P0 clocks. The SW P0 frequency is the same with TSC. The timer provides an
upper bound on how long the instruction waits before exiting.

The implementation of delay function in kernel can lerverage the timer of
MWAITX. This patch provides a new method (delay_mwaitx) to measure delay
time.

Suggested-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Suggested-by: Borislav Petkov <bp@xxxxxxx>
Suggested-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Huang Rui <ray.huang@xxxxxxx>
---
arch/x86/include/asm/mwait.h | 11 +++++++++++
arch/x86/lib/delay.c | 41 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index ece8048..9b41580 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -14,6 +14,8 @@
#define CPUID5_ECX_INTERRUPT_BREAK 0x2

#define MWAIT_ECX_INTERRUPT_BREAK 0x1
+#define MWAITX_ECX_TIMER_ENABLE BIT(1)
+#define MWAITX_MAX_LOOPS ((u32)-1)

static inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
@@ -80,4 +82,13 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
current_clr_polling();
}

+static inline void mwaitx(unsigned long eax, unsigned long loops,
+ bool enable)
+{
+ if (enable)
+ __mwaitx(eax, loops, MWAITX_ECX_TIMER_ENABLE);
+ else
+ __mwaitx(eax, 0, 0);
+}
+
#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 39d6a3d..9daf94a 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -20,6 +20,7 @@
#include <asm/processor.h>
#include <asm/delay.h>
#include <asm/timer.h>
+#include <asm/mwait.h>

#ifdef CONFIG_SMP
# include <asm/smp.h>
@@ -87,6 +88,41 @@ static void delay_tsc(unsigned long __loops)
}

/*
+ * On AMD platforms mwaitx has a configurable 32-bit timer, that counts
+ * with TSC frequency. And the input value is the loop of the counter, it
+ * will exit with the timer expired.
+ */
+static void delay_mwaitx(unsigned long __loops)
+{
+ u32 end, start, delay, addr, loops = __loops;
+
+ rdtsc_barrier();
+ rdtscl(start);
+
+ for (;;) {
+ delay = min(MWAITX_MAX_LOOPS, loops);
+
+ __monitorx(&addr, 0, 0);
+ /*
+ * AMD, like Intel, supports the EAX hint and EAX=0xf
+ * means, do not enter any deep C-state and we use it
+ * here in delay() to minimize wakeup latency.
+ */
+ mwaitx(0xf, delay, true);
+
+ rdtsc_barrier();
+ rdtscl(end);
+
+ if (loops <= end - start)
+ break;
+
+ loops -= end - start;
+
+ start = end;
+ }
+}
+
+/*
* Since we calibrate only once at boot, this
* function should be set once at boot and not changed
*/
@@ -108,7 +144,10 @@ int read_current_timer(unsigned long *timer_val)

void __delay(unsigned long loops)
{
- delay_fn(loops);
+ if (!static_cpu_has_safe(X86_FEATURE_MWAITT))
+ delay_fn(loops);
+ else
+ delay_mwaitx(loops);
}
EXPORT_SYMBOL(__delay);

--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/