[PATCH 3/3] x86/tsc: Introduce early tsc clocksource

From: Peter Zijlstra
Date: Fri Dec 22 2017 - 04:24:29 EST


Without TSC_KNOWN_FREQ we register the TSC clocksource so late we
first switch to the HPET. Using HPET on large CPU count machines is
undesirable.

Therefore register a tsc-early clocksource using the preliminary
tsc_khz from quick calibration. Then when the final TSC calibration
comes in, we can switch to that.

The only notably problem is that we must mark the real tsc clocksource
with CLOCK_SOURCE_VALID_FOR_HRES, otherwise we will not select it when
unregistering tsc-early. And we cannot leave tsc-early registered,
because then we'd fall back to it when we mark the tsc clocksource as
unstable.

Cc: Len Brown <lenb@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/kernel/tsc.c | 43 +++++++++++++++++++++++++++++++++----------
1 file changed, 33 insertions(+), 10 deletions(-)

--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1005,8 +1005,6 @@ static void __init detect_art(void)

/* clocksource code */

-static struct clocksource clocksource_tsc;
-
static void tsc_resume(struct clocksource *cs)
{
tsc_verify_tsc_adjust(true);
@@ -1057,12 +1055,31 @@ static void tsc_cs_tick_stable(struct cl
/*
* .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
*/
+static struct clocksource clocksource_tsc_early = {
+ .name = "tsc-early",
+ .rating = 299,
+ .read = read_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS |
+ CLOCK_SOURCE_MUST_VERIFY,
+ .archdata = { .vclock_mode = VCLOCK_TSC },
+ .resume = tsc_resume,
+ .mark_unstable = tsc_cs_mark_unstable,
+ .tick_stable = tsc_cs_tick_stable,
+};
+
+/*
+ * Must mark VALID_FOR_HRES early such that when we unregister tsc_early
+ * this one will immediately take over. We will only register if TSC has
+ * been found good.
+ */
static struct clocksource clocksource_tsc = {
.name = "tsc",
.rating = 300,
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
+ CLOCK_SOURCE_VALID_FOR_HRES |
CLOCK_SOURCE_MUST_VERIFY,
.archdata = { .vclock_mode = VCLOCK_TSC },
.resume = tsc_resume,
@@ -1186,8 +1203,8 @@ static void tsc_refine_calibration_work(
int cpu;

/* Don't bother refining TSC on unstable systems */
- if (check_tsc_unstable())
- goto out;
+ if (tsc_unstable)
+ return;

/*
* Since the work is started early in boot, we may be
@@ -1239,9 +1256,13 @@ static void tsc_refine_calibration_work(
set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);

out:
+ if (tsc_unstable)
+ return;
+
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
+ clocksource_unregister(&clocksource_tsc_early);
}


@@ -1250,13 +1271,11 @@ static int __init init_tsc_clocksource(v
if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
return 0;

+ if (check_tsc_unstable())
+ return 0;
+
if (tsc_clocksource_reliable)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
- /* lower the rating if we already know its unstable: */
- if (check_tsc_unstable()) {
- clocksource_tsc.rating = 0;
- clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
- }

if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
@@ -1269,6 +1288,7 @@ static int __init init_tsc_clocksource(v
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
+ clocksource_unregister(&clocksource_tsc_early);
return 0;
}

@@ -1367,9 +1387,12 @@ void __init tsc_init(void)

check_system_tsc_reliable();

- if (unsynchronized_tsc())
+ if (unsynchronized_tsc()) {
mark_tsc_unstable("TSCs unsynchronized");
+ return;
+ }

+ clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
detect_art();
}