[RFC PATCH] Make quick_pit_calibrate more robust

From: George Spelvin
Date: Tue Jun 09 2015 - 02:55:00 EST


It's fundamentally the same, but more robust to occasional long delays
when accessing the PIT.

In particular, the old code was susceptible to failing if the initial
PIT read was slow. This revised code will move the timing start if
it's a sufficient improvement.

Another small change that simplified the code was to give up after the
55 ms PIT timer wraparound, rather than 50 ms.

I have a test machine where the old code fails reliably and this
code works.

I've gone wild with the comments, but I don't think the code is much
more complex.

Comments solicited.

Signed-off-by: George Spelvin <linux@xxxxxxxxxxx>
---
arch/x86/kernel/tsc.c | 201 ++++++++++++++++++++++++++++++++------------------
1 file changed, 130 insertions(+), 71 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a00f35be..fa8e73b5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -524,48 +524,38 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
* use the TSC value at the transitions to calculate a pretty
* good value for the TSC frequencty.
*/
-static inline int pit_verify_msb(unsigned char val)
-{
- /* Ignore LSB */
- inb(0x42);
- return inb(0x42) == val;
-}
-
-static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
-{
- int count;
- u64 tsc = 0, prev_tsc = 0;
-
- for (count = 0; count < 50000; count++) {
- if (!pit_verify_msb(val))
- break;
- prev_tsc = tsc;
- tsc = get_cycles();
- }
- *deltap = get_cycles() - prev_tsc;
- *tscp = tsc;
-
- /*
- * We require _some_ success, but the quality control
- * will be based on the error terms on the TSC values.
- */
- return count > 5;
-}

/*
- * How many MSB values do we want to see? We aim for
- * a maximum error rate of 500ppm (in practice the
- * real error is much smaller), but refuse to spend
- * more than 50ms on it.
+ * Return the TSC tick rate in kHz, or 0.
+ *
+ * This tries to get within 500 ppm of reality as fast as possible,
+ * by polling the PIT and looking for clock edges.
+ *
+ * We abort and go to a slower technique if it isn't working before
+ * the PIT wraps, which happens every 55 ms. (88 * 3 * 65536 / 315e6 =
+ * 0.0549254095238 s, if you want to be specific.)
+ *
+ * Each PIT access takes approximately 1 us, so we should not ever do
+ * more than 64K. Include an anti-livelock limit of 2x128K.
+ *
+ * The big annoyance is that sometimes there are large delays accessing
+ * the PIT (hardware, SMM, emulator, whatever), which add a lot of
+ * jitter to the collected timestamps. So this is all about measuring
+ * the time it takes to read the PIT and bounding the uncertainty.
*/
-#define MAX_QUICK_PIT_MS 50
-#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
+#define ONE_BYTE_PIT 0
+
+#define NREPS 3 /* Number of repeat reads required to believe it */
+#define BUFMASK 7 /* Must be power of 2 minus 1, and >= NREPS+3 */

static unsigned long quick_pit_calibrate(void)
{
- int i;
- u64 tsc, delta;
- unsigned long d1, d2;
+ unsigned char msb1 = 0, msb2 = 0xff; /* PIT msbytes */
+ u64 tsc1 = tsc1, tsc2; /* When PIT rolled over to msb1/2 */
+ unsigned d1 = 1, d2; /* How long it took to read the PIT */
+ int reps = 0; /* Number of times we've read msb2 */
+ int limit = 1<<17; /* Maximum number of PIT reads */
+ u64 tsc[BUFMASK+1]; /* Circular buffer of captured TSC values */

/* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
@@ -584,60 +574,129 @@ static unsigned long quick_pit_calibrate(void)
/* Start at 0xffff */
outb(0xff, 0x42);
outb(0xff, 0x42);
-
/*
* The PIT starts counting at the next edge, so we
* need to delay for a microsecond. The easiest way
* to do that is to just read back the 16-bit counter
* once from the PIT.
*/
- pit_verify_msb(0);
+ (void)inb(0x42);
+ (void)inb(0x42);

- if (pit_expect_msb(0xff, &tsc, &d1)) {
- for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
- if (!pit_expect_msb(0xff-i, &delta, &d2))
- break;

- /*
- * Iterate until the error is less than 500 ppm
- */
- delta -= tsc;
- if (d1+d2 >= delta >> 11)
- continue;
+#if ONE_BYTE_PIT
+ outb(0xa0, 0x43); /* Set to MSB-only mode */
+#endif
+ tsc[(limit + 1) & BUFMASK] = get_cycles();
+ do {
+ unsigned char msb;
+
+#if !ONE_BYTE_PIT
+ inb(0x42);
+#endif
+ msb = inb(0x42);
+ tsc[limit & BUFMASK] = get_cycles();

+ /* Now, figure out what do do with what we just collected */
+ if (msb != msb2) {
/*
- * Check the PIT one more time to verify that
- * all TSC reads were stable wrt the PIT.
- *
- * This also guarantees serialization of the
- * last cycle read ('d2') in pit_expect_msb.
+ * This is the boring case. We've seen the PIT
+ * tick over, but we don't trust it until seeing
+ * the value NREPS additional times. Also deal
+ * with the PIT wrapping (by failing), and with it
+ * decrementing more than once (by ignoring this
+ * edge).
*/
- if (!pit_verify_msb(0xfe - i))
- break;
+ if (msb > msb2)
+ break; /* PIT wrapped, fail */
+ reps = (msb == msb2 - 1) ? 0 : NREPS;
+ msb2 = msb;
+ continue;
+ }
+ if (++reps != NREPS)
+ continue;
+
+ /*
+ * Okay, the interesting case. This is the slow path,
+ * which is only executed just after the PIT rolls over,
+ * so a few extra cycles don't matter.
+ */
+
+ /* The PIT rolled over sometime in this window */
+ tsc2 = tsc[(limit + NREPS) & BUFMASK] -
+ tsc[(limit + NREPS + 2) & BUFMASK];
+ /*
+ * At 1 us per PIT access, and two accesses per read, delta
+ * should be below 5 us. Assuming the TSC frequency is
+ * less than 10 GHz, that means that the delta should be
+ * less than 50,000. If we read it more than a million,
+ * just ignore it.
+ *
+ * One million is magic because if d1 and d2 are both
+ * less than 2^20, then (d1 + d2) << 11 is guatanteed to fit
+ * into 32 bits.
+ */
+ if (tsc2 >= 0x100000)
+ continue;
+ d2 = (unsigned)tsc2;
+ tsc2 = tsc[(limit + NREPS + 1) & BUFMASK];
+
+ /*
+ * Should we use this as a new starting point? There are
+ * msb2 more opportunities to read the PIT, so if
+ * d2/msb2 is less than the current d1/msb1, it's
+ * bettter.
+ *
+ * By initializeing d1 (to non-zero) and mab1 (to zero)
+ * correctly, this also makes the initial d1/msb1
+ * "infinitely bad" and will be replaced ASAP.
+ */
+ if (d2 * msb1 <= d1 * msb2) {
+ d1 = d2;
+ msb1 = msb2;
+ tsc1 = tsc2;
+ continue;
+ }
+ /*
+ * Now, is the solution found so far good enough?
+ * The uncertainty is d1+d2, and the time difference
+ * is the difference between the timetamps.
+ *
+ * Note that the shift up by 11 just barely fits into
+ * 32 bits; if you want to increase the precision,
+ * widen the sum or shift delta down instead.
+ *
+ * Also note that the use of <= in the above condition
+ * makes it impossible to reach this point without d1,
+ * msb1 and tsc1 set up, although GCC doesn't know it.
+ */
+ tsc2 -= tsc1;
+ if ((d1+d2) << 11 < tsc2)
goto success;
- }
- }
- pr_info("Fast TSC calibration failed\n");
+ /*
+ * Finally, we impose an upper limit on the number of
+ * times through the loop to ensure the loop eventually
+ * terminates.
+ */
+
+ } while (--limit);
+ pr_info("Fast TSC calibration failed (limit %u msb1 %d msb2 %d)\n",
+ limit, msb1, msb2);
return 0;

success:
/*
- * Ok, if we get here, then we've seen the
- * MSB of the PIT decrement 'i' times, and the
- * error has shrunk to less than 500 ppm.
- *
- * As a result, we can depend on there not being
- * any odd delays anywhere, and the TSC reads are
- * reliable (within the error).
+ * The PIT has ticked 256*(msb1 - msb2) times, at a rate of
+ * PIT_TICK_RATE, in tsc2 TSC ticks.
*
* kHz = ticks / time-in-seconds / 1000;
- * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
- * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
+ * kHz = tsc2 / ((msb1 - msb2) * 256 / PIT_TICK_RATE) / 1000
+ * kHz = (tsc2 * PIT_TICK_RATE) / ((msb1 - msb2) * 256 * 1000)
*/
- delta *= PIT_TICK_RATE;
- do_div(delta, i*256*1000);
- pr_info("Fast TSC calibration using PIT\n");
- return delta;
+ tsc2 = tsc2 * PIT_TICK_RATE / ((msb1 - msb2) * 128 * 1000);
+ pr_info("Fast TSC calibration using PIT: %d(%u)..%d(%u)\n",
+ msb1, d1, msb2, d2);
+ return (tsc2 + 1) >> 1; /* Round to nearest */
}

/**
--
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/