[PATCH 4/4] KVM: x86: track actual TSC frequency from the timekeeper struct

From: Paolo Bonzini
Date: Mon Feb 08 2016 - 10:19:00 EST


When an NTP server is running, it may adjust the time substantially
compared to the "official" frequency of the TSC. A 12 ppm change
sums up to one second per day.

This already shows up if the guest compares kvmclock with e.g. the
PM timer. It shows up even more once we add support for the Hyper-V
TSC page, because the guest expects it to be in sync with the time
reference counter; effectively the time reference counter is just a
slow path to access the same clock that is in the TSC page.

Therefore, we want kvmclock to provide the host kernel's
ktime_get_boot_ns() value, at least if the master clock is active.
To do so, reverse-compute the host's "actual" TSC frequency from
pvclock_gtod_data and return it from kvm_get_time_and_clockread.

Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/x86.c | 46 +++++++++++++++++++++++++++--------------
2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7b5459982433..7dd6d55b4868 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -538,7 +538,7 @@ struct kvm_vcpu_arch {

gpa_t time;
struct pvclock_vcpu_time_info hv_clock;
- unsigned int hw_tsc_khz;
+ u64 hv_clock_tsc_hz;
struct gfn_to_hva_cache pv_time;
bool pv_time_enabled;
/* set guest stopped flag in pvclock flags field */
@@ -731,6 +731,7 @@ struct kvm_arch {

spinlock_t pvclock_gtod_sync_lock;
bool use_master_clock;
+ u64 master_tsc_hz;
u64 master_kernel_ns;
cycle_t master_cycle_now;
struct delayed_work kvmclock_update_work;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e0bf8f10dc22..c99101d4cc5e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1580,7 +1580,16 @@ static inline u64 vgettsc(cycle_t *cycle_now)
return v * gtod->clock.mult;
}

-static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
+static u64 pvclock_gtod_tsc_hz(void)
+{
+ struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+
+ u64 gtod_tsc_hz = 1000000000ULL << gtod->clock.shift;
+ do_div(gtod_tsc_hz, gtod->clock.mult);
+ return gtod_tsc_hz;
+}
+
+static int do_monotonic_boot(s64 *t, cycle_t *cycle_now, u64 *tsc_hz)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
@@ -1589,6 +1598,7 @@ static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)

do {
seq = read_seqcount_begin(&gtod->seq);
+ *tsc_hz = pvclock_gtod_tsc_hz();
mode = gtod->clock.vclock_mode;
ns = gtod->nsec_base;
ns += vgettsc(cycle_now);
@@ -1601,13 +1611,14 @@ static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
}

/* returns true if host is using tsc clocksource */
-static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
+static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now,
+ u64 *tsc_hz)
{
/* checked again under seqlock below */
if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
return false;

- return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
+ return do_monotonic_boot(kernel_ns, cycle_now, tsc_hz) == VCLOCK_TSC;
}
#endif

@@ -1668,7 +1679,8 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
*/
host_tsc_clocksource = kvm_get_time_and_clockread(
&ka->master_kernel_ns,
- &ka->master_cycle_now);
+ &ka->master_cycle_now,
+ &ka->master_tsc_hz);

ka->use_master_clock = host_tsc_clocksource && vcpus_matched
&& !backwards_tsc_observed
@@ -1713,7 +1725,8 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)

static int kvm_guest_time_update(struct kvm_vcpu *v)
{
- unsigned long flags, tgt_tsc_khz;
+ unsigned long flags;
+ u64 tgt_tsc_hz;
struct kvm_vcpu_arch *vcpu = &v->arch;
struct kvm_arch *ka = &v->kvm->arch;
s64 kernel_ns;
@@ -1724,6 +1737,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)

kernel_ns = 0;
host_tsc = 0;
+ tgt_tsc_hz = 0;

/*
* If the host uses TSC clock, then passthrough TSC as stable
@@ -1734,20 +1748,22 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
if (use_master_clock) {
host_tsc = ka->master_cycle_now;
kernel_ns = ka->master_kernel_ns;
+ tgt_tsc_hz = ka->master_tsc_hz;
}
spin_unlock(&ka->pvclock_gtod_sync_lock);

/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
- tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
- if (unlikely(tgt_tsc_khz == 0)) {
- local_irq_restore(flags);
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
- return 1;
- }
if (!use_master_clock) {
host_tsc = rdtsc();
kernel_ns = get_kernel_ns();
+ tgt_tsc_hz = __this_cpu_read(cpu_tsc_khz) * 1000;
+ }
+
+ if (unlikely(tgt_tsc_hz == 0)) {
+ local_irq_restore(flags);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
+ return 1;
}

tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
@@ -1776,13 +1792,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
return 0;

if (kvm_has_tsc_control)
- tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
+ tgt_tsc_hz = kvm_scale_tsc(v, tgt_tsc_hz);

- if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
- kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
+ if (unlikely(vcpu->hv_clock_tsc_hz != tgt_tsc_hz)) {
+ kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_hz,
&vcpu->hv_clock.tsc_shift,
&vcpu->hv_clock.tsc_to_system_mul);
- vcpu->hw_tsc_khz = tgt_tsc_khz;
+ vcpu->hv_clock_tsc_hz = tgt_tsc_hz;
}

/* With all the info we got, fill in the values */
--
1.8.3.1