Re: [PATCH 4/4] KVM: x86: track actual TSC frequency from the timekeeper struct

From: Owen Hofmann
Date: Tue Feb 09 2016 - 13:41:20 EST


Hi,
Should this patch change the condition in pvclock_gtod_notify?
Currently it looks like we'll only request a masterclock update when
tsc is no longer a good clocksource.

On Mon, Feb 8, 2016 at 7:18 AM, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote:
> When an NTP server is running, it may adjust the time substantially
> compared to the "official" frequency of the TSC. A 12 ppm change
> sums up to one second per day.
>
> This already shows up if the guest compares kvmclock with e.g. the
> PM timer. It shows up even more once we add support for the Hyper-V
> TSC page, because the guest expects it to be in sync with the time
> reference counter; effectively the time reference counter is just a
> slow path to access the same clock that is in the TSC page.
>
> Therefore, we want kvmclock to provide the host kernel's
> ktime_get_boot_ns() value, at least if the master clock is active.
> To do so, reverse-compute the host's "actual" TSC frequency from
> pvclock_gtod_data and return it from kvm_get_time_and_clockread.
>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 3 ++-
> arch/x86/kvm/x86.c | 46 +++++++++++++++++++++++++++--------------
> 2 files changed, 33 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 7b5459982433..7dd6d55b4868 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -538,7 +538,7 @@ struct kvm_vcpu_arch {
>
> gpa_t time;
> struct pvclock_vcpu_time_info hv_clock;
> - unsigned int hw_tsc_khz;
> + u64 hv_clock_tsc_hz;
> struct gfn_to_hva_cache pv_time;
> bool pv_time_enabled;
> /* set guest stopped flag in pvclock flags field */
> @@ -731,6 +731,7 @@ struct kvm_arch {
>
> spinlock_t pvclock_gtod_sync_lock;
> bool use_master_clock;
> + u64 master_tsc_hz;
> u64 master_kernel_ns;
> cycle_t master_cycle_now;
> struct delayed_work kvmclock_update_work;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e0bf8f10dc22..c99101d4cc5e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1580,7 +1580,16 @@ static inline u64 vgettsc(cycle_t *cycle_now)
> return v * gtod->clock.mult;
> }
>
> -static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
> +static u64 pvclock_gtod_tsc_hz(void)
> +{
> + struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
> +
> + u64 gtod_tsc_hz = 1000000000ULL << gtod->clock.shift;
> + do_div(gtod_tsc_hz, gtod->clock.mult);
> + return gtod_tsc_hz;
> +}
> +
> +static int do_monotonic_boot(s64 *t, cycle_t *cycle_now, u64 *tsc_hz)
> {
> struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
> unsigned long seq;
> @@ -1589,6 +1598,7 @@ static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
>
> do {
> seq = read_seqcount_begin(&gtod->seq);
> + *tsc_hz = pvclock_gtod_tsc_hz();
> mode = gtod->clock.vclock_mode;
> ns = gtod->nsec_base;
> ns += vgettsc(cycle_now);
> @@ -1601,13 +1611,14 @@ static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
> }
>
> /* returns true if host is using tsc clocksource */
> -static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
> +static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now,
> + u64 *tsc_hz)
> {
> /* checked again under seqlock below */
> if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
> return false;
>
> - return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
> + return do_monotonic_boot(kernel_ns, cycle_now, tsc_hz) == VCLOCK_TSC;
> }
> #endif
>
> @@ -1668,7 +1679,8 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
> */
> host_tsc_clocksource = kvm_get_time_and_clockread(
> &ka->master_kernel_ns,
> - &ka->master_cycle_now);
> + &ka->master_cycle_now,
> + &ka->master_tsc_hz);
>
> ka->use_master_clock = host_tsc_clocksource && vcpus_matched
> && !backwards_tsc_observed
> @@ -1713,7 +1725,8 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
>
> static int kvm_guest_time_update(struct kvm_vcpu *v)
> {
> - unsigned long flags, tgt_tsc_khz;
> + unsigned long flags;
> + u64 tgt_tsc_hz;
> struct kvm_vcpu_arch *vcpu = &v->arch;
> struct kvm_arch *ka = &v->kvm->arch;
> s64 kernel_ns;
> @@ -1724,6 +1737,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>
> kernel_ns = 0;
> host_tsc = 0;
> + tgt_tsc_hz = 0;
>
> /*
> * If the host uses TSC clock, then passthrough TSC as stable
> @@ -1734,20 +1748,22 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> if (use_master_clock) {
> host_tsc = ka->master_cycle_now;
> kernel_ns = ka->master_kernel_ns;
> + tgt_tsc_hz = ka->master_tsc_hz;
> }
> spin_unlock(&ka->pvclock_gtod_sync_lock);
>
> /* Keep irq disabled to prevent changes to the clock */
> local_irq_save(flags);
> - tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
> - if (unlikely(tgt_tsc_khz == 0)) {
> - local_irq_restore(flags);
> - kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
> - return 1;
> - }
> if (!use_master_clock) {
> host_tsc = rdtsc();
> kernel_ns = get_kernel_ns();
> + tgt_tsc_hz = __this_cpu_read(cpu_tsc_khz) * 1000;
> + }
> +
> + if (unlikely(tgt_tsc_hz == 0)) {
> + local_irq_restore(flags);
> + kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
> + return 1;
> }
>
> tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
> @@ -1776,13 +1792,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> return 0;
>
> if (kvm_has_tsc_control)
> - tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
> + tgt_tsc_hz = kvm_scale_tsc(v, tgt_tsc_hz);
>
> - if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
> - kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
> + if (unlikely(vcpu->hv_clock_tsc_hz != tgt_tsc_hz)) {
> + kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_hz,
> &vcpu->hv_clock.tsc_shift,
> &vcpu->hv_clock.tsc_to_system_mul);
> - vcpu->hw_tsc_khz = tgt_tsc_khz;
> + vcpu->hv_clock_tsc_hz = tgt_tsc_hz;
> }
>
> /* With all the info we got, fill in the values */
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html