Re: [KVM Clock Synchronization 2/4] Keep TSC synchronized acrosshost suspend

From: Marcelo Tosatti
Date: Tue Jan 04 2011 - 13:21:27 EST


On Tue, Dec 28, 2010 at 07:38:18PM -1000, Zachary Amsden wrote:
> During a host suspend, TSC may go backwards, which KVM interprets
> as an unstable TSC. Technically, KVM should not be marking the
> TSC unstable, which causes the TSC clocksource to go bad, but
> should be adjusting the TSC offsets in such a case.
>
> Signed-off-by: Zachary Amsden <zamsden@xxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/x86.c | 66 +++++++++++++++++++++++++++++++++++---
> 2 files changed, 61 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 9e6fe39..63a82b0 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -386,6 +386,7 @@ struct kvm_vcpu_arch {
> u64 last_kernel_ns;
> u64 last_tsc_nsec;
> u64 last_tsc_write;
> + u64 tsc_offset_adjustment;
> bool tsc_catchup;
>
> bool nmi_pending;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b9118f4..b509c01 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2042,12 +2042,20 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> }
>
> kvm_x86_ops->vcpu_load(vcpu, cpu);
> +
> + /* Apply any externally detected TSC adjustments (due to suspend) */
> + if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
> + kvm_x86_ops->adjust_tsc_offset(vcpu,
> + vcpu->arch.tsc_offset_adjustment);
> + vcpu->arch.tsc_offset_adjustment = 0;
> + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
> + }
> if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
> /* Make sure TSC doesn't go backwards */
> s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
> native_read_tsc() - vcpu->arch.last_host_tsc;
> if (tsc_delta < 0)
> - mark_tsc_unstable("KVM discovered backwards TSC");
> + WARN_ON(!check_tsc_unstable());
> if (check_tsc_unstable()) {
> kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
> vcpu->arch.tsc_catchup = 1;
> @@ -5778,14 +5786,60 @@ int kvm_arch_hardware_enable(void *garbage)
> {
> struct kvm *kvm;
> struct kvm_vcpu *vcpu;
> - int i;
> + int i, ret;
> + u64 local_tsc;
> + u64 max_tsc = 0;
> + bool stable, backwards_tsc = false;
>
> kvm_shared_msr_cpu_online();
> - list_for_each_entry(kvm, &vm_list, vm_list)
> - kvm_for_each_vcpu(i, vcpu, kvm)
> - if (vcpu->cpu == smp_processor_id())
> + local_tsc = native_read_tsc();
> + stable = !check_tsc_unstable();
> + ret = kvm_x86_ops->hardware_enable(garbage);
> + if (ret)
> + return ret;
> +
> + list_for_each_entry(kvm, &vm_list, vm_list) {
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (!stable && vcpu->cpu == smp_processor_id())
> kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
> - return kvm_x86_ops->hardware_enable(garbage);
> + if (stable && vcpu->arch.last_host_tsc > local_tsc) {
> + backwards_tsc = true;
> + if (vcpu->arch.last_host_tsc > max_tsc)
> + max_tsc = vcpu->arch.last_host_tsc;
> + }
> + }
> + }
> +
> + /*
> + * Sometimes, reliable TSCs go backwards. This happens
> + * on platforms that reset TSC during suspend or hibernate
> + * actions, but maintain synchronization. We must compensate.
> + * Unfortunately, we can't bring the TSCs fully up to date
> + * with real time. The reason is that we aren't yet far
> + * enough into CPU bringup that we know how much real time
> + * has actually elapsed; our helper function, get_kernel_ns()
> + * will be using boot variables that haven't been updated yet.
> + * We simply find the maximum observed TSC above, then record
> + * the adjustment to TSC in each VCPU. When the VCPU later
> + * gets loaded, the adjustment will be applied. Note that we
> + * accumulate adjustments, in case multiple suspend cycles
> + * happen before the VCPU gets a chance to run again.
> + *
> + * Note that unreliable TSCs will be compensated already by
> + * the logic in vcpu_load, which sets the TSC to catchup mode.
> + * This will catchup all VCPUs to real time, but cannot
> + * guarantee synchronization.
> + */
> + if (backwards_tsc) {
> + u64 delta_cyc = max_tsc - local_tsc;
> + list_for_each_entry(kvm, &vm_list, vm_list)
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + vcpu->arch.tsc_offset_adjustment += delta_cyc;
> + vcpu->arch.last_host_tsc = 0;
> + }
> + }
> +
> + return 0;
> }

Wouldnt it be simpler to use cyc2ns_offset (or something equivalent)? In
any case, you forgot to compare smp_processor_id.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/