Re: [PATCH 04/13] x86/kvm: Do not try to disable kvmclock if it was not enabled

From: Sean Christopherson
Date: Fri Oct 06 2023 - 10:37:01 EST


+Paolo

Please use get_maintainers...

On Thu, Oct 05, 2023, Kirill A. Shutemov wrote:
> kvm_guest_cpu_offline() tries to disable kvmclock regardless if it is
> present in the VM. It leads to write to a MSR that doesn't exist on some
> configurations, namely in TDX guest:
>
> unchecked MSR access error: WRMSR to 0x12 (tried to write 0x0000000000000000)
> at rIP: 0xffffffff8110687c (kvmclock_disable+0x1c/0x30)
>
> kvmclock enabling is gated by CLOCKSOURCE and CLOCKSOURCE2 KVM paravirt
> features.
>
> Do not disable kvmclock if it was not enumerated or disabled by user
> from kernel command line.
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> Fixes: c02027b5742b ("x86/kvm: Disable kvmclock on all CPUs on shutdown")
> ---
> arch/x86/kernel/kvmclock.c | 9 +++++++--
> 1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
> index fb8f52149be9..cba2e732e53f 100644
> --- a/arch/x86/kernel/kvmclock.c
> +++ b/arch/x86/kernel/kvmclock.c
> @@ -22,7 +22,7 @@
> #include <asm/x86_init.h>
> #include <asm/kvmclock.h>
>
> -static int kvmclock __initdata = 1;
> +static int kvmclock __ro_after_init = 1;
> static int kvmclock_vsyscall __initdata = 1;
> static int msr_kvm_system_time __ro_after_init = MSR_KVM_SYSTEM_TIME;
> static int msr_kvm_wall_clock __ro_after_init = MSR_KVM_WALL_CLOCK;
> @@ -195,7 +195,12 @@ static void kvm_setup_secondary_clock(void)
>
> void kvmclock_disable(void)
> {
> - native_write_msr(msr_kvm_system_time, 0, 0);
> + if (!kvm_para_available() || !kvmclock)
> + return;
> +
> + if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE) ||
> + kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2))
> + native_write_msr(msr_kvm_system_time, 0, 0);

Rather than recheck everything and preserve kvmclock, what about leaving the MSR
indices '0' by default and then disable msr_kvm_system_time iff it's non-zero.
That way the disable path won't become stale if the conditions for enabling
kvmclock change.

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index fb8f52149be9..f2fff625576d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -24,8 +24,8 @@

static int kvmclock __initdata = 1;
static int kvmclock_vsyscall __initdata = 1;
-static int msr_kvm_system_time __ro_after_init = MSR_KVM_SYSTEM_TIME;
-static int msr_kvm_wall_clock __ro_after_init = MSR_KVM_WALL_CLOCK;
+static int msr_kvm_system_time __ro_after_init;
+static int msr_kvm_wall_clock __ro_after_init;
static u64 kvm_sched_clock_offset __ro_after_init;

static int __init parse_no_kvmclock(char *arg)
@@ -195,7 +195,8 @@ static void kvm_setup_secondary_clock(void)

void kvmclock_disable(void)
{
- native_write_msr(msr_kvm_system_time, 0, 0);
+ if (msr_kvm_system_time)
+ native_write_msr(msr_kvm_system_time, 0, 0);
}

static void __init kvmclock_init_mem(void)
@@ -294,7 +295,10 @@ void __init kvmclock_init(void)
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) {
msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
- } else if (!kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
+ } else if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
+ msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
+ msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
+ } else {
return;
}