Re: [PATCH v3 03/18] x86/reboot: KVM: Handle VMXOFF in KVM's reboot callback

From: Huang, Kai
Date: Mon May 22 2023 - 08:56:06 EST


On Fri, 2023-05-12 at 16:50 -0700, Sean Christopherson wrote:
> Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead
> of manually and blindly doing VMXOFF. There's no need to attempt VMXOFF
> if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't
> possibly be post-VMXON.
>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> arch/x86/include/asm/virtext.h | 10 ----------
> arch/x86/kernel/reboot.c | 29 +++++++++--------------------
> arch/x86/kvm/vmx/vmx.c | 8 +++++---
> 3 files changed, 14 insertions(+), 33 deletions(-)
>
> diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
> index 3b12e6b99412..5bc29fab15da 100644
> --- a/arch/x86/include/asm/virtext.h
> +++ b/arch/x86/include/asm/virtext.h
> @@ -70,16 +70,6 @@ static inline void __cpu_emergency_vmxoff(void)
> cpu_vmxoff();
> }
>
> -/** Disable VMX if it is supported and enabled on the current CPU
> - */
> -static inline void cpu_emergency_vmxoff(void)
> -{
> - if (cpu_has_vmx())
> - __cpu_emergency_vmxoff();
> -}
> -
> -
> -
>
> /*
> * SVM functions:
> diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
> index 739e09527dbb..0cf2261c2dec 100644
> --- a/arch/x86/kernel/reboot.c
> +++ b/arch/x86/kernel/reboot.c
> @@ -787,13 +787,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
> }
> #endif
>
> -/*
> - * This is used to VMCLEAR all VMCSs loaded on the
> - * processor. And when loading kvm_intel module, the
> - * callback function pointer will be assigned.
> - *
> - * protected by rcu.
> - */
> +/* RCU-protected callback to disable virtualization prior to reboot. */
> static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
>
> void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
> @@ -815,17 +809,6 @@ void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
> }
> EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
>
> -static inline void cpu_crash_vmclear_loaded_vmcss(void)
> -{
> - cpu_emergency_virt_cb *callback;
> -
> - rcu_read_lock();
> - callback = rcu_dereference(cpu_emergency_virt_callback);
> - if (callback)
> - callback();
> - rcu_read_unlock();
> -}
> -
> /* This is the CPU performing the emergency shutdown work. */
> int crashing_cpu = -1;
>
> @@ -836,9 +819,15 @@ int crashing_cpu = -1;
> */
> void cpu_emergency_disable_virtualization(void)
> {
> - cpu_crash_vmclear_loaded_vmcss();
> + cpu_emergency_virt_cb *callback;
>
> - cpu_emergency_vmxoff();
> + rcu_read_lock();
> + callback = rcu_dereference(cpu_emergency_virt_callback);
> + if (callback)
> + callback();
> + rcu_read_unlock();
> +
> + /* KVM_AMD doesn't yet utilize the common callback. */
> cpu_emergency_svm_disable();
> }
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index fc9cdb4114cc..76cdb189f1b5 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -744,7 +744,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
> return ret;
> }
>
> -static void crash_vmclear_local_loaded_vmcss(void)
> +static void vmx_emergency_disable(void)
> {
> int cpu = raw_smp_processor_id();
> struct loaded_vmcs *v;
> @@ -752,6 +752,8 @@ static void crash_vmclear_local_loaded_vmcss(void)
> list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
> loaded_vmcss_on_cpu_link)
> vmcs_clear(v->vmcs);
> +
> + __cpu_emergency_vmxoff();

__cpu_emergency_vmxoff() internally checks whether VMX is enabled in CR4.
Logically, looks it's more reasonable to do such check before VMCLEAR active
VMCSes, although in practice there should be no problem I think.

But this problem inherits from the existing code in upstream, so not sure
whether it is worth fixing.

> }
>
> static void __loaded_vmcs_clear(void *arg)
> @@ -8547,7 +8549,7 @@ static void __vmx_exit(void)
> {
> allow_smaller_maxphyaddr = false;
>
> - cpu_emergency_unregister_virt_callback(crash_vmclear_local_loaded_vmcss);
> + cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
>
> vmx_cleanup_l1d_flush();
> }
> @@ -8597,7 +8599,7 @@ static int __init vmx_init(void)
> pi_init_cpu(cpu);
> }
>
> - cpu_emergency_register_virt_callback(crash_vmclear_local_loaded_vmcss);
> + cpu_emergency_register_virt_callback(vmx_emergency_disable);
>
> vmx_check_vmcs12_offsets();
>
> --
> 2.40.1.606.ga4b1b128d6-goog
>