[PATCH 2/4] KVM: nVMX: Pull KVM L0's desired controls directly from vmcs01

From: Sean Christopherson
Date: Tue Aug 10 2021 - 13:20:17 EST


When preparing controls for vmcs02, grab KVM's desired controls from
vmcs01's shadow state instead of recalculating the controls from scratch,
or in the secondary execution controls, instead of using the dedicated
cache. Calculating secondary exec controls is eye-poppingly expensive
due to the guest CPUID checks, hence the dedicated cache, but the other
calculations aren't exactly free either.

Explicitly clear several bits (x2APIC, DESC exiting, and load EFER on
exit) as appropriate as they may be set in vmcs01, whereas the previous
implementation relied on dynamic bits being cleared in the calculator.

Intentionally propagate VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL from
vmcs01 to vmcs02. Whether or not PERF_GLOBAL_CTRL is loaded depends on
whether or not perf itself is active, so unless perf stops between the
exit from L1 and entry to L2, vmcs01 will hold the desired value. This
is purely an optimization as atomic_switch_perf_msrs() will set/clear
the control as needed at VM-Enter, i.e. it avoids two extra VMWRITEs in
the case where perf is active (versus starting with the bits clear in
vmcs02, which was the previous behavior).

Cc: Zeng Guang <guang.zeng@xxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/vmx/nested.c | 25 ++++++++++++++++---------
arch/x86/kvm/vmx/vmx.h | 6 +++++-
2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0d0dd6580cfd..264a9f4c9179 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2170,7 +2170,8 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
}
}

-static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
+ struct vmcs12 *vmcs12)
{
u32 exec_control;
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
@@ -2181,7 +2182,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
/*
* PIN CONTROLS
*/
- exec_control = vmx_pin_based_exec_ctrl(vmx);
+ exec_control = __pin_controls_get(vmcs01);
exec_control |= (vmcs12->pin_based_vm_exec_control &
~PIN_BASED_VMX_PREEMPTION_TIMER);

@@ -2197,7 +2198,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
/*
* EXEC CONTROLS
*/
- exec_control = vmx_exec_control(vmx); /* L0's desires */
+ exec_control = __exec_controls_get(vmcs01); /* L0's desires */
exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
exec_control &= ~CPU_BASED_TPR_SHADOW;
@@ -2234,10 +2235,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
* SECONDARY EXEC CONTROLS
*/
if (cpu_has_secondary_exec_ctrls()) {
- exec_control = vmx->secondary_exec_control;
+ exec_control = __secondary_exec_controls_get(vmcs01);

/* Take the following fields only from vmcs12 */
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_ENABLE_RDTSCP |
SECONDARY_EXEC_XSAVES |
@@ -2245,7 +2247,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_ENABLE_VMFUNC |
- SECONDARY_EXEC_TSC_SCALING);
+ SECONDARY_EXEC_TSC_SCALING |
+ SECONDARY_EXEC_DESC);
+
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12->secondary_vm_exec_control;
@@ -2285,8 +2289,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
* on the related bits (if supported by the CPU) in the hope that
* we can avoid VMWrites during vmx_set_efer().
*/
- exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) &
- ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
+ exec_control = __vm_entry_controls_get(vmcs01);
+ exec_control |= vmcs12->vm_entry_controls;
+ exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
if (cpu_has_load_ia32_efer()) {
if (guest_efer & EFER_LMA)
exec_control |= VM_ENTRY_IA32E_MODE;
@@ -2302,9 +2307,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
* bits may be modified by vmx_set_efer() in prepare_vmcs02().
*/
- exec_control = vmx_vmexit_ctrl();
+ exec_control = __vm_exit_controls_get(vmcs01);
if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
exec_control |= VM_EXIT_LOAD_IA32_EFER;
+ else
+ exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
vm_exit_controls_set(vmx, exec_control);

/*
@@ -3347,7 +3354,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,

vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);

- prepare_vmcs02_early(vmx, vmcs12);
+ prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);

if (from_vmentry) {
if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 4f151175d45a..414b440de9ac 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -418,9 +418,13 @@ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \
vmx->loaded_vmcs->controls_shadow.lname = val; \
} \
} \
+static inline u32 __##lname##_controls_get(struct loaded_vmcs *vmcs) \
+{ \
+ return vmcs->controls_shadow.lname; \
+} \
static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \
{ \
- return vmx->loaded_vmcs->controls_shadow.lname; \
+ return __##lname##_controls_get(vmx->loaded_vmcs); \
} \
static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \
{ \
--
2.32.0.605.g8dce9f2422-goog