Re: [PATCH v1 08/23] KVM: VMX: Initialize VMCS FRED fields

From: Chao Gao
Date: Sun Nov 12 2023 - 22:06:48 EST


On Wed, Nov 08, 2023 at 10:29:48AM -0800, Xin Li wrote:
>Initialize host VMCS FRED fields with host FRED MSRs' value and
>guest VMCS FRED fields to 0.
>
>FRED CPU states are managed in 9 new FRED MSRs, as well as a few
>existing CPU registers and MSRs, e.g., CR4.FRED. To support FRED
>context management, new VMCS fields corresponding to most of FRED
>CPU state MSRs are added to both the host-state and guest-state
>areas of VMCS.
>
>Specifically no VMCS fields are added for FRED RSP0 and SSP0 MSRs,
>because the 2 FRED MSRs are used during ring 3 event delivery only,
>thus KVM, running on ring 0, can run safely even with guest FRED
>RSP0 and SSP0. It can be deferred to load host FRED RSP0 and SSP0
>until before returning to user level.
>
>Tested-by: Shan Kang <shan.kang@xxxxxxxxx>
>Signed-off-by: Xin Li <xin3.li@xxxxxxxxx>
>---
> arch/x86/include/asm/vmx.h | 16 ++++++++++++++++
> arch/x86/kvm/vmx/vmx.c | 32 ++++++++++++++++++++++++++++++++
> 2 files changed, 48 insertions(+)
>
>diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>index 41796a733bc9..d54a1a1057b0 100644
>--- a/arch/x86/include/asm/vmx.h
>+++ b/arch/x86/include/asm/vmx.h
>@@ -277,12 +277,28 @@ enum vmcs_field {
> GUEST_BNDCFGS_HIGH = 0x00002813,
> GUEST_IA32_RTIT_CTL = 0x00002814,
> GUEST_IA32_RTIT_CTL_HIGH = 0x00002815,
>+ GUEST_IA32_FRED_CONFIG = 0x0000281a,
>+ GUEST_IA32_FRED_RSP1 = 0x0000281c,
>+ GUEST_IA32_FRED_RSP2 = 0x0000281e,
>+ GUEST_IA32_FRED_RSP3 = 0x00002820,
>+ GUEST_IA32_FRED_STKLVLS = 0x00002822,
>+ GUEST_IA32_FRED_SSP1 = 0x00002824,
>+ GUEST_IA32_FRED_SSP2 = 0x00002826,
>+ GUEST_IA32_FRED_SSP3 = 0x00002828,
> HOST_IA32_PAT = 0x00002c00,
> HOST_IA32_PAT_HIGH = 0x00002c01,
> HOST_IA32_EFER = 0x00002c02,
> HOST_IA32_EFER_HIGH = 0x00002c03,
> HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
> HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
>+ HOST_IA32_FRED_CONFIG = 0x00002c08,
>+ HOST_IA32_FRED_RSP1 = 0x00002c0a,
>+ HOST_IA32_FRED_RSP2 = 0x00002c0c,
>+ HOST_IA32_FRED_RSP3 = 0x00002c0e,
>+ HOST_IA32_FRED_STKLVLS = 0x00002c10,
>+ HOST_IA32_FRED_SSP1 = 0x00002c12,
>+ HOST_IA32_FRED_SSP2 = 0x00002c14,
>+ HOST_IA32_FRED_SSP3 = 0x00002c16,
> PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
> CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
> EXCEPTION_BITMAP = 0x00004004,
>diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>index 327e052d90c1..41772ecdd368 100644
>--- a/arch/x86/kvm/vmx/vmx.c
>+++ b/arch/x86/kvm/vmx/vmx.c
>@@ -1477,6 +1477,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
> (unsigned long)(cpu_entry_stack(cpu) + 1));
> }
>
>+#ifdef CONFIG_X86_64
>+ /* Per-CPU FRED MSRs */
>+ if (cpu_feature_enabled(X86_FEATURE_FRED)) {

how about kvm_cpu_cap_has()? to decouple KVM's capability to virtualize a feature
and host's enabling a feature.

>+ vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1));
>+ vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2));
>+ vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3));
>+ vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1));
>+ vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2));
>+ vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3));
>+ }
>+#endif

why is this hunk enclosed in #ifdef CONFIG_X86_64 while the one below isn't?

>+
> vmx->loaded_vmcs->cpu = cpu;
> }
> }
>@@ -4375,6 +4387,15 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
>
> if (cpu_has_load_ia32_efer())
> vmcs_write64(HOST_IA32_EFER, host_efer);
>+
>+ /*
>+ * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs
>+ * are the same on all CPUs, thus they are initialized here.
>+ */
>+ if (cpu_feature_enabled(X86_FEATURE_FRED)) {
>+ vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG));
>+ vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS));
>+ }
> }
>
> void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
>@@ -4936,6 +4957,17 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> vmcs_writel(GUEST_IDTR_BASE, 0);
> vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
>
>+ if (cpu_feature_enabled(X86_FEATURE_FRED)) {
>+ vmcs_write64(GUEST_IA32_FRED_CONFIG, 0);
>+ vmcs_write64(GUEST_IA32_FRED_RSP1, 0);
>+ vmcs_write64(GUEST_IA32_FRED_RSP2, 0);
>+ vmcs_write64(GUEST_IA32_FRED_RSP3, 0);
>+ vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0);
>+ vmcs_write64(GUEST_IA32_FRED_SSP1, 0);
>+ vmcs_write64(GUEST_IA32_FRED_SSP2, 0);
>+ vmcs_write64(GUEST_IA32_FRED_SSP3, 0);
>+ }
>+

move this hunk to __vmx_vcpu_reset() because FRED spec says

"INIT does not change the value of the new MSRs."

> vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
> vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
> vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
>--
>2.42.0
>
>