Re: [PATCH v9 26/27] KVM: nVMX: Enable CET support for nested guest

From: Chao Gao
Date: Mon Jan 29 2024 - 02:05:05 EST


On Tue, Jan 23, 2024 at 06:41:59PM -0800, Yang Weijiang wrote:
>Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting
>to enable CET for nested VM.
>
>vmcs12 and vmcs02 needs to be synced when L2 exits to L1 or when L1 wants
>to resume L2, that way correct CET states can be observed by one another.
>
>Suggested-by: Chao Gao <chao.gao@xxxxxxxxx>
>Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
>Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
>---
> arch/x86/kvm/vmx/nested.c | 57 +++++++++++++++++++++++++++++++++++++--
> arch/x86/kvm/vmx/vmcs12.c | 6 +++++
> arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++-
> arch/x86/kvm/vmx/vmx.c | 2 ++
> 4 files changed, 76 insertions(+), 3 deletions(-)
>
>diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
>index 468a7cf75035..e330897a7e5e 100644
>--- a/arch/x86/kvm/vmx/nested.c
>+++ b/arch/x86/kvm/vmx/nested.c
>@@ -691,6 +691,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
> nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
> MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
>
>+ /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */
>+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
>+ MSR_IA32_U_CET, MSR_TYPE_RW);
>+
>+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
>+ MSR_IA32_S_CET, MSR_TYPE_RW);
>+
>+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
>+ MSR_IA32_PL0_SSP, MSR_TYPE_RW);
>+
>+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
>+ MSR_IA32_PL1_SSP, MSR_TYPE_RW);
>+
>+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
>+ MSR_IA32_PL2_SSP, MSR_TYPE_RW);
>+
>+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
>+ MSR_IA32_PL3_SSP, MSR_TYPE_RW);
>+
>+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
>+ MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW);
>+
> kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
>
> vmx->nested.force_msr_bitmap_recalc = false;
>@@ -2506,6 +2528,17 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
> if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
> (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
> vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
>+
>+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) {
>+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) {
>+ vmcs_writel(GUEST_SSP, vmcs12->guest_ssp);
>+ vmcs_writel(GUEST_INTR_SSP_TABLE,
>+ vmcs12->guest_ssp_tbl);
>+ }
>+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) ||
>+ guest_can_use(&vmx->vcpu, X86_FEATURE_IBT))
>+ vmcs_writel(GUEST_S_CET, vmcs12->guest_s_cet);
>+ }

I think you need to move this hunk outside the outmost if-statement, i.e.,

if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {

otherwise, the whole block may be skipped (e.g., when evmcs is enabled and
GUEST_GRP1 is clean), leaving CET state not context-switched.

And if VM_ENTRY_LOAD_CET_STATE of vmcs12 is cleared, L1's values should be
propagated to vmcs02 on nested VMenter; see pre_vmenter_debugctl in struct
nested_vmx. I believe we need similar handling for the three CET fields.

> }
>
> if (nested_cpu_has_xsaves(vmcs12))
>@@ -4344,6 +4377,15 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
> vmcs12->guest_pending_dbg_exceptions =
> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>
>+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) {
>+ vmcs12->guest_ssp = vmcs_readl(GUEST_SSP);
>+ vmcs12->guest_ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE);
>+ }

>+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) ||
>+ guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) {
>+ vmcs12->guest_s_cet = vmcs_readl(GUEST_S_CET);
>+ }

unnecessary braces.