Re: [PATCH v8 26/26] KVM: nVMX: Enable CET support for nested guest

From: Maxim Levitsky
Date: Tue Jan 02 2024 - 17:35:53 EST


On Thu, 2023-12-21 at 09:02 -0500, Yang Weijiang wrote:
> Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting
> to enable CET for nested VM.
>
> vmcs12 and vmcs02 needs to be synced when L2 exits to L1 or when L1 wants
> to resume L2, that way correct CET states can be observed by one another.
>
> Suggested-by: Chao Gao <chao.gao@xxxxxxxxx>
> Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
> ---
> arch/x86/kvm/vmx/nested.c | 57 +++++++++++++++++++++++++++++++++++++--
> arch/x86/kvm/vmx/vmcs12.c | 6 +++++
> arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++-
> arch/x86/kvm/vmx/vmx.c | 2 ++
> 4 files changed, 76 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 468a7cf75035..dee718c65255 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -691,6 +691,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
> nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
> MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
>
> + /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */
> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
> + MSR_IA32_U_CET, MSR_TYPE_RW);
> +
> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
> + MSR_IA32_S_CET, MSR_TYPE_RW);
> +
> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
> + MSR_IA32_PL0_SSP, MSR_TYPE_RW);
> +
> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
> + MSR_IA32_PL1_SSP, MSR_TYPE_RW);
> +
> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
> + MSR_IA32_PL2_SSP, MSR_TYPE_RW);
> +
> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
> + MSR_IA32_PL3_SSP, MSR_TYPE_RW);
> +
> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
> + MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW);
> +
> kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
>
> vmx->nested.force_msr_bitmap_recalc = false;
> @@ -2506,6 +2528,17 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
> if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
> (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
> vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
> +
> + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) {
> + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) {
> + vmcs_writel(GUEST_SSP, vmcs12->guest_ssp);
> + vmcs_writel(GUEST_INTR_SSP_TABLE,
> + vmcs12->guest_ssp_tbl);
> + }
> + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) ||
> + guest_can_use(&vmx->vcpu, X86_FEATURE_IBT))
> + vmcs_writel(GUEST_S_CET, vmcs12->guest_s_cet);
> + }
> }

Looks good.
>
> if (nested_cpu_has_xsaves(vmcs12))
> @@ -4344,6 +4377,15 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
> vmcs12->guest_pending_dbg_exceptions =
> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>
> + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) {
> + vmcs12->guest_ssp = vmcs_readl(GUEST_SSP);
> + vmcs12->guest_ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE);
> + }
> + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) ||
> + guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) {
> + vmcs12->guest_s_cet = vmcs_readl(GUEST_S_CET);
> + }
> +
> vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
> }

Looks good.

>
> @@ -4569,6 +4611,16 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
> if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
> vmcs_write64(GUEST_BNDCFGS, 0);
>
> + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE) {
> + if (guest_can_use(vcpu, X86_FEATURE_SHSTK)) {
> + vmcs_writel(HOST_SSP, vmcs12->host_ssp);
> + vmcs_writel(HOST_INTR_SSP_TABLE, vmcs12->host_ssp_tbl);
> + }
> + if (guest_can_use(vcpu, X86_FEATURE_SHSTK) ||
> + guest_can_use(vcpu, X86_FEATURE_IBT))
> + vmcs_writel(HOST_S_CET, vmcs12->host_s_cet);
> + }
> +

Looks good.

> if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
> vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
> vcpu->arch.pat = vmcs12->host_ia32_pat;
> @@ -6840,7 +6892,7 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf,
> VM_EXIT_HOST_ADDR_SPACE_SIZE |
> #endif
> VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
> - VM_EXIT_CLEAR_BNDCFGS;
> + VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE;
> msrs->exit_ctls_high |=
> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
> @@ -6862,7 +6914,8 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf,
> #ifdef CONFIG_X86_64
> VM_ENTRY_IA32E_MODE |
> #endif
> - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
> + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
> + VM_ENTRY_LOAD_CET_STATE;
> msrs->entry_ctls_high |=
> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER |
> VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
> diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
> index 106a72c923ca..4233b5ca9461 100644
> --- a/arch/x86/kvm/vmx/vmcs12.c
> +++ b/arch/x86/kvm/vmx/vmcs12.c
> @@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = {
> FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
> FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
> FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
> + FIELD(GUEST_S_CET, guest_s_cet),
> + FIELD(GUEST_SSP, guest_ssp),
> + FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl),
> FIELD(HOST_CR0, host_cr0),
> FIELD(HOST_CR3, host_cr3),
> FIELD(HOST_CR4, host_cr4),
> @@ -151,5 +154,8 @@ const unsigned short vmcs12_field_offsets[] = {
> FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
> FIELD(HOST_RSP, host_rsp),
> FIELD(HOST_RIP, host_rip),
> + FIELD(HOST_S_CET, host_s_cet),
> + FIELD(HOST_SSP, host_ssp),
> + FIELD(HOST_INTR_SSP_TABLE, host_ssp_tbl),
> };
> const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets);
> diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
> index 01936013428b..3884489e7f7e 100644
> --- a/arch/x86/kvm/vmx/vmcs12.h
> +++ b/arch/x86/kvm/vmx/vmcs12.h
> @@ -117,7 +117,13 @@ struct __packed vmcs12 {
> natural_width host_ia32_sysenter_eip;
> natural_width host_rsp;
> natural_width host_rip;
> - natural_width paddingl[8]; /* room for future expansion */
> + natural_width host_s_cet;
> + natural_width host_ssp;
> + natural_width host_ssp_tbl;
> + natural_width guest_s_cet;
> + natural_width guest_ssp;
> + natural_width guest_ssp_tbl;
> + natural_width paddingl[2]; /* room for future expansion */
> u32 pin_based_vm_exec_control;
> u32 cpu_based_vm_exec_control;
> u32 exception_bitmap;
> @@ -292,6 +298,12 @@ static inline void vmx_check_vmcs12_offsets(void)
> CHECK_OFFSET(host_ia32_sysenter_eip, 656);
> CHECK_OFFSET(host_rsp, 664);
> CHECK_OFFSET(host_rip, 672);
> + CHECK_OFFSET(host_s_cet, 680);
> + CHECK_OFFSET(host_ssp, 688);
> + CHECK_OFFSET(host_ssp_tbl, 696);
> + CHECK_OFFSET(guest_s_cet, 704);
> + CHECK_OFFSET(guest_ssp, 712);
> + CHECK_OFFSET(guest_ssp_tbl, 720);
> CHECK_OFFSET(pin_based_vm_exec_control, 744);
> CHECK_OFFSET(cpu_based_vm_exec_control, 748);
> CHECK_OFFSET(exception_bitmap, 752);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index c802e790c0d5..7ddd3f6fe8ab 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7732,6 +7732,8 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
> cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU));
> cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP));
> cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57));
> + cr4_fixed1_update(X86_CR4_CET, ecx, feature_bit(SHSTK));
> + cr4_fixed1_update(X86_CR4_CET, edx, feature_bit(IBT));
>
> entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 1);
> cr4_fixed1_update(X86_CR4_LAM_SUP, eax, feature_bit(LAM));

Looks good to me, but I might have missed something. Nesting is always tricky to get right,
so this should be very well tested.


Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>

Best regards,
Maxim Levitsky