[PATCH v3 20/21] KVM:x86: Enable kernel IBT support for guest

From: Yang Weijiang
Date: Thu May 11 2023 - 03:16:13 EST


Enable MSR_IA32_S_CET access for guest kernel IBT.

Mainline Linux kernel now supports supervisor IBT for kernel code,
to make s-IBT work in guest(nested guest), pass through MSR_IA32_S_CET
to guest(nested guest) if host kernel and KVM enabled IBT.

Note, s-IBT can work independent to host xsaves support because guest
MSR_IA32_S_CET is {stored|loaded} from VMCS GUEST_S_CET field.

Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
---
arch/x86/kvm/vmx/nested.c | 3 +++
arch/x86/kvm/vmx/vmx.c | 39 ++++++++++++++++++++++++++++++++++-----
arch/x86/kvm/x86.c | 7 ++++++-
3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 522ac27d2534..bf690827bfee 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -664,6 +664,9 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_U_CET, MSR_TYPE_RW);

+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_S_CET, MSR_TYPE_RW);
+
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_PL3_SSP, MSR_TYPE_RW);

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a2494156902d..1d0151f9e575 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -711,6 +711,7 @@ static bool is_valid_passthrough_msr(u32 msr)
return true;
case MSR_IA32_U_CET:
case MSR_IA32_PL3_SSP:
+ case MSR_IA32_S_CET:
return true;
}

@@ -2097,14 +2098,18 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
break;
case MSR_IA32_U_CET:
+ case MSR_IA32_S_CET:
case MSR_IA32_PL3_SSP:
case MSR_KVM_GUEST_SSP:
if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
return 1;
- if (msr_info->index == MSR_KVM_GUEST_SSP)
+ if (msr_info->index == MSR_KVM_GUEST_SSP) {
msr_info->data = vmcs_readl(GUEST_SSP);
- else
+ } else if (msr_info->index == MSR_IA32_S_CET) {
+ msr_info->data = vmcs_readl(GUEST_S_CET);
+ } else {
kvm_get_xsave_msr(msr_info);
+ }
break;
case MSR_IA32_DEBUGCTLMSR:
msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
@@ -2419,6 +2424,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->pt_desc.guest.addr_a[index / 2] = data;
break;
case MSR_IA32_U_CET:
+ case MSR_IA32_S_CET:
case MSR_IA32_PL3_SSP:
case MSR_KVM_GUEST_SSP:
if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
@@ -2430,10 +2436,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((msr_index == MSR_IA32_PL3_SSP ||
msr_index == MSR_KVM_GUEST_SSP) && (data & GENMASK(2, 0)))
return 1;
- if (msr_index == MSR_KVM_GUEST_SSP)
+ if (msr_index == MSR_KVM_GUEST_SSP) {
vmcs_writel(GUEST_SSP, data);
- else
+ } else if (msr_index == MSR_IA32_S_CET) {
+ vmcs_writel(GUEST_S_CET, data);
+ } else {
kvm_set_xsave_msr(msr_info);
+ }
break;
case MSR_IA32_PERF_CAPABILITIES:
if (data && !vcpu_to_pmu(vcpu)->version)
@@ -7322,6 +7331,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)

kvm_wait_lapic_expire(vcpu);

+ /*
+ * Save host MSR_IA32_S_CET so that it can be reloaded at vm_exit.
+ * No need to save the other two vmcs fields as supervisor SHSTK
+ * are not enabled on Intel platform now.
+ */
+ if (IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
+ (vm_exit_controls_get(vmx) & VM_EXIT_LOAD_CET_STATE)) {
+ u64 msr;
+
+ rdmsrl(MSR_IA32_S_CET, msr);
+ vmcs_writel(HOST_S_CET, msr);
+ }
+
/* The actual VMENTER/EXIT is in the .noinstr.text section. */
vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));

@@ -7735,6 +7757,13 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)

incpt |= !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, incpt);
+
+ /*
+ * If IBT is available to guest, then passthrough S_CET MSR too since
+ * kernel IBT is already in mainline kernel tree.
+ */
+ incpt = !guest_cpuid_has(vcpu, X86_FEATURE_IBT);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, incpt);
}

static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
@@ -7805,7 +7834,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
/* Refresh #PF interception to account for MAXPHYADDR changes. */
vmx_update_exception_bitmap(vcpu);

- if (kvm_cet_user_supported())
+ if (kvm_cet_user_supported() || kvm_cpu_cap_has(X86_FEATURE_IBT))
vmx_update_intercept_for_cet_msr(vcpu);
}

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 858cb68e781a..b450361b94ef 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1471,6 +1471,7 @@ static const u32 msrs_to_save_base[] = {
MSR_IA32_XFD, MSR_IA32_XFD_ERR,
MSR_IA32_XSS,
MSR_IA32_U_CET, MSR_IA32_PL3_SSP, MSR_KVM_GUEST_SSP,
+ MSR_IA32_S_CET,
};

static const u32 msrs_to_save_pmu[] = {
@@ -13652,7 +13653,8 @@ EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);

bool kvm_cet_is_msr_accessible(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
- if (!kvm_cet_user_supported())
+ if (!kvm_cet_user_supported() &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
return false;

if (msr->host_initiated)
@@ -13666,6 +13668,9 @@ bool kvm_cet_is_msr_accessible(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (msr->index == MSR_KVM_GUEST_SSP)
return false;

+ if (msr->index == MSR_IA32_S_CET)
+ return guest_cpuid_has(vcpu, X86_FEATURE_IBT);
+
if (msr->index == MSR_IA32_PL3_SSP &&
!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
return false;
--
2.27.0