[PATCH v16 087/116] KVM: TDX: Handle EXIT_REASON_OTHER_SMI with MSMI

From: isaku . yamahata
Date: Mon Oct 16 2023 - 12:50:01 EST


From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

When BIOS eMCA MCE-SMI morphing is enabled, the #MC is morphed to MSMI
(Machine Check System Management Interrupt). Then the SMI causes TD exit
with the read reason of EXIT_REASON_OTHER_SMI with MSMI bit set in the exit
qualification to KVM instead of EXIT_REASON_EXCEPTION_NMI with MC
exception.

Handle EXIT_REASON_OTHER_SMI with MSMI bit set in the exit qualification as
MCE(Machine Check Exception) happened during TD guest running.

Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
---
arch/x86/kvm/vmx/tdx.c | 40 ++++++++++++++++++++++++++++++++++---
arch/x86/kvm/vmx/tdx_arch.h | 2 ++
2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 6bed75c4069c..44558f1c13e4 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -890,6 +890,30 @@ void tdx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
tdexit_intr_info(vcpu));
else if (exit_reason == EXIT_REASON_EXCEPTION_NMI)
vmx_handle_exception_irqoff(vcpu, tdexit_intr_info(vcpu));
+ else if (unlikely(tdx->exit_reason.non_recoverable ||
+ tdx->exit_reason.error)) {
+ /*
+ * The only reason it gets EXIT_REASON_OTHER_SMI is there is an
+ * #MSMI(Machine Check System Management Interrupt) with
+ * exit_qualification bit 0 set in TD guest.
+ * The #MSMI is delivered right after SEAMCALL returns,
+ * and an #MC is delivered to host kernel after SMI handler
+ * returns.
+ *
+ * The #MC right after SEAMCALL is fixed up and skipped in #MC
+ * handler because it's an #MC happens in TD guest we cannot
+ * handle it with host's context.
+ *
+ * Call KVM's machine check handler explicitly here.
+ */
+ if (tdx->exit_reason.basic == EXIT_REASON_OTHER_SMI) {
+ unsigned long exit_qual;
+
+ exit_qual = tdexit_exit_qual(vcpu);
+ if (exit_qual & TD_EXIT_OTHER_SMI_IS_MSMI)
+ kvm_machine_check();
+ }
+ }
}

static int tdx_handle_exception(struct kvm_vcpu *vcpu)
@@ -1372,6 +1396,11 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
exit_reason.full, exit_reason.basic,
to_kvm_tdx(vcpu->kvm)->hkid,
set_hkid_to_hpa(0, to_kvm_tdx(vcpu->kvm)->hkid));
+
+ /*
+ * tdx_handle_exit_irqoff() handled EXIT_REASON_OTHER_SMI. It
+ * must be handled before enabling preemption because it's #MC.
+ */
goto unhandled_exit;
}

@@ -1410,9 +1439,14 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
return tdx_handle_ept_misconfig(vcpu);
case EXIT_REASON_OTHER_SMI:
/*
- * If reach here, it's not a Machine Check System Management
- * Interrupt(MSMI). #SMI is delivered and handled right after
- * SEAMRET, nothing needs to be done in KVM.
+ * Unlike VMX, all the SMI in SEAM non-root mode (i.e. when
+ * TD guest vcpu is running) will cause TD exit to TDX module,
+ * then SEAMRET to KVM. Once it exits to KVM, SMI is delivered
+ * and handled right away.
+ *
+ * - If it's an Machine Check System Management Interrupt
+ * (MSMI), it's handled above due to non_recoverable bit set.
+ * - If it's not an MSMI, don't need to do anything here.
*/
return 1;
default:
diff --git a/arch/x86/kvm/vmx/tdx_arch.h b/arch/x86/kvm/vmx/tdx_arch.h
index fc9a8898765c..9f93250d22b9 100644
--- a/arch/x86/kvm/vmx/tdx_arch.h
+++ b/arch/x86/kvm/vmx/tdx_arch.h
@@ -47,6 +47,8 @@
#define TDG_VP_VMCALL_REPORT_FATAL_ERROR 0x10003
#define TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004

+#define TD_EXIT_OTHER_SMI_IS_MSMI BIT(1)
+
/* TDX control structure (TDR/TDCS/TDVPS) field access codes */
#define TDX_NON_ARCH BIT_ULL(63)
#define TDX_CLASS_SHIFT 56
--
2.25.1