[PATCH v2 10/11] KVM: SVM: implement support for vNMI

From: Maxim Levitsky
Date: Tue Nov 29 2022 - 14:42:19 EST


This patch implements support for injecting pending
NMIs via the .kvm_x86_set_hw_nmi_pending using new AMD's vNMI
feature.

Note that the vNMI can't cause a VM exit, which is needed
when a nested guest intercepts NMIs.

Therefore to avoid breaking nesting, the vNMI is inhibited while
a nested guest is running and instead, the legacy NMI window
detection and delivery method is used.

While it is possible to passthrough the vNMI if a nested guest
doesn't intercept NMIs, such usage is very uncommon, and it's
not worth to optimize for.

Signed-off-by: Santosh Shukla <santosh.shukla@xxxxxxx>
Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
---
arch/x86/kvm/svm/nested.c | 42 +++++++++++++++
arch/x86/kvm/svm/svm.c | 111 ++++++++++++++++++++++++++++++--------
arch/x86/kvm/svm/svm.h | 10 ++++
3 files changed, 140 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index e891318595113e..5bea672bf8b12d 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -623,6 +623,42 @@ static bool is_evtinj_nmi(u32 evtinj)
return type == SVM_EVTINJ_TYPE_NMI;
}

+static void nested_svm_save_vnmi(struct vcpu_svm *svm)
+{
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
+
+ /*
+ * Copy the vNMI state back to software NMI tracking state
+ * for the duration of the nested run
+ */
+
+ svm->nmi_masked = vmcb01->control.int_ctl & V_NMI_MASK;
+ svm->vcpu.arch.nmi_pending += vmcb01->control.int_ctl & V_NMI_PENDING;
+}
+
+static void nested_svm_restore_vnmi(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
+
+ /*
+ * Restore the vNMI state from the software NMI tracking state
+ * after a nested run
+ */
+
+ if (svm->nmi_masked)
+ vmcb01->control.int_ctl |= V_NMI_MASK;
+ else
+ vmcb01->control.int_ctl &= ~V_NMI_MASK;
+
+ if (vcpu->arch.nmi_pending) {
+ vcpu->arch.nmi_pending--;
+ vmcb01->control.int_ctl |= V_NMI_PENDING;
+ } else
+ vmcb01->control.int_ctl &= ~V_NMI_PENDING;
+}
+
+
static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
unsigned long vmcb12_rip,
unsigned long vmcb12_csbase)
@@ -646,6 +682,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
else
int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);

+ if (vnmi)
+ nested_svm_save_vnmi(svm);
+
/* Copied from vmcb01. msrpm_base can be overwritten later. */
vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
@@ -1049,6 +1088,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm_update_lbrv(vcpu);
}

+ if (vnmi)
+ nested_svm_restore_vnmi(svm);
+
/*
* On vmexit the GIF is set to false and
* no event can be injected in L1.
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index cfed6ab29c839a..bf10adcf3170a8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -230,6 +230,8 @@ module_param(dump_invalid_vmcb, bool, 0644);
bool intercept_smi = true;
module_param(intercept_smi, bool, 0444);

+bool vnmi = true;
+module_param(vnmi, bool, 0444);

static bool svm_gp_erratum_intercept = true;

@@ -1299,6 +1301,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
if (kvm_vcpu_apicv_active(vcpu))
avic_init_vmcb(svm, vmcb);

+ if (vnmi)
+ svm->vmcb->control.int_ctl |= V_NMI_ENABLE;
+
if (vgif) {
svm_clr_intercept(svm, INTERCEPT_STGI);
svm_clr_intercept(svm, INTERCEPT_CLGI);
@@ -3487,6 +3492,39 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
++vcpu->stat.nmi_injections;
}

+
+static bool svm_get_hw_nmi_pending(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!is_vnmi_enabled(svm))
+ return false;
+
+ return !!(svm->vmcb->control.int_ctl & V_NMI_MASK);
+}
+
+static bool svm_set_hw_nmi_pending(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!is_vnmi_enabled(svm))
+ return false;
+
+ if (svm->vmcb->control.int_ctl & V_NMI_PENDING)
+ return false;
+
+ svm->vmcb->control.int_ctl |= V_NMI_PENDING;
+ vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
+
+ /*
+ * NMI isn't yet technically injected but
+ * this rough estimation should be good enough
+ */
+ ++vcpu->stat.nmi_injections;
+
+ return true;
+}
+
static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3582,11 +3620,38 @@ static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
}

+static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (is_vnmi_enabled(svm))
+ return svm->vmcb->control.int_ctl & V_NMI_MASK;
+ else
+ return svm->nmi_masked;
+}
+
+static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (is_vnmi_enabled(svm)) {
+ if (masked)
+ svm->vmcb->control.int_ctl |= V_NMI_MASK;
+ else
+ svm->vmcb->control.int_ctl &= ~V_NMI_MASK;
+ } else {
+ svm->nmi_masked = masked;
+ if (masked)
+ svm_enable_iret_interception(svm);
+ else
+ svm_disable_iret_interception(svm);
+ }
+}
+
bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
- bool ret;

if (!gif_set(svm))
return true;
@@ -3594,10 +3659,10 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
return false;

- ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
- (svm->nmi_masked);
+ if (svm_get_nmi_mask(vcpu))
+ return true;

- return ret;
+ return vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK;
}

static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
@@ -3615,24 +3680,6 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return 1;
}

-static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
-{
- return to_svm(vcpu)->nmi_masked;
-}
-
-static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- if (masked) {
- svm->nmi_masked = true;
- svm_enable_iret_interception(svm);
- } else {
- svm->nmi_masked = false;
- svm_disable_iret_interception(svm);
- }
-}
-
bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3725,10 +3772,16 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
/*
* Something prevents NMI from been injected. Single step over possible
* problem (IRET or exception injection or interrupt shadow)
+ *
+ * With vNMI we should never need an NMI window
+ * (we can always inject vNMI either by setting VNMI_PENDING or by EVENTINJ)
*/
+ if (WARN_ON_ONCE(is_vnmi_enabled(svm)))
+ return;
+
svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
- svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+ svm->nmi_singlestep = true;
}

static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
@@ -4770,6 +4823,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.patch_hypercall = svm_patch_hypercall,
.inject_irq = svm_inject_irq,
.inject_nmi = svm_inject_nmi,
+ .get_hw_nmi_pending = svm_get_hw_nmi_pending,
+ .set_hw_nmi_pending = svm_set_hw_nmi_pending,
.inject_exception = svm_inject_exception,
.cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed,
@@ -5058,6 +5113,16 @@ static __init int svm_hardware_setup(void)
pr_info("Virtual GIF supported\n");
}

+
+ vnmi = vgif && vnmi && boot_cpu_has(X86_FEATURE_AMD_VNMI);
+ if (vnmi)
+ pr_info("Virtual NMI enabled\n");
+
+ if (!vnmi) {
+ svm_x86_ops.get_hw_nmi_pending = NULL;
+ svm_x86_ops.set_hw_nmi_pending = NULL;
+ }
+
if (lbrv) {
if (!boot_cpu_has(X86_FEATURE_LBRV))
lbrv = false;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 587ddc150f9f34..0b7e1790fadde1 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -35,6 +35,7 @@ extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
extern int vgif;
extern bool intercept_smi;
+extern bool vnmi;

enum avic_modes {
AVIC_MODE_NONE = 0,
@@ -553,6 +554,15 @@ static inline bool is_x2apic_msrpm_offset(u32 offset)
(msr < (APIC_BASE_MSR + 0x100));
}

+static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
+{
+ /* L1's vNMI is inhibited while nested guest is running */
+ if (is_guest_mode(&svm->vcpu))
+ return false;
+
+ return !!(svm->vmcb01.ptr->control.int_ctl & V_NMI_ENABLE);
+}
+
/* svm.c */
#define MSR_INVALID 0xffffffffU

--
2.26.3