Re: [RESEND PATCH 5/6] KVM: x86/VMX: add kvm_vmx_reinject_nmi_irq() for NMI/IRQ reinjection

From: Peter Zijlstra
Date: Thu Nov 10 2022 - 04:03:50 EST


On Wed, Nov 09, 2022 at 10:15:44PM -0800, Xin Li wrote:
> To eliminate dispatching NMI/IRQ through the IDT, add
> kvm_vmx_reinject_nmi_irq(), which calls external_interrupt()
> for IRQ reinjection.
>
> Lastly replace calling a NMI/IRQ handler in an IDT descriptor
> with calling kvm_vmx_reinject_nmi_irq().
>
> Signed-off-by: H. Peter Anvin (Intel) <hpa@xxxxxxxxx>
> Signed-off-by: Xin Li <xin3.li@xxxxxxxxx>

Idem.


> +#if IS_ENABLED(CONFIG_KVM_INTEL)
> +/*
> + * KVM VMX reinjects NMI/IRQ on its current stack, it's a sync
> + * call thus the values in the pt_regs structure are not used in
> + * executing NMI/IRQ handlers, except cs.RPL and flags.IF, which
> + * are both always 0 in the VMX NMI/IRQ reinjection context. Thus
> + * we simply allocate a zeroed pt_regs structure on current stack
> + * to call external_interrupt().
> + */
> +void kvm_vmx_reinject_nmi_irq(u32 vector)

noinstr ?

> +{
> + struct pt_regs irq_regs;
> +
> + memset(&irq_regs, 0, sizeof(irq_regs));
> +
> + if (vector == NMI_VECTOR)
> + return exc_nmi(&irq_regs);
> +
> + external_interrupt(&irq_regs, vector);
> +}
> +EXPORT_SYMBOL_GPL(kvm_vmx_reinject_nmi_irq);
> +#endif

> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 63247c57c72c..b457e4888468 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -46,6 +46,7 @@
> #include <asm/mshyperv.h>
> #include <asm/mwait.h>
> #include <asm/spec-ctrl.h>
> +#include <asm/traps.h>
> #include <asm/virtext.h>
> #include <asm/vmx.h>
>
> @@ -6758,15 +6759,11 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
> memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
> }
>
> -void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
> -
> -static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
> - unsigned long entry)
> +static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 vector)
> {
> - bool is_nmi = entry == (unsigned long)asm_exc_nmi_noist;
> -
> - kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ);
> - vmx_do_interrupt_nmi_irqoff(entry);
> + kvm_before_interrupt(vcpu, vector == NMI_VECTOR ?
> + KVM_HANDLING_NMI : KVM_HANDLING_IRQ);
> + kvm_vmx_reinject_nmi_irq(vector);
> kvm_after_interrupt(vcpu);
> }
>
> @@ -6792,7 +6789,6 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
>
> static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
> {
> - const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist;
> u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
>
> /* if exit due to PF check for async PF */
> @@ -6806,20 +6802,19 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
> kvm_machine_check();
> /* We need to handle NMIs before interrupts are enabled */
> else if (is_nmi(intr_info))
> - handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry);
> + handle_interrupt_nmi_irqoff(&vmx->vcpu, NMI_VECTOR);
> }
>
> static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
> {
> u32 intr_info = vmx_get_intr_info(vcpu);
> unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
> - gate_desc *desc = (gate_desc *)host_idt_base + vector;
>
> if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
> "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
> return;
>
> - handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
> + handle_interrupt_nmi_irqoff(vcpu, vector);
> vcpu->arch.at_instruction_boundary = true;
> }

How does any of this work? You're calling into entry/noinstr code from a
random context.