Re: [PATCH v4 2/4] KVM: X86: Fix loss of exception which has not yet injected

From: Paolo Bonzini
Date: Thu Aug 24 2017 - 07:36:32 EST


On 24/08/2017 12:35, Wanpeng Li wrote:
> From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
>
> vmx_complete_interrupts() assumes that the exception is always injected,
> so it would be dropped by kvm_clear_exception_queue(). This patch separates
> exception.pending from exception.injected, exception.inject represents the
> exception is injected or the exception should be reinjected due to vmexit
> occurs during event delivery in VMX non-root operation. exception.pending
> represents the exception is queued and will be cleared when injecting the
> exception to the guest. So exception.pending and exception.injected can
> cooperate to guarantee exception will not be lost.
>
> Reported-by: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> ---
> v3 -> v4:
> * avoid the double fault
> * exception must be injected immediately
> v2 -> v3:
> * the changes to inject_pending_event and adds the WARN_ON
> * combine injected and pending exception for GET/SET_VCPU_EVENTS
>
> arch/x86/include/asm/kvm_host.h | 2 +-
> arch/x86/kvm/svm.c | 2 +-
> arch/x86/kvm/vmx.c | 4 +--
> arch/x86/kvm/x86.c | 78 +++++++++++++++++++++++++++--------------
> arch/x86/kvm/x86.h | 4 +--
> 5 files changed, 57 insertions(+), 33 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 9d90787..6e385ac 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -547,8 +547,8 @@ struct kvm_vcpu_arch {
>
> struct kvm_queued_exception {
> bool pending;
> + bool injected;
> bool has_error_code;
> - bool reinject;
> u8 nr;
> u32 error_code;
> u8 nested_apf;
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index a2fddce..6a439a1 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -655,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
> struct vcpu_svm *svm = to_svm(vcpu);
> unsigned nr = vcpu->arch.exception.nr;
> bool has_error_code = vcpu->arch.exception.has_error_code;
> - bool reinject = vcpu->arch.exception.reinject;
> + bool reinject = vcpu->arch.exception.injected;
> u32 error_code = vcpu->arch.exception.error_code;
>
> /*
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index c5f43ab..902b780 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2503,7 +2503,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> unsigned nr = vcpu->arch.exception.nr;
> bool has_error_code = vcpu->arch.exception.has_error_code;
> - bool reinject = vcpu->arch.exception.reinject;
> + bool reinject = vcpu->arch.exception.injected;
> u32 error_code = vcpu->arch.exception.error_code;
> u32 intr_info = nr | INTR_INFO_VALID_MASK;
>
> @@ -10948,7 +10948,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
> u32 idt_vectoring;
> unsigned int nr;
>
> - if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
> + if (vcpu->arch.exception.injected) {
> nr = vcpu->arch.exception.nr;
> idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8f41b88..fa79cd7 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -389,15 +389,20 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
>
> kvm_make_request(KVM_REQ_EVENT, vcpu);
>
> - if (!vcpu->arch.exception.pending) {
> + if (!vcpu->arch.exception.pending ||
> + !vcpu->arch.exception.injected) {
> queue:
> if (has_error && !is_protmode(vcpu))
> has_error = false;
> - vcpu->arch.exception.pending = true;
> + if (reinject)

I'd like to add a WARN_ON here:

+ /*
+ * vcpu->arch.exception.pending is only true on
+ * vmentry if an event injection was blocked by
+ * nested_run_pending. In that case, however,
+ * vcpu_enter_guest requests an immediate exit,
+ * so the guest shouldn't proceed far enough to
+ * need reinjection.
+ */
+ WARN_ON_ONCE(vcpu->arch.exception.pending);

I'll test it and add it if it is useful.

> + vcpu->arch.exception.injected = true;
> + else {
> + vcpu->arch.exception.pending = true;
> + vcpu->arch.exception.injected = false;
> + }
> vcpu->arch.exception.has_error_code = has_error;
> vcpu->arch.exception.nr = nr;
> vcpu->arch.exception.error_code = error_code;
> - vcpu->arch.exception.reinject = reinject;
> return;
> }

The double fault case can still result in pending && injected. We need
to set exception.pending = true to allow the double fault to trigger a
vmexit, but exception.injected must be reset.

Otherwise looks good. This was exhausting...

Paolo

> @@ -3069,8 +3074,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
> struct kvm_vcpu_events *events)
> {
> process_nmi(vcpu);
> + /*
> + * FIXME: pass injected and pending separately. This is only
> + * needed for nested virtualization, whose state cannot be
> + * migrated yet. For now we combine them just in case.
> + */
> events->exception.injected =
> - vcpu->arch.exception.pending &&
> + (vcpu->arch.exception.pending ||
> + vcpu->arch.exception.injected) &&
> !kvm_exception_is_soft(vcpu->arch.exception.nr);
> events->exception.nr = vcpu->arch.exception.nr;
> events->exception.has_error_code = vcpu->arch.exception.has_error_code;
> @@ -3125,6 +3136,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
> return -EINVAL;
>
> process_nmi(vcpu);
> + vcpu->arch.exception.injected = false;
> vcpu->arch.exception.pending = events->exception.injected;
> vcpu->arch.exception.nr = events->exception.nr;
> vcpu->arch.exception.has_error_code = events->exception.has_error_code;
> @@ -6350,33 +6362,25 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> int r;
>
> /* try to reinject previous events if any */
> - if (vcpu->arch.exception.pending) {
> - trace_kvm_inj_exception(vcpu->arch.exception.nr,
> - vcpu->arch.exception.has_error_code,
> - vcpu->arch.exception.error_code);
> -
> - if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
> - __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
> - X86_EFLAGS_RF);
> -
> - if (vcpu->arch.exception.nr == DB_VECTOR &&
> - (vcpu->arch.dr7 & DR7_GD)) {
> - vcpu->arch.dr7 &= ~DR7_GD;
> - kvm_update_dr7(vcpu);
> - }
> -
> + if (vcpu->arch.exception.injected) {
> kvm_x86_ops->queue_exception(vcpu);
> return 0;
> }
>
> - if (vcpu->arch.nmi_injected) {
> - kvm_x86_ops->set_nmi(vcpu);
> - return 0;
> - }
> + /*
> + * Exceptions must be injected immediately, or the exception
> + * frame will have the address of the NMI or interrupt handler.
> + */
> + if (!vcpu->arch.exception.pending) {
> + if (vcpu->arch.nmi_injected) {
> + kvm_x86_ops->set_nmi(vcpu);
> + return 0;
> + }
>
> - if (vcpu->arch.interrupt.pending) {
> - kvm_x86_ops->set_irq(vcpu);
> - return 0;
> + if (vcpu->arch.interrupt.pending) {
> + kvm_x86_ops->set_irq(vcpu);
> + return 0;
> + }
> }
>
> if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
> @@ -6386,7 +6390,25 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> }
>
> /* try to inject new event if pending */
> - if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
> + if (vcpu->arch.exception.pending) {
> + trace_kvm_inj_exception(vcpu->arch.exception.nr,
> + vcpu->arch.exception.has_error_code,
> + vcpu->arch.exception.error_code);
> +
> + vcpu->arch.exception.pending = false;
> + vcpu->arch.exception.injected = true;
> + kvm_x86_ops->queue_exception(vcpu);
> +
> + if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
> + __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
> + X86_EFLAGS_RF);
> +
> + if (vcpu->arch.exception.nr == DB_VECTOR &&
> + (vcpu->arch.dr7 & DR7_GD)) {
> + vcpu->arch.dr7 &= ~DR7_GD;
> + kvm_update_dr7(vcpu);
> + }
> + } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
> vcpu->arch.smi_pending = false;
> enter_smm(vcpu);
> } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
> @@ -6862,6 +6884,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> kvm_x86_ops->enable_nmi_window(vcpu);
> if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
> kvm_x86_ops->enable_irq_window(vcpu);
> + WARN_ON(vcpu->arch.exception.pending);
> }
>
> if (kvm_lapic_enabled(vcpu)) {
> @@ -7738,6 +7761,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> vcpu->arch.nmi_injected = false;
> kvm_clear_interrupt_queue(vcpu);
> kvm_clear_exception_queue(vcpu);
> + vcpu->arch.exception.pending = false;
>
> memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
> kvm_update_dr0123(vcpu);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 1134603..e6ec0de 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -11,7 +11,7 @@
>
> static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
> {
> - vcpu->arch.exception.pending = false;
> + vcpu->arch.exception.injected = false;
> }
>
> static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
> @@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
>
> static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
> {
> - return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
> + return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
> vcpu->arch.nmi_injected;
> }
>
>