[PATCH 1/2] KVM: x86: Move "KVM no-APIC vCPU" key management into local APIC code

From: Sean Christopherson
Date: Fri Feb 09 2024 - 17:22:51 EST


Move incrementing and decrementing of kvm_has_noapic_vcpu into
kvm_create_lapic() and kvm_free_lapic() respectively to fix a benign bug
bug where KVM fails to decrement the count if vCPU creation ultimately
fails, e.g. due to a memory allocation failing.

Note, the bug is benign as kvm_has_noapic_vcpu is used purely to optimize
lapic_in_kernel() checks, and that optimization is quite dubious. That,
and practically speaking no setup that cares at all about performance runs
with a userspace local APIC.

Reported-by: Li RongQing <lirongqing@xxxxxxxxx>
Cc: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/lapic.c | 27 ++++++++++++++++++++++++++-
arch/x86/kvm/x86.c | 29 +++--------------------------
2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3242f3da2457..681f6d82d015 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -124,6 +124,9 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}

+__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
+EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
+
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);

@@ -2466,8 +2469,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;

- if (!vcpu->arch.apic)
+ if (!vcpu->arch.apic) {
+ static_branch_dec(&kvm_has_noapic_vcpu);
return;
+ }

hrtimer_cancel(&apic->lapic_timer.timer);

@@ -2809,6 +2814,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)

ASSERT(vcpu != NULL);

+ if (!irqchip_in_kernel(vcpu->kvm)) {
+ static_branch_inc(&kvm_has_noapic_vcpu);
+ return 0;
+ }
+
apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
if (!apic)
goto nomem;
@@ -2844,6 +2854,21 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);

+ /*
+ * Defer evaluating inhibits until the vCPU is first run, as this vCPU
+ * will not get notified of any changes until this vCPU is visible to
+ * other vCPUs (marked online and added to the set of vCPUs).
+ *
+ * Opportunistically mark APICv active as VMX in particularly is highly
+ * unlikely to have inhibits. Ignore the current per-VM APICv state so
+ * that vCPU creation is guaranteed to run with a deterministic value,
+ * the request will ensure the vCPU gets the correct state before VM-Entry.
+ */
+ if (enable_apicv) {
+ apic->apicv_active = true;
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+ }
+
return 0;
nomem_free_apic:
kfree(apic);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b66c45e7f6f8..59119157bd20 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12053,27 +12053,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (r < 0)
return r;

- if (irqchip_in_kernel(vcpu->kvm)) {
- r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
- if (r < 0)
- goto fail_mmu_destroy;
-
- /*
- * Defer evaluating inhibits until the vCPU is first run, as
- * this vCPU will not get notified of any changes until this
- * vCPU is visible to other vCPUs (marked online and added to
- * the set of vCPUs). Opportunistically mark APICv active as
- * VMX in particularly is highly unlikely to have inhibits.
- * Ignore the current per-VM APICv state so that vCPU creation
- * is guaranteed to run with a deterministic value, the request
- * will ensure the vCPU gets the correct state before VM-Entry.
- */
- if (enable_apicv) {
- vcpu->arch.apic->apicv_active = true;
- kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
- }
- } else
- static_branch_inc(&kvm_has_noapic_vcpu);
+ r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+ if (r < 0)
+ goto fail_mmu_destroy;

r = -ENOMEM;

@@ -12194,8 +12176,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
srcu_read_unlock(&vcpu->kvm->srcu, idx);
free_page((unsigned long)vcpu->arch.pio_data);
kvfree(vcpu->arch.cpuid_entries);
- if (!lapic_in_kernel(vcpu))
- static_branch_dec(&kvm_has_noapic_vcpu);
}

void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -12472,9 +12452,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
}

-__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
-EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
-
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
--
2.43.0.687.g38aa6559b0-goog