Re: [PATCH v4 10/32] KVM: x86: Move APIC access page helper to common x86 code

From: Maxim Levitsky
Date: Thu Dec 08 2022 - 16:56:47 EST


On Sat, 2022-10-01 at 00:58 +0000, Sean Christopherson wrote:
> Move the APIC access page allocation helper function to common x86 code,
> the allocation routine is virtually identical between APICv (VMX) and
> AVIC (SVM). Keep APICv's gfn_to_page() + put_page() sequence, which
> verifies that a backing page can be allocated, i.e. that the system isn't
> under heavy memory pressure. Forcing the backing page to be populated
> isn't strictly necessary, but skipping the effective prefetch only delays
> the inevitable.


Just a note on the way we deal with that dummy page differs between APICv and AVIC
a bit:


On APICv we need physical address of the dummy page, since APICv
has physical address of this page in VMCB, and when guest "accesses"
only then APICv specific handling is invoked.

Since we don't want to pin it, we have a mmu notifier telling us where
it is currently and we update the vmcb each time it moves.

If the page is swapped out, mmu notifier will call us, and we will
pretty much swap it back in on next VM entry, in 'vmx_set_apic_access_page_addr'
I hope that this code doesn't have races, I never looked at depth at it.


On AVIC on the other hand we don't force the page to be mapped at all, since
AVIC stores in the vmcb the virtual address of the page and only checkes that
'something' is mapped there.

If we get nothing mapped there, AVIC will not intercept, we will get normal
NPT fault and hopefully swap that page in.


Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>

Best regards,
Maxim Levitsky



>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> arch/x86/kvm/lapic.c | 35 +++++++++++++++++++++++++++++++++++
> arch/x86/kvm/lapic.h | 1 +
> arch/x86/kvm/svm/avic.c | 41 +++++++----------------------------------
> arch/x86/kvm/vmx/vmx.c | 35 +----------------------------------
> 4 files changed, 44 insertions(+), 68 deletions(-)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 316b61b56cca..80e8b1cc6dc2 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2436,6 +2436,41 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
> }
> EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
>
> +int kvm_alloc_apic_access_page(struct kvm *kvm)
> +{
> + struct page *page;
> + void __user *hva;
> + int ret = 0;
> +
> + mutex_lock(&kvm->slots_lock);
> + if (kvm->arch.apic_access_memslot_enabled)
> + goto out;
> +
> + hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
> + APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
> + if (IS_ERR(hva)) {
> + ret = PTR_ERR(hva);
> + goto out;
> + }
> +
> + page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
> + if (is_error_page(page)) {
> + ret = -EFAULT;
> + goto out;
> + }
> +
> + /*
> + * Do not pin the page in memory, so that memory hot-unplug
> + * is able to migrate it.
> + */
> + put_page(page);




> + kvm->arch.apic_access_memslot_enabled = true;
> +out:
> + mutex_unlock(&kvm->slots_lock);
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(kvm_alloc_apic_access_page);
> +
> void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
> {
> struct kvm_lapic *apic = vcpu->arch.apic;
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index a5ac4a5a5179..0587a8282cb3 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -112,6 +112,7 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
> struct dest_map *dest_map);
> int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
> void kvm_apic_update_apicv(struct kvm_vcpu *vcpu);
> +int kvm_alloc_apic_access_page(struct kvm *kvm);
>
> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 97ad0661f963..ec28ba4c5f1b 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -256,39 +256,6 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
> return &avic_physical_id_table[index];
> }
>
> -/*
> - * Note:
> - * AVIC hardware walks the nested page table to check permissions,
> - * but does not use the SPA address specified in the leaf page
> - * table entry since it uses address in the AVIC_BACKING_PAGE pointer
> - * field of the VMCB. Therefore, we set up the
> - * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
> - */
> -static int avic_alloc_access_page(struct kvm *kvm)
> -{
> - void __user *ret;
> - int r = 0;
> -
> - mutex_lock(&kvm->slots_lock);
> -
> - if (kvm->arch.apic_access_memslot_enabled)
> - goto out;
> -
> - ret = __x86_set_memory_region(kvm,
> - APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
> - APIC_DEFAULT_PHYS_BASE,
> - PAGE_SIZE);
> - if (IS_ERR(ret)) {
> - r = PTR_ERR(ret);
> - goto out;
> - }
> -
> - kvm->arch.apic_access_memslot_enabled = true;
> -out:
> - mutex_unlock(&kvm->slots_lock);
> - return r;
> -}
> -
> static int avic_init_backing_page(struct kvm_vcpu *vcpu)
> {
> u64 *entry, new_entry;
> @@ -305,7 +272,13 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
> if (kvm_apicv_activated(vcpu->kvm)) {
> int ret;
>
> - ret = avic_alloc_access_page(vcpu->kvm);
> + /*
> + * Note, AVIC hardware walks the nested page table to check
> + * permissions, but does not use the SPA address specified in
> + * the leaf SPTE since it uses address in the AVIC_BACKING_PAGE
> + * pointer field of the VMCB.
> + */
> + ret = kvm_alloc_apic_access_page(vcpu->kvm);
> if (ret)
> return ret;
> }
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9dba04b6b019..974d9a366d5d 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -3771,39 +3771,6 @@ static void seg_setup(int seg)
> vmcs_write32(sf->ar_bytes, ar);
> }
>
> -static int alloc_apic_access_page(struct kvm *kvm)
> -{
> - struct page *page;
> - void __user *hva;
> - int ret = 0;
> -
> - mutex_lock(&kvm->slots_lock);
> - if (kvm->arch.apic_access_memslot_enabled)
> - goto out;
> - hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
> - APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
> - if (IS_ERR(hva)) {
> - ret = PTR_ERR(hva);
> - goto out;
> - }
> -
> - page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
> - if (is_error_page(page)) {
> - ret = -EFAULT;
> - goto out;
> - }
> -
> - /*
> - * Do not pin the page in memory, so that memory hot-unplug
> - * is able to migrate it.
> - */
> - put_page(page);
> - kvm->arch.apic_access_memslot_enabled = true;
> -out:
> - mutex_unlock(&kvm->slots_lock);
> - return ret;
> -}
> -
> int allocate_vpid(void)
> {
> int vpid;
> @@ -7348,7 +7315,7 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
> vmx->loaded_vmcs = &vmx->vmcs01;
>
> if (cpu_need_virtualize_apic_accesses(vcpu)) {
> - err = alloc_apic_access_page(vcpu->kvm);
> + err = kvm_alloc_apic_access_page(vcpu->kvm);
> if (err)
> goto free_vmcs;
> }