Re: [PATCH v2 4/6] KVM: x86/mmu: Track the number of TDP MMU pages, but not the actual pages

From: David Matlack
Date: Mon Jul 25 2022 - 19:21:19 EST


On Sat, Jul 23, 2022 at 01:23:23AM +0000, Sean Christopherson wrote:
> Track the number of TDP MMU "shadow" pages instead of tracking the pages
> themselves. With the NX huge page list manipulation moved out of the common
> linking flow, elminating the list-based tracking means the happy path of
> adding a shadow page doesn't need to acquire a spinlock and can instead
> inc/dec an atomic.
>
> Keep the tracking as the WARN during TDP MMU teardown on leaked shadow
> pages is very, very useful for detecting KVM bugs.
>
> Tracking the number of pages will also make it trivial to expose the
> counter to userspace as a stat in the future, which may or may not be
> desirable.
>
> Note, the TDP MMU needs to use a separate counter (and stat if that ever
> comes to be) from the existing n_used_mmu_pages. The TDP MMU doesn't bother
> supporting the shrinker nor does it honor KVM_SET_NR_MMU_PAGES (because the
> TDP MMU consumes so few pages relative to shadow paging), and including TDP
> MMU pages in that counter would break both the shrinker and shadow MMUs,
> e.g. if a VM is using nested TDP.
>
> Cc: Yosry Ahmed <yosryahmed@xxxxxxxxxx>
> Reviewed-by: Mingwei Zhang <mizhang@xxxxxxxxxx>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>

Reviewed-by: David Matlack <dmatlack@xxxxxxxxxx>

> ---
> arch/x86/include/asm/kvm_host.h | 11 +++--------
> arch/x86/kvm/mmu/tdp_mmu.c | 19 +++++++++----------
> 2 files changed, 12 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 246b69262b93..5c269b2556d6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1271,6 +1271,9 @@ struct kvm_arch {
> */
> bool tdp_mmu_enabled;
>
> + /* The number of TDP MMU pages across all roots. */
> + atomic64_t tdp_mmu_pages;

This is the number of non-root TDP MMU pages, right?

> +
> /*
> * List of struct kvm_mmu_pages being used as roots.
> * All struct kvm_mmu_pages in the list should have
> @@ -1291,18 +1294,10 @@ struct kvm_arch {
> */
> struct list_head tdp_mmu_roots;
>
> - /*
> - * List of struct kvmp_mmu_pages not being used as roots.
> - * All struct kvm_mmu_pages in the list should have
> - * tdp_mmu_page set and a tdp_mmu_root_count of 0.
> - */
> - struct list_head tdp_mmu_pages;
> -
> /*
> * Protects accesses to the following fields when the MMU lock
> * is held in read mode:
> * - tdp_mmu_roots (above)
> - * - tdp_mmu_pages (above)
> * - the link field of struct kvm_mmu_pages used by the TDP MMU
> * - possible_nx_huge_pages;
> * - the possible_nx_huge_page_link field of struct kvm_mmu_pages used
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 626c40ec2af9..fea22dc481a0 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -29,7 +29,6 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
> kvm->arch.tdp_mmu_enabled = true;
> INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
> spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
> - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
> kvm->arch.tdp_mmu_zap_wq = wq;
> return 1;
> }
> @@ -54,7 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
> /* Also waits for any queued work items. */
> destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
>
> - WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
> + WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
> WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
>
> /*
> @@ -386,16 +385,18 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
> static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
> bool shared)
> {
> + atomic64_dec(&kvm->arch.tdp_mmu_pages);
> +
> + if (!sp->nx_huge_page_disallowed)
> + return;
> +
> if (shared)
> spin_lock(&kvm->arch.tdp_mmu_pages_lock);
> else
> lockdep_assert_held_write(&kvm->mmu_lock);
>
> - list_del(&sp->link);
> - if (sp->nx_huge_page_disallowed) {
> - sp->nx_huge_page_disallowed = false;
> - untrack_possible_nx_huge_page(kvm, sp);
> - }
> + sp->nx_huge_page_disallowed = false;
> + untrack_possible_nx_huge_page(kvm, sp);
>
> if (shared)
> spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
> @@ -1132,9 +1133,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
> tdp_mmu_set_spte(kvm, iter, spte);
> }
>
> - spin_lock(&kvm->arch.tdp_mmu_pages_lock);
> - list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
> - spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
> + atomic64_inc(&kvm->arch.tdp_mmu_pages);
>
> return 0;
> }
> --
> 2.37.1.359.gd136c6c3e2-goog
>