[PATCH v2 16/20] kvm: x86/mmu: Support disabling dirty logging for the tdp MMU

From: Ben Gardon
Date: Wed Oct 14 2020 - 14:28:11 EST


Dirty logging ultimately breaks down MMU mappings to 4k granularity.
When dirty logging is no longer needed, these granaular mappings
represent a useless performance penalty. When dirty logging is disabled,
search the paging structure for mappings that could be re-constituted
into a large page mapping. Zap those mappings so that they can be
faulted in again at a higher mapping level.

Tested by running kvm-unit-tests and KVM selftests on an Intel Haswell
machine. This series introduced no new failures.

This series can be viewed in Gerrit at:
https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2538

Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 3 ++
arch/x86/kvm/mmu/tdp_mmu.c | 59 ++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/mmu/tdp_mmu.h | 2 ++
3 files changed, 64 insertions(+)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b2ce57761d2f1..8fcf5e955c475 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5918,6 +5918,9 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
kvm_mmu_zap_collapsible_spte, true);
+
+ if (kvm->arch.tdp_mmu_enabled)
+ kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
}

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 099c7d68aeb1d..94624cc1df84c 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1019,3 +1019,62 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
return spte_set;
}

+/*
+ * Clear non-leaf entries (and free associated page tables) which could
+ * be replaced by large mappings, for GFNs within the slot.
+ */
+static void zap_collapsible_spte_range(struct kvm *kvm,
+ struct kvm_mmu_page *root,
+ gfn_t start, gfn_t end)
+{
+ struct tdp_iter iter;
+ kvm_pfn_t pfn;
+ bool spte_set = false;
+
+ tdp_root_for_each_pte(iter, root, start, end) {
+ if (!is_shadow_present_pte(iter.old_spte) ||
+ is_last_spte(iter.old_spte, iter.level))
+ continue;
+
+ pfn = spte_to_pfn(iter.old_spte);
+ if (kvm_is_reserved_pfn(pfn) ||
+ !PageTransCompoundMap(pfn_to_page(pfn)))
+ continue;
+
+ tdp_mmu_set_spte(kvm, &iter, 0);
+ spte_set = true;
+
+ spte_set = !tdp_mmu_iter_cond_resched(kvm, &iter);
+ }
+
+ if (spte_set)
+ kvm_flush_remote_tlbs(kvm);
+}
+
+/*
+ * Clear non-leaf entries (and free associated page tables) which could
+ * be replaced by large mappings, for GFNs within the slot.
+ */
+void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ const struct kvm_memory_slot *slot)
+{
+ struct kvm_mmu_page *root;
+ int root_as_id;
+
+ for_each_tdp_mmu_root(kvm, root) {
+ root_as_id = kvm_mmu_page_as_id(root);
+ if (root_as_id != slot->as_id)
+ continue;
+
+ /*
+ * Take a reference on the root so that it cannot be freed if
+ * this thread releases the MMU lock and yields in this loop.
+ */
+ get_tdp_mmu_root(kvm, root);
+
+ zap_collapsible_spte_range(kvm, root, slot->base_gfn,
+ slot->base_gfn + slot->npages);
+
+ put_tdp_mmu_root(kvm, root);
+ }
+}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index add8bb97c56dd..dc4cdc5cc29f5 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -38,4 +38,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
gfn_t gfn, unsigned long mask,
bool wrprot);
bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot);
+void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ const struct kvm_memory_slot *slot);
#endif /* __KVM_X86_MMU_TDP_MMU_H */
--
2.28.0.1011.ga647a8990f-goog