[PATCH 19/24] kvm: x86/mmu: Protect tdp_mmu_pages with a lock

From: Ben Gardon
Date: Tue Jan 12 2021 - 13:12:40 EST


Add a lock to protect the data structures that track the page table
memory used by the TDP MMU. In order to handle multiple TDP MMU
operations in parallel, pages of PT memory must be added and removed
without the exclusive protection of the MMU lock. A new lock to protect
the list(s) of in-use pages will cause some serialization, but only on
non-leaf page table entries, so the lock is not expected to be very
contended.

Reviewed-by: Peter Feiner <pfeiner@xxxxxxxxxx>

Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 15 ++++++++
arch/x86/kvm/mmu/tdp_mmu.c | 67 +++++++++++++++++++++++++++++----
2 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 92d5340842c8..f8dccb27c722 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1034,6 +1034,21 @@ struct kvm_arch {
* tdp_mmu_page set and a root_count of 0.
*/
struct list_head tdp_mmu_pages;
+
+ /*
+ * Protects accesses to the following fields when the MMU lock is
+ * not held exclusively:
+ * - tdp_mmu_pages (above)
+ * - the link field of struct kvm_mmu_pages used by the TDP MMU
+ * when they are part of tdp_mmu_pages (but not when they are part
+ * of the tdp_mmu_free_list or tdp_mmu_disconnected_list)
+ * - lpage_disallowed_mmu_pages
+ * - the lpage_disallowed_link field of struct kvm_mmu_pages used
+ * by the TDP MMU
+ * May be acquired under the MMU lock in read mode or non-overlapping
+ * with the MMU lock.
+ */
+ spinlock_t tdp_mmu_pages_lock;
};

struct kvm_vm_stat {
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 8b61bdb391a0..264594947c3b 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -33,6 +33,7 @@ void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
kvm->arch.tdp_mmu_enabled = true;

INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
+ spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
}

@@ -262,6 +263,58 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
}
}

+/**
+ * tdp_mmu_link_page - Add a new page to the list of pages used by the TDP MMU
+ *
+ * @kvm: kvm instance
+ * @sp: the new page
+ * @atomic: This operation is not running under the exclusive use of the MMU
+ * lock and the operation must be atomic with respect to ther threads
+ * that might be adding or removing pages.
+ * @account_nx: This page replaces a NX large page and should be marked for
+ * eventual reclaim.
+ */
+static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+ bool atomic, bool account_nx)
+{
+ if (atomic)
+ spin_lock(&kvm->arch.tdp_mmu_pages_lock);
+ else
+ kvm_mmu_lock_assert_held_exclusive(kvm);
+
+ list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
+ if (account_nx)
+ account_huge_nx_page(kvm, sp);
+
+ if (atomic)
+ spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+}
+
+/**
+ * tdp_mmu_unlink_page - Remove page from the list of pages used by the TDP MMU
+ *
+ * @kvm: kvm instance
+ * @sp: the page to be removed
+ * @atomic: This operation is not running under the exclusive use of the MMU
+ * lock and the operation must be atomic with respect to ther threads
+ * that might be adding or removing pages.
+ */
+static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+ bool atomic)
+{
+ if (atomic)
+ spin_lock(&kvm->arch.tdp_mmu_pages_lock);
+ else
+ kvm_mmu_lock_assert_held_exclusive(kvm);
+
+ list_del(&sp->link);
+ if (sp->lpage_disallowed)
+ unaccount_huge_nx_page(kvm, sp);
+
+ if (atomic)
+ spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+}
+
/**
* handle_disconnected_tdp_mmu_page - handle a pt removed from the TDP structure
*
@@ -285,10 +338,7 @@ static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt)

trace_kvm_mmu_prepare_zap_page(sp);

- list_del(&sp->link);
-
- if (sp->lpage_disallowed)
- unaccount_huge_nx_page(kvm, sp);
+ tdp_mmu_unlink_page(kvm, sp, atomic);

for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
old_child_spte = READ_ONCE(*(pt + i));
@@ -719,15 +769,16 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,

if (!is_shadow_present_pte(iter.old_spte)) {
sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
- list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages);
child_pt = sp->spt;
+
+ tdp_mmu_link_page(vcpu->kvm, sp, false,
+ huge_page_disallowed &&
+ req_level >= iter.level);
+
new_spte = make_nonleaf_spte(child_pt,
!shadow_accessed_mask);

trace_kvm_mmu_get_page(sp, true);
- if (huge_page_disallowed && req_level >= iter.level)
- account_huge_nx_page(vcpu->kvm, sp);
-
tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte);
}
}
--
2.30.0.284.gd98b1dd5eaa7-goog