[PATCH 23/24] kvm: x86/mmu: Freeze SPTEs in disconnected pages

From: Ben Gardon
Date: Tue Jan 12 2021 - 13:14:34 EST


When clearing TDP MMU pages what have been disconnected from the paging
structure root, set the SPTEs to a special non-present value which will
not be overwritten by other threads. This is needed to prevent races in
which a thread is clearing a disconnected page table, but another thread
has already acquired a pointer to that memory and installs a mapping in
an already cleared entry. This can lead to memory leaks and accounting
errors.

Reviewed-by: Peter Feiner <pfeiner@xxxxxxxxxx>

Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
---
arch/x86/kvm/mmu/tdp_mmu.c | 35 +++++++++++++++++++++++++++++------
1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 5c9d053000ad..45160ff84e91 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -333,13 +333,14 @@ static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt,
{
struct kvm_mmu_page *sp;
gfn_t gfn;
+ gfn_t base_gfn;
int level;
u64 *sptep;
u64 old_child_spte;
int i;

sp = sptep_to_sp(pt);
- gfn = sp->gfn;
+ base_gfn = sp->gfn;
level = sp->role.level;

trace_kvm_mmu_prepare_zap_page(sp);
@@ -348,16 +349,38 @@ static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt,

for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
sptep = pt + i;
+ gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1));

if (atomic) {
- old_child_spte = xchg(sptep, 0);
+ /*
+ * Set the SPTE to a nonpresent value that other
+ * threads will not overwrite. If the SPTE was already
+ * frozen then another thread handling a page fault
+ * could overwrite it, so set the SPTE until it is set
+ * from nonfrozen -> frozen.
+ */
+ for (;;) {
+ old_child_spte = xchg(sptep, FROZEN_SPTE);
+ if (old_child_spte != FROZEN_SPTE)
+ break;
+ cpu_relax();
+ }
} else {
old_child_spte = READ_ONCE(*sptep);
- WRITE_ONCE(*sptep, 0);
+
+ /*
+ * Setting the SPTE to FROZEN_SPTE is not strictly
+ * necessary here as the MMU lock should stop other
+ * threads from concurrentrly modifying this SPTE.
+ * Using FROZEN_SPTE keeps the atomic and
+ * non-atomic cases consistent and simplifies the
+ * function.
+ */
+ WRITE_ONCE(*sptep, FROZEN_SPTE);
}
- handle_changed_spte(kvm, kvm_mmu_page_as_id(sp),
- gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
- old_child_spte, 0, level - 1, atomic);
+ handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
+ old_child_spte, FROZEN_SPTE, level - 1,
+ atomic);
}

kvm_flush_remote_tlbs_with_address(kvm, gfn,
--
2.30.0.284.gd98b1dd5eaa7-goog