[RFC PATCH] kvm: nv: Optimize the unmapping of shadow S2-MMU tables.

From: Ganapatrao Kulkarni
Date: Tue Mar 05 2024 - 00:46:42 EST


As per 'commit 178a6915434c ("KVM: arm64: nv: Unmap/flush shadow stage 2
page tables")', when ever there is unmap of pages that
are mapped to L1, they are invalidated from both L1 S2-MMU and from
all the active shadow/L2 S2-MMU tables. Since there is no mapping
to invalidate the IPAs of Shadow S2 to a page, there is a complete
S2-MMU page table walk and invalidation is done covering complete
address space allocated to a L2. This has performance impacts and
even soft lockup for NV(L1 and L2) boots with higher number of
CPUs and large Memory.

Adding a lookup table of mapping of Shadow IPA to Canonical IPA
whenever a page is mapped to any of the L2. While any page is
unmaped, this lookup is helpful to unmap only if it is mapped in
any of the shadow S2-MMU tables. Hence avoids unnecessary long
iterations of S2-MMU table walk-through and invalidation for the
complete address space.

Signed-off-by: Ganapatrao Kulkarni <gankulkarni@xxxxxxxxxxxxxxxxxxxxxx>
---
arch/arm64/include/asm/kvm_emulate.h | 5 ++
arch/arm64/include/asm/kvm_host.h | 14 ++++
arch/arm64/include/asm/kvm_nested.h | 4 +
arch/arm64/kvm/mmu.c | 19 ++++-
arch/arm64/kvm/nested.c | 113 +++++++++++++++++++++++++++
5 files changed, 152 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5173f8cf2904..f503b2eaedc4 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -656,4 +656,9 @@ static inline bool kvm_is_shadow_s2_fault(struct kvm_vcpu *vcpu)
vcpu->arch.hw_mmu->nested_stage2_enabled);
}

+static inline bool kvm_is_l1_using_shadow_s2(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.hw_mmu != &vcpu->kvm->arch.mmu);
+}
+
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8da3c9a81ae3..f61c674c300a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -144,6 +144,13 @@ struct kvm_vmid {
atomic64_t id;
};

+struct mapipa_node {
+ struct rb_node node;
+ phys_addr_t ipa;
+ phys_addr_t shadow_ipa;
+ long size;
+};
+
struct kvm_s2_mmu {
struct kvm_vmid vmid;

@@ -216,6 +223,13 @@ struct kvm_s2_mmu {
* >0: Somebody is actively using this.
*/
atomic_t refcnt;
+
+ /*
+ * For a Canonical IPA to Shadow IPA mapping.
+ */
+ struct rb_root nested_mapipa_root;
+ rwlock_t mmu_lock;
+
};

static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index da7ebd2f6e24..c31a59a1fdc6 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -65,6 +65,9 @@ extern void kvm_init_nested(struct kvm *kvm);
extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
+extern void add_shadow_ipa_map_node(
+ struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa, phys_addr_t shadow_ipa, long size);

union tlbi_info;

@@ -123,6 +126,7 @@ extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
extern void kvm_nested_s2_wp(struct kvm *kvm);
extern void kvm_nested_s2_unmap(struct kvm *kvm);
+extern void kvm_nested_s2_unmap_range(struct kvm *kvm, struct kvm_gfn_range *range);
extern void kvm_nested_s2_flush(struct kvm *kvm);
int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe);

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 61bdd8798f83..3948681426a0 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1695,6 +1695,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
memcache,
KVM_PGTABLE_WALK_HANDLE_FAULT |
KVM_PGTABLE_WALK_SHARED);
+ if ((nested || kvm_is_l1_using_shadow_s2(vcpu)) && !ret) {
+ struct kvm_s2_mmu *shadow_s2_mmu;
+
+ ipa &= ~(vma_pagesize - 1);
+ shadow_s2_mmu = lookup_s2_mmu(vcpu);
+ add_shadow_ipa_map_node(shadow_s2_mmu, ipa, fault_ipa, vma_pagesize);
+ }
}

/* Mark the page dirty only if the fault is handled successfully */
@@ -1918,7 +1925,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
(range->end - range->start) << PAGE_SHIFT,
range->may_block);

- kvm_nested_s2_unmap(kvm);
+ kvm_nested_s2_unmap_range(kvm, range);
return false;
}

@@ -1953,7 +1960,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
PAGE_SIZE, __pfn_to_phys(pfn),
KVM_PGTABLE_PROT_R, NULL, 0);

- kvm_nested_s2_unmap(kvm);
+ kvm_nested_s2_unmap_range(kvm, range);
return false;
}

@@ -2223,12 +2230,18 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
+ struct kvm_gfn_range range;
+
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
phys_addr_t size = slot->npages << PAGE_SHIFT;

+ range.start = gpa;
+ range.end = gpa + size;
+ range.may_block = true;
+
write_lock(&kvm->mmu_lock);
kvm_unmap_stage2_range(&kvm->arch.mmu, gpa, size);
- kvm_nested_s2_unmap(kvm);
+ kvm_nested_s2_unmap_range(kvm, &range);
write_unlock(&kvm->mmu_lock);
}

diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index f88d9213c6b3..888ec9fba4a0 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -565,6 +565,88 @@ void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
write_unlock(&kvm->mmu_lock);
}

+/*
+ * Create a node and add to lookup table, when a page is mapped to
+ * Canonical IPA and also mapped to Shadow IPA.
+ */
+void add_shadow_ipa_map_node(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa,
+ phys_addr_t shadow_ipa, long size)
+{
+ struct rb_root *ipa_root = &(mmu->nested_mapipa_root);
+ struct rb_node **node = &(ipa_root->rb_node), *parent = NULL;
+ struct mapipa_node *new;
+
+ new = kzalloc(sizeof(struct mapipa_node), GFP_KERNEL);
+ if (!new)
+ return;
+
+ new->shadow_ipa = shadow_ipa;
+ new->ipa = ipa;
+ new->size = size;
+
+ write_lock(&mmu->mmu_lock);
+
+ while (*node) {
+ struct mapipa_node *tmp;
+
+ tmp = container_of(*node, struct mapipa_node, node);
+ parent = *node;
+ if (new->ipa < tmp->ipa) {
+ node = &(*node)->rb_left;
+ } else if (new->ipa > tmp->ipa) {
+ node = &(*node)->rb_right;
+ } else {
+ write_unlock(&mmu->mmu_lock);
+ kfree(new);
+ return;
+ }
+ }
+
+ rb_link_node(&new->node, parent, node);
+ rb_insert_color(&new->node, ipa_root);
+ write_unlock(&mmu->mmu_lock);
+}
+
+/*
+ * Iterate over the lookup table of Canonical IPA to Shadow IPA.
+ * Return Shadow IPA, if the page mapped to Canonical IPA is
+ * also mapped to a Shadow IPA.
+ *
+ */
+bool get_shadow_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, phys_addr_t *shadow_ipa, long *size)
+{
+ struct rb_node *node;
+ struct mapipa_node *tmp = NULL;
+
+ read_lock(&mmu->mmu_lock);
+ node = mmu->nested_mapipa_root.rb_node;
+
+ while (node) {
+ tmp = container_of(node, struct mapipa_node, node);
+
+ if (tmp->ipa == ipa)
+ break;
+ else if (ipa > tmp->ipa)
+ node = node->rb_right;
+ else
+ node = node->rb_left;
+ }
+
+ read_unlock(&mmu->mmu_lock);
+
+ if (tmp && tmp->ipa == ipa) {
+ *shadow_ipa = tmp->shadow_ipa;
+ *size = tmp->size;
+ write_lock(&mmu->mmu_lock);
+ rb_erase(&tmp->node, &mmu->nested_mapipa_root);
+ write_unlock(&mmu->mmu_lock);
+ kfree(tmp);
+ return true;
+ }
+ return false;
+}
+
/* Must be called with kvm->mmu_lock held */
struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu)
{
@@ -674,6 +756,7 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
mmu->tlb_vttbr = 1;
mmu->nested_stage2_enabled = false;
atomic_set(&mmu->refcnt, 0);
+ mmu->nested_mapipa_root = RB_ROOT;
}

void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
@@ -760,6 +843,36 @@ void kvm_nested_s2_unmap(struct kvm *kvm)
}
}

+void kvm_nested_s2_unmap_range(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ int i;
+ long size;
+ bool ret;
+
+ for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
+ struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
+
+ if (kvm_s2_mmu_valid(mmu)) {
+ phys_addr_t shadow_ipa, start, end;
+
+ start = range->start << PAGE_SHIFT;
+ end = range->end << PAGE_SHIFT;
+
+ while (start < end) {
+ size = PAGE_SIZE;
+ /*
+ * get the Shadow IPA if the page is mapped
+ * to L1 and also mapped to any of active L2.
+ */
+ ret = get_shadow_ipa(mmu, start, &shadow_ipa, &size);
+ if (ret)
+ kvm_unmap_stage2_range(mmu, shadow_ipa, size);
+ start += size;
+ }
+ }
+ }
+}
+
/* expects kvm->mmu_lock to be held */
void kvm_nested_s2_flush(struct kvm *kvm)
{
--
2.40.1