[PATCH RFC v3 32/35] KVM: arm64: mte: Reserve tag storage for virtual machines with MTE

From: Alexandru Elisei
Date: Thu Jan 25 2024 - 11:56:35 EST


KVM allows MTE enabled VMs to be created when the backing VMA does not have
MTE enabled. As a result, pages allocated for the virtual machine's memory
won't have tag storage reserved. Try to reserve tag storage the first time
the page is accessed by the guest. This is similar to how pages mapped
without tag storage in an MTE VMA are handled.

Signed-off-by: Alexandru Elisei <alexandru.elisei@xxxxxxx>
---

Changes since rfc v2:

* New patch.

arch/arm64/include/asm/mte_tag_storage.h | 10 ++++++
arch/arm64/include/asm/pgtable.h | 7 +++-
arch/arm64/kvm/mmu.c | 43 ++++++++++++++++++++++++
arch/arm64/mm/fault.c | 2 +-
4 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/mte_tag_storage.h b/arch/arm64/include/asm/mte_tag_storage.h
index 40590a8c3748..32940ef7bcdf 100644
--- a/arch/arm64/include/asm/mte_tag_storage.h
+++ b/arch/arm64/include/asm/mte_tag_storage.h
@@ -34,6 +34,8 @@ void free_tag_storage(struct page *page, int order);
bool page_tag_storage_reserved(struct page *page);
bool page_is_tag_storage(struct page *page);

+int replace_folio_with_tagged(struct folio *folio);
+
vm_fault_t handle_folio_missing_tag_storage(struct folio *folio, struct vm_fault *vmf,
bool *map_pte);
vm_fault_t mte_try_transfer_swap_tags(swp_entry_t entry, struct page *page);
@@ -67,6 +69,14 @@ static inline bool page_tag_storage_reserved(struct page *page)
{
return true;
}
+static inline bool page_is_tag_storage(struct page *page)
+{
+ return false;
+}
+static inline int replace_folio_with_tagged(struct folio *folio)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_ARM64_MTE_TAG_STORAGE */

#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index d0473538c926..7f89606ad617 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1108,7 +1108,12 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
#define __HAVE_ARCH_FREE_PAGES_PREPARE
static inline void arch_free_pages_prepare(struct page *page, int order)
{
- if (tag_storage_enabled() && page_mte_tagged(page))
+ /*
+ * KVM can free a page after tag storage has been reserved and before is
+ * marked as tagged, hence use page_tag_storage_reserved() instead of
+ * page_mte_tagged() to check for tag storage.
+ */
+ if (tag_storage_enabled() && page_tag_storage_reserved(page))
free_tag_storage(page, order);
}

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index b7517c4a19c4..986a9544228d 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1361,6 +1361,8 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
if (!kvm_has_mte(kvm))
return;

+ WARN_ON_ONCE(tag_storage_enabled() && !page_tag_storage_reserved(pfn_to_page(pfn)));
+
for (i = 0; i < nr_pages; i++, page++) {
if (try_page_mte_tagging(page)) {
mte_clear_page_tags(page_address(page));
@@ -1374,6 +1376,39 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
return vma->vm_flags & VM_MTE_ALLOWED;
}

+/*
+ * Called with an elevated reference on the pfn. If successful, the reference
+ * count is not changed. If it returns an error, the elevated reference is
+ * dropped.
+ */
+static int kvm_mte_reserve_tag_storage(kvm_pfn_t pfn)
+{
+ struct folio *folio;
+ int ret;
+
+ folio = page_folio(pfn_to_page(pfn));
+
+ if (page_tag_storage_reserved(folio_page(folio, 0)))
+ return 0;
+
+ if (page_is_tag_storage(folio_page(folio, 0)))
+ goto migrate;
+
+ ret = reserve_tag_storage(folio_page(folio, 0), folio_order(folio),
+ GFP_HIGHUSER_MOVABLE);
+ if (!ret)
+ return 0;
+
+migrate:
+ replace_folio_with_tagged(folio);
+ /*
+ * If migration succeeds, the fault needs to be replayed because 'pfn'
+ * has been unmapped. If migration fails, KVM will try to reserve tag
+ * storage again by replaying the fault.
+ */
+ return -EAGAIN;
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
bool fault_is_perm)
@@ -1488,6 +1523,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,

pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
write_fault, &writable, NULL);
+
if (pfn == KVM_PFN_ERR_HWPOISON) {
kvm_send_hwpoison_signal(hva, vma_shift);
return 0;
@@ -1518,6 +1554,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (exec_fault && device)
return -ENOEXEC;

+ if (tag_storage_enabled() && !fault_is_perm && !device &&
+ kvm_has_mte(kvm) && mte_allowed) {
+ ret = kvm_mte_reserve_tag_storage(pfn);
+ if (ret)
+ return ret == -EAGAIN ? 0 : ret;
+ }
+
read_lock(&kvm->mmu_lock);
pgt = vcpu->arch.hw_mmu->pgt;
if (mmu_invalidate_retry(kvm, mmu_seq))
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 01450ab91a87..5c12232bdf0b 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -974,7 +974,7 @@ void tag_clear_highpage(struct page *page)
* Called with an elevated reference on the folio.
* Returns with the elevated reference dropped.
*/
-static int replace_folio_with_tagged(struct folio *folio)
+int replace_folio_with_tagged(struct folio *folio)
{
struct migration_target_control mtc = {
.nid = NUMA_NO_NODE,
--
2.43.0