[PATCH 07/11] arm64: ptdump: Snapshot the host stage-2 pagetables

From: Sebastian Ene
Date: Wed Sep 27 2023 - 07:26:15 EST


Introduce callbacks invoked when the debugfs entry is accessed from the
userspace and store them in the ptdump state structure. Call these
functions when the ptdump registered entry with debugfs is open/closed.
When we open the entry we allocate memory for the host stage-2 snapshot,
we share it with the hypervisor and then we issue the hvc to copy the
page-tables to the shared buffer. When we close the debugfs entry we
release the associated memory resources and we unshare the memory from
the hypervisor.

Signed-off-by: Sebastian Ene <sebastianene@xxxxxxxxxx>
---
arch/arm64/include/asm/ptdump.h | 5 ++
arch/arm64/mm/ptdump.c | 143 +++++++++++++++++++++++++++++++-
arch/arm64/mm/ptdump_debugfs.c | 34 +++++++-
3 files changed, 179 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 1f6e0aabf16a..35b883524462 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -19,7 +19,12 @@ struct ptdump_info {
struct mm_struct *mm;
const struct addr_marker *markers;
unsigned long base_addr;
+ void (*ptdump_prepare_walk)(struct ptdump_info *info);
void (*ptdump_walk)(struct seq_file *s, struct ptdump_info *info);
+ void (*ptdump_end_walk)(struct ptdump_info *info);
+ struct mutex file_lock;
+ size_t mc_len;
+ void *priv;
};

void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 25c0640e82aa..7d57fa9be724 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -24,6 +24,7 @@
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptdump.h>
+#include <asm/kvm_pkvm.h>
#include <asm/kvm_pgtable.h>


@@ -482,6 +483,139 @@ static struct mm_struct ipa_init_mm = {
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS,
ipa_init_mm.mmap_lock),
};
+
+static phys_addr_t ptdump_host_pa(void *addr)
+{
+ return __pa(addr);
+}
+
+static void *ptdump_host_va(phys_addr_t phys)
+{
+ return __va(phys);
+}
+
+static size_t stage2_get_pgd_len(void)
+{
+ u64 mmfr0, mmfr1, vtcr;
+ u32 phys_shift = get_kvm_ipa_limit();
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
+
+ return kvm_pgtable_stage2_pgd_size(vtcr);
+}
+
+static void stage2_ptdump_prepare_walk(struct ptdump_info *info)
+{
+ struct kvm_pgtable_snapshot *snapshot;
+ int ret, pgd_index, mc_index, pgd_pages_sz;
+ void *page_hva;
+
+ snapshot = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
+ if (!snapshot)
+ return;
+
+ memset(snapshot, 0, PAGE_SIZE);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn(snapshot));
+ if (ret)
+ goto free_snapshot;
+
+ snapshot->pgd_len = stage2_get_pgd_len();
+ pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
+ snapshot->pgd_hva = alloc_pages_exact(snapshot->pgd_len,
+ GFP_KERNEL_ACCOUNT);
+ if (!snapshot->pgd_hva)
+ goto unshare_snapshot;
+
+ for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+ virt_to_pfn(page_hva));
+ if (ret)
+ goto unshare_pgd_pages;
+ }
+
+ for (mc_index = 0; mc_index < info->mc_len; mc_index++) {
+ page_hva = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
+ if (!page_hva)
+ goto free_memcache_pages;
+
+ push_hyp_memcache(&snapshot->mc, page_hva, ptdump_host_pa);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+ virt_to_pfn(page_hva));
+ if (ret) {
+ pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ goto free_memcache_pages;
+ }
+ }
+
+ ret = kvm_call_hyp_nvhe(__pkvm_copy_host_stage2, snapshot);
+ if (ret)
+ goto free_memcache_pages;
+
+ info->priv = snapshot;
+ return;
+
+free_memcache_pages:
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ while (page_hva) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ }
+unshare_pgd_pages:
+ pgd_index = pgd_index - 1;
+ for (; pgd_index >= 0; pgd_index--) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ }
+ free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
+unshare_snapshot:
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(snapshot)));
+free_snapshot:
+ free_pages_exact(snapshot, PAGE_SIZE);
+ info->priv = NULL;
+}
+
+static void stage2_ptdump_end_walk(struct ptdump_info *info)
+{
+ struct kvm_pgtable_snapshot *snapshot = info->priv;
+ void *page_hva;
+ int pgd_index, ret, pgd_pages_sz;
+
+ if (!snapshot)
+ return;
+
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ while (page_hva) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ }
+
+ pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
+ for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ }
+
+ free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(snapshot)));
+ free_pages_exact(snapshot, PAGE_SIZE);
+ info->priv = NULL;
+}
#endif /* CONFIG_NVHE_EL2_PTDUMP_DEBUGFS */

static int __init ptdump_init(void)
@@ -495,11 +629,16 @@ static int __init ptdump_init(void)

#ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS
stage2_kernel_ptdump_info = (struct ptdump_info) {
- .markers = ipa_address_markers,
- .mm = &ipa_init_mm,
+ .markers = ipa_address_markers,
+ .mm = &ipa_init_mm,
+ .mc_len = host_s2_pgtable_pages(),
+ .ptdump_prepare_walk = stage2_ptdump_prepare_walk,
+ .ptdump_end_walk = stage2_ptdump_end_walk,
};

init_rwsem(&ipa_init_mm.mmap_lock);
+ mutex_init(&stage2_kernel_ptdump_info.file_lock);
+
ptdump_debugfs_register(&stage2_kernel_ptdump_info,
"host_stage2_kernel_page_tables");
#endif
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 7564519db1e6..14619452dd8d 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -15,7 +15,39 @@ static int ptdump_show(struct seq_file *m, void *v)
put_online_mems();
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(ptdump);
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+ int ret;
+ struct ptdump_info *info = inode->i_private;
+
+ ret = single_open(file, ptdump_show, inode->i_private);
+ if (!ret && info->ptdump_prepare_walk) {
+ mutex_lock(&info->file_lock);
+ info->ptdump_prepare_walk(info);
+ }
+ return ret;
+}
+
+static int ptdump_release(struct inode *inode, struct file *file)
+{
+ struct ptdump_info *info = inode->i_private;
+
+ if (info->ptdump_end_walk) {
+ info->ptdump_end_walk(info);
+ mutex_unlock(&info->file_lock);
+ }
+
+ return single_release(inode, file);
+}
+
+static const struct file_operations ptdump_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = ptdump_release,
+};

void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name)
{
--
2.42.0.515.g380fc7ccd1-goog