[PATCH v3 06/10] arm64: ptdump: Register a debugfs entry for the host stage-2 tables

From: Sebastian Ene
Date: Wed Nov 15 2023 - 12:17:22 EST


Initialize a structures used to keep the state of the host stage-2 ptdump
walker when pKVM is enabled. Create a new debugfs entry for the host
stage-2 pagetables and hook the callbacks invoked when the entry is
accessed. When the debugfs file is opened, allocate memory resources which
will be shared with the hypervisor for saving the pagetable snapshot.
On close release the associated memory and we unshare it from the
hypervisor.

Signed-off-by: Sebastian Ene <sebastianene@xxxxxxxxxx>
---
arch/arm64/include/asm/ptdump.h | 12 +++
arch/arm64/kvm/Kconfig | 13 +++
arch/arm64/kvm/arm.c | 2 +
arch/arm64/mm/ptdump.c | 168 ++++++++++++++++++++++++++++++++
arch/arm64/mm/ptdump_debugfs.c | 8 +-
5 files changed, 202 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 9b2bebfcefbe..de5a5a0c0ecf 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -22,6 +22,7 @@ struct ptdump_info {
void (*ptdump_walk)(struct seq_file *s, struct ptdump_info *info);
int (*ptdump_prepare_walk)(void *file_priv);
void (*ptdump_end_walk)(void *file_priv);
+ size_t mc_len;
};

void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
@@ -33,13 +34,24 @@ struct ptdump_info_file_priv {
#ifdef CONFIG_PTDUMP_DEBUGFS
#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
+void ptdump_debugfs_kvm_register(struct ptdump_info *info, const char *name,
+ struct dentry *d_entry);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
+static inline void ptdump_debugfs_kvm_register(struct ptdump_info *info,
+ const char *name,
+ struct dentry *d_entry) { }
#endif
void ptdump_check_wx(void);
#endif /* CONFIG_PTDUMP_CORE */

+#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+void ptdump_register_host_stage2(void);
+#else
+static inline void ptdump_register_host_stage2(void) { }
+#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
+
#ifdef CONFIG_DEBUG_WX
#define debug_checkwx() ptdump_check_wx()
#else
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 83c1e09be42e..cf5b7f06b152 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -71,4 +71,17 @@ config PROTECTED_NVHE_STACKTRACE

If unsure, or not using protected nVHE (pKVM), say N.

+config PTDUMP_STAGE2_DEBUGFS
+ bool "Present the stage-2 pagetables to debugfs"
+ depends on NVHE_EL2_DEBUG && PTDUMP_DEBUGFS && KVM
+ default n
+ help
+ Say Y here if you want to show the stage-2 kernel pagetables
+ layout in a debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+
+ If in doubt, say N.
+
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e5f75f1f1085..987683650576 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -28,6 +28,7 @@

#include <linux/uaccess.h>
#include <asm/ptrace.h>
+#include <asm/ptdump.h>
#include <asm/mman.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
@@ -2592,6 +2593,7 @@ static __init int kvm_arm_init(void)
if (err)
goto out_subs;

+ ptdump_register_host_stage2();
kvm_arm_initialised = true;

return 0;
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index d531e24ea0b2..0b4cb54e43ff 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -24,6 +24,9 @@
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptdump.h>
+#include <asm/kvm_pkvm.h>
+#include <asm/kvm_pgtable.h>
+#include <asm/kvm_host.h>


enum address_markers_idx {
@@ -378,6 +381,170 @@ void ptdump_check_wx(void)
pr_info("Checked W+X mappings: passed, no W+X pages found\n");
}

+#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+static struct ptdump_info stage2_kernel_ptdump_info;
+
+static phys_addr_t ptdump_host_pa(void *addr)
+{
+ return __pa(addr);
+}
+
+static void *ptdump_host_va(phys_addr_t phys)
+{
+ return __va(phys);
+}
+
+static size_t stage2_get_pgd_len(void)
+{
+ u64 mmfr0, mmfr1, vtcr;
+ u32 phys_shift = get_kvm_ipa_limit();
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
+
+ return kvm_pgtable_stage2_pgd_size(vtcr);
+}
+
+static int stage2_ptdump_prepare_walk(void *file_priv)
+{
+ struct ptdump_info_file_priv *f_priv = file_priv;
+ struct ptdump_info *info = &f_priv->info;
+ struct kvm_pgtable_snapshot *snapshot;
+ int ret, pgd_index, mc_index, pgd_pages_sz;
+ void *page_hva;
+ phys_addr_t pgd;
+
+ snapshot = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
+ if (!snapshot)
+ return -ENOMEM;
+
+ memset(snapshot, 0, PAGE_SIZE);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn(snapshot));
+ if (ret)
+ goto free_snapshot;
+
+ snapshot->pgd_len = stage2_get_pgd_len();
+ pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
+ snapshot->pgd_hva = alloc_pages_exact(snapshot->pgd_len,
+ GFP_KERNEL_ACCOUNT);
+ if (!snapshot->pgd_hva) {
+ ret = -ENOMEM;
+ goto unshare_snapshot;
+ }
+
+ for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+ virt_to_pfn(page_hva));
+ if (ret)
+ goto unshare_pgd_pages;
+ }
+
+ for (mc_index = 0; mc_index < info->mc_len; mc_index++) {
+ page_hva = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
+ if (!page_hva) {
+ ret = -ENOMEM;
+ goto free_memcache_pages;
+ }
+
+ push_hyp_memcache(&snapshot->mc, page_hva, ptdump_host_pa);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+ virt_to_pfn(page_hva));
+ if (ret) {
+ pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ goto free_memcache_pages;
+ }
+ }
+
+ ret = kvm_call_hyp_nvhe(__pkvm_copy_host_stage2, snapshot);
+ if (ret)
+ goto free_memcache_pages;
+
+ pgd = (phys_addr_t)snapshot->pgtable.pgd;
+ snapshot->pgtable.pgd = phys_to_virt(pgd);
+ f_priv->file_priv = snapshot;
+ return 0;
+
+free_memcache_pages:
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ while (page_hva) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ }
+unshare_pgd_pages:
+ pgd_index = pgd_index - 1;
+ for (; pgd_index >= 0; pgd_index--) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ }
+ free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
+unshare_snapshot:
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(snapshot)));
+free_snapshot:
+ free_pages_exact(snapshot, PAGE_SIZE);
+ f_priv->file_priv = NULL;
+ return ret;
+}
+
+static void stage2_ptdump_end_walk(void *file_priv)
+{
+ struct ptdump_info_file_priv *f_priv = file_priv;
+ struct kvm_pgtable_snapshot *snapshot = f_priv->file_priv;
+ void *page_hva;
+ int pgd_index, ret, pgd_pages_sz;
+
+ if (!snapshot)
+ return;
+
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ while (page_hva) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ }
+
+ pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
+ for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ }
+
+ free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(snapshot)));
+ free_pages_exact(snapshot, PAGE_SIZE);
+ f_priv->file_priv = NULL;
+}
+
+void ptdump_register_host_stage2(void)
+{
+ if (!is_protected_kvm_enabled())
+ return;
+
+ stage2_kernel_ptdump_info = (struct ptdump_info) {
+ .mc_len = host_s2_pgtable_pages(),
+ .ptdump_prepare_walk = stage2_ptdump_prepare_walk,
+ .ptdump_end_walk = stage2_ptdump_end_walk,
+ };
+
+ ptdump_debugfs_kvm_register(&stage2_kernel_ptdump_info,
+ "host_stage2_page_tables",
+ kvm_debugfs_dir);
+}
+#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
+
static int __init ptdump_init(void)
{
address_markers[PAGE_END_NR].start_address = PAGE_END;
@@ -386,6 +553,7 @@ static int __init ptdump_init(void)
#endif
ptdump_initialize();
ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
+
return 0;
}
device_initcall(ptdump_init);
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 3bf5de51e8c3..4821dbef784c 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -68,5 +68,11 @@ static const struct file_operations ptdump_fops = {

void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name)
{
- debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
+ ptdump_debugfs_kvm_register(info, name, NULL);
+}
+
+void ptdump_debugfs_kvm_register(struct ptdump_info *info, const char *name,
+ struct dentry *d_entry)
+{
+ debugfs_create_file(name, 0400, d_entry, info, &ptdump_fops);
}
--
2.43.0.rc0.421.g78406f8d94-goog