Re: [PATCH v2 11/11] arm64: ptdump: Add support for guest stage-2 pagetables dumping

From: Sebastian Ene
Date: Mon Oct 23 2023 - 10:46:38 EST


On Fri, Oct 20, 2023 at 09:40:06AM +0100, Vincent Donnefort wrote:
> On Thu, Oct 19, 2023 at 02:40:33PM +0000, Sebastian Ene wrote:
> > Register a debugfs file on guest creation to be able to view their
> > second translation tables with ptdump. This assumes that the host is in
> > control of the guest stage-2 and has direct access to the pagetables.
>
> What about pKVM? The walker you wrote for the host stage-2 should be
> reusable in that case?
>

Yes, when pKVM will be ready upstream the walker which duplicates the
pagetables for the host will be re-used for the guests. We will have to
add a separate HVC for this which receives as an argument the guest
vmid.

> >
> > Signed-off-by: Sebastian Ene <sebastianene@xxxxxxxxxx>
> > ---
> > arch/arm64/include/asm/ptdump.h | 21 +++++++--
> > arch/arm64/kvm/mmu.c | 3 ++
> > arch/arm64/mm/ptdump.c | 84 +++++++++++++++++++++++++++++++++
> > arch/arm64/mm/ptdump_debugfs.c | 5 +-
> > 4 files changed, 108 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
> > index 35b883524462..be86244d532b 100644
> > --- a/arch/arm64/include/asm/ptdump.h
> > +++ b/arch/arm64/include/asm/ptdump.h
> > @@ -5,6 +5,8 @@
> > #ifndef __ASM_PTDUMP_H
> > #define __ASM_PTDUMP_H
> >
> > +#include <asm/kvm_pgtable.h>
> > +
> > #ifdef CONFIG_PTDUMP_CORE
> >
> > #include <linux/mm_types.h>
> > @@ -30,14 +32,27 @@ struct ptdump_info {
> > void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
> > #ifdef CONFIG_PTDUMP_DEBUGFS
> > #define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
> > -void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
> > +struct dentry *ptdump_debugfs_register(struct ptdump_info *info,
> > + const char *name);
> > #else
> > -static inline void ptdump_debugfs_register(struct ptdump_info *info,
> > - const char *name) { }
> > +static inline struct dentry *ptdump_debugfs_register(struct ptdump_info *info,
> > + const char *name)
> > +{
> > + return NULL;
> > +}
> > #endif
> > void ptdump_check_wx(void);
> > #endif /* CONFIG_PTDUMP_CORE */
> >
> > +#ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS
> > +void ptdump_register_guest_stage2(struct kvm_pgtable *pgt, void *lock);
> > +void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt);
> > +#else
> > +static inline void ptdump_register_guest_stage2(struct kvm_pgtable *pgt,
> > + void *lock) { }
> > +static inline void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt) { }
> > +#endif /* CONFIG_NVHE_EL2_PTDUMP_DEBUGFS */
>
> I believe this should be compatible with VHE as well, that option should be
> renamed.
>

Good point, I will rename this.

> > +
> > #ifdef CONFIG_DEBUG_WX
> > #define debug_checkwx() ptdump_check_wx()
> > #else
> > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > index 482280fe22d7..e47988dba34d 100644
> > --- a/arch/arm64/kvm/mmu.c
> > +++ b/arch/arm64/kvm/mmu.c
> > @@ -11,6 +11,7 @@
> > #include <linux/sched/signal.h>
> > #include <trace/events/kvm.h>
> > #include <asm/pgalloc.h>
> > +#include <asm/ptdump.h>
> > #include <asm/cacheflush.h>
> > #include <asm/kvm_arm.h>
> > #include <asm/kvm_mmu.h>
> > @@ -908,6 +909,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
> > if (err)
> > goto out_free_pgtable;
> >
> > + ptdump_register_guest_stage2(pgt, &kvm->mmu_lock);
> > mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
> > if (!mmu->last_vcpu_ran) {
> > err = -ENOMEM;
> > @@ -1021,6 +1023,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
> > write_unlock(&kvm->mmu_lock);
> >
> > if (pgt) {
> > + ptdump_unregister_guest_stage2(pgt);
> > kvm_pgtable_stage2_destroy(pgt);
> > kfree(pgt);
> > }
> > diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
> > index 4687840dcb69..facfb15468f5 100644
> > --- a/arch/arm64/mm/ptdump.c
> > +++ b/arch/arm64/mm/ptdump.c
> > @@ -26,6 +26,7 @@
> > #include <asm/ptdump.h>
> > #include <asm/kvm_pkvm.h>
> > #include <asm/kvm_pgtable.h>
> > +#include <asm/kvm_host.h>
> >
> >
> > enum address_markers_idx {
> > @@ -543,6 +544,22 @@ void ptdump_check_wx(void)
> > #ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS
> > static struct ptdump_info stage2_kernel_ptdump_info;
> >
> > +#define GUEST_NAME_LEN (32U)
> > +
> > +struct ptdump_registered_guest {
> > + struct list_head reg_list;
> > + struct ptdump_info info;
> > + struct mm_struct mem;
> > + struct kvm_pgtable_snapshot snapshot;
> > + struct dentry *dentry;
> > + rwlock_t *lock;
> > + char reg_name[GUEST_NAME_LEN];
> > +};
> > +
> > +static LIST_HEAD(ptdump_guest_list);
> > +static DEFINE_MUTEX(ptdump_list_lock);
> > +static u16 guest_no;
>
> This is not robust enough: If 1 VM starts then 65535 others which are killed.
> guest_no overflows. The next number is 0 which is already taken.
>

Yes, I guess this should be improved. In the case you described we won't
register any debugfs file because of the name clash.

> Linux has and ID allocation to solve this problem, but I don't think this is
> necessary anyway. This should simply reuse the struct kvm->debugfs_dentry.
>
> Also probably most of the informations contained in ptdump_registered_guest can
> be found in struct kvm. The debugfs should then probably simply take struct kvm
> for the private argument.
>

I would prefer to keep it as a separate struct here as it gives some
flexibility if we need to extend it for guests pKVM support. I think we
can drop the struct mm_struct from here.

Thanks,
Sebastian

> > +
> > static phys_addr_t ptdump_host_pa(void *addr)
> > {
> > return __pa(addr);
> > @@ -740,6 +757,73 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
> >
> > kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
> > }
>
> [...]