Re: [PATCH v6 5/6] KVM: arm64: Initialize the ptdump parser with stage-2 attributes

From: Sebastian Ene
Date: Thu Feb 22 2024 - 10:19:39 EST


On Tue, Feb 20, 2024 at 03:10:34PM +0000, Sebastian Ene wrote:
> Define a set of attributes used by the ptdump parser to display the
> properties of a guest memory region covered by a pagetable descriptor.
> Build a description of the pagetable levels and initialize the parser
> with this configuration.
>
> Signed-off-by: Sebastian Ene <sebastianene@xxxxxxxxxx>
> ---
> arch/arm64/kvm/ptdump.c | 146 ++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 139 insertions(+), 7 deletions(-)
>
> diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> index 9b04c24bb9be..2c4e0c122d23 100644
> --- a/arch/arm64/kvm/ptdump.c
> +++ b/arch/arm64/kvm/ptdump.c
> @@ -14,6 +14,61 @@
> #include <kvm_ptdump.h>
>
>
> +#define MARKERS_LEN (2)
> +#define KVM_PGTABLE_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
> +
> +struct kvm_ptdump_guest_state {
> + struct kvm *kvm;
> + struct pg_state parser_state;
> + struct addr_marker ipa_marker[MARKERS_LEN];
> + struct pg_level level[KVM_PGTABLE_LEVELS];
> + struct ptdump_range range[MARKERS_LEN];
> +};
> +
> +static const struct prot_bits stage2_pte_bits[] = {
> + {
> + .mask = PTE_VALID,
> + .val = PTE_VALID,
> + .set = " ",
> + .clear = "F",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> + .set = "XN",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> + .set = "R",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> + .set = "W",
> + .clear = " ",
> + }, {
> + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> + .set = "AF",
> + .clear = " ",
> + }, {
> + .mask = PTE_NG,
> + .val = PTE_NG,
> + .set = "FnXS",
> + .clear = " ",
> + }, {
> + .mask = PTE_CONT | PTE_VALID,
> + .val = PTE_CONT | PTE_VALID,
> + .set = "CON",
> + .clear = " ",
> + }, {
> + .mask = PTE_TABLE_BIT,
> + .val = PTE_TABLE_BIT,
> + .set = " ",
> + .clear = "BLK",
> + },
> +};
> +
> static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
> enum kvm_pgtable_walk_flags visit)
> {
> @@ -37,15 +92,78 @@ static int kvm_ptdump_show_common(struct seq_file *m,
> return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> }
>
> +static int kvm_ptdump_build_levels(struct pg_level *level, u32 start_lvl)
> +{
> + static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
> + u32 i = 0;
> + u64 mask = 0;
> +
> + if (start_lvl > 2) {
> + pr_err("invalid start_lvl %u\n", start_lvl);
> + return -EINVAL;
> + }
> +
> + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
> + mask |= stage2_pte_bits[i].mask;
> +
> + for (i = start_lvl; i <= KVM_PGTABLE_LAST_LEVEL; i++) {
> + level[i].name = level_names[i];
> + level[i].num = ARRAY_SIZE(stage2_pte_bits);
> + level[i].bits = stage2_pte_bits;
> + level[i].mask = mask;
> + }
> +
> + if (start_lvl > 0)
> + level[start_lvl].name = level_names[0];
> +
> + return 0;
> +}
> +
> +static struct kvm_ptdump_guest_state
> +*kvm_ptdump_parser_init(struct kvm *kvm)
> +{
> + struct kvm_ptdump_guest_state *st;
> + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> + struct kvm_pgtable *pgtable = mmu->pgt;
> + int ret;
> +
> + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
> + if (!st)
> + return NULL;
> +
> + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
> + if (ret)
> + goto free_with_state;
> +
> + st->ipa_marker[0].name = "Guest IPA";
> + st->ipa_marker[1].start_address = BIT(pgtable->ia_bits);
> + st->range[0].end = BIT(pgtable->ia_bits);
> +
> + st->kvm = kvm;
> + st->parser_state = (struct pg_state) {
> + .marker = &st->ipa_marker[0],
> + .level = -1,
> + .pg_level = &st->level[0],
> + .ptdump.range = &st->range[0],
> + };
> +
> + return st;
> +free_with_state:
> + kfree(st);
> + return NULL;
> +}
> +
> static int kvm_ptdump_guest_show(struct seq_file *m, void *)
> {
> - struct kvm *kvm = m->private;
> + struct kvm_ptdump_guest_state *st = m->private;
> + struct kvm *kvm = st->kvm;
> struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> - struct pg_state parser_state = {0};
> int ret;
>
> + st->parser_state.seq = m;
> +
> write_lock(&kvm->mmu_lock);
> - ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
> + ret = kvm_ptdump_show_common(m, mmu->pgt, &st->parser_state);
> write_unlock(&kvm->mmu_lock);
>
> return ret;
> @@ -54,22 +172,36 @@ static int kvm_ptdump_guest_show(struct seq_file *m, void *)
> static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
> {
> struct kvm *kvm = m->i_private;
> + struct kvm_ptdump_guest_state *st;
> int ret;
>
> - if (!kvm_get_kvm_safe(kvm))
> - return -ENOENT;

FIXME: Keep kvm_get_kvm_safe before invoking kvm_ptdump_parser_init to
avoid UAF if kvm gets destroyed.

> + st = kvm_ptdump_parser_init(kvm);
> + if (!st)
> + return -ENOMEM;
>
> - ret = single_open(file, kvm_ptdump_guest_show, m->i_private);
> + if (!kvm_get_kvm_safe(kvm)) {
> + ret = -ENOENT;
> + goto free_with_state;
> + }
> +
> + ret = single_open(file, kvm_ptdump_guest_show, st);
> if (ret < 0)
> - kvm_put_kvm(kvm);
> + goto free_with_kvm_ref;
>
> return ret;
> +free_with_kvm_ref:
> + kvm_put_kvm(kvm);
> +free_with_state:
> + kfree(st);
> + return ret;
> }
>
> static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
> {
> struct kvm *kvm = m->i_private;
> + void *st = ((struct seq_file *)file->private_data)->private;
>
> + kfree(st);
> kvm_put_kvm(kvm);
> return single_release(m, file);
> }
> --
> 2.44.0.rc0.258.g7320e95886-goog
>