[RFC] x86/mm/dump_pagetables: Allow dumping pagetables by pid

From: lizhe . 67
Date: Thu Aug 04 2022 - 03:04:18 EST


From: Li Zhe <lizhe.67@xxxxxxxxxxxxx>

In current kernel we can only dump a user task's pagetable
by task itself. Sometimes we need to inspect the page table
attributes of different memory maps to user space to meet
the relevant development and debugging requirements. This
patch helps us to make our works easier. It add two file
named 'pid' and 'pid_pgtable_show'. We can use 'pid' to
input the task we want to inspect and get pagetable info
from 'pid_pgtable_show'.

User space can use file 'pid' and 'pid_pgtable_show' as follows.
====
$ echo $pid > /sys/kernel/debug/page_tables/pid
$ cat /sys/kernel/debug/page_tables/pid_pgtable_show

Signed-off-by: Li Zhe <lizhe.67@xxxxxxxxxxxxx>
---
arch/x86/mm/debug_pagetables.c | 82 ++++++++++++++++++++++++++++++++++
1 file changed, 82 insertions(+)

diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index 092ea436c7e6..53a8ced44080 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -4,6 +4,8 @@
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/pgtable.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>

static int ptdump_show(struct seq_file *m, void *v)
{
@@ -31,6 +33,84 @@ static int ptdump_curusr_show(struct seq_file *m, void *v)
}

DEFINE_SHOW_ATTRIBUTE(ptdump_curusr);
+
+static pid_t trace_pid;
+static int ptdump_pid_pgtable_show(struct seq_file *m, void *v)
+{
+ struct task_struct *task;
+ struct mm_struct *mm;
+
+ if (trace_pid == 0)
+ return 0;
+
+ rcu_read_lock();
+ task = find_task_by_vpid(trace_pid);
+ if (!task) {
+ rcu_read_unlock();
+ return -ESRCH;
+ }
+ mm = get_task_mm(task);
+ rcu_read_unlock();
+
+ if (mm && mm->pgd)
+ ptdump_walk_pgd_level_debugfs(m, mm, true);
+
+ if (mm)
+ mmput(mm);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(ptdump_pid_pgtable);
+
+static ssize_t ptdump_pid_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *f_pos)
+{
+ pid_t pid;
+ int ret = -ENOMEM;
+ char *tmp = kzalloc(count, GFP_KERNEL);
+
+ if (!tmp)
+ return ret;
+
+ if (copy_from_user(tmp, buffer, count)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = kstrtoint(tmp, 0, &pid);
+ if (ret) {
+ ret = -EINVAL;
+ goto out;
+ }
+ kfree(tmp);
+ trace_pid = pid;
+ return count;
+
+out:
+ kfree(tmp);
+ return ret;
+}
+
+static int ptdump_show_pid(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%d\n", trace_pid);
+ return 0;
+}
+
+static int ptdump_open_pid(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show_pid, NULL);
+}
+
+static const struct file_operations ptdump_pid_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open_pid,
+ .write = ptdump_pid_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
#endif

#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
@@ -57,6 +137,8 @@ static int __init pt_dump_debug_init(void)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
debugfs_create_file("current_user", 0400, dir, NULL,
&ptdump_curusr_fops);
+ debugfs_create_file("pid_pgtable_show", 0400, dir, NULL, &ptdump_pid_pgtable_fops);
+ debugfs_create_file("pid", 0400, dir, NULL, &ptdump_pid_fops);
#endif
#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops);
--
2.20.1