[PATCH v7 3/4] mm,page_owner: Display all stacks and their count

From: Oscar Salvador
Date: Thu Feb 08 2024 - 18:45:51 EST


This patch adds a new file called 'page_owner_stacks', which
will show all stacks that were added by page_owner followed by
their counting, giving us a clear overview of stack <-> count
relationship.

E.g:

prep_new_page+0xa9/0x120
get_page_from_freelist+0x801/0x2210
__alloc_pages+0x18b/0x350
alloc_pages_mpol+0x91/0x1f0
folio_alloc+0x14/0x50
filemap_alloc_folio+0xb2/0x100
__filemap_get_folio+0x14a/0x490
ext4_write_begin+0xbd/0x4b0 [ext4]
generic_perform_write+0xc1/0x1e0
ext4_buffered_write_iter+0x68/0xe0 [ext4]
ext4_file_write_iter+0x70/0x740 [ext4]
vfs_write+0x33d/0x420
ksys_write+0xa5/0xe0
do_syscall_64+0x80/0x160
entry_SYSCALL_64_after_hwframe+0x6e/0x76
stack_count: 4578

In order to show all the stacks, we implement stack_depot_get_next_stack(),
which walks all buckets while retrieving the stacks stored in them.
stack_depot_get_next_stack() will return all stacks, one at a time,
by first finding a non-empty bucket, and then retrieving all the stacks
stored in that bucket.
Once we have completely gone through it, we get the next non-empty bucket
and repeat the same steps, and so on until we have completely checked all
buckets.

Signed-off-by: Oscar Salvador <osalvador@xxxxxxx>
---
include/linux/stackdepot.h | 20 +++++++++
lib/stackdepot.c | 46 +++++++++++++++++++++
mm/page_owner.c | 85 ++++++++++++++++++++++++++++++++++++++
3 files changed, 151 insertions(+)

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index ac62de4d4999..d851ec821e6f 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -183,6 +183,26 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
*/
struct stack_record *stack_depot_get_stack(depot_stack_handle_t handle);

+/**
+ * stack_depot_get_next_stack - Returns all stacks, one at a time
+ *
+ * @table: Current table we are checking
+ * @bucket: Current bucket we are checking
+ * @last_found: Last stack that was found
+ *
+ * This function finds first a non-empty bucket and returns the first stack
+ * stored in it. On consequent calls, it walks the bucket to see whether
+ * it contains more stacks.
+ * Once we have walked all the stacks in a bucket, we check
+ * the next one, and we repeat the same steps until we have checked all of them
+ *
+ * Return: A pointer a to stack_record struct, or NULL when we have walked all
+ * buckets.
+ */
+struct stack_record *stack_depot_get_next_stack(unsigned long *table,
+ struct list_head **bucket,
+ struct stack_record **last_found);
+
/**
* stack_depot_fetch - Fetch a stack trace from stack depot
*
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 197c355601f9..107bd0174cd6 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -782,6 +782,52 @@ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle)
}
EXPORT_SYMBOL(stack_depot_get_extra_bits);

+struct stack_record *stack_depot_get_next_stack(unsigned long *table,
+ struct list_head **curr_bucket,
+ struct stack_record **last_found)
+{
+ struct list_head *bucket = *curr_bucket;
+ unsigned long nr_table = *table;
+ struct stack_record *found = NULL;
+ unsigned long stack_table_entries = stack_hash_mask + 1;
+
+ rcu_read_lock_sched_notrace();
+ if (!bucket) {
+ /*
+ * Find a non-empty bucket. Once we have found it,
+ * we will use list_for_each_entry_continue_rcu() on the next
+ * call to keep walking the bucket.
+ */
+new_table:
+ bucket = &stack_table[nr_table];
+ list_for_each_entry_rcu(found, bucket, hash_list) {
+ goto out;
+ }
+ } else {
+ /* Check whether we have more stacks in this bucket */
+ found = *last_found;
+ list_for_each_entry_continue_rcu(found, bucket, hash_list) {
+ goto out;
+ }
+ }
+
+ /* No more stacks in this bucket, check the next one */
+ nr_table++;
+ if (nr_table < stack_table_entries)
+ goto new_table;
+
+ /* We are done walking all buckets */
+ found = NULL;
+
+out:
+ *table = nr_table;
+ *curr_bucket = bucket;
+ *last_found = found;
+ rcu_read_unlock_sched_notrace();
+
+ return found;
+}
+
static int stats_show(struct seq_file *seq, void *v)
{
/*
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 0adf41702b9d..aea212734557 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -749,6 +749,89 @@ static const struct file_operations proc_page_owner_operations = {
.llseek = lseek_page_owner,
};

+struct stack_iterator {
+ unsigned long nr_table;
+ struct list_head *bucket;
+ struct stack_record *last_stack;
+};
+
+static void *stack_start(struct seq_file *m, loff_t *ppos)
+{
+ struct stack_iterator *iter = m->private;
+
+ if (*ppos == -1UL)
+ return NULL;
+
+ return stack_depot_get_next_stack(&iter->nr_table,
+ &iter->bucket,
+ &iter->last_stack);
+}
+
+static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
+{
+ struct stack_iterator *iter = m->private;
+ struct stack_record *stack;
+
+ stack = stack_depot_get_next_stack(&iter->nr_table,
+ &iter->bucket,
+ &iter->last_stack);
+ *ppos = stack ? *ppos + 1 : -1UL;
+
+ return stack;
+}
+
+static int stack_print(struct seq_file *m, void *v)
+{
+ char *buf;
+ int ret = 0;
+ struct stack_iterator *iter = m->private;
+ struct stack_record *stack = iter->last_stack;
+
+ if (!stack->size || stack->size < 0 || refcount_read(&stack->count) < 2)
+ return 0;
+
+ buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+ ret += stack_trace_snprint(buf, PAGE_SIZE, stack->entries, stack->size,
+ 0);
+ if (!ret)
+ goto out;
+
+ scnprintf(buf + ret, PAGE_SIZE - ret, "stack_count: %d\n\n",
+ refcount_read(&stack->count));
+
+ seq_printf(m, buf);
+ seq_puts(m, "\n\n");
+out:
+ kfree(buf);
+
+ return 0;
+}
+
+static void stack_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations page_owner_stack_op = {
+ .start = stack_start,
+ .next = stack_next,
+ .stop = stack_stop,
+ .show = stack_print
+};
+
+static int page_owner_stack_open(struct inode *inode, struct file *file)
+{
+ return seq_open_private(file, &page_owner_stack_op,
+ sizeof(struct stack_iterator));
+}
+
+const struct file_operations page_owner_stack_operations = {
+ .open = page_owner_stack_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static int __init pageowner_init(void)
{
if (!static_branch_unlikely(&page_owner_inited)) {
@@ -758,6 +841,8 @@ static int __init pageowner_init(void)

debugfs_create_file("page_owner", 0400, NULL, NULL,
&proc_page_owner_operations);
+ debugfs_create_file("page_owner_stacks", 0400, NULL, NULL,
+ &page_owner_stack_operations);

return 0;
}
--
2.43.0