[PATCH v3] mm: Update mark_victim tracepoints fields

From: Carlos Galo
Date: Fri Feb 23 2024 - 12:38:39 EST


The current implementation of the mark_victim tracepoint provides only
the process ID (pid) of the victim process. This limitation poses
challenges for userspace tools requiring real-time OOM analysis and
intervention. Although this information is available from the kernel
logs, it’s not the appropriate format to provide OOM notifications. In
Android, BPF programs are used with the mark_victim trace events to
notify userspace of an OOM kill. For consistency, update the trace
event to include the same information about the OOMed victim as the
kernel logs.

- UID
In Android each installed application has a unique UID. Including
the `uid` assists in correlating OOM events with specific apps.

- Process Name (comm)
Enables identification of the affected process.

- OOM Score
Will allow userspace to get additional insight of the relative kill
priority of the OOM victim. In Android, the oom_score_adj is used to
categorize app state (foreground, background, etc.), which aids in
analyzing user-perceptible impacts of OOM events [1].

- Total VM, RSS Stats, and pgtables
Amount of memory used by the victim that will, potentially, be freed up
by killing it.

[1] https://cs.android.com/android/platform/superproject/main/+/246dc8fc95b6d93afcba5c6d6c133307abb3ac2e:frameworks/base/services/core/java/com/android/server/am/ProcessList.java;l=188-283

Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Suren Baghdasaryan <surenb@xxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Reviewed-by: Steven Rostedt <rostedt@xxxxxxxxxxx>
Signed-off-by: Carlos Galo <carlosgalo@xxxxxxxxxx>
---

v3:
- Added total_vm, rss fields, and pgtables per Michal Hocko.
- Added Steven Rostedt reviewed by
- Updated commit description to include android usecase

v2: Fixed build error. Added missing comma when printing `__entry->uid`.

include/trace/events/oom.h | 36 ++++++++++++++++++++++++++++++++----
mm/oom_kill.c | 6 +++++-
2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 26a11e4a2c36..b799f3bcba82 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -7,6 +7,8 @@
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>

+#define PG_COUNT_TO_KB(x) ((x) << (PAGE_SHIFT - 10))
+
TRACE_EVENT(oom_score_adj_update,

TP_PROTO(struct task_struct *task),
@@ -72,19 +74,45 @@ TRACE_EVENT(reclaim_retry_zone,
);

TRACE_EVENT(mark_victim,
- TP_PROTO(int pid),
+ TP_PROTO(struct task_struct *task, uid_t uid),

- TP_ARGS(pid),
+ TP_ARGS(task, uid),

TP_STRUCT__entry(
__field(int, pid)
+ __string(comm, task->comm)
+ __field(unsigned long, total_vm)
+ __field(unsigned long, anon_rss)
+ __field(unsigned long, file_rss)
+ __field(unsigned long, shmem_rss)
+ __field(uid_t, uid)
+ __field(unsigned long, pgtables)
+ __field(short, oom_score_adj)
),

TP_fast_assign(
- __entry->pid = pid;
+ __entry->pid = task->pid;
+ __assign_str(comm, task->comm);
+ __entry->total_vm = PG_COUNT_TO_KB(task->mm->total_vm);
+ __entry->anon_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_ANONPAGES));
+ __entry->file_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_FILEPAGES));
+ __entry->shmem_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_SHMEMPAGES));
+ __entry->uid = uid;
+ __entry->pgtables = mm_pgtables_bytes(task->mm) >> 10;
+ __entry->oom_score_adj = task->signal->oom_score_adj;
),

- TP_printk("pid=%d", __entry->pid)
+ TP_printk("pid=%d comm=%s total-vm=%lukB anon-rss=%lukB file-rss:%lukB shmem-rss:%lukB uid=%u pgtables=%lukB oom_score_adj=%hd",
+ __entry->pid,
+ __get_str(comm),
+ __entry->total_vm,
+ __entry->anon_rss,
+ __entry->file_rss,
+ __entry->shmem_rss,
+ __entry->uid,
+ __entry->pgtables,
+ __entry->oom_score_adj
+ )
);

TRACE_EVENT(wake_reaper,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 91ccd82097c2..8d6a207c3c59 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -44,6 +44,7 @@
#include <linux/kthread.h>
#include <linux/init.h>
#include <linux/mmu_notifier.h>
+#include <linux/cred.h>

#include <asm/tlb.h>
#include "internal.h"
@@ -754,6 +755,7 @@ static inline void queue_oom_reaper(struct task_struct *tsk)
*/
static void mark_oom_victim(struct task_struct *tsk)
{
+ const struct cred *cred;
struct mm_struct *mm = tsk->mm;

WARN_ON(oom_killer_disabled);
@@ -773,7 +775,9 @@ static void mark_oom_victim(struct task_struct *tsk)
*/
__thaw_task(tsk);
atomic_inc(&oom_victims);
- trace_mark_victim(tsk->pid);
+ cred = get_task_cred(tsk);
+ trace_mark_victim(tsk, cred->uid.val);
+ put_cred(cred);
}

/**
--
2.44.0.rc0.258.g7320e95886-goog