[PATCH 1/2] perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event

From: Namhyung Kim
Date: Thu Jan 14 2021 - 03:51:12 EST


This patch adds a new software event to count context switches
involving cgroup switches. So it's counted only if cgroups of
previous and next tasks are different. Note that it only checks the
cgroups in the perf_event subsystem. For cgroup v2, it shouldn't
matter anyway.

One can argue that we can do this by using existing sched_switch event
with eBPF. But some systems might not have eBPF for some reason so
I'd like to add this as a simple way.

Suggested-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
include/linux/perf_event.h | 38 +++++++++++++++------------------
include/uapi/linux/perf_event.h | 1 +
2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9a38f579bc76..304ef42d42d1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1174,30 +1174,24 @@ DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
* which is guaranteed by us not actually scheduling inside other swevents
* because those disable preemption.
*/
-static __always_inline void
-perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
+static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
{
- if (static_key_false(&perf_swevent_enabled[event_id])) {
- struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
+ struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);

- perf_fetch_caller_regs(regs);
- ___perf_sw_event(event_id, nr, regs, addr);
- }
+ perf_fetch_caller_regs(regs);
+ ___perf_sw_event(event_id, nr, regs, addr);
}

extern struct static_key_false perf_sched_events;

-static __always_inline bool
-perf_sw_migrate_enabled(void)
+static __always_inline bool __perf_sw_enabled(int swevt)
{
- if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
- return true;
- return false;
+ return static_key_false(&perf_swevent_enabled[swevt]);
}

static inline void perf_event_task_migrate(struct task_struct *task)
{
- if (perf_sw_migrate_enabled())
+ if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
task->sched_migrated = 1;
}

@@ -1207,11 +1201,9 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
if (static_branch_unlikely(&perf_sched_events))
__perf_event_task_sched_in(prev, task);

- if (perf_sw_migrate_enabled() && task->sched_migrated) {
- struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
-
- perf_fetch_caller_regs(regs);
- ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
+ if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
+ task->sched_migrated) {
+ __perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
task->sched_migrated = 0;
}
}
@@ -1219,7 +1211,13 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
static inline void perf_event_task_sched_out(struct task_struct *prev,
struct task_struct *next)
{
- perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
+ if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
+ __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
+
+ if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
+ (task_css_check(prev, perf_event_cgrp_id, 1)->cgroup !=
+ task_css_check(next, perf_event_cgrp_id, 1)->cgroup))
+ __perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);

if (static_branch_unlikely(&perf_sched_events))
__perf_event_task_sched_out(prev, next);
@@ -1475,8 +1473,6 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh)
static inline void
perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
static inline void
-perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { }
-static inline void
perf_bp_event(struct perf_event *event, void *data) { }

static inline int perf_register_guest_info_callbacks
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b15e3447cd9f..16b9538ad89b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -112,6 +112,7 @@ enum perf_sw_ids {
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_DUMMY = 9,
PERF_COUNT_SW_BPF_OUTPUT = 10,
+ PERF_COUNT_SW_CGROUP_SWITCHES = 11,

PERF_COUNT_SW_MAX, /* non-ABI */
};
--
2.30.0.284.gd98b1dd5eaa7-goog