[PATCH 1/2] cgroup: Print v1 cgroup.procs and tasks without pidlists

From: Michal Koutný
Date: Wed Aug 23 2023 - 13:48:14 EST


pidlists are structure that allows printing cgroup member tasks in
sorted order and with proper pidns (in)visibility.

The promise of sorted output was removed in the commit 7823da36ce8e
("cgroups: update documentation of cgroups tasks and procs files") (more
than 13 years ago at the time of writing this).

On systems that still use v1 hierarchies (e.g. systemd in non-unified
mode), pidlists are problematic because:
a) their cache unnecessarily busies workqueues (cgroup_pidlist_destroy_wq)
b) PID recycling [1] may lead to logging noise:
> seq_file: buggy .next function kernfs_seq_next did not update position index

It is possible to reuse cgroup v2 code that relies directly on css_set
iterator without caching (effectively extracting css_task_iter_* calls
from pidlist_array_load()).
We only need to make a provision for pidns by skipping external tasks
(instead of printing '0' like in v2).

[1] cgroup v1 code uses PID as the iterator position, PID recycling causes
a repetition of the same PID at the end of (`tasks`) pidlist and
seq_file interprets this as a non-incremented index. (seq_file code
corrects it by PID++, which should be harmless unless tasks file is
read byte by byte).

Signed-off-by: Michal Koutný <mkoutny@xxxxxxxx>
---
kernel/cgroup/cgroup-internal.h | 5 +++++
kernel/cgroup/cgroup-v1.c | 32 ++++++++++++++++++++++++--------
kernel/cgroup/cgroup.c | 8 ++++----
3 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index c56071f150f2..8edf7aeac159 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -264,6 +264,11 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
int __cgroup_task_count(const struct cgroup *cgrp);
int cgroup_task_count(const struct cgroup *cgrp);

+void cgroup_procs_release(struct kernfs_open_file *of);
+void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos);
+void *cgroup_procs_start(struct seq_file *s, loff_t *pos);
+void *cgroup_threads_start(struct seq_file *s, loff_t *pos);
+
/*
* rstat.c
*/
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 83044312bc41..7c0945ccba0d 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -486,6 +486,22 @@ static int cgroup_pidlist_show(struct seq_file *s, void *v)
return 0;
}

+static int cgroup1_procs_show(struct seq_file *s, void *v)
+{
+ pid_t pid;
+
+ /* Print PID both for `tasks` file (threads) and `cgroup.procs`
+ * (processes), the latter iterates with CSS_TASK_ITER_PROCS hence we
+ * get PIDs of thread group leaders, i.e. tgids.
+ */
+ pid = task_pid_vnr(v);
+ if (!pid)
+ return SEQ_SKIP;
+
+ seq_printf(s, "%d\n", pid);
+ return 0;
+}
+
static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off,
bool threadgroup)
@@ -623,11 +639,11 @@ static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
struct cftype cgroup1_base_files[] = {
{
.name = "cgroup.procs",
- .seq_start = cgroup_pidlist_start,
- .seq_next = cgroup_pidlist_next,
- .seq_stop = cgroup_pidlist_stop,
- .seq_show = cgroup_pidlist_show,
.private = CGROUP_FILE_PROCS,
+ .release = cgroup_procs_release,
+ .seq_start = cgroup_procs_start,
+ .seq_next = cgroup_procs_next,
+ .seq_show = cgroup1_procs_show,
.write = cgroup1_procs_write,
},
{
@@ -642,11 +658,11 @@ struct cftype cgroup1_base_files[] = {
},
{
.name = "tasks",
- .seq_start = cgroup_pidlist_start,
- .seq_next = cgroup_pidlist_next,
- .seq_stop = cgroup_pidlist_stop,
- .seq_show = cgroup_pidlist_show,
.private = CGROUP_FILE_TASKS,
+ .release = cgroup_procs_release,
+ .seq_start = cgroup_threads_start,
+ .seq_next = cgroup_procs_next,
+ .seq_show = cgroup1_procs_show,
.write = cgroup1_tasks_write,
},
{
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index f55a40db065f..3c5ba2ca7852 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4963,7 +4963,7 @@ void css_task_iter_end(struct css_task_iter *it)
put_task_struct(it->cur_task);
}

-static void cgroup_procs_release(struct kernfs_open_file *of)
+void cgroup_procs_release(struct kernfs_open_file *of)
{
struct cgroup_file_ctx *ctx = of->priv;

@@ -4971,7 +4971,7 @@ static void cgroup_procs_release(struct kernfs_open_file *of)
css_task_iter_end(&ctx->procs.iter);
}

-static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
+void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
{
struct kernfs_open_file *of = s->private;
struct cgroup_file_ctx *ctx = of->priv;
@@ -5008,7 +5008,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
return cgroup_procs_next(s, NULL, NULL);
}

-static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
+void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
{
struct cgroup *cgrp = seq_css(s)->cgroup;

@@ -5152,7 +5152,7 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
return __cgroup_procs_write(of, buf, true) ?: nbytes;
}

-static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
+void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
{
return __cgroup_procs_start(s, pos, 0);
}
--
2.41.0