Re: [RFC PATCH] cpuset: remove /proc/PID/cpuset

From: Waiman Long
Date: Fri Feb 09 2024 - 23:35:05 EST


On 2/2/24 05:49, brookxu.cn wrote:
From: Chunguang Xu <chunguang.xu@xxxxxxxxxx>

Now we can get all cgroup paths from /proc/PID/cgroup for
a long time, so it maybe useless to keep /proc/PID/cpuset,
besides the path get from /proc/PID/cpuset is not consistent
with /proc/PID/cgroup in default hierarchy, so now we may
can safely remove /proc/PID/cpuset to avoid the mismatch.

With cgroup v2, the value of /proc/PID/cgroup may not match /proc/PID/cpuset. That is the expected behavior due to the way cgroup v2 works. For example,

Root [CS] --> A [CS] --> B --> C where [CS] means cpuset controller is enabled at that cgroup.

If a process P is in cgroup C, /proc/PID/cgroup will show /A/B/C. However, /proc/PID/cpuset will show /A because it is where the cpuset controller setting will apply to process P. Also removing an existing procfs file may break an existing application that relies on it. So we don't do that unless there is a strong reason to do so.

NAK


root@test:~# cat /proc/1186/cgroup
0::/system.slice/lmeter.service

root@test:~# cat /proc/1186/cpuset
/system.slice

Signed-off-by: Chunguang Xu <chunguang.xu@xxxxxxxxxx>
---
arch/mips/configs/sb1250_swarm_defconfig | 1 -
arch/sh/configs/sdk7786_defconfig | 1 -
arch/sh/configs/urquell_defconfig | 1 -
fs/proc/base.c | 6 ----
kernel/cgroup/cpuset.c | 40 ------------------------
5 files changed, 49 deletions(-)

diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig
index ce855b644bb0..2fbea9c604df 100644
--- a/arch/mips/configs/sb1250_swarm_defconfig
+++ b/arch/mips/configs/sb1250_swarm_defconfig
@@ -3,7 +3,6 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=15
CONFIG_CGROUPS=y
CONFIG_CPUSETS=y
-# CONFIG_PROC_PID_CPUSET is not set
CONFIG_CGROUP_CPUACCT=y
CONFIG_NAMESPACES=y
CONFIG_RELAY=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index 7b427c17fbfe..6fe340d4860c 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -13,7 +13,6 @@ CONFIG_CGROUP_DEBUG=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
-# CONFIG_PROC_PID_CPUSET is not set
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_MEMCG=y
CONFIG_CGROUP_SCHED=y
diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig
index 445bb451a5ec..c960221a0549 100644
--- a/arch/sh/configs/urquell_defconfig
+++ b/arch/sh/configs/urquell_defconfig
@@ -11,7 +11,6 @@ CONFIG_CGROUP_DEBUG=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
-# CONFIG_PROC_PID_CPUSET is not set
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_MEMCG=y
CONFIG_CGROUP_SCHED=y
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 98a031ac2648..8dcd23b9212a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3309,9 +3309,6 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_LATENCYTOP
REG("latency", S_IRUGO, proc_lstats_operations),
#endif
-#ifdef CONFIG_PROC_PID_CPUSET
- ONE("cpuset", S_IRUGO, proc_cpuset_show),
-#endif
#ifdef CONFIG_CGROUPS
ONE("cgroup", S_IRUGO, proc_cgroup_show),
#endif
@@ -3658,9 +3655,6 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_LATENCYTOP
REG("latency", S_IRUGO, proc_lstats_operations),
#endif
-#ifdef CONFIG_PROC_PID_CPUSET
- ONE("cpuset", S_IRUGO, proc_cpuset_show),
-#endif
#ifdef CONFIG_CGROUPS
ONE("cgroup", S_IRUGO, proc_cgroup_show),
#endif
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ba36c073304a..908cd7e6efa8 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -5066,46 +5066,6 @@ void __cpuset_memory_pressure_bump(void)
rcu_read_unlock();
}
-#ifdef CONFIG_PROC_PID_CPUSET
-/*
- * proc_cpuset_show()
- * - Print tasks cpuset path into seq_file.
- * - Used for /proc/<pid>/cpuset.
- * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
- * doesn't really matter if tsk->cpuset changes after we read it,
- * and we take cpuset_mutex, keeping cpuset_attach() from changing it
- * anyway.
- */
-int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
- struct pid *pid, struct task_struct *tsk)
-{
- char *buf;
- struct cgroup_subsys_state *css;
- int retval;
-
- retval = -ENOMEM;
- buf = kmalloc(PATH_MAX, GFP_KERNEL);
- if (!buf)
- goto out;
-
- css = task_get_css(tsk, cpuset_cgrp_id);
- retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
- current->nsproxy->cgroup_ns);
- css_put(css);
- if (retval == -E2BIG)
- retval = -ENAMETOOLONG;
- if (retval < 0)
- goto out_free;
- seq_puts(m, buf);
- seq_putc(m, '\n');
- retval = 0;
-out_free:
- kfree(buf);
-out:
- return retval;
-}
-#endif /* CONFIG_PROC_PID_CPUSET */
-
/* Display task mems_allowed in /proc/<pid>/status file. */
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
{