[PATCH-cgroup 2/2] cgroup/cpuset: Include isolated cpuset CPUs in cpu_is_isolated() check

From: Waiman Long
Date: Sun Nov 26 2023 - 23:20:49 EST


Currently, the cpu_is_isolated() function checks only the statically
isolated CPUs specified via the "isolcpus" and "nohz_full" kernel
command line options. This function is used by vmstat and memcg to
reduce interference with isolated CPUs by not doing stat flushing
or scheduling works on those CPUs.

Workloads running on isolated CPUs within isolated cpuset
partitions should receive the same treatment to reduce unnecessary
interference. This patch introduces a new cpuset_cpu_is_isolated()
function to be called by cpu_is_isolated() so that the set of dynamically
created cpuset isolated CPUs will be included in the check.

To minimize overhead of calling cpuset_cpu_is_isolated(), a seqcount
is used to protect read access of the isolated cpumask without taking
the cpuset_mutex or callback_lock.

Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
include/linux/cpuset.h | 6 ++++++
include/linux/sched/isolation.h | 4 +++-
kernel/cgroup/cpuset.c | 25 +++++++++++++++++++++++++
3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index d629094fac6e..875d12598bd2 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -77,6 +77,7 @@ extern void cpuset_lock(void);
extern void cpuset_unlock(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
+extern bool cpuset_cpu_is_isolated(int cpu);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void);
@@ -207,6 +208,11 @@ static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
return false;
}

+static inline bool cpuset_cpu_is_isolated(int cpu)
+{
+ return false;
+}
+
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
{
return node_possible_map;
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index fe1a46f30d24..2b461129d1fa 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -2,6 +2,7 @@
#define _LINUX_SCHED_ISOLATION_H

#include <linux/cpumask.h>
+#include <linux/cpuset.h>
#include <linux/init.h>
#include <linux/tick.h>

@@ -67,7 +68,8 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type)
static inline bool cpu_is_isolated(int cpu)
{
return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN) ||
- !housekeeping_test_cpu(cpu, HK_TYPE_TICK);
+ !housekeeping_test_cpu(cpu, HK_TYPE_TICK) ||
+ cpuset_cpu_is_isolated(cpu);
}

#endif /* _LINUX_SCHED_ISOLATION_H */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index e34bbb0e2f24..4adb6d2209ca 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -208,8 +208,13 @@ static cpumask_var_t subpartitions_cpus;

/*
* Exclusive CPUs in isolated partitions
+ *
+ * The isolcpus_seq is used to protect read access to isolated_cpus without
+ * taking callback_lock or cpuset_mutex while write access requires taking
+ * both cpuset_mutex and callback_lock.
*/
static cpumask_var_t isolated_cpus;
+static seqcount_t isolcpus_seq = SEQCNT_ZERO(isolcpus_seq);

/* List of remote partition root children */
static struct list_head remote_children;
@@ -1435,10 +1440,12 @@ static void reset_partition_data(struct cpuset *cs)
static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *xcpus)
{
WARN_ON_ONCE(old_prs == new_prs);
+ write_seqcount_begin(&isolcpus_seq);
if (new_prs == PRS_ISOLATED)
cpumask_or(isolated_cpus, isolated_cpus, xcpus);
else
cpumask_andnot(isolated_cpus, isolated_cpus, xcpus);
+ write_seqcount_end(&isolcpus_seq);
}

/*
@@ -1518,6 +1525,24 @@ static void update_unbound_workqueue_cpumask(bool isolcpus_updated)
WARN_ON_ONCE(ret < 0);
}

+/**
+ * cpuset_cpu_is_isolated - Check if the given CPU is isolated
+ * @cpu: the CPU number to be checked
+ * Return: true if CPU is used in an isolated partition, false otherwise
+ */
+bool cpuset_cpu_is_isolated(int cpu)
+{
+ unsigned int seq;
+ bool ret;
+
+ do {
+ seq = read_seqcount_begin(&isolcpus_seq);
+ ret = cpumask_test_cpu(cpu, isolated_cpus);
+ } while (read_seqcount_retry(&isolcpus_seq, seq));
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpuset_cpu_is_isolated);
+
/*
* compute_effective_exclusive_cpumask - compute effective exclusive CPUs
* @cs: cpuset
--
2.39.3