[PATCH net-next V4 1/3] sched/topology: Add NUMA-based CPUs spread API

From: Tariq Toukan
Date: Thu Jul 28 2022 - 15:12:35 EST


Implement and expose API that sets the spread of CPUs based on distance,
given a NUMA node. Fallback to legacy logic that uses
cpumask_local_spread.

This logic can be used by device drivers to prefer some remote cpus over
others.

Reviewed-by: Gal Pressman <gal@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
include/linux/sched/topology.h | 5 ++++
kernel/sched/topology.c | 49 ++++++++++++++++++++++++++++++++++
2 files changed, 54 insertions(+)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 56cffe42abbc..a49167c2a0e5 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -210,6 +210,7 @@ extern void set_sched_topology(struct sched_domain_topology_level *tl);
# define SD_INIT_NAME(type)
#endif

+void sched_cpus_set_spread(int node, u16 *cpus, int ncpus);
#else /* CONFIG_SMP */

struct sched_domain_attr;
@@ -231,6 +232,10 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
return true;
}

+static inline void sched_cpus_set_spread(int node, u16 *cpus, int ncpus)
+{
+ memset(cpus, 0, ncpus * sizeof(*cpus));
+}
#endif /* !CONFIG_SMP */

#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 05b6c2ad90b9..157aef862c04 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2067,8 +2067,57 @@ int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
return found;
}

+static bool sched_cpus_spread_by_distance(int node, u16 *cpus, int ncpus)
+{
+ cpumask_var_t cpumask;
+ int first, i;
+
+ if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+ return false;
+
+ cpumask_copy(cpumask, cpu_online_mask);
+
+ first = cpumask_first(cpumask_of_node(node));
+
+ for (i = 0; i < ncpus; i++) {
+ int cpu;
+
+ cpu = sched_numa_find_closest(cpumask, first);
+ if (cpu >= nr_cpu_ids) {
+ free_cpumask_var(cpumask);
+ return false;
+ }
+ cpus[i] = cpu;
+ __cpumask_clear_cpu(cpu, cpumask);
+ }
+
+ free_cpumask_var(cpumask);
+ return true;
+}
+#else
+static bool sched_cpus_spread_by_distance(int node, u16 *cpus, int ncpus)
+{
+ return false;
+}
#endif /* CONFIG_NUMA */

+static void sched_cpus_by_local_spread(int node, u16 *cpus, int ncpus)
+{
+ int i;
+
+ for (i = 0; i < ncpus; i++)
+ cpus[i] = cpumask_local_spread(i, node);
+}
+
+void sched_cpus_set_spread(int node, u16 *cpus, int ncpus)
+{
+ bool success = sched_cpus_spread_by_distance(node, cpus, ncpus);
+
+ if (!success)
+ sched_cpus_by_local_spread(node, cpus, ncpus);
+}
+EXPORT_SYMBOL_GPL(sched_cpus_set_spread);
+
static int __sdt_alloc(const struct cpumask *cpu_map)
{
struct sched_domain_topology_level *tl;
--
2.21.0