[PATCH v2 3/7] sched: Teach arch_asym_cpu_priority() the idle state of SMT siblings

From: Ricardo Neri
Date: Tue Nov 22 2022 - 15:28:33 EST


Some processors (e.g., Intel processors with ITMT) use asym_packing to
balance load between physical cores with SMT. In such case, a core with all
its SMT siblings idle is more desirable than another with one or more busy
siblings.

Other processors (e.g, Power7 with SMT8) use asym_packing to balance load
among SMT siblings of different priority, regardless of their idle state.

Add a new parameter, check_smt, that architectures can use as needed.

Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Len Brown <len.brown@xxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
Cc: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Tim C. Chen <tim.c.chen@xxxxxxxxx>
Cc: Valentin Schneider <vschneid@xxxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Suggested-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
---
Changes since v1:
* Introduced this patch.
---
arch/x86/kernel/itmt.c | 2 +-
include/linux/sched/topology.h | 2 +-
kernel/sched/fair.c | 5 ++++-
kernel/sched/sched.h | 3 ++-
4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index 9ff480e94511..4cb5a5e4fa47 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -167,7 +167,7 @@ void sched_clear_itmt_support(void)
mutex_unlock(&itmt_update_mutex);
}

-int arch_asym_cpu_priority(int cpu)
+int arch_asym_cpu_priority(int cpu, bool check_smt)
{
return per_cpu(sched_core_priority, cpu);
}
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 816df6cc444e..87b64b9776f6 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -63,7 +63,7 @@ static inline int cpu_numa_flags(void)
}
#endif

-extern int arch_asym_cpu_priority(int cpu);
+extern int arch_asym_cpu_priority(int cpu, bool check_smt);

struct sched_domain_attr {
int relax_domain_level;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d18947a9c03e..0e4251f83807 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -142,8 +142,11 @@ __setup("sched_thermal_decay_shift=", setup_sched_thermal_decay_shift);
#ifdef CONFIG_SMP
/*
* For asym packing, by default the lower numbered CPU has higher priority.
+ *
+ * When doing ASYM_PACKING at the "MC" or higher domains, architectures may
+ * want to check the idle state of the SMT siblngs of @cpu.
*/
-int __weak arch_asym_cpu_priority(int cpu)
+int __weak arch_asym_cpu_priority(int cpu, bool check_smt)
{
return -cpu;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0fc7c0130755..e5e52c2e82de 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -802,7 +802,8 @@ static inline long se_weight(struct sched_entity *se)
*/
static inline bool sched_asym_prefer(int a, int b, bool check_smt)
{
- return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
+ return arch_asym_cpu_priority(a, check_smt) >
+ arch_asym_cpu_priority(b, check_smt);
}

struct perf_domain {
--
2.25.1