[RFC PATCH 2/2] sched/fair: Export a param to control the traverse len when select idle cpu.

From: Kenan.Liu
Date: Thu Jul 20 2023 - 04:58:54 EST


From: "Kenan.Liu" <Kenan.Liu@xxxxxxxxxxxxxxxxx>

The variable 'nr' decides the length of traverse when we try to find an
idle cpu in function select_idle_cpu(). A fixed value such as 4 may not
perform well in all scenes and may lead to un-acceptable overhead. Export
two sysctl parameters to enable adjustments.

Signed-off-by: Kenan.Liu <Kenan.Liu@xxxxxxxxxxxxxxxxx>
Signed-off-by: Ben Luo <luoben@xxxxxxxxxxxxxxxxx>
---
kernel/sched/fair.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 63 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ad7c93f..e10de3b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -125,6 +125,9 @@
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;

const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+unsigned int __read_mostly sysctl_sched_idle_search_nr_default = 4;
+unsigned int __read_mostly sysctl_sched_idle_search_nr_threshold = 4;
+
static bool smt_neighbour_topo;
static bool core_smt_topo_detect;
static unsigned int smt_nr_cpu = 2;
@@ -207,6 +210,50 @@ int __weak arch_asym_cpu_priority(int cpu)
#endif

#ifdef CONFIG_SYSCTL
+static int sched_set_idle_search_nr_default(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ static DEFINE_MUTEX(mutex);
+ unsigned int old_nr;
+ int ret;
+
+ mutex_lock(&mutex);
+ old_nr = sysctl_sched_idle_search_nr_default;
+ ret = proc_douintvec(table, write, buffer, lenp, ppos);
+ if (!ret && write) {
+ if (sysctl_sched_idle_search_nr_default == 0) {
+ sysctl_sched_idle_search_nr_default = old_nr;
+ mutex_unlock(&mutex);
+ return -EINVAL;
+ }
+ }
+
+ mutex_unlock(&mutex);
+ return ret;
+}
+
+static int sched_set_idle_search_nr_threshold(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ static DEFINE_MUTEX(mutex);
+ unsigned int old_threshold;
+ int ret;
+
+ mutex_lock(&mutex);
+ old_threshold = sysctl_sched_idle_search_nr_threshold;
+ ret = proc_douintvec(table, write, buffer, lenp, ppos);
+ if (!ret && write) {
+ if (sysctl_sched_idle_search_nr_threshold == 0) {
+ sysctl_sched_idle_search_nr_threshold = old_threshold;
+ mutex_unlock(&mutex);
+ return -EINVAL;
+ }
+ }
+
+ mutex_unlock(&mutex);
+ return ret;
+}
+
static struct ctl_table sched_fair_sysctls[] = {
{
.procname = "sched_child_runs_first",
@@ -235,6 +282,20 @@ int __weak arch_asym_cpu_priority(int cpu)
.extra1 = SYSCTL_ZERO,
},
#endif /* CONFIG_NUMA_BALANCING */
+ {
+ .procname = "sched_cfs_idle_search_nr_default",
+ .data = &sysctl_sched_idle_search_nr_default,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_set_idle_search_nr_default,
+ },
+ {
+ .procname = "sched_cfs_idle_search_nr_threshold",
+ .data = &sysctl_sched_idle_search_nr_threshold,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_set_idle_search_nr_threshold,
+ },
{}
};

@@ -7027,10 +7088,10 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
avg_cost = this_sd->avg_scan_cost + 1;

span_avg = sd->span_weight * avg_idle;
- if (span_avg > 4*avg_cost)
+ if (span_avg > sysctl_sched_idle_search_nr_threshold * avg_cost)
nr = div_u64(span_avg, avg_cost);
else
- nr = 4;
+ nr = sysctl_sched_idle_search_nr_default;

time = cpu_clock(this);
}
--
1.8.3.1