[PATCH v2 5/5] sched/fair: Fix use of find_idlest_group when local group is idlest.

From: Brendan Jackman
Date: Fri Aug 25 2017 - 06:17:29 EST


find_idlest_group currently returns NULL when the local group is
idlest. The caller then continues the find_idlest_group search at a
lower level of the current CPU's sched_domain hierarchy. However
find_idlest_group_cpu is not consulted and, crucially, @new_cpu is
not updated. This means the search is pointless and we return
@prev_cpu from select_task_rq_fair.

This patch makes find_idlest_group return the idlest group, and never
NULL, and then has the caller unconditionally continue to consult
find_idlest_group_cpu and update @new_cpu.

* * *

An alternative, simpler, fix for this would be to initialize @new_cpu
to @cpu instead of @prev_cpu, at the beginning of the new
find_idlest_cpu. However this would not fix the fact that
find_idlest_group_cpu goes unused, and we miss out on the bias toward
a shallower-idle CPU, unless find_idlest_group picks a non-local
group.

If that alternative solution was preferred, then some code could be
removed in recognition of the fact that when find_idlest_group_cpu
was called, @cpu would never be a member of @group (because @group
would be a non-local group, and since @sd is derived from @cpu, @cpu
would always be in @sd's local group).

Signed-off-by: Brendan Jackman <brendan.jackman@xxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Josef Bacik <josef@xxxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Morten Rasmussen <morten.rasmussen@xxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
kernel/sched/fair.c | 29 ++++++++++-------------------
1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 26080917ff8d..93e2746d3c30 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5384,11 +5384,10 @@ static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
* Assumes p is allowed on at least one CPU in sd.
*/
static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p,
- int this_cpu, int sd_flag)
+find_idlest_group(struct sched_domain *sd, struct task_struct *p, int sd_flag)
{
- struct sched_group *idlest = NULL, *group = sd->groups;
- struct sched_group *most_spare_sg = NULL;
+ struct sched_group *group = sd->groups, *local_group = sd->groups;
+ struct sched_group *idlest = NULL, *most_spare_sg = NULL;
unsigned long min_runnable_load = ULONG_MAX;
unsigned long this_runnable_load = ULONG_MAX;
unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX;
@@ -5404,7 +5403,6 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
do {
unsigned long load, avg_load, runnable_load;
unsigned long spare_cap, max_spare_cap;
- int local_group;
int i;

/* Skip over this group if it has no CPUs allowed */
@@ -5412,9 +5410,6 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
&p->cpus_allowed))
continue;

- local_group = cpumask_test_cpu(this_cpu,
- sched_group_span(group));
-
/*
* Tally up the load of all CPUs in the group and find
* the group containing the CPU with most spare capacity.
@@ -5425,7 +5420,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,

for_each_cpu(i, sched_group_span(group)) {
/* Bias balancing toward cpus of our domain */
- if (local_group)
+ if (group == local_group)
load = source_load(i, load_idx);
else
load = target_load(i, load_idx);
@@ -5446,7 +5441,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) /
group->sgc->capacity;

- if (local_group) {
+ if (group == local_group) {
this_runnable_load = runnable_load;
this_avg_load = avg_load;
this_spare = max_spare_cap;
@@ -5492,21 +5487,21 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,

if (this_spare > task_util(p) / 2 &&
imbalance_scale*this_spare > 100*most_spare)
- return NULL;
+ return local_group;

if (most_spare > task_util(p) / 2)
return most_spare_sg;

skip_spare:
if (!idlest)
- return NULL;
+ return local_group;

if (min_runnable_load > (this_runnable_load + imbalance))
- return NULL;
+ return local_group;

if ((this_runnable_load < (min_runnable_load + imbalance)) &&
(100*this_avg_load < imbalance_scale*min_avg_load))
- return NULL;
+ return local_group;

return idlest;
}
@@ -5582,11 +5577,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
continue;
}

- group = find_idlest_group(sd, p, cpu, sd_flag);
- if (!group) {
- sd = sd->child;
- continue;
- }
+ group = find_idlest_group(sd, p, sd_flag);

new_cpu = find_idlest_group_cpu(group, p, cpu);
if (new_cpu == cpu) {
--
2.14.1