Re: [PATCH 2/2 v3] sched: use load_avg for selecting idlest group

From: Peter Zijlstra
Date: Fri Dec 09 2016 - 10:22:53 EST


On Thu, Dec 08, 2016 at 05:56:54PM +0100, Vincent Guittot wrote:
> @@ -5449,14 +5456,32 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
> }
>
> /* Adjust by relative CPU capacity of the group */
> - avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity;
> + avg_load = (avg_load * SCHED_CAPACITY_SCALE) /
> + group->sgc->capacity;
> + runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) /
> + group->sgc->capacity;
>
> if (local_group) {
> - this_load = avg_load;
> + this_runnable_load = runnable_load;
> + this_avg_load = avg_load;
> this_spare = max_spare_cap;
> } else {
> - if (avg_load < min_load) {
> - min_load = avg_load;
> + if (min_runnable_load > (runnable_load + imbalance)) {
> + /*
> + * The runnable load is significantly smaller
> + * so we can pick this new cpu
> + */
> + min_runnable_load = runnable_load;
> + min_avg_load = avg_load;
> + idlest = group;
> + } else if ((runnable_load < (min_runnable_load + imbalance)) &&
> + (100*min_avg_load > imbalance_scale*avg_load)) {
> + /*
> + * The runnable loads are close so we take
> + * into account blocked load through avg_load
> + * which is blocked + runnable load
> + */
> + min_avg_load = avg_load;
> idlest = group;
> }
>
> @@ -5480,13 +5505,16 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
> goto skip_spare;
>
> if (this_spare > task_util(p) / 2 &&
> - imbalance*this_spare > 100*most_spare)
> + imbalance_scale*this_spare > 100*most_spare)
> return NULL;
> else if (most_spare > task_util(p) / 2)
> return most_spare_sg;
>
> skip_spare:
> - if (!idlest || 100*this_load < imbalance*min_load)
> + if (!idlest ||
> + (min_runnable_load > (this_runnable_load + imbalance)) ||
> + ((this_runnable_load < (min_runnable_load + imbalance)) &&
> + (100*this_avg_load < imbalance_scale*min_avg_load)))
> return NULL;
> return idlest;
> }

I did the below on top for readability.

---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5469,17 +5469,16 @@ find_idlest_group(struct sched_domain *s
if (min_runnable_load > (runnable_load + imbalance)) {
/*
* The runnable load is significantly smaller
- * so we can pick this new cpu
+ * so we can pick this new cpu
*/
min_runnable_load = runnable_load;
min_avg_load = avg_load;
idlest = group;
} else if ((runnable_load < (min_runnable_load + imbalance)) &&
- (100*min_avg_load > imbalance_scale*avg_load)) {
+ (100*min_avg_load > imbalance_scale*avg_load)) {
/*
- * The runnable loads are close so we take
- * into account blocked load through avg_load
- * which is blocked + runnable load
+ * The runnable loads are close so take the
+ * blocked load into account through avg_load.
*/
min_avg_load = avg_load;
idlest = group;
@@ -5509,15 +5508,21 @@ find_idlest_group(struct sched_domain *s
if (this_spare > task_util(p) / 2 &&
imbalance_scale*this_spare > 100*most_spare)
return NULL;
- else if (most_spare > task_util(p) / 2)
+
+ if (most_spare > task_util(p) / 2)
return most_spare_sg;

skip_spare:
- if (!idlest ||
- (min_runnable_load > (this_runnable_load + imbalance)) ||
- ((this_runnable_load < (min_runnable_load + imbalance)) &&
- (100*this_avg_load < imbalance_scale*min_avg_load)))
+ if (!idlest)
+ return NULL;
+
+ if (min_runnable_load > (this_runnable_load + imbalance))
return NULL;
+
+ if ((this_runnable_load < (min_runnable_load + imbalance)) &&
+ (100*this_avg_load < imbalance_scale*min_avg_load))
+ return NULL;
+
return idlest;
}