[PATCH 3/3] sched: Don't shorten the load balance interval of a 80% or more busy CPU

From: Tim Chen
Date: Mon May 24 2021 - 16:21:03 EST


For a CPU that's busy 80% or more on average, it is quite likely that a task
will wake up on it very soon. It is better to not shorten the load
balance interval as if it is completely idle to save on the load
balancing overhead.

Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---
kernel/sched/fair.c | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a69bfc651e55..7353395d8a3a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9895,12 +9895,11 @@ get_sd_balance_interval(struct sched_domain *sd, int cpu_busy)
}

static inline void
-update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
+update_next_balance(struct sched_domain *sd, unsigned long *next_balance, int cpu_busy)
{
unsigned long interval, next;

- /* used by idle balance, so cpu_busy = 0 */
- interval = get_sd_balance_interval(sd, 0);
+ interval = get_sd_balance_interval(sd, cpu_busy);
if (time_after(jiffies+1, sd->last_balance + interval))
next = jiffies+1;
else
@@ -10593,6 +10592,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
struct sched_domain *sd;
int pulled_task = 0;
u64 curr_cost = 0;
+ int cpu_busy = 0;

update_misfit_status(NULL, this_rq);
/*
@@ -10618,12 +10618,20 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
rcu_read_lock();
sd = rcu_dereference_check_sched_domain(this_rq->sd);

+ /*
+ * Consider the cpu busy if it has more than 80% average utilization.
+ * Idle balance such cpu not as frequently as a task may wake up soon.
+ */
+ if ((cpu_util(this_cpu) * 10 > capacity_orig_of(this_cpu) * 8))
+ cpu_busy = 1;
+
if (this_rq->avg_idle < sysctl_sched_migration_cost ||
!READ_ONCE(this_rq->rd->overload) ||
(sd && this_rq->avg_idle < sd->max_newidle_lb_cost)) {

if (sd)
- update_next_balance(sd, &next_balance);
+ update_next_balance(sd, &next_balance, cpu_busy);
+
rcu_read_unlock();

goto out;
@@ -10639,7 +10647,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
u64 t0, domain_cost;

if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
- update_next_balance(sd, &next_balance);
+ update_next_balance(sd, &next_balance, cpu_busy);
break;
}

@@ -10657,7 +10665,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
curr_cost += domain_cost;
}

- update_next_balance(sd, &next_balance);
+ update_next_balance(sd, &next_balance, cpu_busy);

/*
* Stop searching for tasks to pull if there are
--
2.20.1