[PATCH 2/3] sched/fair: Introduce scaled capacity awareness in select_idle_sibling code path

From: Rohit Jain
Date: Mon Sep 25 2017 - 20:00:05 EST


While looking for CPUs to place running tasks on, the scheduler
completely ignores the capacity stolen away by RT/IRQ tasks.

This patch fixes that.

Signed-off-by: Rohit Jain <rohit.k.jain@xxxxxxxxxx>
---
kernel/sched/fair.c | 54 ++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index afb701f..19ff2c3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6040,7 +6040,10 @@ void __update_idle_core(struct rq *rq)
static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
{
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
- int core, cpu;
+ int core, cpu, rcpu, rcpu_backup;
+ unsigned int backup_cap = 0;
+
+ rcpu = rcpu_backup = -1;

if (!static_branch_likely(&sched_smt_present))
return -1;
@@ -6057,10 +6060,20 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
cpumask_clear_cpu(cpu, cpus);
if (!idle_cpu(cpu))
idle = false;
+
+ if (full_capacity(cpu)) {
+ rcpu = cpu;
+ } else if ((rcpu == -1) && (capacity_of(cpu) > backup_cap)) {
+ backup_cap = capacity_of(cpu);
+ rcpu_backup = cpu;
+ }
}

- if (idle)
- return core;
+ if (idle) {
+ if (rcpu == -1)
+ return (rcpu_backup != -1 ? rcpu_backup : core);
+ return rcpu;
+ }
}

/*
@@ -6076,7 +6089,8 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
*/
static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
{
- int cpu;
+ int cpu, backup_cpu = -1;
+ unsigned int backup_cap = 0;

if (!static_branch_likely(&sched_smt_present))
return -1;
@@ -6084,11 +6098,17 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu(cpu, cpu_smt_mask(target)) {
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
continue;
- if (idle_cpu(cpu))
- return cpu;
+ if (idle_cpu(cpu)) {
+ if (full_capacity(cpu))
+ return cpu;
+ if (capacity_of(cpu) > backup_cap) {
+ backup_cap = capacity_of(cpu);
+ backup_cpu = cpu;
+ }
+ }
}

- return -1;
+ return backup_cpu;
}

#else /* CONFIG_SCHED_SMT */
@@ -6117,6 +6137,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
u64 time, cost;
s64 delta;
int cpu, nr = INT_MAX;
+ int backup_cpu = -1;
+ unsigned int backup_cap = 0;

this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
if (!this_sd)
@@ -6147,10 +6169,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
return -1;
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
continue;
- if (idle_cpu(cpu))
- break;
+ if (idle_cpu(cpu)) {
+ if (full_capacity(cpu)) {
+ backup_cpu = -1;
+ break;
+ } else if (capacity_of(cpu) > backup_cap) {
+ backup_cap = capacity_of(cpu);
+ backup_cpu = cpu;
+ }
+ }
}

+ if (backup_cpu >= 0)
+ cpu = backup_cpu;
time = local_clock() - time;
cost = this_sd->avg_scan_cost;
delta = (s64)(time - cost) / 8;
@@ -6167,13 +6198,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
struct sched_domain *sd;
int i;

- if (idle_cpu(target))
+ if (idle_cpu(target) && full_capacity(target))
return target;

/*
* If the previous cpu is cache affine and idle, don't be stupid.
*/
- if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+ if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)
+ && full_capacity(prev))
return prev;

sd = rcu_dereference(per_cpu(sd_llc, target));
--
2.7.4