[RFC PATCH 1/2] sched: Force migration on a better cpu

From: Morten Rasmussen
Date: Mon Mar 25 2013 - 11:39:39 EST

Next message: Morten Rasmussen: "[RFC PATCH 0/2] sched: Task placement on mixed cpu_power systems"
Previous message: Morten Rasmussen: "[RFC PATCH 2/2] sched: Pull tasks from cpus with multiple tasks when idle"
In reply to: Morten Rasmussen: "[RFC PATCH 2/2] sched: Pull tasks from cpus with multiple tasks when idle"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

From: Vincent Guittot <vincent.guittot@xxxxxxxxxx>

In a system with different cpu_power for cpus, we can fall in a
situation where a heavy task runs on a cpu with a lower cpu_power
which by definition means lower compute capacity and lower
performance. We can detect this scenario and force the task to migrate
to a cpu with higher compute capacity to improve performance for
demanding tasks.

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Signed-off-by: Morten Rasmussen <morten.rasmussen@xxxxxxx>
---
kernel/sched/fair.c | 36 +++++++++++++++++++++++++++++++++++-
1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4243143..4781cdd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4444,7 +4444,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
{
unsigned long nr_running, max_nr_running, min_nr_running;
unsigned long load, max_cpu_load, min_cpu_load;
- unsigned int balance_cpu = -1, first_idle_cpu = 0;
+ unsigned int balance_cpu = -1, first_idle_cpu = 0, overloaded_cpu = 0;
unsigned long avg_load_per_task = 0;
int i;

@@ -4482,6 +4482,11 @@ static inline void update_sg_lb_stats(struct lb_env *env,
max_nr_running = nr_running;
if (min_nr_running > nr_running)
min_nr_running = nr_running;
+
+ if ((load > rq->cpu_power)
+ && ((rq->cpu_power*env->sd->imbalance_pct) < (env->dst_rq->cpu_power*100))
+ && (load > target_load(env->dst_cpu, load_idx)))
+ overloaded_cpu = 1;
}

sgs->group_load += load;
@@ -4527,6 +4532,13 @@ static inline void update_sg_lb_stats(struct lb_env *env,
(max_nr_running - min_nr_running) > 1)
sgs->group_imb = 1;

+ /*
+ * The load contrib of a CPU exceeds its capacity, we should try to
+ * find a better CPU with more capacity
+ */
+ if (overloaded_cpu)
+ sgs->group_imb = 1;
+
sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
SCHED_POWER_SCALE);
if (!sgs->group_capacity)
@@ -4940,6 +4952,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
struct sched_group *group)
{
struct rq *busiest = NULL, *rq;
+ struct rq *overloaded = NULL, *dst_rq = cpu_rq(env->dst_cpu);
unsigned long max_load = 0;
int i;

@@ -4959,6 +4972,17 @@ static struct rq *find_busiest_queue(struct lb_env *env,
wl = weighted_cpuload(i);

/*
+ * If the task requires more power than the current CPU
+ * capacity and the dst_cpu has more capacity, keep the
+ * dst_cpu in mind
+ */
+ if ((rq->nr_running == 1)
+ && (rq->cfs.runnable_load_avg > rq->cpu_power)
+ && (rq->cfs.runnable_load_avg > dst_rq->cfs.runnable_load_avg)
+ && ((rq->cpu_power*env->sd->imbalance_pct) < (dst_rq->cpu_power*100)))
+ overloaded = rq;
+
+ /*
* When comparing with imbalance, use weighted_cpuload()
* which is not scaled with the cpu power.
*/
@@ -4979,6 +5003,9 @@ static struct rq *find_busiest_queue(struct lb_env *env,
}
}

+ if (!busiest)
+ busiest = overloaded;
+
return busiest;
}

@@ -5006,6 +5033,9 @@ static int need_active_balance(struct lb_env *env)
return 1;
}

+ if ((power_of(env->src_cpu)*sd->imbalance_pct) < (power_of(env->dst_cpu)*100))
+ return 1;
+
return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
}

@@ -5650,6 +5680,10 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
if (rq->nr_running >= 2)
goto need_kick;

+ /* load contrib is higher than cpu capacity */
+ if (rq->cfs.runnable_load_avg > rq->cpu_power)
+ goto need_kick;
+
rcu_read_lock();
for_each_domain(cpu, sd) {
struct sched_group *sg = sd->groups;
--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Morten Rasmussen: "[RFC PATCH 0/2] sched: Task placement on mixed cpu_power systems"
Previous message: Morten Rasmussen: "[RFC PATCH 2/2] sched: Pull tasks from cpus with multiple tasks when idle"
In reply to: Morten Rasmussen: "[RFC PATCH 2/2] sched: Pull tasks from cpus with multiple tasks when idle"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]