[tip:sched/core] sched/cpufreq: Use the DEADLINE utilization signal

From: tip-bot for Juri Lelli
Date: Wed Jan 10 2018 - 07:22:22 EST


Commit-ID: d4edd662ac1657126df7ffd74a278958b133a77d
Gitweb: https://git.kernel.org/tip/d4edd662ac1657126df7ffd74a278958b133a77d
Author: Juri Lelli <juri.lelli@xxxxxxx>
AuthorDate: Mon, 4 Dec 2017 11:23:18 +0100
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Wed, 10 Jan 2018 11:30:32 +0100

sched/cpufreq: Use the DEADLINE utilization signal

SCHED_DEADLINE tracks active utilization signal with a per dl_rq
variable named running_bw.

Make use of that to drive CPU frequency selection: add up FAIR and
DEADLINE contribution to get the required CPU capacity to handle both
requirements (while RT still selects max frequency).

Co-authored-by: Claudio Scordino <claudio@xxxxxxxxxxxxxxx>
Signed-off-by: Juri Lelli <juri.lelli@xxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Acked-by: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Luca Abeni <luca.abeni@xxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Rafael J . Wysocki <rafael.j.wysocki@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: alessio.balsini@xxxxxxx
Cc: bristot@xxxxxxxxxx
Cc: dietmar.eggemann@xxxxxxx
Cc: joelaf@xxxxxxxxxx
Cc: juri.lelli@xxxxxxxxxx
Cc: mathieu.poirier@xxxxxxxxxx
Cc: morten.rasmussen@xxxxxxx
Cc: patrick.bellasi@xxxxxxx
Cc: rjw@xxxxxxxxxxxxx
Cc: rostedt@xxxxxxxxxxx
Cc: tkjos@xxxxxxxxxxx
Cc: tommaso.cucinotta@xxxxxxxxxxxxxxx
Cc: vincent.guittot@xxxxxxxxxx
Link: http://lkml.kernel.org/r/20171204102325.5110-2-juri.lelli@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
include/linux/sched/cpufreq.h | 2 --
kernel/sched/cpufreq_schedutil.c | 25 +++++++++++++++----------
kernel/sched/sched.h | 10 ++++++++++
3 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index d1ad3d8..0b55834 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -12,8 +12,6 @@
#define SCHED_CPUFREQ_DL (1U << 1)
#define SCHED_CPUFREQ_IOWAIT (1U << 2)

-#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
-
#ifdef CONFIG_CPU_FREQ
struct update_util_data {
void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 6dd1ec9..8d266bc 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -179,12 +179,17 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu)
{
struct rq *rq = cpu_rq(cpu);
- unsigned long cfs_max;
+ unsigned long util_cfs = cpu_util_cfs(rq);
+ unsigned long util_dl = cpu_util_dl(rq);

- cfs_max = arch_scale_cpu_capacity(NULL, cpu);
+ *max = arch_scale_cpu_capacity(NULL, cpu);

- *util = min(rq->cfs.avg.util_avg, cfs_max);
- *max = cfs_max;
+ /*
+ * Ideally we would like to set util_dl as min/guaranteed freq and
+ * util_cfs + util_dl as requested freq. However, cpufreq is not yet
+ * ready for such an interface. So, we only do the latter for now.
+ */
+ *util = min(util_cfs + util_dl, *max);
}

static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time)
@@ -271,7 +276,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,

busy = sugov_cpu_is_busy(sg_cpu);

- if (flags & SCHED_CPUFREQ_RT_DL) {
+ if (flags & SCHED_CPUFREQ_RT) {
next_f = policy->cpuinfo.max_freq;
} else {
sugov_get_util(&util, &max, sg_cpu->cpu);
@@ -316,7 +321,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
j_sg_cpu->iowait_boost_pending = false;
continue;
}
- if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
+ if (j_sg_cpu->flags & SCHED_CPUFREQ_RT)
return policy->cpuinfo.max_freq;

j_util = j_sg_cpu->util;
@@ -352,7 +357,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
sg_cpu->last_update = time;

if (sugov_should_update_freq(sg_policy, time)) {
- if (flags & SCHED_CPUFREQ_RT_DL)
+ if (flags & SCHED_CPUFREQ_RT)
next_f = sg_policy->policy->cpuinfo.max_freq;
else
next_f = sugov_next_freq_shared(sg_cpu, time);
@@ -382,9 +387,9 @@ static void sugov_irq_work(struct irq_work *irq_work)
sg_policy = container_of(irq_work, struct sugov_policy, irq_work);

/*
- * For RT and deadline tasks, the schedutil governor shoots the
- * frequency to maximum. Special care must be taken to ensure that this
- * kthread doesn't result in the same behavior.
+ * For RT tasks, the schedutil governor shoots the frequency to maximum.
+ * Special care must be taken to ensure that this kthread doesn't result
+ * in the same behavior.
*
* This is (mostly) guaranteed by the work_in_progress flag. The flag is
* updated only at the end of the sugov_work() function and before that
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 43f5d6e..136ab50 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2084,3 +2084,13 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
#else /* arch_scale_freq_capacity */
#define arch_scale_freq_invariant() (false)
#endif
+
+static inline unsigned long cpu_util_dl(struct rq *rq)
+{
+ return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
+}
+
+static inline unsigned long cpu_util_cfs(struct rq *rq)
+{
+ return rq->cfs.avg.util_avg;
+}