[PATCH 2/4] sched: cpufreq: Fix apply_dvfs_headroom() escaping uclamp constraints

From: Qais Yousef
Date: Sun Aug 20 2023 - 17:08:34 EST


DVFS headroom is applied after we calculate the effective_cpu_util()
which is where we honour uclamp constraints. It makes more sense to
apply the headroom there once and let all users naturally get the right
thing without having to sprinkle the call around in various places.

Before this fix running

uclampset -M 800 cat /dev/zero > /dev/null

Will cause the test system to run at max freq of 2.8GHz. After the fix
it runs at 2.2GHz instead which is the correct value that matches the
capacity of 800.

Note that similar problem exist for uclamp_min. If util was 50, and
uclamp_min is 100. Since we apply_dvfs_headroom() after apply uclamp
constraints, we'll end up with util of 125 instead of 100. IOW, we get
boosted twice, first time by uclamp_min, and second time by dvfs
headroom.

Signed-off-by: Qais Yousef (Google) <qyousef@xxxxxxxxxxx>
---
include/linux/energy_model.h | 1 -
kernel/sched/core.c | 11 ++++++++---
kernel/sched/cpufreq_schedutil.c | 5 ++---
3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 6ebde4e69e98..adec808b371a 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -243,7 +243,6 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
scale_cpu = arch_scale_cpu_capacity(cpu);
ps = &pd->table[pd->nr_perf_states - 1];

- max_util = apply_dvfs_headroom(max_util);
max_util = min(max_util, allowed_cpu_cap);
freq = map_util_freq(max_util, ps->frequency, scale_cpu);

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index efe3848978a0..441d433c83cd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7439,8 +7439,10 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
* frequency will be gracefully reduced with the utilization decay.
*/
util = util_cfs + cpu_util_rt(rq);
- if (type == FREQUENCY_UTIL)
+ if (type == FREQUENCY_UTIL) {
+ util = apply_dvfs_headroom(util);
util = uclamp_rq_util_with(rq, util, p);
+ }

dl_util = cpu_util_dl(rq);

@@ -7471,9 +7473,12 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
* max - irq
* U' = irq + --------- * U
* max
+ *
+ * We only need to apply dvfs headroom to irq part since the util part
+ * already had it applied.
*/
util = scale_irq_capacity(util, irq, max);
- util += irq;
+ util += type == FREQUENCY_UTIL ? apply_dvfs_headroom(irq) : irq;

/*
* Bandwidth required by DEADLINE must always be granted while, for
@@ -7486,7 +7491,7 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
* an interface. So, we only do the latter for now.
*/
if (type == FREQUENCY_UTIL)
- util += cpu_bw_dl(rq);
+ util += apply_dvfs_headroom(cpu_bw_dl(rq));

return min(max, util);
}
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 916c4d3d6192..0c7565ac31fb 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -143,7 +143,6 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
unsigned int freq = arch_scale_freq_invariant() ?
policy->cpuinfo.max_freq : policy->cur;

- util = apply_dvfs_headroom(util);
freq = map_util_freq(util, freq, max);

if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
@@ -406,8 +405,8 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
sg_cpu->util = prev_util;

- cpufreq_driver_adjust_perf(sg_cpu->cpu, apply_dvfs_headroom(sg_cpu->bw_dl),
- apply_dvfs_headroom(sg_cpu->util), max_cap);
+ cpufreq_driver_adjust_perf(sg_cpu->cpu, sg_cpu->bw_dl,
+ sg_cpu->util, max_cap);

sg_cpu->sg_policy->last_freq_update_time = time;
}
--
2.34.1