Re: [PATCH 4/4] sched: Rename arch_update_thermal_pressure into arch_update_hw_pressure

From: Vincent Guittot
Date: Thu Dec 14 2023 - 03:36:17 EST


On Thu, 14 Dec 2023 at 09:30, Lukasz Luba <lukasz.luba@xxxxxxx> wrote:
>
>
> On 12/12/23 14:27, Vincent Guittot wrote:
> > Now that cpufreq provides a pressure value to the scheduler, rename
> > arch_update_thermal_pressure into hw pressure to reflect that it returns
> > a pressure applied by HW with a high frequency and which needs filtering.
>
> I would elaborte this meaning 'filtering' here. Something like:
> '... high frequency and which needs filtering to smooth the singal and
> get an average value. That reflects available capacity of the CPU in
> longer period'

Ok I will update the commit message to provide more details

>
> > This pressure is not always related to thermal mitigation but can also be
> > generated by max current limitation as an example.
> >
> > Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
> > ---
> > arch/arm/include/asm/topology.h | 6 ++---
> > arch/arm64/include/asm/topology.h | 6 ++---
> > drivers/base/arch_topology.c | 26 +++++++++----------
> > drivers/cpufreq/qcom-cpufreq-hw.c | 4 +--
> > include/linux/arch_topology.h | 8 +++---
> > include/linux/sched/topology.h | 8 +++---
> > .../{thermal_pressure.h => hw_pressure.h} | 14 +++++-----
> > include/trace/events/sched.h | 2 +-
> > init/Kconfig | 12 ++++-----
> > kernel/sched/core.c | 8 +++---
> > kernel/sched/fair.c | 12 ++++-----
> > kernel/sched/pelt.c | 18 ++++++-------
> > kernel/sched/pelt.h | 16 ++++++------
> > kernel/sched/sched.h | 4 +--
> > 14 files changed, 72 insertions(+), 72 deletions(-)
> > rename include/trace/events/{thermal_pressure.h => hw_pressure.h} (55%)
> >
> > diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
> > index 853c4f81ba4a..e175e8596b5d 100644
> > --- a/arch/arm/include/asm/topology.h
> > +++ b/arch/arm/include/asm/topology.h
> > @@ -22,9 +22,9 @@
> > /* Enable topology flag updates */
> > #define arch_update_cpu_topology topology_update_cpu_topology
> >
> > -/* Replace task scheduler's default thermal pressure API */
> > -#define arch_scale_thermal_pressure topology_get_thermal_pressure
> > -#define arch_update_thermal_pressure topology_update_thermal_pressure
> > +/* Replace task scheduler's default hw pressure API */
> > +#define arch_scale_hw_pressure topology_get_hw_pressure
> > +#define arch_update_hw_pressure topology_update_hw_pressure
> >
> > #else
> >
> > diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> > index a323b109b9c4..a427650bdfba 100644
> > --- a/arch/arm64/include/asm/topology.h
> > +++ b/arch/arm64/include/asm/topology.h
> > @@ -35,9 +35,9 @@ void update_freq_counters_refs(void);
> > /* Enable topology flag updates */
> > #define arch_update_cpu_topology topology_update_cpu_topology
> >
> > -/* Replace task scheduler's default thermal pressure API */
> > -#define arch_scale_thermal_pressure topology_get_thermal_pressure
> > -#define arch_update_thermal_pressure topology_update_thermal_pressure
> > +/* Replace task scheduler's default hw pressure API */
>
> s/hw/HW/ ?
>
> > +#define arch_scale_hw_pressure topology_get_hw_pressure
> > +#define arch_update_hw_pressure topology_update_hw_pressure
> >
> > #include <asm-generic/topology.h>
> >
> > diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> > index 0906114963ff..3d8dc9d5c3ad 100644
> > --- a/drivers/base/arch_topology.c
> > +++ b/drivers/base/arch_topology.c
> > @@ -22,7 +22,7 @@
> > #include <linux/units.h>
> >
> > #define CREATE_TRACE_POINTS
> > -#include <trace/events/thermal_pressure.h>
> > +#include <trace/events/hw_pressure.h>
> >
> > static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
> > static struct cpumask scale_freq_counters_mask;
> > @@ -160,26 +160,26 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
> > per_cpu(cpu_scale, cpu) = capacity;
> > }
> >
> > -DEFINE_PER_CPU(unsigned long, thermal_pressure);
> > +DEFINE_PER_CPU(unsigned long, hw_pressure);
> >
> > /**
> > - * topology_update_thermal_pressure() - Update thermal pressure for CPUs
> > + * topology_update_hw_pressure() - Update hw pressure for CPUs
>
> same here: HW?
>
> > * @cpus : The related CPUs for which capacity has been reduced
> > * @capped_freq : The maximum allowed frequency that CPUs can run at
> > *
> > - * Update the value of thermal pressure for all @cpus in the mask. The
> > + * Update the value of hw pressure for all @cpus in the mask. The
>
> HW?
>
> > * cpumask should include all (online+offline) affected CPUs, to avoid
> > * operating on stale data when hot-plug is used for some CPUs. The
> > * @capped_freq reflects the currently allowed max CPUs frequency due to
> > - * thermal capping. It might be also a boost frequency value, which is bigger
> > + * hw capping. It might be also a boost frequency value, which is bigger
>
> HW?
>
> > * than the internal 'capacity_freq_ref' max frequency. In such case the
> > * pressure value should simply be removed, since this is an indication that
> > - * there is no thermal throttling. The @capped_freq must be provided in kHz.
> > + * there is no hw throttling. The @capped_freq must be provided in kHz.
>
> HW?
>
> > */
> > -void topology_update_thermal_pressure(const struct cpumask *cpus,
> > +void topology_update_hw_pressure(const struct cpumask *cpus,
> > unsigned long capped_freq)
> > {
> > - unsigned long max_capacity, capacity, th_pressure;
> > + unsigned long max_capacity, capacity, hw_pressure;
> > u32 max_freq;
> > int cpu;
> >
> > @@ -189,21 +189,21 @@ void topology_update_thermal_pressure(const struct cpumask *cpus,
> >
> > /*
> > * Handle properly the boost frequencies, which should simply clean
> > - * the thermal pressure value.
> > + * the hw pressure value.
>
> HW?
>
> > */
> > if (max_freq <= capped_freq)
> > capacity = max_capacity;
> > else
> > capacity = mult_frac(max_capacity, capped_freq, max_freq);
> >
> > - th_pressure = max_capacity - capacity;
> > + hw_pressure = max_capacity - capacity;
> >
> > - trace_thermal_pressure_update(cpu, th_pressure);
> > + trace_hw_pressure_update(cpu, hw_pressure);
> >
> > for_each_cpu(cpu, cpus)
> > - WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
> > + WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure);
> > }
> > -EXPORT_SYMBOL_GPL(topology_update_thermal_pressure);
> > +EXPORT_SYMBOL_GPL(topology_update_hw_pressure);
> >
> > static ssize_t cpu_capacity_show(struct device *dev,
> > struct device_attribute *attr,
> > diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
> > index 70b0f21968a0..a619202ba81d 100644
> > --- a/drivers/cpufreq/qcom-cpufreq-hw.c
> > +++ b/drivers/cpufreq/qcom-cpufreq-hw.c
> > @@ -347,8 +347,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
> >
> > throttled_freq = freq_hz / HZ_PER_KHZ;
> >
> > - /* Update thermal pressure (the boost frequencies are accepted) */
> > - arch_update_thermal_pressure(policy->related_cpus, throttled_freq);
> > + /* Update hw pressure (the boost frequencies are accepted) */
>
> HW?
>
> > + arch_update_hw_pressure(policy->related_cpus, throttled_freq);
> >
>
> [snip]
>
> >
> > if (housekeeping_cpu(cpu, HK_TYPE_TICK))
> > @@ -5669,8 +5669,8 @@ void scheduler_tick(void)
> > rq_lock(rq, &rf);
> >
> > update_rq_clock(rq);
> > - thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
> > - update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
> > + hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
> > + update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
>
> We switch to task clock here, could you tell me why?
> Don't we have to maintain the boot command parameter for the shift?

This should have been part of the patch5 that I finally removed. IMO,
the additional time shift with rq_clock_thermal is no more needed now
that we have 2 separates signals

>
> > curr->sched_class->task_tick(rq, curr, 0);
> > if (sched_feat(LATENCY_WARN))
> > resched_latency = cpu_resched_latency(rq);
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 11d3be829302..07050955d6e0 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -4936,7 +4936,7 @@ static inline unsigned long get_actual_cpu_capacity(int cpu)
> > {
> > unsigned long capacity = arch_scale_cpu_capacity(cpu);
> >
> > - capacity -= max(thermal_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu));
> > + capacity -= max(hw_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu));
> >
> > return capacity;
> > }
> > @@ -4968,7 +4968,7 @@ static inline int util_fits_cpu(unsigned long util,
> > * Similarly if a task is capped to arch_scale_cpu_capacity(little_cpu), it
> > * should fit a little cpu even if there's some pressure.
> > *
> > - * Only exception is for thermal pressure since it has a direct impact
> > + * Only exception is for hw or cpufreq pressure since it has a direct impact
>
> HW?
>
> > * on available OPP of the system.
> > *
> > * We honour it for uclamp_min only as a drop in performance level
> > @@ -9224,7 +9224,7 @@ static inline bool others_have_blocked(struct rq *rq)
> > if (READ_ONCE(rq->avg_dl.util_avg))
> > return true;
> >
> > - if (thermal_load_avg(rq))
> > + if (hw_load_avg(rq))
> > return true;
> >
> > #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
> > @@ -9256,7 +9256,7 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
> > {
> > const struct sched_class *curr_class;
> > u64 now = rq_clock_pelt(rq);
> > - unsigned long thermal_pressure;
> > + unsigned long hw_pressure;
> > bool decayed;
> >
> > /*
> > @@ -9265,11 +9265,11 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
> > */
> > curr_class = rq->curr->sched_class;
> >
> > - thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
> > + hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
> >
> > decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
> > update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
> > - update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure) |
> > + update_hw_load_avg(now, rq, hw_pressure) |
>
> Here also the rq_clock_thermal() is not used anymore. Shouldn't we
> remove the rq_clock_thermal() if not used anymore (I cannot se this in
> the patch)?
>
> > update_irq_load_avg(rq, 0);
> >
> > if (others_have_blocked(rq))
>
> [snip]
>
> > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> > index e58a54bda77d..7eaf186d071e 100644
> > --- a/kernel/sched/sched.h
> > +++ b/kernel/sched/sched.h
> > @@ -1078,8 +1078,8 @@ struct rq {
> > #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
> > struct sched_avg avg_irq;
> > #endif
> > -#ifdef CONFIG_SCHED_THERMAL_PRESSURE
> > - struct sched_avg avg_thermal;
> > +#ifdef CONFIG_SCHED_HW_PRESSURE
> > + struct sched_avg avg_hw;
> > #endif
> > u64 idle_stamp;
> > u64 avg_idle;
>
> I don't see that rq_clock_thermal() removal in this header.
> Maybe I miss some patch?
>
> BTW, I like the name 'HW pressure' for this information/signal.