Re: [Patch v4 5/6] thermal/cpu-cooling: Update thermal pressure in case of a maximum frequency capping

From: Thara Gopinath
Date: Thu Oct 31 2019 - 12:46:18 EST


On 10/31/2019 12:29 PM, Dietmar Eggemann wrote:
> On 22.10.19 22:34, Thara Gopinath wrote:
>> Thermal governors can request for a cpu's maximum supported frequency
>> to be capped in case of an overheat event. This in turn means that the
>> maximum capacity available for tasks to run on the particular cpu is
>> reduced. Delta between the original maximum capacity and capped
>> maximum capacity is known as thermal pressure. Enable cpufreq cooling
>> device to update the thermal pressure in event of a capped
>> maximum frequency.
>>
>> Signed-off-by: Thara Gopinath <thara.gopinath@xxxxxxxxxx>
>> ---
>> drivers/thermal/cpu_cooling.c | 31 +++++++++++++++++++++++++++++--
>> 1 file changed, 29 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
>> index 391f397..2e6a979 100644
>> --- a/drivers/thermal/cpu_cooling.c
>> +++ b/drivers/thermal/cpu_cooling.c
>> @@ -218,6 +218,23 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
>> }
>>
>> /**
>> + * update_sched_max_capacity - update scheduler about change in cpu
>> + * max frequency.
>> + * @policy - cpufreq policy whose max frequency is capped.
>> + */
>> +static void update_sched_max_capacity(struct cpumask *cpus,
>> + unsigned int cur_max_freq,
>> + unsigned int max_freq)
>> +{
>> + int cpu;
>> + unsigned long capacity = (cur_max_freq << SCHED_CAPACITY_SHIFT) /
>> + max_freq;
>> +
>> + for_each_cpu(cpu, cpus)
>> + update_thermal_pressure(cpu, capacity);
>> +}
>> +
>> +/**
>> * get_load() - get load for a cpu since last updated
>> * @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu
>> * @cpu: cpu number
>> @@ -320,6 +337,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
>> unsigned long state)
>> {
>> struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
>> + int ret;
>>
>> /* Request state should be less than max_level */
>> if (WARN_ON(state > cpufreq_cdev->max_level))
>> @@ -331,8 +349,17 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
>>
>> cpufreq_cdev->cpufreq_state = state;
>>
>> - return dev_pm_qos_update_request(&cpufreq_cdev->qos_req,
>> - cpufreq_cdev->freq_table[state].frequency);
>> + ret = dev_pm_qos_update_request
>> + (&cpufreq_cdev->qos_req,
>> + cpufreq_cdev->freq_table[state].frequency);
>> +
>> + if (ret > 0)
>> + update_sched_max_capacity
>> + (cpufreq_cdev->policy->cpus,
>> + cpufreq_cdev->freq_table[state].frequency,
>> + cpufreq_cdev->policy->cpuinfo.max_freq);
>> +
>> + return ret;
>> }
>>
>> /**
>>
>
> Why not getting rid of update_sched_max_capacity() entirely and call
> update_thermal_pressure() in cpu_cooling.c directly? Saves one level in
> the call chain and would mean less code for this feature.

Hi Dietmar,
Thanks for the review.

I did not want the scheduler piece of code to loop through the cpus.
Do you feel strongly about this one ?

Warm Regards
Thara
>
> Just compile tested on arm64:
>
> diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
> index 3211b4d3a899..bf36995013b0 100644
> --- a/drivers/thermal/cpu_cooling.c
> +++ b/drivers/thermal/cpu_cooling.c
> @@ -217,23 +217,6 @@ static u32 cpu_power_to_freq(struct
> cpufreq_cooling_device *cpufreq_cdev,
> return freq_table[i - 1].frequency;
> }
>
> -/**
> - * update_sched_max_capacity - update scheduler about change in cpu
> - * max frequency.
> - * @policy - cpufreq policy whose max frequency is capped.
> - */
> -static void update_sched_max_capacity(struct cpumask *cpus,
> - unsigned int cur_max_freq,
> - unsigned int max_freq)
> -{
> - int cpu;
> - unsigned long capacity = (cur_max_freq << SCHED_CAPACITY_SHIFT) /
> - max_freq;
> -
> - for_each_cpu(cpu, cpus)
> - update_thermal_pressure(cpu, capacity);
> -}
> -
> /**
> * get_load() - get load for a cpu since last updated
> * @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu
> @@ -353,7 +336,7 @@ static int cpufreq_set_cur_state(struct
> thermal_cooling_device *cdev,
> cpufreq_cdev->freq_table[state].frequency);
>
> if (ret > 0)
> - update_sched_max_capacity
> + update_thermal_pressure
> (cpufreq_cdev->policy->cpus,
> cpufreq_cdev->freq_table[state].frequency,
> cpufreq_cdev->policy->cpuinfo.max_freq);
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 55dfe9634f67..5707813c7621 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1985,9 +1985,9 @@ static inline void rseq_syscall(struct pt_regs *regs)
> #endif
>
> #ifdef CONFIG_SMP
> -void update_thermal_pressure(int cpu, u64 capacity);
> +void update_thermal_pressure(struct cpumask *cpus, unsigned int cur,
> unsigned int max);
> #else
> -static inline void update_thermal_pressure(int cpu, u64 capacity)
> +static inline void update_thermal_pressure(struct cpumask *cpus,
> unsigned int cur, unsigned int max);
> {
> }
> #endif
> diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
> index 0da31e12a5ff..691bdd79597a 100644
> --- a/kernel/sched/thermal.c
> +++ b/kernel/sched/thermal.c
> @@ -43,17 +43,16 @@ static DEFINE_PER_CPU(unsigned long, delta_capacity);
> * the arch_scale_cpu_capacity and capped capacity is stored in per cpu
> * delta_capacity.
> */
> -void update_thermal_pressure(int cpu, u64 capped_freq_ratio)
> +void update_thermal_pressure(struct cpumask *cpus, unsigned int cur,
> unsigned int max)
> {
> - unsigned long __capacity, delta;
> + int cpu;
>
> - /* Normalize the capped freq ratio */
> - __capacity = (capped_freq_ratio * arch_scale_cpu_capacity(cpu)) >>
> -
> SCHED_CAPACITY_SHIFT;
> - delta = arch_scale_cpu_capacity(cpu) - __capacity;
> - pr_debug("updating cpu%d thermal pressure to %lu\n", cpu, delta);
> + for_each_cpu(cpu, cpus) {
> + unsigned long scale_cap = arch_scale_cpu_capacity(cpu);
> + unsigned long cur_cap = cur * scale_cap / max;
>
> - per_cpu(delta_capacity, cpu) = delta;
> + per_cpu(delta_capacity, cpu) = scale_cap - cur_cap;
> + }
> }
>


--
Warm Regards
Thara