Re: [Patch v5 2/2] ACPI: processor: reduce CPUFREQ thermal reduction pctg for Tegra241

From: Rafael J. Wysocki
Date: Wed Oct 18 2023 - 09:05:01 EST


On Sat, Oct 14, 2023 at 12:55 PM Sumit Gupta <sumitg@xxxxxxxxxx> wrote:
>
> From: Srikar Srimath Tirumala <srikars@xxxxxxxxxx>
>
> Current implementation of processor_thermal performs software throttling
> in fixed steps of "20%" which can be too coarse for some platforms.
> We observed some performance gain after reducing the throttle percentage.
> Change the CPUFREQ thermal reduction percentage and maximum thermal steps
> to be configurable. Also, update the default values of both for Nvidia
> Tegra241 (Grace) SoC. The thermal reduction percentage is reduced to "5%"
> and accordingly the maximum number of thermal steps are increased as they
> are derived from the reduction percentage.
>
> Signed-off-by: Srikar Srimath Tirumala <srikars@xxxxxxxxxx>
> Co-developed-by: Sumit Gupta <sumitg@xxxxxxxxxx>
> Signed-off-by: Sumit Gupta <sumitg@xxxxxxxxxx>
> ---
> drivers/acpi/arm64/Makefile | 1 +
> drivers/acpi/arm64/thermal_cpufreq.c | 20 ++++++++++++++++
> drivers/acpi/processor_thermal.c | 35 +++++++++++++++++++++++++---
> include/linux/acpi.h | 9 +++++++
> 4 files changed, 62 insertions(+), 3 deletions(-)
> create mode 100644 drivers/acpi/arm64/thermal_cpufreq.c
>
> diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
> index 143debc1ba4a..3f181d8156cc 100644
> --- a/drivers/acpi/arm64/Makefile
> +++ b/drivers/acpi/arm64/Makefile
> @@ -5,3 +5,4 @@ obj-$(CONFIG_ACPI_GTDT) += gtdt.o
> obj-$(CONFIG_ACPI_APMT) += apmt.o
> obj-$(CONFIG_ARM_AMBA) += amba.o
> obj-y += dma.o init.o
> +obj-$(CONFIG_ACPI) += thermal_cpufreq.o
> diff --git a/drivers/acpi/arm64/thermal_cpufreq.c b/drivers/acpi/arm64/thermal_cpufreq.c
> new file mode 100644
> index 000000000000..de834fb013e7
> --- /dev/null
> +++ b/drivers/acpi/arm64/thermal_cpufreq.c
> @@ -0,0 +1,20 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#include <linux/acpi.h>
> +
> +#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY
> +#define SMCCC_SOC_ID_T241 0x036b0241
> +
> +int acpi_thermal_cpufreq_pctg(void)
> +{
> + s32 soc_id = arm_smccc_get_soc_id_version();
> +
> + /*
> + * Check JEP106 code for NVIDIA Tegra241 chip (036b:0241) and
> + * reduce the CPUFREQ Thermal reduction percentage to 5%.
> + */
> + if (soc_id == SMCCC_SOC_ID_T241)
> + return 5;
> +
> + return 0;
> +}
> +#endif

This part needs an ACK from the ARM folks.

> diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
> index b7c6287eccca..52f316e4e260 100644
> --- a/drivers/acpi/processor_thermal.c
> +++ b/drivers/acpi/processor_thermal.c
> @@ -26,7 +26,16 @@
> */
>
> #define CPUFREQ_THERMAL_MIN_STEP 0
> -#define CPUFREQ_THERMAL_MAX_STEP 3
> +
> +static int cpufreq_thermal_max_step __read_mostly = 3;
> +
> +/*
> + * Minimum throttle percentage for processor_thermal cooling device.
> + * The processor_thermal driver uses it to calculate the percentage amount by
> + * which cpu frequency must be reduced for each cooling state. This is also used
> + * to calculate the maximum number of throttling steps or cooling states.
> + */
> +static int cpufreq_thermal_pctg __read_mostly = 20;

I'd call this cpufreq_thermal_reduction_step, because the value
multiplied by it already is in percent.

>
> static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_pctg);
>
> @@ -71,7 +80,7 @@ static int cpufreq_get_max_state(unsigned int cpu)
> if (!cpu_has_cpufreq(cpu))
> return 0;
>
> - return CPUFREQ_THERMAL_MAX_STEP;
> + return cpufreq_thermal_max_step;
> }
>
> static int cpufreq_get_cur_state(unsigned int cpu)
> @@ -113,7 +122,8 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
> if (!policy)
> return -EINVAL;
>
> - max_freq = (policy->cpuinfo.max_freq * (100 - reduction_pctg(i) * 20)) / 100;
> + max_freq = (policy->cpuinfo.max_freq *
> + (100 - reduction_pctg(i) * cpufreq_thermal_pctg)) / 100;
>
> cpufreq_cpu_put(policy);
>
> @@ -126,10 +136,29 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
> return 0;
> }
>
> +static void acpi_thermal_cpufreq_config(void)
> +{
> + int cpufreq_pctg = acpi_thermal_cpufreq_pctg();
> +
> + if (!cpufreq_pctg)
> + return;
> +
> + cpufreq_thermal_pctg = cpufreq_pctg;
> +
> + /*
> + * Derive the MAX_STEP from minimum throttle percentage so that the reduction
> + * percentage doesn't end up becoming negative. Also, cap the MAX_STEP so that
> + * the CPU performance doesn't become 0.
> + */
> + cpufreq_thermal_max_step = (100 / cpufreq_thermal_pctg) - 1;

Why don't you use the local variable in the expression on the right-hand side?

Also please note that the formula doesn't allow the default
combination of reduction_step and max_step to be produced which is a
bit odd.

What would be wrong with max_step = 60 / reduction_step?

> +}
> +
> void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy)
> {
> unsigned int cpu;
>
> + acpi_thermal_cpufreq_config();
> +
> for_each_cpu(cpu, policy->related_cpus) {
> struct acpi_processor *pr = per_cpu(processors, cpu);
> int ret;
> diff --git a/include/linux/acpi.h b/include/linux/acpi.h
> index ba3f601b6e3d..407617670221 100644
> --- a/include/linux/acpi.h
> +++ b/include/linux/acpi.h
> @@ -1541,4 +1541,13 @@ static inline void acpi_device_notify(struct device *dev) { }
> static inline void acpi_device_notify_remove(struct device *dev) { }
> #endif
>
> +#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY
> +int acpi_thermal_cpufreq_pctg(void);
> +#else
> +static inline int acpi_thermal_cpufreq_pctg(void)
> +{
> + return 0;
> +}
> +#endif
> +

This can go into drivers/acpi/internal.h as far as I'm concerned.

> #endif /*_LINUX_ACPI_H*/
> --