Re: [PATCH v2 3/5] cpufreq: intel_pstate: Add ->offline and ->online callbacks

From: Jinjie Ruan
Date: Fri Nov 03 2023 - 06:56:29 EST




On 2020/8/25 1:43, Rafael J. Wysocki wrote:
> From: "Rafael J. Wysocki" <rafael.j.wysocki@xxxxxxxxx>
>
> Add ->offline and ->online driver callbacks to prepare for taking a
> CPU offline and to restore its working configuration when it goes
> back online, respectively, to avoid invoking the ->init callback on
> every CPU online which is quite a bit of unnecessary overhead.
>
> Define ->offline and ->online so that they can be used in the
> passive mode as well as in the active mode and because ->offline
> will do the majority of ->stop_cpu work, the passive mode does
> not need that callback any more, so drop it.
>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
> ---
>
> -> v2: Typo fixes and changelog edits (Doug).
>
> ---
> drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
> 1 file changed, 32 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 3d18934fa975..98836ac299db 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -2297,28 +2297,51 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
> return 0;
> }
>
> -static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
> +static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
> {
> + pr_debug("CPU %d going offline\n", policy->cpu);
> +
> + intel_pstate_exit_perf_limits(policy);
> +
> + /*
> + * If the CPU is an SMT thread and it goes offline with the performance
> + * settings different from the minimum, it will prevent its sibling
> + * from getting to lower performance levels, so force the minimum
> + * performance on CPU offline to prevent that from happening.
> + */
> if (hwp_active)
> intel_pstate_hwp_force_min_perf(policy->cpu);
> else
> intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
> +
> + return 0;
> +}
> +
> +static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
> +{
> + pr_debug("CPU %d going online\n", policy->cpu);
> +
> + intel_pstate_init_acpi_perf_limits(policy);
> +
> + if (hwp_active)
> + wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
> + all_cpu_data[policy->cpu]->hwp_req_cached);
> +
> + return 0;
> }

On Ice Lake server, there seems a bug when CONFIG_X86_INTEL_PSTATE=y and
not configure intel_pstate=xxx in command line.

Although the Performance tuner is used, the CPU have the lowest
frequency in scaling_cur_freq after the CPU goes offline and then goes
online, running the same infinite loop load.

How to produce:

echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor

cat while_true.c
#include <stdio.h>
void main(void)
{
while(1);
}


[root@localhost freq_test]# cat test.sh
#!/bin/bash

cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
taskset -c ${1} ./while_true &
sleep 1s

cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

echo 0 > /sys/devices/system/cpu/cpu${1}/online

sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

sleep 1s

echo 1 > /sys/devices/system/cpu/cpu${1}/online
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

taskset -c ${1} ./while_true &

sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq


[root@localhost freq_test]# sh test.sh 40
2300000
performance
2299977
cat: /sys/devices/system/cpu/cpu40/cpufreq/scaling_cur_freq: Device or
resource busy
2300000
2300022
2300000
2299953
[root@localhost freq_test]# sh test.sh 50
2300000
performance
2300000
cat: /sys/devices/system/cpu/cpu50/cpufreq/scaling_cur_freq: Device or
resource busy
2300000
2299977
2300022
2299977
[root@localhost freq_test]# sh test.sh 20
2300000
performance
2299977
cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
resource busy
800000
800000
800000
799992
[root@localhost freq_test]# sh test.sh 21
2300000
performance
2300000
cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
resource busy
800000
800000
800000
800000

[root@localhost freq_test]# cat
/sys/devices/system/cpu/cpu21/cpufreq/scaling_max_freq
2300000
[root@localhost freq_test]# cat
/sys/devices/system/cpu/cpu21/cpufreq/scaling_min_freq
800000

>
> static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
> {
> - pr_debug("CPU %d exiting\n", policy->cpu);
> + pr_debug("CPU %d stopping\n", policy->cpu);
>
> intel_pstate_clear_update_util_hook(policy->cpu);
> if (hwp_active)
> intel_pstate_hwp_save_state(policy);
> -
> - intel_cpufreq_stop_cpu(policy);
> }
>
> static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
> {
> - intel_pstate_exit_perf_limits(policy);
> + pr_debug("CPU %d exiting\n", policy->cpu);
>
> policy->fast_switch_possible = false;
>
> @@ -2398,6 +2421,8 @@ static struct cpufreq_driver intel_pstate = {
> .init = intel_pstate_cpu_init,
> .exit = intel_pstate_cpu_exit,
> .stop_cpu = intel_pstate_stop_cpu,
> + .offline = intel_pstate_cpu_offline,
> + .online = intel_pstate_cpu_online,
> .update_limits = intel_pstate_update_limits,
> .name = "intel_pstate",
> };
> @@ -2652,7 +2677,8 @@ static struct cpufreq_driver intel_cpufreq = {
> .fast_switch = intel_cpufreq_fast_switch,
> .init = intel_cpufreq_cpu_init,
> .exit = intel_cpufreq_cpu_exit,
> - .stop_cpu = intel_cpufreq_stop_cpu,
> + .offline = intel_pstate_cpu_offline,
> + .online = intel_pstate_cpu_online,
> .update_limits = intel_pstate_update_limits,
> .name = "intel_cpufreq",
> };