Re: [PATCH v2] cpufreq: intel_pstate: Avoid initializing variables prematurely

From: Rafael J. Wysocki
Date: Tue May 23 2023 - 07:08:40 EST


On Tue, May 23, 2023 at 10:51 AM Fieah Lim <kweifat@xxxxxxxxx> wrote:
>
> We should avoid initializing some rather expensive data
> when the function has a chance to bail out earlier
> before actually using it.
> This applies to the following initializations:
>
> - cpudata *cpu = all_cpu_data; (in everywhere)
> - this_cpu = smp_processor_id(); (in notify_hwp_interrupt)
> - hwp_cap = READ_ONCE(cpu->hwp_cap_cached); (in intel_pstate_hwp_boost_up)
>
> These initializations are premature because there is a chance
> that the function will bail out before actually using the data.
> I think this qualifies as a micro-optimization,
> especially in such a hot path.
>
> While at it, tidy up how and when we initialize
> all of the cpu_data pointers, for the sake of consistency.
>
> A side note on the intel_pstate_cpu_online change:
> we simply don't have to initialize cpudata just
> for the pr_debug, while policy->cpu is being there.
>
> Signed-off-by: Fieah Lim <kweifat@xxxxxxxxx>
> ---
> V1 -> V2: Rewrite changelog for better explanation.
> ---
> drivers/cpufreq/intel_pstate.c | 35 +++++++++++++++++++---------------
> 1 file changed, 20 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 2548ec92faa2..b85e340520d9 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -464,9 +464,8 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
>
> static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
> {
> - struct cpudata *cpu;
> + struct cpudata *cpu = all_cpu_data[policy->cpu];
>
> - cpu = all_cpu_data[policy->cpu];

This particular change has nothing to do with any optimization. It is
a coding style change, nothing more, and I'm not actually sure that it
is useful.

> if (!cpu->valid_pss_table)
> return;
>
> @@ -539,9 +538,8 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
> static inline void update_turbo_state(void)
> {
> u64 misc_en;
> - struct cpudata *cpu;
> + struct cpudata *cpu = all_cpu_data[0];
>
> - cpu = all_cpu_data[0];
> rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
> global.turbo_disabled =
> (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
> @@ -769,7 +767,7 @@ static struct cpufreq_driver intel_pstate;
> static ssize_t store_energy_performance_preference(
> struct cpufreq_policy *policy, const char *buf, size_t count)
> {
> - struct cpudata *cpu = all_cpu_data[policy->cpu];
> + struct cpudata *cpu;
> char str_preference[21];
> bool raw = false;
> ssize_t ret;
> @@ -802,6 +800,8 @@ static ssize_t store_energy_performance_preference(
> if (!intel_pstate_driver)
> return -EAGAIN;
>
> + cpu = all_cpu_data[policy->cpu];
> +

This is sysfs attribute handling, not any hot path at all.

> mutex_lock(&intel_pstate_limits_lock);
>
> if (intel_pstate_driver == &intel_pstate) {
> @@ -1297,7 +1297,7 @@ static void update_qos_request(enum freq_qos_req_type type)
> int i;
>
> for_each_possible_cpu(i) {
> - struct cpudata *cpu = all_cpu_data[i];
> + struct cpudata *cpu;
> unsigned int freq, perf_pct;
>
> policy = cpufreq_cpu_get(i);
> @@ -1310,6 +1310,8 @@ static void update_qos_request(enum freq_qos_req_type type)
> if (!req)
> continue;
>
> + cpu = all_cpu_data[i];
> +
> if (hwp_active)
> intel_pstate_get_hwp_cap(cpu);

This one kind of makes sense, even though this isn't any hot path at
all too (again, it is only used in sysfs attribute handling), but it
may qualify as a code cleanup.

>
> @@ -1579,7 +1581,7 @@ static cpumask_t hwp_intr_enable_mask;
>
> void notify_hwp_interrupt(void)
> {
> - unsigned int this_cpu = smp_processor_id();
> + unsigned int this_cpu;
> struct cpudata *cpudata;
> unsigned long flags;
> u64 value;
> @@ -1591,6 +1593,8 @@ void notify_hwp_interrupt(void)
> if (!(value & 0x01))
> return;
>
> + this_cpu = smp_processor_id();
> +
> spin_lock_irqsave(&hwp_notify_lock, flags);
>
> if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask))

This is a place where it may really matter for performance, but how
much? Can you actually estimate this?

> @@ -2024,8 +2028,8 @@ static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
>
> static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
> {
> + u64 hwp_cap;
> u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
> - u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
> u32 max_limit = (hwp_req & 0xff00) >> 8;
> u32 min_limit = (hwp_req & 0xff);
> u32 boost_level1;
> @@ -2052,6 +2056,7 @@ static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
> cpu->hwp_boost_min = min_limit;
>
> /* level at half way mark between min and guranteed */
> + hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
> boost_level1 = (HWP_GUARANTEED_PERF(hwp_cap) + min_limit) >> 1;
>
> if (cpu->hwp_boost_min < boost_level1)

For clarity, the original code is much better than the new one and the
only case where hwp_cap is not used is when that single read doesn't
matter. Moreover, the compiler is free to optimize it too.

> @@ -2389,9 +2394,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
>
> static int intel_pstate_init_cpu(unsigned int cpunum)
> {
> - struct cpudata *cpu;
> -
> - cpu = all_cpu_data[cpunum];
> + struct cpudata *cpu = all_cpu_data[cpunum];
>
> if (!cpu) {
> cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
> @@ -2431,11 +2434,13 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
>
> static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
> {
> - struct cpudata *cpu = all_cpu_data[cpu_num];
> + struct cpudata *cpu;
>
> if (hwp_active && !hwp_boost)
> return;
>
> + cpu = all_cpu_data[cpu_num];
> +
> if (cpu->update_util_set)
> return;
>

This isn't a hot path.

> @@ -2638,9 +2643,7 @@ static int intel_cpufreq_cpu_offline(struct cpufreq_policy *policy)
>
> static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
> {
> - struct cpudata *cpu = all_cpu_data[policy->cpu];
> -
> - pr_debug("CPU %d going online\n", cpu->cpu);
> + pr_debug("CPU %d going online\n", policy->cpu);
>
> intel_pstate_init_acpi_perf_limits(policy);
>
> @@ -2649,6 +2652,8 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
> * Re-enable HWP and clear the "suspended" flag to let "resume"
> * know that it need not do that.
> */
> + struct cpudata *cpu = all_cpu_data[policy->cpu];
> +
> intel_pstate_hwp_reenable(cpu);
> cpu->suspended = false;

Same here and I don't see why the change matters.

> }
> --

There is one piece in this patch that may be regarded as useful.
Please send it separately.