Re: [PATCH v5 14/23] PM: EM: Support late CPUs booting and capacity adjustment

From: Qais Yousef
Date: Sun Dec 17 2023 - 13:00:53 EST


On 11/29/23 11:08, Lukasz Luba wrote:
> The patch adds needed infrastructure to handle the late CPUs boot, which
> might change the previous CPUs capacity values. With this changes the new
> CPUs which try to register EM will trigger the needed re-calculations for
> other CPUs EMs. Thanks to that the em_per_state::performance values will
> be aligned with the CPU capacity information after all CPUs finish the
> boot and EM registrations.
>
> Signed-off-by: Lukasz Luba <lukasz.luba@xxxxxxx>
> ---
> kernel/power/energy_model.c | 121 ++++++++++++++++++++++++++++++++++++
> 1 file changed, 121 insertions(+)
>
> diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
> index b5016afe6a19..d3fa5a77de80 100644
> --- a/kernel/power/energy_model.c
> +++ b/kernel/power/energy_model.c
> @@ -25,6 +25,9 @@ static DEFINE_MUTEX(em_pd_mutex);
>
> static void em_cpufreq_update_efficiencies(struct device *dev,
> struct em_perf_state *table);
> +static void em_check_capacity_update(void);
> +static void em_update_workfn(struct work_struct *work);
> +static DECLARE_DELAYED_WORK(em_update_work, em_update_workfn);
>
> static bool _is_cpu_device(struct device *dev)
> {
> @@ -596,6 +599,10 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
>
> unlock:
> mutex_unlock(&em_pd_mutex);
> +
> + if (_is_cpu_device(dev))
> + em_check_capacity_update();
> +
> return ret;
> }
> EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
> @@ -631,3 +638,117 @@ void em_dev_unregister_perf_domain(struct device *dev)
> mutex_unlock(&em_pd_mutex);
> }
> EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
> +
> +/*
> + * Adjustment of CPU performance values after boot, when all CPUs capacites
> + * are correctly calculated.
> + */
> +static void em_adjust_new_capacity(struct device *dev,
> + struct em_perf_domain *pd,
> + u64 max_cap)
> +{
> + struct em_perf_table __rcu *runtime_table;
> + struct em_perf_state *table, *new_table;
> + int ret, table_size;
> +
> + runtime_table = em_allocate_table(pd);
> + if (!runtime_table) {
> + dev_warn(dev, "EM: allocation failed\n");
> + return;
> + }
> +
> + new_table = runtime_table->state;
> +
> + table = em_get_table(pd);
> + /* Initialize data based on older runtime table */
> + table_size = sizeof(struct em_perf_state) * pd->nr_perf_states;
> + memcpy(new_table, table, table_size);
> +
> + em_put_table();
> +
> + em_init_performance(dev, pd, new_table, pd->nr_perf_states);
> + ret = em_compute_costs(dev, new_table, NULL, pd->nr_perf_states,
> + pd->flags);
> + if (ret) {
> + em_free_table(runtime_table);
> + return;
> + }
> +
> + ret = em_dev_update_perf_domain(dev, runtime_table);
> + if (ret)
> + dev_warn(dev, "EM: update failed %d\n", ret);
> +
> + /*
> + * This is one-time-update, so give up the ownership in this updater.
> + * The EM fwk will keep the reference and free the memory when needed.
> + */
> + em_free_table(runtime_table);
> +}
> +
> +static void em_check_capacity_update(void)
> +{
> + cpumask_var_t cpu_done_mask;
> + struct em_perf_state *table;
> + struct em_perf_domain *pd;
> + unsigned long cpu_capacity;
> + int cpu;
> +
> + if (!zalloc_cpumask_var(&cpu_done_mask, GFP_KERNEL)) {
> + pr_warn("no free memory\n");
> + return;
> + }
> +
> + /* Check if CPUs capacity has changed than update EM */
> + for_each_possible_cpu(cpu) {

Can't we instead hook into cpufreq_online/offline() to check if we need to
do any em related update for this policy?


Cheers

--
Qais Yousef

> + struct cpufreq_policy *policy;
> + unsigned long em_max_perf;
> + struct device *dev;
> + int nr_states;
> +
> + if (cpumask_test_cpu(cpu, cpu_done_mask))
> + continue;
> +
> + policy = cpufreq_cpu_get(cpu);
> + if (!policy) {
> + pr_debug("Accessing cpu%d policy failed\n", cpu);
> + schedule_delayed_work(&em_update_work,
> + msecs_to_jiffies(1000));
> + break;
> + }
> + cpufreq_cpu_put(policy);
> +
> + pd = em_cpu_get(cpu);
> + if (!pd || em_is_artificial(pd))
> + continue;
> +
> + cpumask_or(cpu_done_mask, cpu_done_mask,
> + em_span_cpus(pd));
> +
> + nr_states = pd->nr_perf_states;
> + cpu_capacity = arch_scale_cpu_capacity(cpu);
> +
> + table = em_get_table(pd);
> + em_max_perf = table[pd->nr_perf_states - 1].performance;
> + em_put_table();
> +
> + /*
> + * Check if the CPU capacity has been adjusted during boot
> + * and trigger the update for new performance values.
> + */
> + if (em_max_perf == cpu_capacity)
> + continue;
> +
> + pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n",
> + cpu, cpu_capacity, em_max_perf);
> +
> + dev = get_cpu_device(cpu);
> + em_adjust_new_capacity(dev, pd, cpu_capacity);
> + }
> +
> + free_cpumask_var(cpu_done_mask);
> +}
> +
> +static void em_update_workfn(struct work_struct *work)
> +{
> + em_check_capacity_update();
> +}
> --
> 2.25.1
>