Re: [PATCH v2] cpufreq: Rebuild sched-domains when removing cpufreq driver

From: Viresh Kumar
Date: Fri Oct 06 2023 - 03:07:19 EST


On 05-10-23, 15:41, Pierre Gondois wrote:
> The Energy Aware Scheduler (EAS) relies on the schedutil governor.
> When moving to/from the schedutil governor, sched domains must be
> rebuilt to allow re-evaluating the enablement conditions of EAS.
> This is done through sched_cpufreq_governor_change().
>
> Having a cpufreq governor assumes a cpufreq driver is running.
> Inserting/removing a cpufreq driver should trigger a re-evaluation
> of EAS enablement conditions, avoiding to see EAS enabled when
> removing a running cpufreq driver.
>
> Rebuild the sched domains in schedutil's sugov_init()/sugov_exit(),
> allowing to check EAS's enablement condition whenever schedutil
> governor is initialized/exited from.
> Move relevant code up in schedutil.c to avoid a split and conditional
> function declaration.
> Rename sched_cpufreq_governor_change() to sugov_eas_rebuild_sd().
>
> Signed-off-by: Pierre Gondois <pierre.gondois@xxxxxxx>
> ---
> drivers/cpufreq/cpufreq.c | 3 +-
> include/linux/cpufreq.h | 8 -----
> kernel/sched/cpufreq_schedutil.c | 55 +++++++++++++++++---------------
> 3 files changed, 30 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 60ed89000e82..4bc15634d49c 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1544,7 +1544,7 @@ static int cpufreq_online(unsigned int cpu)
>
> /*
> * Register with the energy model before
> - * sched_cpufreq_governor_change() is called, which will result
> + * sugov_eas_rebuild_sd() is called, which will result
> * in rebuilding of the sched domains, which should only be done
> * once the energy model is properly initialized for the policy
> * first.
> @@ -2652,7 +2652,6 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
> ret = cpufreq_start_governor(policy);
> if (!ret) {
> pr_debug("governor change\n");
> - sched_cpufreq_governor_change(policy, old_gov);
> return 0;
> }
> cpufreq_exit_governor(policy);
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 71d186d6933a..1c5ca92a0555 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -1193,14 +1193,6 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
> }
> #endif
>
> -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
> -void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
> - struct cpufreq_governor *old_gov);
> -#else
> -static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
> - struct cpufreq_governor *old_gov) { }
> -#endif
> -
> extern unsigned int arch_freq_get_on_cpu(int cpu);
>
> #ifndef arch_set_freq_scale
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 4492608b7d7f..901cada51ba7 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -555,6 +555,31 @@ static const struct kobj_type sugov_tunables_ktype = {
>
> /********************** cpufreq governor interface *********************/
>
> +#ifdef CONFIG_ENERGY_MODEL
> +static void rebuild_sd_workfn(struct work_struct *work)
> +{
> + rebuild_sched_domains_energy();
> +}
> +
> +static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
> +
> +/*
> + * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
> + * on governor changes to make sure the scheduler knows about it.
> + */
> +static void sugov_eas_rebuild_sd(void)
> +{
> + /*
> + * When called from the cpufreq_register_driver() path, the
> + * cpu_hotplug_lock is already held, so use a work item to
> + * avoid nested locking in rebuild_sched_domains().
> + */
> + schedule_work(&rebuild_sd_work);
> +}
> +#else
> +static inline void sugov_eas_rebuild_sd(void) { };
> +#endif
> +
> struct cpufreq_governor schedutil_gov;
>
> static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
> @@ -709,6 +734,8 @@ static int sugov_init(struct cpufreq_policy *policy)
> if (ret)
> goto fail;
>
> + sugov_eas_rebuild_sd();
> +
> out:
> mutex_unlock(&global_tunables_lock);
> return 0;
> @@ -750,6 +777,8 @@ static void sugov_exit(struct cpufreq_policy *policy)
> sugov_kthread_stop(sg_policy);
> sugov_policy_free(sg_policy);
> cpufreq_disable_fast_switch(policy);
> +
> + sugov_eas_rebuild_sd();
> }
>
> static int sugov_start(struct cpufreq_policy *policy)
> @@ -838,29 +867,3 @@ struct cpufreq_governor *cpufreq_default_governor(void)
> #endif
>
> cpufreq_governor_init(schedutil_gov);
> -
> -#ifdef CONFIG_ENERGY_MODEL
> -static void rebuild_sd_workfn(struct work_struct *work)
> -{
> - rebuild_sched_domains_energy();
> -}
> -static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
> -
> -/*
> - * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
> - * on governor changes to make sure the scheduler knows about it.
> - */
> -void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
> - struct cpufreq_governor *old_gov)
> -{
> - if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) {
> - /*
> - * When called from the cpufreq_register_driver() path, the
> - * cpu_hotplug_lock is already held, so use a work item to
> - * avoid nested locking in rebuild_sched_domains().
> - */
> - schedule_work(&rebuild_sd_work);
> - }
> -
> -}
> -#endif

Acked-by: Viresh Kumar <viresh.kumar@xxxxxxxxxx>

--
viresh