Re: [PATCH v6 10/23] PM: EM: Add API for memory allocations for new tables

From: Rafael J. Wysocki
Date: Thu Jan 04 2024 - 14:36:52 EST


On Thu, Jan 4, 2024 at 6:15 PM Lukasz Luba <lukasz.luba@xxxxxxx> wrote:
>
> The runtime modified EM table can be provided from drivers. Create
> mechanism which allows safely allocate and free the table for device
> drivers. The same table can be used by the EAS in task scheduler code
> paths, so make sure the memory is not freed when the device driver module
> is unloaded.
>
> Signed-off-by: Lukasz Luba <lukasz.luba@xxxxxxx>
> ---
> include/linux/energy_model.h | 11 +++++++++
> kernel/power/energy_model.c | 43 ++++++++++++++++++++++++++++++++++--
> 2 files changed, 52 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
> index 5f842da3bb0c..753d70d0ce7e 100644
> --- a/include/linux/energy_model.h
> +++ b/include/linux/energy_model.h
> @@ -5,6 +5,7 @@
> #include <linux/device.h>
> #include <linux/jump_label.h>
> #include <linux/kobject.h>
> +#include <linux/kref.h>
> #include <linux/rcupdate.h>
> #include <linux/sched/cpufreq.h>
> #include <linux/sched/topology.h>
> @@ -39,10 +40,12 @@ struct em_perf_state {
> /**
> * struct em_perf_table - Performance states table
> * @rcu: RCU used for safe access and destruction
> + * @refcount: Reference count to track the owners

"Reference counter to track the users"

Also it is not really just a counter - it provides the memory release
mechanism too.

> * @state: List of performance states, in ascending order
> */
> struct em_perf_table {
> struct rcu_head rcu;
> + struct kref refcount;

So I would just call it kref.

> struct em_perf_state state[];
> };
>
> @@ -184,6 +187,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
> struct em_data_callback *cb, cpumask_t *span,
> bool microwatts);
> void em_dev_unregister_perf_domain(struct device *dev);
> +struct em_perf_table __rcu *em_allocate_table(struct em_perf_domain *pd);
> +void em_free_table(struct em_perf_table __rcu *table);
>
> /**
> * em_pd_get_efficient_state() - Get an efficient performance state from the EM
> @@ -365,6 +370,12 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
> {
> return 0;
> }
> +static inline
> +struct em_perf_table __rcu *em_allocate_table(struct em_perf_domain *pd)
> +{
> + return NULL;
> +}
> +static inline void em_free_table(struct em_perf_table __rcu *table) {}
> #endif
>
> #endif
> diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
> index c03010084208..bbc406db0be1 100644
> --- a/kernel/power/energy_model.c
> +++ b/kernel/power/energy_model.c
> @@ -114,12 +114,46 @@ static void em_destroy_table_rcu(struct rcu_head *rp)
> kfree(table);
> }
>
> -static void em_free_table(struct em_perf_table __rcu *table)
> +static void em_release_table_kref(struct kref *kref)
> {
> + struct em_perf_table __rcu *table;
> +
> + /* It was the last owner of this table so we can free */
> + table = container_of(kref, struct em_perf_table, refcount);
> +
> call_rcu(&table->rcu, em_destroy_table_rcu);
> }
>
> -static struct em_perf_table __rcu *
> +static inline void em_table_inc(struct em_perf_table __rcu *table)

Why not em_table_get()?

> +{
> + kref_get(&table->refcount);
> +}
> +
> +static void em_table_dec(struct em_perf_table __rcu *table)

And em_table_put() here?

Note that I do realize that the "put" and "get" terminology is used in
one of the subsequent patches - I'll get to it later.

> +{
> + kref_put(&table->refcount, em_release_table_kref);
> +}
> +
> +/**
> + * em_free_table() - Handles safe free of the EM table when needed
> + * @table : EM memory which is going to be freed
> + *
> + * No return values.
> + */
> +void em_free_table(struct em_perf_table __rcu *table)
> +{
> + em_table_dec(table);
> +}

Why is this necessary? The function called by it could be extern
instead and wrapped into a static inline wrapper in a header (if you
really need the alias).

> +
> +/**
> + * em_allocate_table() - Handles safe allocation of the new EM table

" - Allocate a new EM table"

And I would call this em_table_alloc() and (maybe) add an
em_table_free() alias for em_table_put().

> + * @table : EM memory which is going to be freed

So the argument is called "pd" and it is not a table. It is also used
for computing the size of the new table AFAICS.

> + *
> + * Increments the reference counter to mark that there is an owner of that

"Allocate a new EM table and initialize its kref to indicate that it
has a user."

> + * EM table. That might be a device driver module or EAS.
> + * Returns allocated table or error.
> + */
> +struct em_perf_table __rcu *
> em_allocate_table(struct em_perf_domain *pd)
> {
> struct em_perf_table __rcu *table;
> @@ -128,6 +162,11 @@ em_allocate_table(struct em_perf_domain *pd)
> table_size = sizeof(struct em_perf_state) * pd->nr_perf_states;
>
> table = kzalloc(sizeof(*table) + table_size, GFP_KERNEL);
> + if (!table)
> + return table;

I would return NULL from here explicitly.

> +
> + kref_init(&table->refcount);
> +
> return table;
> }
>
> --