Re: [PATCH v14 2/5] riscv: Add static key for misaligned accesses

From: Evan Green
Date: Mon Jan 08 2024 - 14:23:33 EST


On Wed, Dec 27, 2023 at 9:38 AM Charlie Jenkins <charlie@xxxxxxxxxxxx> wrote:
>
> Support static branches depending on the value of misaligned accesses.
> This will be used by a later patch in the series. All online cpus must
> be considered "fast" for this static branch to be flipped.
>
> Signed-off-by: Charlie Jenkins <charlie@xxxxxxxxxxxx>

This is fancier than I would have gone for, I probably would have
punted on heterogeneous hotplug out of laziness for now. However, what
you've done looks smart, in that we'll basically flip the branch if at
any moment all the online CPUs are fast. I've got some nits below, but
won't withhold my review for them (making them optional I suppose :)).

Reviewed-by: Evan Green <evan@xxxxxxxxxxxx>

> ---
> arch/riscv/include/asm/cpufeature.h | 2 +
> arch/riscv/kernel/cpufeature.c | 89 +++++++++++++++++++++++++++++++++++--
> 2 files changed, 87 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index a418c3112cd6..7b129e5e2f07 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -133,4 +133,6 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
> return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> }
>
> +DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> +
> #endif
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index b3785ffc1570..dfd716b93565 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -8,8 +8,10 @@
>
> #include <linux/acpi.h>
> #include <linux/bitmap.h>
> +#include <linux/cpu.h>
> #include <linux/cpuhotplug.h>
> #include <linux/ctype.h>
> +#include <linux/jump_label.h>
> #include <linux/log2.h>
> #include <linux/memory.h>
> #include <linux/module.h>
> @@ -44,6 +46,8 @@ struct riscv_isainfo hart_isa[NR_CPUS];
> /* Performance information */
> DEFINE_PER_CPU(long, misaligned_access_speed);
>
> +static cpumask_t fast_misaligned_access;
> +
> /**
> * riscv_isa_extension_base() - Get base extension word
> *
> @@ -643,6 +647,16 @@ static int check_unaligned_access(void *param)
> (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
>
> per_cpu(misaligned_access_speed, cpu) = speed;
> +
> + /*
> + * Set the value of fast_misaligned_access of a CPU. These operations
> + * are atomic to avoid race conditions.
> + */
> + if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
> + cpumask_set_cpu(cpu, &fast_misaligned_access);
> + else
> + cpumask_clear_cpu(cpu, &fast_misaligned_access);
> +
> return 0;
> }
>
> @@ -655,13 +669,70 @@ static void check_unaligned_access_nonboot_cpu(void *param)
> check_unaligned_access(pages[cpu]);
> }
>
> +DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> +
> +static int exclude_set_unaligned_access_static_branches(int cpu)
> +{
> + /*
> + * Same as set_unaligned_access_static_branches, except excludes the
> + * given CPU from the result. When a CPU is hotplugged into an offline
> + * state, this function is called before the CPU is set to offline in
> + * the cpumask, and thus the CPU needs to be explicitly excluded.
> + */
> +
> + cpumask_t online_fast_misaligned_access;
> +
> + cpumask_and(&online_fast_misaligned_access, &fast_misaligned_access, cpu_online_mask);
> + cpumask_clear_cpu(cpu, &online_fast_misaligned_access);
> +
> + if (cpumask_weight(&online_fast_misaligned_access) == (num_online_cpus() - 1))
> + static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
> + else
> + static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
> +
> + return 0;
> +}

A minor nit: the function above and below are looking a little
copy/pasty, and lead to multiple spots where the static branch gets
changed. You could make a third function that actually does the
setting with parameters, then these two could call it in different
ways. The return types also don't need to be int, since you always
return 0. Something like:

static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
{
if (cpumask_weight(mask) == weight) {
static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
} else {
static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
}
}

static void set_unaligned_access_branches(void)
{
cpumask_t fast_and_online;

cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
}

static void set_unaligned_access_branches_except_cpu(unsigned int cpu)
{
cpumask_t fast_except_me;

cpumask_and(&online_fast_misaligned_access,
&fast_misaligned_access, cpu_online_mask);
cpumask_clear_cpu(cpu, &fast_except_me);
modify_unaligned_access_branches(&fast_except_me,
num_online_cpus() - 1);
}

> +
> +static int set_unaligned_access_static_branches(void)
> +{
> + /*
> + * This will be called after check_unaligned_access_all_cpus so the
> + * result of unaligned access speed for all CPUs will be available.
> + *
> + * To avoid the number of online cpus changing between reading
> + * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
> + * held before calling this function.
> + */
> + cpumask_t online_fast_misaligned_access;
> +
> + cpumask_and(&online_fast_misaligned_access, &fast_misaligned_access, cpu_online_mask);
> +
> + if (cpumask_weight(&online_fast_misaligned_access) == num_online_cpus())
> + static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
> + else
> + static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
> +
> + return 0;
> +}
> +
> +static int lock_and_set_unaligned_access_static_branch(void)
> +{
> + cpus_read_lock();
> + set_unaligned_access_static_branches();
> + cpus_read_unlock();
> +
> + return 0;
> +}
> +
> +arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
> +
> static int riscv_online_cpu(unsigned int cpu)
> {
> static struct page *buf;
>
> /* We are already set since the last check */
> if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
> - return 0;
> + goto exit;
>
> buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
> if (!buf) {
> @@ -671,7 +742,14 @@ static int riscv_online_cpu(unsigned int cpu)
>
> check_unaligned_access(buf);
> __free_pages(buf, MISALIGNED_BUFFER_ORDER);
> - return 0;
> +
> +exit:
> + return set_unaligned_access_static_branches();
> +}
> +
> +static int riscv_offline_cpu(unsigned int cpu)
> +{
> + return exclude_set_unaligned_access_static_branches(cpu);
> }
>
> /* Measure unaligned access on all CPUs present at boot in parallel. */
> @@ -705,9 +783,12 @@ static int check_unaligned_access_all_cpus(void)
> /* Check core 0. */
> smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
>
> - /* Setup hotplug callback for any new CPUs that come online. */
> + /*
> + * Setup hotplug callbacks for any new CPUs that come online or go
> + * offline.
> + */
> cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> - riscv_online_cpu, NULL);
> + riscv_online_cpu, riscv_offline_cpu);
>
> out:
> unaligned_emulation_finish();
>
> --
> 2.43.0
>