Re: [PATCH 6/7] riscv: report misaligned accesses emulation to hwprobe

From: Clément Léger
Date: Thu Sep 28 2023 - 03:46:35 EST




On 26/09/2023 23:57, Evan Green wrote:
> On Tue, Sep 26, 2023 at 8:03 AM Clément Léger <cleger@xxxxxxxxxxxx> wrote:
>>
>> hwprobe provides a way to report if misaligned access are emulated. In
>> order to correctly populate that feature, we can check if it actually
>> traps when doing a misaligned access. This can be checked using an
>> exception table entry which will actually be used when a misaligned
>> access is done from kernel mode.
>>
>> Signed-off-by: Clément Léger <cleger@xxxxxxxxxxxx>
>> ---
>> arch/riscv/include/asm/cpufeature.h | 6 +++
>> arch/riscv/kernel/cpufeature.c | 6 ++-
>> arch/riscv/kernel/setup.c | 1 +
>> arch/riscv/kernel/traps_misaligned.c | 63 +++++++++++++++++++++++++++-
>> 4 files changed, 74 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
>> index d0345bd659c9..c1f0ef02cd7d 100644
>> --- a/arch/riscv/include/asm/cpufeature.h
>> +++ b/arch/riscv/include/asm/cpufeature.h
>> @@ -8,6 +8,7 @@
>>
>> #include <linux/bitmap.h>
>> #include <asm/hwcap.h>
>> +#include <asm/hwprobe.h>
>>
>> /*
>> * These are probed via a device_initcall(), via either the SBI or directly
>> @@ -32,4 +33,9 @@ extern struct riscv_isainfo hart_isa[NR_CPUS];
>>
>> void check_unaligned_access(int cpu);
>>
>> +bool unaligned_ctl_available(void);
>> +
>> +bool check_unaligned_access_emulated(int cpu);
>> +void unaligned_emulation_finish(void);
>> +
>> #endif
>> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
>> index 1cfbba65d11a..fbbde800bc21 100644
>> --- a/arch/riscv/kernel/cpufeature.c
>> +++ b/arch/riscv/kernel/cpufeature.c
>> @@ -568,6 +568,9 @@ void check_unaligned_access(int cpu)
>> void *src;
>> long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
>>
>> + if (check_unaligned_access_emulated(cpu))
>
> This spot (referenced below).
>
>> + return;
>> +
>> page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
>> if (!page) {
>> pr_warn("Can't alloc pages to measure memcpy performance");
>> @@ -645,9 +648,10 @@ void check_unaligned_access(int cpu)
>> __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
>> }
>>
>> -static int check_unaligned_access_boot_cpu(void)
>> +static int __init check_unaligned_access_boot_cpu(void)
>> {
>> check_unaligned_access(0);
>> + unaligned_emulation_finish();
>> return 0;
>> }
>>
>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>> index e600aab116a4..3af6ad4df7cf 100644
>> --- a/arch/riscv/kernel/setup.c
>> +++ b/arch/riscv/kernel/setup.c
>> @@ -26,6 +26,7 @@
>> #include <asm/acpi.h>
>> #include <asm/alternative.h>
>> #include <asm/cacheflush.h>
>> +#include <asm/cpufeature.h>
>> #include <asm/cpu_ops.h>
>> #include <asm/early_ioremap.h>
>> #include <asm/pgtable.h>
>> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
>> index b5fb1ff078e3..fa81f6952fa4 100644
>> --- a/arch/riscv/kernel/traps_misaligned.c
>> +++ b/arch/riscv/kernel/traps_misaligned.c
>> @@ -9,11 +9,14 @@
>> #include <linux/perf_event.h>
>> #include <linux/irq.h>
>> #include <linux/stringify.h>
>> +#include <linux/prctl.h>
>>
>> #include <asm/processor.h>
>> #include <asm/ptrace.h>
>> #include <asm/csr.h>
>> #include <asm/entry-common.h>
>> +#include <asm/hwprobe.h>
>> +#include <asm/cpufeature.h>
>>
>> #define INSN_MATCH_LB 0x3
>> #define INSN_MASK_LB 0x707f
>> @@ -396,8 +399,10 @@ union reg_data {
>> u64 data_u64;
>> };
>>
>> +static bool unaligned_ctl __read_mostly;
>> +
>> /* sysctl hooks */
>> -int unaligned_enabled __read_mostly = 1; /* Enabled by default */
>> +int unaligned_enabled __read_mostly;
>>
>> int handle_misaligned_load(struct pt_regs *regs)
>> {
>> @@ -412,6 +417,9 @@ int handle_misaligned_load(struct pt_regs *regs)
>> if (!unaligned_enabled)
>> return -1;
>>
>> + if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
>> + return -1;
>> +
>> if (get_insn(regs, epc, &insn))
>> return -1;
>>
>> @@ -511,6 +519,9 @@ int handle_misaligned_store(struct pt_regs *regs)
>> if (!unaligned_enabled)
>> return -1;
>>
>> + if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
>> + return -1;
>> +
>> if (get_insn(regs, epc, &insn))
>> return -1;
>>
>> @@ -585,3 +596,53 @@ int handle_misaligned_store(struct pt_regs *regs)
>>
>> return 0;
>> }
>> +
>> +bool check_unaligned_access_emulated(int cpu)
>> +{
>> + unsigned long emulated = 1, tmp_var;
>> +
>> + /* Use a fixup to detect if misaligned access triggered an exception */
>> + __asm__ __volatile__ (
>> + "1:\n"
>> + " "REG_L" %[tmp], 1(%[ptr])\n"
>> + " li %[emulated], 0\n"
>> + "2:\n"
>> + _ASM_EXTABLE(1b, 2b)
>> + : [emulated] "+r" (emulated), [tmp] "=r" (tmp_var)
>> + : [ptr] "r" (&tmp_var)
>> + : "memory");
>> +
>> + if (!emulated)
>> + return false;
>> +
>> + per_cpu(misaligned_access_speed, cpu) =
>> + RISCV_HWPROBE_MISALIGNED_EMULATED;
>
> For tidiness, can we move the assignment of this per-cpu variable into
> check_unaligned_access(), at the spot I referenced above. That way
> people looking to see how this variable is set don't have to hunt
> through multiple locations.

Agreed, that seems better.

>
>> +
>> + return true;
>> +}
>> +
>> +void __init unaligned_emulation_finish(void)
>> +{
>> + int cpu;
>> +
>> + /*
>> + * We can only support PR_UNALIGN controls if all CPUs have misaligned
>> + * accesses emulated since tasks requesting such control can run on any
>> + * CPU.
>> + */
>> + for_each_possible_cpu(cpu) {
>> + if (per_cpu(misaligned_access_speed, cpu) !=
>> + RISCV_HWPROBE_MISALIGNED_EMULATED) {
>> + goto out;
>> + }
>> + }
>> + unaligned_ctl = true;
>
> This doesn't handle the case where a CPU is hotplugged later that
> doesn't match with the others. You may want to add a patch that fails
> the onlining of that new CPU if unaligned_ctl is true and
> new_cpu.misaligned_access_speed != EMULATED.

So actually, this will require a bit more plumbing as I realize the
switch to disable misalignment support is global. This switch should
only be disabled at boot which means I won't be able to disable it at
runtime (while hiotplugging a CPU) for CPU detection. There are multiple
ways to handle that:

1- Have a per-cpu switch for misalignment handling which would be
disabled only when detection is needed.

2- Assume that once detected at boot-time, emulation will not change.

Not sure which one is better though. Advice are welcomed.

Clément

>
> -Evan