Re: [PATCH v3] arm64: sdei: abort running SDEI handlers during crash

From: D Scott Phillips
Date: Thu Jun 22 2023 - 16:35:26 EST


D Scott Phillips <scott@xxxxxxxxxxxxxxxxxxxxxx> writes:

> Interrupts are blocked in SDEI context, per the SDEI spec: "The client
> interrupts cannot preempt the event handler." If we crashed in the SDEI
> handler-running context (as with ACPI's AGDI) then we need to clean up the
> SDEI state before proceeding to the crash kernel so that the crash kernel
> can have working interrupts.
>
> Track the active SDEI handler per-cpu so that we can COMPLETE_AND_RESUME
> the handler, discarding the interrupted context.
>
> Fixes: f5df26961853 ("arm64: kernel: Add arch-specific SDEI entry code and CPU masking")
> Signed-off-by: D Scott Phillips <scott@xxxxxxxxxxxxxxxxxxxxxx>

Ping on this one

> ---
> Changes since v2:
> - Dropped the patch fiddling with the sdei conduit.
> v2 Link: https://lore.kernel.org/linux-arm-kernel/20230329202519.6110-1-scott@xxxxxxxxxxxxxxxxxxxxxx/
>
> Changes since v1:
> - Store the active SDEI event being handled per-cpu, use the per-cpu active
> handler information to know when to abort.
> - Add prints before attempting to abort sdei handlers.
> v1 Link: https://lore.kernel.org/linux-arm-kernel/20230204000851.3871-1-scott@xxxxxxxxxxxxxxxxxxxxxx/
>
> arch/arm64/include/asm/sdei.h | 8 ++++++++
> arch/arm64/kernel/entry.S | 25 +++++++++++++++++++++++++
> arch/arm64/kernel/sdei.c | 3 +++
> arch/arm64/kernel/smp.c | 27 +++++++++++++++++++++++----
> 4 files changed, 59 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
> index 4292d9bafb9d..dc2f038a61ef 100644
> --- a/arch/arm64/include/asm/sdei.h
> +++ b/arch/arm64/include/asm/sdei.h
> @@ -17,6 +17,11 @@
>
> #include <asm/virt.h>
>
> +#ifdef CONFIG_ARM_SDE_INTERFACE
> +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
> +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
> +#endif
> +
> extern unsigned long sdei_exit_mode;
>
> /* Software Delegated Exception entry point from firmware*/
> @@ -29,6 +34,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
> unsigned long pc,
> unsigned long pstate);
>
> +/* Abort a running handler. Context is discarded. */
> +void sdei_handler_abort(void);
> +
> /*
> * The above entry point does the minimum to call C code. This function does
> * anything else, before calling the driver.
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index ab2a6e33c052..e49f72b5fb63 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -1007,6 +1007,12 @@ SYM_CODE_START(__sdei_asm_handler)
> ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
> #endif
>
> + cbnz w4, 1f
> + adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
> + b 2f
> +1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
> +2: str x19, [x5]
> +
> #ifdef CONFIG_VMAP_STACK
> /*
> * entry.S may have been using sp as a scratch register, find whether
> @@ -1072,6 +1078,13 @@ SYM_CODE_START(__sdei_asm_handler)
>
> ldr_l x2, sdei_exit_mode
>
> + ldrb w3, [x4, #SDEI_EVENT_PRIORITY]
> + cbnz w3, 1f
> + adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
> + b 2f
> +1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
> +2: str xzr, [x5]
> +
> alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
> sdei_handler_exit exit_mode=x2
> alternative_else_nop_endif
> @@ -1082,4 +1095,16 @@ alternative_else_nop_endif
> #endif
> SYM_CODE_END(__sdei_asm_handler)
> NOKPROBE(__sdei_asm_handler)
> +
> +SYM_CODE_START(sdei_handler_abort)
> + mov_q x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
> + adr x1, 1f
> + ldr_l x2, sdei_exit_mode
> + sdei_handler_exit exit_mode=x2
> + // exit the handler and jump to the next instruction.
> + // Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx.
> +1: ret
> +SYM_CODE_END(sdei_handler_abort)
> +NOKPROBE(sdei_handler_abort)
> +
> #endif /* CONFIG_ARM_SDE_INTERFACE */
> diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
> index 830be01af32d..255d12f881c2 100644
> --- a/arch/arm64/kernel/sdei.c
> +++ b/arch/arm64/kernel/sdei.c
> @@ -47,6 +47,9 @@ DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
> DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
> #endif
>
> +DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
> +DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
> +
> static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
> {
> unsigned long *p;
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 4e8327264255..ea1595b51590 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -1047,10 +1047,8 @@ void crash_smp_send_stop(void)
> * If this cpu is the only one alive at this point in time, online or
> * not, there are no stop messages to be sent around, so just back out.
> */
> - if (num_other_online_cpus() == 0) {
> - sdei_mask_local_cpu();
> - return;
> - }
> + if (num_other_online_cpus() == 0)
> + goto skip_ipi;
>
> cpumask_copy(&mask, cpu_online_mask);
> cpumask_clear_cpu(smp_processor_id(), &mask);
> @@ -1069,7 +1067,28 @@ void crash_smp_send_stop(void)
> pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
> cpumask_pr_args(&mask));
>
> +skip_ipi:
> sdei_mask_local_cpu();
> +
> +#ifdef CONFIG_ARM_SDE_INTERFACE
> + /*
> + * If the crash happened in an SDEI event handler then we need to
> + * finish the handler with the firmware so that we can have working
> + * interrupts in the crash kernel.
> + */
> + if (__this_cpu_read(sdei_active_critical_event)) {
> + pr_warn("SMP: stopped CPUs from SDEI critical event handler "
> + "context, attempting to finish handler.\n");
> + sdei_handler_abort();
> + __this_cpu_write(sdei_active_critical_event, NULL);
> + }
> + if (__this_cpu_read(sdei_active_normal_event)) {
> + pr_warn("SMP: stopped CPUs from SDEI normal event handler "
> + "context, attempting to finish handler.\n");
> + sdei_handler_abort();
> + __this_cpu_write(sdei_active_normal_event, NULL);
> + }
> +#endif
> }
>
> bool smp_crash_stop_failed(void)
> --
> 2.40.1
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel