Re: [RFC PATCH v1 07/28] riscv: kernel handling on trap entry/exit for user cfi

From: Stefan O'Rear
Date: Thu Jan 25 2024 - 14:48:26 EST


On Thu, Jan 25, 2024, at 12:30 PM, Deepak Gupta wrote:
> On Thu, Jan 25, 2024 at 02:29:01AM -0500, Stefan O'Rear wrote:
>>On Thu, Jan 25, 2024, at 1:21 AM, debug@xxxxxxxxxxxx wrote:
>>> From: Deepak Gupta <debug@xxxxxxxxxxxx>
>>>
>>> Carves out space in arch specific thread struct for cfi status and shadow stack
>>> in usermode on riscv.
>>>
>>> This patch does following
>>> - defines a new structure cfi_status with status bit for cfi feature
>>> - defines shadow stack pointer, base and size in cfi_status structure
>>> - defines offsets to new member fields in thread in asm-offsets.c
>>> - Saves and restore shadow stack pointer on trap entry (U --> S) and exit
>>> (S --> U)
>>>
>>> Signed-off-by: Deepak Gupta <debug@xxxxxxxxxxxx>
>>> ---
>>> arch/riscv/include/asm/processor.h | 1 +
>>> arch/riscv/include/asm/thread_info.h | 3 +++
>>> arch/riscv/include/asm/usercfi.h | 24 ++++++++++++++++++++++++
>>> arch/riscv/kernel/asm-offsets.c | 5 ++++-
>>> arch/riscv/kernel/entry.S | 25 +++++++++++++++++++++++++
>>> 5 files changed, 57 insertions(+), 1 deletion(-)
>>> create mode 100644 arch/riscv/include/asm/usercfi.h
>>>
>>> diff --git a/arch/riscv/include/asm/processor.h
>>> b/arch/riscv/include/asm/processor.h
>>> index ee2f51787ff8..d4dc298880fc 100644
>>> --- a/arch/riscv/include/asm/processor.h
>>> +++ b/arch/riscv/include/asm/processor.h
>>> @@ -14,6 +14,7 @@
>>>
>>> #include <asm/ptrace.h>
>>> #include <asm/hwcap.h>
>>> +#include <asm/usercfi.h>
>>>
>>> #ifdef CONFIG_64BIT
>>> #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1))
>>> diff --git a/arch/riscv/include/asm/thread_info.h
>>> b/arch/riscv/include/asm/thread_info.h
>>> index 320bc899a63b..6a2acecec546 100644
>>> --- a/arch/riscv/include/asm/thread_info.h
>>> +++ b/arch/riscv/include/asm/thread_info.h
>>> @@ -58,6 +58,9 @@ struct thread_info {
>>> int cpu;
>>> unsigned long syscall_work; /* SYSCALL_WORK_ flags */
>>> unsigned long envcfg;
>>> +#ifdef CONFIG_RISCV_USER_CFI
>>> + struct cfi_status user_cfi_state;
>>> +#endif
>>> #ifdef CONFIG_SHADOW_CALL_STACK
>>> void *scs_base;
>>> void *scs_sp;
>>> diff --git a/arch/riscv/include/asm/usercfi.h
>>> b/arch/riscv/include/asm/usercfi.h
>>> new file mode 100644
>>> index 000000000000..080d7077d12c
>>> --- /dev/null
>>> +++ b/arch/riscv/include/asm/usercfi.h
>>> @@ -0,0 +1,24 @@
>>> +/* SPDX-License-Identifier: GPL-2.0
>>> + * Copyright (C) 2023 Rivos, Inc.
>>> + * Deepak Gupta <debug@xxxxxxxxxxxx>
>>> + */
>>> +#ifndef _ASM_RISCV_USERCFI_H
>>> +#define _ASM_RISCV_USERCFI_H
>>> +
>>> +#ifndef __ASSEMBLY__
>>> +#include <linux/types.h>
>>> +
>>> +#ifdef CONFIG_RISCV_USER_CFI
>>> +struct cfi_status {
>>> + unsigned long ubcfi_en : 1; /* Enable for backward cfi. */
>>> + unsigned long rsvd : ((sizeof(unsigned long)*8) - 1);
>>> + unsigned long user_shdw_stk; /* Current user shadow stack pointer */
>>> + unsigned long shdw_stk_base; /* Base address of shadow stack */
>>> + unsigned long shdw_stk_size; /* size of shadow stack */
>>> +};
>>> +
>>> +#endif /* CONFIG_RISCV_USER_CFI */
>>> +
>>> +#endif /* __ASSEMBLY__ */
>>> +
>>> +#endif /* _ASM_RISCV_USERCFI_H */
>>> diff --git a/arch/riscv/kernel/asm-offsets.c
>>> b/arch/riscv/kernel/asm-offsets.c
>>> index cdd8f095c30c..5e1f412e96ba 100644
>>> --- a/arch/riscv/kernel/asm-offsets.c
>>> +++ b/arch/riscv/kernel/asm-offsets.c
>>> @@ -43,8 +43,11 @@ void asm_offsets(void)
>>> #ifdef CONFIG_SHADOW_CALL_STACK
>>> OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp);
>>> #endif
>>> -
>>> OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
>>> +#ifdef CONFIG_RISCV_USER_CFI
>>> + OFFSET(TASK_TI_CFI_STATUS, task_struct, thread_info.user_cfi_state);
>>> + OFFSET(TASK_TI_USER_SSP, task_struct,
>>> thread_info.user_cfi_state.user_shdw_stk);
>>> +#endif
>>> OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]);
>>> OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]);
>>> OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]);
>>> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
>>> index 63c3855ba80d..410659e2eadb 100644
>>> --- a/arch/riscv/kernel/entry.S
>>> +++ b/arch/riscv/kernel/entry.S
>>> @@ -49,6 +49,21 @@ SYM_CODE_START(handle_exception)
>>> REG_S x5, PT_T0(sp)
>>> save_from_x6_to_x31
>>>
>>> +#ifdef CONFIG_RISCV_USER_CFI
>>> + /*
>>> + * we need to save cfi status only when previous mode was U
>>> + */
>>> + csrr s2, CSR_STATUS
>>> + andi s2, s2, SR_SPP
>>> + bnez s2, skip_bcfi_save
>>> + /* load cfi status word */
>>> + lw s3, TASK_TI_CFI_STATUS(tp)
>>> + andi s3, s3, 1
>>> + beqz s3, skip_bcfi_save
>>> + csrr s3, CSR_SSP
>>> + REG_S s3, TASK_TI_USER_SSP(tp) /* save user ssp in thread_info */
>>> +skip_bcfi_save:
>>> +#endif
>>> /*
>>> * Disable user-mode memory access as it should only be set in the
>>> * actual user copy routines.
>>> @@ -141,6 +156,16 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
>>> * structures again.
>>> */
>>> csrw CSR_SCRATCH, tp
>>> +
>>> +#ifdef CONFIG_RISCV_USER_CFI
>>> + lw s3, TASK_TI_CFI_STATUS(tp)
>>> + andi s3, s3, 1
>>> + beqz s3, skip_bcfi_resume
>>> + REG_L s3, TASK_TI_USER_SSP(tp) /* restore user ssp from thread struct */
>>> + csrw CSR_SSP, s3
>>> +skip_bcfi_resume:
>>> +#endif
>>> +
>>
>>We shouldn't need any of this in the entry/exit code, at least as long as
>>the kernel itself is not using Zicfiss. ssp can keep its value in the
>>kernel and swap it on task switches. Our entry/exit code is rather short
>>and I'd like to keep it that way.
>
> I kept it here because sooner or later we will need to establish kernel
> shadow
> stack. Kernel shadow stack on riscv (compared to other arches) kernel
> actually will
> be easier to support and adopt because there is already support for
> shadow call stack
> (SCS, [1]). Difference between existing shadow call stack (SCS) and
> `zicfiss` based
> kernel shadow stack would be
>
> - In prolog instead of using `sd`, we will be inserting `sspush` to
> save ret addr
> - In epilog instead of using `ld` and compare, we will be inserting
> `sspopchk`
>
> So a lot underlying work and functional testing for shadow kernel stack
> is already carried
> out with SCS patches. It would be easier and faster to re-use SCS
> patches to support
> `zicfiss` based shadow stack.

Do you think that realistically, after all the patches are merged, almost all
kernel configurations that enable kernel Zicfiss will also enable userspace
Zicfiss and vice versa?

If not - if Zicfiss exclusively in user mode is likely to be a common
configuration - then the kernel should handle that case in task switch.

If kernel Zicfiss and user Zicfiss are overwhelmingly likely to be supported
together, then I agree it makes sense to handle it in the same place in
entry/exit, but I think what you have is more complicated than necessary.
I'm picturing something like this:

--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -32,6 +32,13 @@ SYM_CODE_START(handle_exception)
csrr tp, CSR_SCRATCH
REG_S sp, TASK_TI_KERNEL_SP(tp)

+#ifdef CONFIG_SHADOW_CALL_STACK
+ ALTERNATIVE("scs_save_current\n\tnop\n\tnop",
+ "csrr sp, ssp\n\t"
+ "REG_S sp, TASK_TI_SCS_SP(tp)\n\t"
+ "REG_L sp, TASK_TI_KERNEL_SP(tp)")
+#endif
+
#ifdef CONFIG_VMAP_STACK
addi sp, sp, -(PT_SIZE_ON_STACK)
srli sp, sp, THREAD_SHIFT
@@ -80,8 +87,13 @@ SYM_CODE_START(handle_exception)
/* Load the global pointer */
load_global_pointer

- /* Load the kernel shadow call stack pointer if coming from userspace */
- scs_load_current_if_task_changed s5
+ /* Load the kernel shadow call stack pointer (harmless if from kernel) */
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ALTERNATIVE("scs_load_current\n\tnop\n\tnop",
+ "REG_L s0, TASK_TI_SCS_SP(tp)\n\t"
+ "csrrw s0, ssp, s0\n\t"
+ "REG_S s0, PT_SSP(sp)")
+#endif

move a0, sp /* pt_regs */
la ra, ret_from_exception
@@ -130,7 +142,12 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
REG_S s0, TASK_TI_KERNEL_SP(tp)

/* Save the kernel shadow call stack pointer */
- scs_save_current
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ALTERNATIVE("scs_save_current\n\tnop\n\tnop",
+ "REG_L s0, PT_SSP(sp)\n\t"
+ "csrrw s0, ssp, s0\n\t"
+ "REG_S s0, TASK_TI_SCS_SP(tp)")
+#endif

/*
* Save TP into the scratch register , so we can find the kernel data


I moved the shadow stack pointer into pt_regs because it's nearly a GPR and has a
meaningfully different value on every trap; this allows us to talk about the ssp
at the time of a trap in kernel mode.

Saving both the sp and ssp in Lrestore_kernel_tpsp avoids adding conditional logic
to Lsave_context. I believe the current code also has a bug: if the U-mode tp is,
by chance or intentional exploit, equal to the thread_info address, kernel code
will be executed with whatever value U-mode left in gp.

I also notice that there is no check for overflow of the shadow stack. This may be
intentional, since as long as the shadow stack is at least half the size of the
main kernel stack the latter will always overflow first, barring deeper corruption
of the control structures or assembly code issues. I expect that the result in that
case would be an infinite loop of shadow stack overflows in handle_bad_stack and
do_trap_software_check with occasional visits to handle_kernel_stack_overflow.

I believe that "Save unwound kernel stack pointer in thread_info" and "Save the
kernel shadow call stack pointer" are both no-ops in all cases other than ret_from_fork,
since the ABI requires the C trap handler to return with the same sp and ssp it
was entered with. Optimizing that would be a separate issue.

-s

>
> I don't have favorites here, if overwhelving opinion of community here
> is to take this
> logic into task switching and re-work this logic back into entry.S
> whenever shadow stack for
> kernel patches are posted, I can do that as well.
>
> [1] -
> https://lore.kernel.org/all/20230828195833.756747-8-samitolvanen@xxxxxxxxxx/
>
>>
>>-s
>>
>>> 1:
>>> REG_L a0, PT_STATUS(sp)
>>> /*
>>> --
>>> 2.43.0
>>>
>>>
>>> _______________________________________________
>>> linux-riscv mailing list
>>> linux-riscv@xxxxxxxxxxxxxxxxxxx
>>> http://lists.infradead.org/mailman/listinfo/linux-riscv
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@xxxxxxxxxxxxxxxxxxx
> http://lists.infradead.org/mailman/listinfo/linux-riscv