Re: [RFC PATCH v1 07/28] riscv: kernel handling on trap entry/exit for user cfi

From: Deepak Gupta
Date: Thu Jan 25 2024 - 19:25:31 EST


On Thu, Jan 25, 2024 at 02:47:49PM -0500, Stefan O'Rear wrote:
On Thu, Jan 25, 2024, at 12:30 PM, Deepak Gupta wrote:
On Thu, Jan 25, 2024 at 02:29:01AM -0500, Stefan O'Rear wrote:
On Thu, Jan 25, 2024, at 1:21 AM, debug@xxxxxxxxxxxx wrote:
From: Deepak Gupta <debug@xxxxxxxxxxxx>

Carves out space in arch specific thread struct for cfi status and shadow stack
in usermode on riscv.

This patch does following
- defines a new structure cfi_status with status bit for cfi feature
- defines shadow stack pointer, base and size in cfi_status structure
- defines offsets to new member fields in thread in asm-offsets.c
- Saves and restore shadow stack pointer on trap entry (U --> S) and exit
(S --> U)

Signed-off-by: Deepak Gupta <debug@xxxxxxxxxxxx>
---
arch/riscv/include/asm/processor.h | 1 +
arch/riscv/include/asm/thread_info.h | 3 +++
arch/riscv/include/asm/usercfi.h | 24 ++++++++++++++++++++++++
arch/riscv/kernel/asm-offsets.c | 5 ++++-
arch/riscv/kernel/entry.S | 25 +++++++++++++++++++++++++
5 files changed, 57 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/include/asm/usercfi.h

diff --git a/arch/riscv/include/asm/processor.h
b/arch/riscv/include/asm/processor.h
index ee2f51787ff8..d4dc298880fc 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -14,6 +14,7 @@

#include <asm/ptrace.h>
#include <asm/hwcap.h>
+#include <asm/usercfi.h>

#ifdef CONFIG_64BIT
#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1))
diff --git a/arch/riscv/include/asm/thread_info.h
b/arch/riscv/include/asm/thread_info.h
index 320bc899a63b..6a2acecec546 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -58,6 +58,9 @@ struct thread_info {
int cpu;
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
unsigned long envcfg;
+#ifdef CONFIG_RISCV_USER_CFI
+ struct cfi_status user_cfi_state;
+#endif
#ifdef CONFIG_SHADOW_CALL_STACK
void *scs_base;
void *scs_sp;
diff --git a/arch/riscv/include/asm/usercfi.h
b/arch/riscv/include/asm/usercfi.h
new file mode 100644
index 000000000000..080d7077d12c
--- /dev/null
+++ b/arch/riscv/include/asm/usercfi.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (C) 2023 Rivos, Inc.
+ * Deepak Gupta <debug@xxxxxxxxxxxx>
+ */
+#ifndef _ASM_RISCV_USERCFI_H
+#define _ASM_RISCV_USERCFI_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+#ifdef CONFIG_RISCV_USER_CFI
+struct cfi_status {
+ unsigned long ubcfi_en : 1; /* Enable for backward cfi. */
+ unsigned long rsvd : ((sizeof(unsigned long)*8) - 1);
+ unsigned long user_shdw_stk; /* Current user shadow stack pointer */
+ unsigned long shdw_stk_base; /* Base address of shadow stack */
+ unsigned long shdw_stk_size; /* size of shadow stack */
+};
+
+#endif /* CONFIG_RISCV_USER_CFI */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_RISCV_USERCFI_H */
diff --git a/arch/riscv/kernel/asm-offsets.c
b/arch/riscv/kernel/asm-offsets.c
index cdd8f095c30c..5e1f412e96ba 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -43,8 +43,11 @@ void asm_offsets(void)
#ifdef CONFIG_SHADOW_CALL_STACK
OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp);
#endif
-
OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
+#ifdef CONFIG_RISCV_USER_CFI
+ OFFSET(TASK_TI_CFI_STATUS, task_struct, thread_info.user_cfi_state);
+ OFFSET(TASK_TI_USER_SSP, task_struct,
thread_info.user_cfi_state.user_shdw_stk);
+#endif
OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]);
OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]);
OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 63c3855ba80d..410659e2eadb 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -49,6 +49,21 @@ SYM_CODE_START(handle_exception)
REG_S x5, PT_T0(sp)
save_from_x6_to_x31

+#ifdef CONFIG_RISCV_USER_CFI
+ /*
+ * we need to save cfi status only when previous mode was U
+ */
+ csrr s2, CSR_STATUS
+ andi s2, s2, SR_SPP
+ bnez s2, skip_bcfi_save
+ /* load cfi status word */
+ lw s3, TASK_TI_CFI_STATUS(tp)
+ andi s3, s3, 1
+ beqz s3, skip_bcfi_save
+ csrr s3, CSR_SSP
+ REG_S s3, TASK_TI_USER_SSP(tp) /* save user ssp in thread_info */
+skip_bcfi_save:
+#endif
/*
* Disable user-mode memory access as it should only be set in the
* actual user copy routines.
@@ -141,6 +156,16 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
* structures again.
*/
csrw CSR_SCRATCH, tp
+
+#ifdef CONFIG_RISCV_USER_CFI
+ lw s3, TASK_TI_CFI_STATUS(tp)
+ andi s3, s3, 1
+ beqz s3, skip_bcfi_resume
+ REG_L s3, TASK_TI_USER_SSP(tp) /* restore user ssp from thread struct */
+ csrw CSR_SSP, s3
+skip_bcfi_resume:
+#endif
+

We shouldn't need any of this in the entry/exit code, at least as long as
the kernel itself is not using Zicfiss. ssp can keep its value in the
kernel and swap it on task switches. Our entry/exit code is rather short
and I'd like to keep it that way.

I kept it here because sooner or later we will need to establish kernel
shadow
stack. Kernel shadow stack on riscv (compared to other arches) kernel
actually will
be easier to support and adopt because there is already support for
shadow call stack
(SCS, [1]). Difference between existing shadow call stack (SCS) and
`zicfiss` based
kernel shadow stack would be

- In prolog instead of using `sd`, we will be inserting `sspush` to
save ret addr
- In epilog instead of using `ld` and compare, we will be inserting
`sspopchk`

So a lot underlying work and functional testing for shadow kernel stack
is already carried
out with SCS patches. It would be easier and faster to re-use SCS
patches to support
`zicfiss` based shadow stack.

Do you think that realistically, after all the patches are merged, almost all
kernel configurations that enable kernel Zicfiss will also enable userspace
Zicfiss and vice versa?

If not - if Zicfiss exclusively in user mode is likely to be a common
configuration - then the kernel should handle that case in task switch.

If kernel Zicfiss and user Zicfiss are overwhelmingly likely to be supported
together, then I agree it makes sense to handle it in the same place in
entry/exit, but I think what you have is more complicated than necessary.
I'm picturing something like this:

I expect user mode would be the first target and even if kernel shadow stacks
are enabled, it may not be as pervasive in adoption as I expect for user mode.

I do expect it to be used in settings where both are enabled (if not overwhelmingly)

Since that has to be supported, it's better to have it organized where we are saving/
restoring in a way which serves both needs rather than have two ways to save/restore
depending on how user shadow stacks and kernel shadow stacks are configured.


--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -32,6 +32,13 @@ SYM_CODE_START(handle_exception)
csrr tp, CSR_SCRATCH
REG_S sp, TASK_TI_KERNEL_SP(tp)

+#ifdef CONFIG_SHADOW_CALL_STACK
+ ALTERNATIVE("scs_save_current\n\tnop\n\tnop",
+ "csrr sp, ssp\n\t"
+ "REG_S sp, TASK_TI_SCS_SP(tp)\n\t"
+ "REG_L sp, TASK_TI_KERNEL_SP(tp)")
+#endif
+
#ifdef CONFIG_VMAP_STACK
addi sp, sp, -(PT_SIZE_ON_STACK)
srli sp, sp, THREAD_SHIFT
@@ -80,8 +87,13 @@ SYM_CODE_START(handle_exception)
/* Load the global pointer */
load_global_pointer

- /* Load the kernel shadow call stack pointer if coming from userspace */
- scs_load_current_if_task_changed s5
+ /* Load the kernel shadow call stack pointer (harmless if from kernel) */
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ALTERNATIVE("scs_load_current\n\tnop\n\tnop",
+ "REG_L s0, TASK_TI_SCS_SP(tp)\n\t"
+ "csrrw s0, ssp, s0\n\t"
+ "REG_S s0, PT_SSP(sp)")
+#endif

move a0, sp /* pt_regs */
la ra, ret_from_exception
@@ -130,7 +142,12 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
REG_S s0, TASK_TI_KERNEL_SP(tp)

/* Save the kernel shadow call stack pointer */
- scs_save_current
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ALTERNATIVE("scs_save_current\n\tnop\n\tnop",
+ "REG_L s0, PT_SSP(sp)\n\t"
+ "csrrw s0, ssp, s0\n\t"
+ "REG_S s0, TASK_TI_SCS_SP(tp)")
+#endif

I've not used alternatives earlier. But yes along with kernel shadow stack
this is much more appealing. Let me munch on it a bit.

/*
* Save TP into the scratch register , so we can find the kernel data


I moved the shadow stack pointer into pt_regs because it's nearly a GPR and has a
meaningfully different value on every trap; this allows us to talk about the ssp
at the time of a trap in kernel mode.

I had been under the impression that changing `pt_regs` is something that we don't
do usually. If it doesn't require a high bar, I'll do that. Infact, one my earlier
implementation had ssp in pt_regs.


Saving both the sp and ssp in Lrestore_kernel_tpsp avoids adding conditional logic
to Lsave_context. I believe the current code also has a bug: if the U-mode tp is,
by chance or intentional exploit, equal to the thread_info address, kernel code
will be executed with whatever value U-mode left in gp.

I also notice that there is no check for overflow of the shadow stack. This may be
intentional, since as long as the shadow stack is at least half the size of the
main kernel stack the latter will always overflow first, barring deeper corruption
of the control structures or assembly code issues. I expect that the result in that
case would be an infinite loop of shadow stack overflows in handle_bad_stack and
do_trap_software_check with occasional visits to handle_kernel_stack_overflow.

I believe that "Save unwound kernel stack pointer in thread_info" and "Save the
kernel shadow call stack pointer" are both no-ops in all cases other than ret_from_fork,
since the ABI requires the C trap handler to return with the same sp and ssp it
was entered with. Optimizing that would be a separate issue.

-s


I don't have favorites here, if overwhelving opinion of community here
is to take this
logic into task switching and re-work this logic back into entry.S
whenever shadow stack for
kernel patches are posted, I can do that as well.

[1] -
https://lore.kernel.org/all/20230828195833.756747-8-samitolvanen@xxxxxxxxxx/


-s

1:
REG_L a0, PT_STATUS(sp)
/*
--
2.43.0


_______________________________________________
linux-riscv mailing list
linux-riscv@xxxxxxxxxxxxxxxxxxx
http://lists.infradead.org/mailman/listinfo/linux-riscv

_______________________________________________
linux-riscv mailing list
linux-riscv@xxxxxxxxxxxxxxxxxxx
http://lists.infradead.org/mailman/listinfo/linux-riscv