[PATCH v1 5/8] sbm: x86: handle sandbox mode faults

From: Petr Tesarik
Date: Wed Feb 14 2024 - 06:37:46 EST


From: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx>

Provide a fault handler for sandbox mode. Set the sandbox mode instance
error code, abort the sandbox and return to the caller. To allow graceful
return from a fatal fault, save all callee-saved registers (including the
stack pointer) just before passing control to the target function.

Modify the handlers for #PF and #DF CPU exceptions to call this handler if
coming from sandbox mode. The check is based on the saved CS register,
which should be modified in the entry path to a value that is otherwise not
possible (__SBM_CS).

For the page fault handler, make sure that sandbox mode check is placed
before do_kern_addr_fault(). That function calls spurious_kernel_fault(),
which implements lazy TLB invalidation of kernel pages and it assumes that
the faulting instruction ran with kernel-mode page tables; it would produce
false positives for sandbox mode.

Signed-off-by: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx>
---
arch/x86/include/asm/ptrace.h | 21 +++++++++++++++++++++
arch/x86/include/asm/sbm.h | 24 ++++++++++++++++++++++++
arch/x86/include/asm/segment.h | 7 +++++++
arch/x86/kernel/asm-offsets.c | 5 +++++
arch/x86/kernel/sbm/call_64.S | 21 +++++++++++++++++++++
arch/x86/kernel/sbm/core.c | 26 ++++++++++++++++++++++++++
arch/x86/kernel/traps.c | 11 +++++++++++
arch/x86/mm/fault.c | 6 ++++++
8 files changed, 121 insertions(+)

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index f4db78b09c8f..f66f16f037b0 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -164,6 +164,27 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#endif
}

+/*
+ * sandbox_mode() - did a register set come from SandBox Mode?
+ * @regs: register set
+ */
+static inline bool sandbox_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_SANDBOX_MODE
+ /*
+ * SandBox Mode always runs in 64-bit and it is not implemented
+ * on paravirt systems, so this is the only possible value.
+ */
+ return regs->cs == __SBM_CS;
+#else /* !CONFIG_SANDBOX_MODE */
+ return false;
+#endif
+#else /* !CONFIG_X86_64 */
+ return false;
+#endif
+}
+
/*
* Determine whether the register set came from any context that is running in
* 64-bit mode.
diff --git a/arch/x86/include/asm/sbm.h b/arch/x86/include/asm/sbm.h
index ca4741b449e8..229b1ac3bbd4 100644
--- a/arch/x86/include/asm/sbm.h
+++ b/arch/x86/include/asm/sbm.h
@@ -11,23 +11,29 @@

#include <asm/processor.h>

+struct pt_regs;
+
#if defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE)

#include <asm/pgtable_types.h>

/**
* struct x86_sbm_state - Run-time state of the environment.
+ * @sbm: Link back to the SBM instance.
* @pgd: Sandbox mode page global directory.
* @stack: Sandbox mode stack.
* @exc_stack: Exception and IRQ stack.
+ * @return_sp: Stack pointer for returning to kernel mode.
*
* One instance of this union is allocated for each sandbox and stored as SBM
* instance private data.
*/
struct x86_sbm_state {
+ struct sbm *sbm;
pgd_t *pgd;
unsigned long stack;
unsigned long exc_stack;
+ unsigned long return_sp;
};

/**
@@ -43,6 +49,18 @@ static inline unsigned long top_of_intr_stack(void)
return current_top_of_stack();
}

+/**
+ * handle_sbm_fault() - Handle a CPU fault in sandbox mode.
+ * @regs: Saved registers at fault.
+ * @error_code: CPU error code.
+ * @address: Fault address (CR2 register).
+ *
+ * Handle a sandbox mode fault. The caller should use sandbox_mode() to
+ * check that @regs came from sandbox mode before calling this function.
+ */
+void handle_sbm_fault(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address);
+
#else /* defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE) */

static inline unsigned long top_of_intr_stack(void)
@@ -50,6 +68,12 @@ static inline unsigned long top_of_intr_stack(void)
return current_top_of_stack();
}

+static inline void handle_sbm_fault(struct pt_regs *regs,
+ unsigned long error_code,
+ unsigned long address)
+{
+}
+
#endif /* defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE) */

#endif /* __ASM_SBM_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 9d6411c65920..966831385d18 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -217,6 +217,13 @@
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
#define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3)

+/*
+ * Sandbox runs with __USER_CS, but the interrupt entry code sets the RPL
+ * in the saved selector to zero to avoid user-mode processing (FPU, signal
+ * delivery, etc.). This is the resulting pseudo-CS.
+ */
+#define __SBM_CS (GDT_ENTRY_DEFAULT_USER_CS*8)
+
#endif

#define IDT_ENTRIES 256
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 6913b372ccf7..44d4f0a0cb19 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -20,6 +20,7 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
#include <asm/tdx.h>
+#include <asm/sbm.h>

#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
@@ -120,4 +121,8 @@ static void __used common(void)
OFFSET(ARIA_CTX_rounds, aria_ctx, rounds);
#endif

+#if defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE)
+ COMMENT("SandBox Mode");
+ OFFSET(SBM_return_sp, x86_sbm_state, return_sp);
+#endif
}
diff --git a/arch/x86/kernel/sbm/call_64.S b/arch/x86/kernel/sbm/call_64.S
index 1b232c8d15b7..6a615b4f6047 100644
--- a/arch/x86/kernel/sbm/call_64.S
+++ b/arch/x86/kernel/sbm/call_64.S
@@ -22,6 +22,17 @@
* rcx .. top of sandbox stack
*/
SYM_FUNC_START(x86_sbm_exec)
+ /* save all callee-saved registers */
+ push %rbp
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ /* to be used by sandbox abort */
+ mov %rsp, SBM_return_sp(%rdi)
+
/*
* Set up the sandbox stack:
* 1. Store the old stack pointer at the top of the sandbox stack,
@@ -37,5 +48,15 @@ SYM_FUNC_START(x86_sbm_exec)

pop %rsp

+SYM_INNER_LABEL(x86_sbm_return, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR // IRET target via x86_sbm_fault()
+
+ /* restore callee-saved registers and return */
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
+ pop %rbp
RET
SYM_FUNC_END(x86_sbm_exec)
diff --git a/arch/x86/kernel/sbm/core.c b/arch/x86/kernel/sbm/core.c
index 81f1b0093537..d4c378847e93 100644
--- a/arch/x86/kernel/sbm/core.c
+++ b/arch/x86/kernel/sbm/core.c
@@ -13,6 +13,8 @@
#include <asm/page.h>
#include <asm/sbm.h>
#include <asm/sections.h>
+#include <asm/segment.h>
+#include <asm/trap_pf.h>
#include <linux/cpumask.h>
#include <linux/mm.h>
#include <linux/sbm.h>
@@ -23,6 +25,7 @@

asmlinkage int x86_sbm_exec(struct x86_sbm_state *state, sbm_func func,
void *args, unsigned long sbm_tos);
+extern char x86_sbm_return[];

static inline phys_addr_t page_to_ptval(struct page *page)
{
@@ -343,6 +346,8 @@ int arch_sbm_exec(struct sbm *sbm, sbm_func func, void *args)
struct x86_sbm_state *state = sbm->private;
int err;

+ state->sbm = sbm;
+
/* let interrupt handlers use the sandbox state page */
barrier();
WRITE_ONCE(current_thread_info()->sbm_state, state);
@@ -354,3 +359,24 @@ int arch_sbm_exec(struct sbm *sbm, sbm_func func, void *args)

return err;
}
+
+void handle_sbm_fault(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
+{
+ struct x86_sbm_state *state = current_thread_info()->sbm_state;
+
+ /*
+ * Force -EFAULT unless the fault was due to a user-mode instruction
+ * fetch from the designated return address.
+ */
+ if (error_code != (X86_PF_PROT | X86_PF_USER | X86_PF_INSTR) ||
+ address != (unsigned long)x86_sbm_return)
+ state->sbm->error = -EFAULT;
+
+ /* modify IRET frame to exit from sandbox */
+ regs->ip = (unsigned long)x86_sbm_return;
+ regs->cs = __KERNEL_CS;
+ regs->flags = X86_EFLAGS_IF;
+ regs->sp = state->return_sp;
+ regs->ss = __KERNEL_DS;
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9c9c74314e7..8fc5b17b8fb4 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -416,6 +416,12 @@ DEFINE_IDTENTRY_DF(exc_double_fault)

irqentry_nmi_enter(regs);
instrumentation_begin();
+
+ if (sandbox_mode(regs)) {
+ handle_sbm_fault(regs, error_code, 0);
+ return;
+ }
+
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);

tsk->thread.error_code = error_code;
@@ -675,6 +681,11 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
goto exit;
}

+ if (sandbox_mode(regs)) {
+ handle_sbm_fault(regs, error_code, 0);
+ return;
+ }
+
if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc, 0))
goto exit;

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 679b09cfe241..f223b258e53f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -34,6 +34,7 @@
#include <asm/kvm_para.h> /* kvm_handle_async_pf */
#include <asm/vdso.h> /* fixup_vdso_exception() */
#include <asm/irq_stack.h>
+#include <asm/sbm.h>

#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -1500,6 +1501,11 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code,
if (unlikely(kmmio_fault(regs, address)))
return;

+ if (sandbox_mode(regs)) {
+ handle_sbm_fault(regs, error_code, address);
+ return;
+ }
+
/* Was the fault on kernel-controlled part of the address space? */
if (unlikely(fault_in_kernel_space(address))) {
do_kern_addr_fault(regs, error_code, address);
--
2.34.1