[RFC PATCH v1] syscall_user_dispatch: ptrace SUSPEND feature for checkpoint/restore

From: Gregory Price
Date: Wed Jan 04 2023 - 17:10:05 EST


Adds PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH to ptrace options, and
modify Syscall User Dispatch to suspend interception when enabled.

This is modeled after the similar feature for SECCOMP, which suspends
SECCOMP interposition. Without doing this, software like CRIU will
inject system calls into a process and be intercepted by Syscall
User Dispatch, either causing a crash (due to blocked signals) or
the delivery of those signals to a ptracer (not the intended behavior).

Since Syscall User Dispatch is not a privileged feature, a check
for permissions like SECCOMP does is not required, however attemping
to set this feature should not be possible when
CONFIG_CHECKPOINT_RESTORE it not supported.
---
include/linux/ptrace.h | 1 +
include/uapi/linux/ptrace.h | 3 ++-
kernel/entry/syscall_user_dispatch.c | 4 ++++
kernel/ptrace.c | 5 +++++
4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index eaaef3ffec22..17ded8ed3795 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -45,6 +45,7 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,

#define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
#define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
+#define PT_SUSPEND_SYSCALL_USER_DISPATCH (PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH << PT_OPT_FLAG_SHIFT)

extern long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data);
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 195ae64a8c87..be602a8d554f 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -146,9 +146,10 @@ struct ptrace_rseq_configuration {
/* eventless options */
#define PTRACE_O_EXITKILL (1 << 20)
#define PTRACE_O_SUSPEND_SECCOMP (1 << 21)
+#define PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH (1 << 22)

#define PTRACE_O_MASK (\
- 0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP)
+ 0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP | PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH)

#include <asm/ptrace.h>

diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
index 0b6379adff6b..6fad83e34da7 100644
--- a/kernel/entry/syscall_user_dispatch.c
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -36,6 +36,10 @@ bool syscall_user_dispatch(struct pt_regs *regs)
struct syscall_user_dispatch *sd = &current->syscall_dispatch;
char state;

+ if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
+ unlikely(current->ptrace & PT_SUSPEND_SYSCALL_USER_DISPATCH))
+ return false;
+
if (likely(instruction_pointer(regs) - sd->offset < sd->len))
return false;

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 54482193e1ed..a6ad815bd4be 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -370,6 +370,11 @@ static int check_ptrace_options(unsigned long data)
if (data & ~(unsigned long)PTRACE_O_MASK)
return -EINVAL;

+ if (unlikely(data & PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH)) {
+ if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTART))
+ return -EINVAL;
+ }
+
if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
!IS_ENABLED(CONFIG_SECCOMP))
--
2.37.3