[PATCH 07/12] x86/entry/64: Always run ptregs-using syscalls on the slow path

From: Andy Lutomirski
Date: Mon Dec 07 2015 - 16:54:14 EST


64-bit syscalls currently have an optimization in which they are
called with partial pt_regs. A small handful require full pt_regs.

In the 32-bit and compat cases, I cleaned this up by forcing full
pt_regs for all syscalls. The performance hit doesn't really matter.

I want to clean up the 64-bit case as well, but I don't want to hurt
fast path performance. To do that, I want to force the syscalls
that use pt_regs onto the slow path. This will enable us to make
slow path syscalls be real ABI-compliant C functions.

Use the new syscall entry qualification machinery for this.
stub_clone is now stub_clone/ptregs.

The next patch will eliminate the stubs, and we'll just have
sys_clone/ptregs.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
---
arch/x86/entry/entry_64.S | 17 +++++++++--------
arch/x86/entry/syscall_64.c | 18 ++++++++++++++++++
arch/x86/entry/syscalls/syscall_64.tbl | 16 ++++++++--------
3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9d34d3cfceb6..a698b8092831 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -182,7 +182,7 @@ entry_SYSCALL_64_fastpath:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx
- call *sys_call_table(, %rax, 8)
+ call *sys_call_table_fastpath_64(, %rax, 8)
movq %rax, RAX(%rsp)
1:
/*
@@ -238,13 +238,6 @@ tracesys:
movq %rsp, %rdi
movl $AUDIT_ARCH_X86_64, %esi
call syscall_trace_enter_phase1
- test %rax, %rax
- jnz tracesys_phase2 /* if needed, run the slow path */
- RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
- movq ORIG_RAX(%rsp), %rax
- jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
-
-tracesys_phase2:
SAVE_EXTRA_REGS
movq %rsp, %rdi
movl $AUDIT_ARCH_X86_64, %esi
@@ -355,6 +348,14 @@ opportunistic_sysret_failed:
jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)

+ENTRY(stub_ptregs_64)
+ /*
+ * Syscalls marked as needing ptregs that go through the fast path
+ * land here. We transfer to the slow path.
+ */
+ addq $8, %rsp
+ jmp tracesys
+END(stub_ptregs_64)

.macro FORK_LIKE func
ENTRY(stub_\func)
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index a1d408772ae6..601745c667ce 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -22,3 +22,21 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_64.h>
};
+
+#undef __SYSCALL_64
+
+extern long stub_ptregs_64(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+
+#define __SYSCALL_64_QUAL_(nr, sym) [nr] = sym,
+#define __SYSCALL_64_QUAL_ptregs(nr, sym) [nr] = stub_ptregs_64,
+
+#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(nr, sym)
+
+asmlinkage const sys_call_ptr_t sys_call_table_fastpath_64[__NR_syscall_max+1] = {
+ /*
+ * Smells like a compiler bug -- it doesn't work
+ * when the & below is removed.
+ */
+ [0 ... __NR_syscall_max] = &sys_ni_syscall,
+#include <asm/syscalls_64.h>
+};
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 278842fdf1f6..6b9db2e338f4 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -21,7 +21,7 @@
12 common brk sys_brk
13 64 rt_sigaction sys_rt_sigaction
14 common rt_sigprocmask sys_rt_sigprocmask
-15 64 rt_sigreturn stub_rt_sigreturn
+15 64 rt_sigreturn stub_rt_sigreturn/ptregs
16 64 ioctl sys_ioctl
17 common pread64 sys_pread64
18 common pwrite64 sys_pwrite64
@@ -62,10 +62,10 @@
53 common socketpair sys_socketpair
54 64 setsockopt sys_setsockopt
55 64 getsockopt sys_getsockopt
-56 common clone stub_clone
-57 common fork stub_fork
-58 common vfork stub_vfork
-59 64 execve stub_execve
+56 common clone stub_clone/ptregs
+57 common fork stub_fork/ptregs
+58 common vfork stub_vfork/ptregs
+59 64 execve stub_execve/ptregs
60 common exit sys_exit
61 common wait4 sys_wait4
62 common kill sys_kill
@@ -328,7 +328,7 @@
319 common memfd_create sys_memfd_create
320 common kexec_file_load sys_kexec_file_load
321 common bpf sys_bpf
-322 64 execveat stub_execveat
+322 64 execveat stub_execveat/ptregs
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier

@@ -344,7 +344,7 @@
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
-520 x32 execve stub_x32_execve
+520 x32 execve stub_x32_execve/ptregs
521 x32 ptrace compat_sys_ptrace
522 x32 rt_sigpending compat_sys_rt_sigpending
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
@@ -369,4 +369,4 @@
542 x32 getsockopt compat_sys_getsockopt
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
-545 x32 execveat stub_x32_execveat
+545 x32 execveat stub_x32_execveat/ptregs
--
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/