Re: [PATCH 00/12] x86: Rewrite 64-bit syscall code

From: Andy Lutomirski
Date: Mon Dec 07 2015 - 17:55:51 EST


On Mon, Dec 7, 2015 at 1:51 PM, Andy Lutomirski <luto@xxxxxxxxxx> wrote:
> This is kind of like the 32-bit and compat code, except that I
> preserved the fast path this time. I was unable to measure any
> significant performance change on my laptop in the fast path.
>
> What do you all think?

For completeness, if I zap the fast path entirely (see attached), I
lose 20 cycles (148 cycles vs 128 cycles) on Skylake. Switching
between movq and pushq for stack setup makes no difference whatsoever,
interestingly. I haven't tried to figure out exactly where those 20
cycles go.

--Andy
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1ab5362f241d..a97981f0d9ce 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -163,71 +163,17 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r9 /* pt_regs->r9 */
pushq %r10 /* pt_regs->r10 */
pushq %r11 /* pt_regs->r11 */
- sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+ pushq %rbx /* pt_regs->rbx */
+ pushq %rbp /* pt_regs->rbp */
+ pushq %r12 /* pt_regs->r12 */
+ pushq %r13 /* pt_regs->r13 */
+ pushq %r14 /* pt_regs->r14 */
+ pushq %r15 /* pt_regs->r15 */

- /*
- * If we need to do entry work or if we guess we'll need to do
- * exit work, go straight to the slow path.
- */
- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz entry_SYSCALL64_slow_path
-
-entry_SYSCALL_64_fastpath:
- /*
- * Easy case: enable interrupts and issue the syscall. If the syscall
- * needs pt_regs, we'll call a stub that disables interrupts again
- * and jumps to the slow path.
- */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
-#if __SYSCALL_MASK == ~0
- cmpq $__NR_syscall_max, %rax
-#else
- andl $__SYSCALL_MASK, %eax
- cmpl $__NR_syscall_max, %eax
-#endif
- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
- movq %r10, %rcx
- call *sys_call_table_fastpath_64(, %rax, 8)
- movq %rax, RAX(%rsp)
-1:
-
- /*
- * If we get here, then we know that pt_regs is clean for SYSRET64.
- * If we see that no exit work is required (which we are required
- * to check with IRQs off), then we can go straight to SYSRET64.
- */
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz 1f
-
- LOCKDEP_SYS_EXIT
- TRACE_IRQS_ON /* user mode is traced as IRQs on */
- RESTORE_C_REGS
- movq RSP(%rsp), %rsp
- USERGS_SYSRET64
-
-1:
- /*
- * The fast path looked good when we started, but something changed
- * along the way and we need to switch to the slow path. Calling
- * raise(3) will trigger this, for example. IRQs are off.
- */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- SAVE_EXTRA_REGS
- movq %rsp, %rdi
- call syscall_return_slowpath /* returns with IRQs disabled */
- jmp return_from_SYSCALL_64
-
-entry_SYSCALL64_slow_path:
/* IRQs are off. */
- SAVE_EXTRA_REGS
movq %rsp, %rdi
call do_syscall_64 /* returns with IRQs disabled */

-return_from_SYSCALL_64:
RESTORE_EXTRA_REGS
TRACE_IRQS_IRETQ /* we're about to change IF */

@@ -305,14 +251,7 @@ opportunistic_sysret_failed:
END(entry_SYSCALL_64)

ENTRY(stub_ptregs_64)
- /*
- * Syscalls marked as needing ptregs that go through the fast path
- * land here. We transfer to the slow path.
- */
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- addq $8, %rsp
- jmp entry_SYSCALL64_slow_path
+ ud2
END(stub_ptregs_64)

/*