Re: [PATCH v2] x86/xen/64: Rearrange the SYSCALL entries

From: Brian Gerst
Date: Sun Aug 13 2017 - 22:44:37 EST


On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@xxxxxxxxxx> wrote:
> Xen's raw SYSCALL entries are much less weird than native. Rather
> than fudging them to look like native entries, use the Xen-provided
> stack frame directly.
>
> This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
> the SWAPGS_UNSAFE_STACK paravirt hook. The SYSENTER code would
> benefit from similar treatment.
>
> This makes one change to the native code path: the compat
> instruction that clears the high 32 bits of %rax is moved slightly
> later. I'd be surprised if this affects performance at all.
>
> Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
> ---
>
> Changes from v1 (which I never actually emailed):
> - Fix zero-extension in the compat case.
>
> arch/x86/entry/entry_64.S | 9 ++-------
> arch/x86/entry/entry_64_compat.S | 7 +++----
> arch/x86/xen/xen-asm_64.S | 23 +++++++++--------------
> 3 files changed, 14 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index aa58155187c5..7cee92cf807f 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
> * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
> * it is too small to ever cause noticeable irq latency.
> */
> - SWAPGS_UNSAFE_STACK
> - /*
> - * A hypervisor implementation might want to use a label
> - * after the swapgs, so that it can do the swapgs
> - * for the guest and jump here on syscall.
> - */
> -GLOBAL(entry_SYSCALL_64_after_swapgs)
>
> + swapgs
> movq %rsp, PER_CPU_VAR(rsp_scratch)
> movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
> @@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
> pushq %r11 /* pt_regs->flags */
> pushq $__USER_CS /* pt_regs->cs */
> pushq %rcx /* pt_regs->ip */
> +GLOBAL(entry_SYSCALL_64_after_hwframe)
> pushq %rax /* pt_regs->orig_ax */
> pushq %rdi /* pt_regs->di */
> pushq %rsi /* pt_regs->si */
> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> index e1721dafbcb1..5314d7b8e5ad 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
> */
> ENTRY(entry_SYSCALL_compat)
> /* Interrupts are off on entry. */
> - SWAPGS_UNSAFE_STACK
> + swapgs
>
> /* Stash user ESP and switch to the kernel stack. */
> movl %esp, %r8d
> movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
> - /* Zero-extending 32-bit regs, do not remove */
> - movl %eax, %eax
> -
> /* Construct struct pt_regs on stack */
> pushq $__USER32_DS /* pt_regs->ss */
> pushq %r8 /* pt_regs->sp */
> pushq %r11 /* pt_regs->flags */
> pushq $__USER32_CS /* pt_regs->cs */
> pushq %rcx /* pt_regs->ip */
> +GLOBAL(entry_SYSCALL_compat_after_hwframe)
> + movl %eax, %eax /* discard orig_ax high bits */
> pushq %rax /* pt_regs->orig_ax */
> pushq %rdi /* pt_regs->di */
> pushq %rsi /* pt_regs->si */
> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index c3df43141e70..a8a4f4c460a6 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
> * rip
> * r11
> * rsp->rcx
> - *
> - * In all the entrypoints, we undo all that to make it look like a
> - * CPU-generated syscall/sysenter and jump to the normal entrypoint.
> */
>
> -.macro undo_xen_syscall
> - mov 0*8(%rsp), %rcx
> - mov 1*8(%rsp), %r11
> - mov 5*8(%rsp), %rsp
> -.endm
> -
> /* Normal 64-bit system call target */
> ENTRY(xen_syscall_target)
> - undo_xen_syscall
> - jmp entry_SYSCALL_64_after_swapgs
> + popq %rcx
> + popq %r11
> + jmp entry_SYSCALL_64_after_hwframe
> ENDPROC(xen_syscall_target)
>
> #ifdef CONFIG_IA32_EMULATION
>
> /* 32-bit compat syscall target */
> ENTRY(xen_syscall32_target)
> - undo_xen_syscall
> - jmp entry_SYSCALL_compat
> + popq %rcx
> + popq %r11
> + jmp entry_SYSCALL_compat_after_hwframe
> ENDPROC(xen_syscall32_target)
>
> /* 32-bit compat sysenter target */
> ENTRY(xen_sysenter_target)
> - undo_xen_syscall
> + mov 0*8(%rsp), %rcx
> + mov 1*8(%rsp), %r11
> + mov 5*8(%rsp), %rsp
> jmp entry_SYSENTER_compat
> ENDPROC(xen_sysenter_target)

This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
64-bit PV kernel. The 64-bit versions pass. It gets a seg fault after
"parent: write to 0x80 (should fail)", and the fault isn't caught by
the signal handler. It just dumps back to the shell. The tests pass
after reverting this.

--
Brian Gerst