Re: [PATCH 2/2] x86: Rewrite ret_from_fork() in C

From: Brian Gerst
Date: Fri Jun 23 2023 - 14:13:12 EST


On Thu, Jun 22, 2023 at 8:08 AM Brian Gerst <brgerst@xxxxxxxxx> wrote:
>
> When kCFI is enabled, special handling is needed for the indirect call
> to the kernel thread function. Rewrite the ret_from_fork() function in
> C so that the compiler can properly handle the indirect call.
>
> Suggested-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> Signed-off-by: Brian Gerst <brgerst@xxxxxxxxx>
> ---
> arch/x86/entry/entry_32.S | 30 +++++++--------------------
> arch/x86/entry/entry_64.S | 35 +++++++++-----------------------
> arch/x86/include/asm/switch_to.h | 4 +++-
> arch/x86/kernel/process.c | 22 +++++++++++++++++++-
> 4 files changed, 41 insertions(+), 50 deletions(-)
>
> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> index 6c1ee76adc11..7932c14199fb 100644
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -727,37 +727,21 @@ SYM_CODE_END(__switch_to_asm)
> * edi: kernel thread arg
> */
> .pushsection .text, "ax"
> -SYM_CODE_START(ret_from_fork)
> +SYM_CODE_START(ret_from_fork_asm)
> /* return address for the stack unwinder */
> pushl $.Lsyscall_32_done
> FRAME_BEGIN
>
> - pushl %eax
> - call schedule_tail
> + /* prev already in EAX */
> + movl %esp, %edx /* regs */
> + movl %ebx, %ecx /* fn */
> + pushl %edi /* fn_arg */
> + call ret_from_fork
> addl $4, %esp
>
> - testl %ebx, %ebx
> - jnz 1f /* kernel threads are uncommon */
> -
> -2:
> - /* When we fork, we trace the syscall return in the child, too. */
> - movl %esp, %eax
> - call syscall_exit_to_user_mode
> -
> FRAME_END
> RET
> -
> - /* kernel thread */
> -1: movl %edi, %eax
> - CALL_NOSPEC ebx
> - /*
> - * A kernel thread is allowed to return here after successfully
> - * calling kernel_execve(). Exit to userspace to complete the execve()
> - * syscall.
> - */
> - movl $0, PT_EAX(%esp)
> - jmp 2b
> -SYM_CODE_END(ret_from_fork)
> +SYM_CODE_END(ret_from_fork_asm)
> .popsection
>
> SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index f31e286c2977..5ee32e7e29e8 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -284,36 +284,21 @@ SYM_FUNC_END(__switch_to_asm)
> * r12: kernel thread arg
> */
> .pushsection .text, "ax"
> - __FUNC_ALIGN
> -SYM_CODE_START_NOALIGN(ret_from_fork)
> +SYM_CODE_START(ret_from_fork_asm)
> UNWIND_HINT_END_OF_STACK
> ANNOTATE_NOENDBR // copy_thread
> CALL_DEPTH_ACCOUNT
> - movq %rax, %rdi
> - call schedule_tail /* rdi: 'prev' task parameter */
> -
> - testq %rbx, %rbx /* from kernel_thread? */
> - jnz 1f /* kernel threads are uncommon */
>
> -2:
> - UNWIND_HINT_REGS
> - movq %rsp, %rdi
> - call syscall_exit_to_user_mode /* returns with IRQs disabled */
> - jmp swapgs_restore_regs_and_return_to_usermode
> + /* return address for the stack unwinder */
> + pushq $swapgs_restore_regs_and_return_to_usermode
> + UNWIND_HINT_FUNC
>
> -1:
> - /* kernel thread */
> - UNWIND_HINT_END_OF_STACK
> - movq %r12, %rdi
> - CALL_NOSPEC rbx
> - /*
> - * A kernel thread is allowed to return here after successfully
> - * calling kernel_execve(). Exit to userspace to complete the execve()
> - * syscall.
> - */
> - movq $0, RAX(%rsp)
> - jmp 2b
> -SYM_CODE_END(ret_from_fork)
> + movq %rax, %rdi /* prev */
> + movq %rsp, %rsi /* regs */

The push above makes this give the wrong address for regs. New version coming.

Brian Gerst