Re: [PATCH 26/35] x86/process: Change copy_thread() argument 'arg' to 'stack_size'

From: Christian Brauner
Date: Tue Feb 15 2022 - 03:49:28 EST


On Tue, Feb 15, 2022 at 01:22:04AM +0000, Edgecombe, Rick P wrote:
> On Mon, 2022-02-14 at 13:33 +0100, Jann Horn wrote:
> > On Sun, Jan 30, 2022 at 10:22 PM Rick Edgecombe
> > <rick.p.edgecombe@xxxxxxxxx> wrote:
> > >
> > > From: Yu-cheng Yu <yu-cheng.yu@xxxxxxxxx>
> > >
> > > The single call site of copy_thread() passes stack size in
> > > 'arg'. To make
> > > this clear and in preparation of using this argument for shadow
> > > stack
> > > allocation, change 'arg' to 'stack_size'. No functional changes.
> >
> > Actually that name is misleading - the single caller copy_process()
> > indeed does:
> >
> > retval = copy_thread(clone_flags, args->stack, args->stack_size, p,
> > args->tls);
> >
> > but the member "stack_size" of "struct kernel_clone_args" can
> > actually
> > also be a pointer argument given to a kthread, see create_io_thread()
> > and kernel_thread():
> >
> > pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long
> > flags)
> > {
> > struct kernel_clone_args args = {
> > .flags = ((lower_32_bits(flags) | CLONE_VM |
> > CLONE_UNTRACED) & ~CSIGNAL),
> > .exit_signal = (lower_32_bits(flags) & CSIGNAL),
> > .stack = (unsigned long)fn,
> > .stack_size = (unsigned long)arg,
> > };
> >
> > return kernel_clone(&args);
> > }
> >
> > And then in copy_thread(), we have:
> >
> > kthread_frame_init(frame, sp, arg)
> >
> >
> > So I'm not sure whether this name change really makes sense, or
> > whether it just adds to the confusion.
>
> Thanks Jann. Yea I guess this makes it worse.
>
> Reading a bit of the history, it seems there used to be unwieldy
> argument lists which were replaced by a big "struct kernel_clone_args"
> to be passed around. The re-use of the stack_size argument is from
> before there was the struct. And then the struct just inherited the
> confusion when it was introduced.
>
> So if a separate *data member was added to kernel_clone_args for
> kernel_thread() and create_io_thread() to use, they wouldn't need to
> re-use stack_size. And copy_thread() could just take a struct
> kernel_clone_args pointer. It might make it more clear.

I'm honestly not sure it makes things that much better but I don't feel
strongly about it either.

>
> But copy_thread() has a ton of arch specific implementations. So they
> would all need to be updated to do this.

When struct kernel_clone_args was introduced I also removed the
copy_thread_tls() and copy_thread() split. So now we're only left with
copy_thread(). That already allowed us to get rid of some arch-specific
code. I didn't go further in trying to unify more arch-specific code. It
might be worth it but I didn't come to a clear conclusion.

>
> Just playing around with it, I did this which only makes the change on
> x86. Duplicating it for 21 copy_thread()s seems perfectly doable, but
> I'm not sure how much people care vs renaming arg to
> stacksize_or_data...


>
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 11bf09b60f9d..cfbba5b14609 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -132,9 +132,8 @@ static int set_new_tls(struct task_struct *p,
> unsigned long tls)
> return do_set_thread_area_64(p, ARCH_SET_FS, tls);
> }
>
> -int copy_thread(unsigned long clone_flags, unsigned long sp,
> - unsigned long stack_size, struct task_struct *p,
> - unsigned long tls)
> +int copy_thread(unsigned long clone_flags, struct kernel_clone_args
> *args,
> + struct task_struct *p)
> {
> struct inactive_task_frame *frame;
> struct fork_frame *fork_frame;
> @@ -178,7 +177,7 @@ int copy_thread(unsigned long clone_flags, unsigned
> long sp,
> if (unlikely(p->flags & PF_KTHREAD)) {
> p->thread.pkru = pkru_get_init_value();
> memset(childregs, 0, sizeof(struct pt_regs));
> - kthread_frame_init(frame, sp, stack_size);
> + kthread_frame_init(frame, args->stack, (unsigned
> long)args->data);
> return 0;
> }
>
> @@ -191,8 +190,8 @@ int copy_thread(unsigned long clone_flags, unsigned
> long sp,
> frame->bx = 0;
> *childregs = *current_pt_regs();
> childregs->ax = 0;
> - if (sp)
> - childregs->sp = sp;
> + if (args->stack)
> + childregs->sp = args->stack;
>
> #ifdef CONFIG_X86_32
> task_user_gs(p) = get_user_gs(current_pt_regs());
> @@ -211,17 +210,17 @@ int copy_thread(unsigned long clone_flags,
> unsigned long sp,
> */
> childregs->sp = 0;
> childregs->ip = 0;
> - kthread_frame_init(frame, sp, stack_size);
> + kthread_frame_init(frame, args->stack, (unsigned
> long)args->data);
> return 0;
> }
>
> /* Set a new TLS for the child thread? */
> if (clone_flags & CLONE_SETTLS)
> - ret = set_new_tls(p, tls);
> + ret = set_new_tls(p, args->tls);
>
> /* Allocate a new shadow stack for pthread */
> if (!ret)
> - ret = shstk_alloc_thread_stack(p, clone_flags,
> stack_size);
> + ret = shstk_alloc_thread_stack(p, clone_flags, args-
> >stack_size);
>
> if (!ret && unlikely(test_tsk_thread_flag(current,
> TIF_IO_BITMAP)))
> io_bitmap_share(p);
> diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
> index b9198a1b3a84..f138b23aee50 100644
> --- a/include/linux/sched/task.h
> +++ b/include/linux/sched/task.h
> @@ -34,6 +34,7 @@ struct kernel_clone_args {
> int io_thread;
> struct cgroup *cgrp;
> struct css_set *cset;
> + void *data;
> };
>
> /*
> @@ -67,8 +68,8 @@ extern void fork_init(void);
>
> extern void release_task(struct task_struct * p);
>
> -extern int copy_thread(unsigned long, unsigned long, unsigned long,
> - struct task_struct *, unsigned long);
> +extern int copy_thread(unsigned long clone_flags, struct
> kernel_clone_args *args,
> + struct task_struct *p);
>
> extern void flush_thread(void);
>
> @@ -85,10 +86,10 @@ extern void exit_files(struct task_struct *);
> extern void exit_itimers(struct signal_struct *);
>
> extern pid_t kernel_clone(struct kernel_clone_args *kargs);
> -struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int
> node);
> +struct task_struct *create_io_thread(int (*fn)(void *), void *data,
> int node);
> struct task_struct *fork_idle(int);
> struct mm_struct *copy_init_mm(void);
> -extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long
> flags);
> +extern pid_t kernel_thread(int (*fn)(void *), void *data, unsigned
> long flags);
> extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
> int kernel_wait(pid_t pid, int *stat);
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index d75a528f7b21..8af202e5651e 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -2170,7 +2170,7 @@ static __latent_entropy struct task_struct
> *copy_process(
> retval = copy_io(clone_flags, p);
> if (retval)
> goto bad_fork_cleanup_namespaces;
> - retval = copy_thread(clone_flags, args->stack, args-
> >stack_size, p, args->tls);
> + retval = copy_thread(clone_flags, args, p);
> if (retval)
> goto bad_fork_cleanup_io;
>
> @@ -2487,7 +2487,7 @@ struct mm_struct *copy_init_mm(void)
> * The returned task is inactive, and the caller must fire it up
> through
> * wake_up_new_task(p). All signals are blocked in the created task.
> */
> -struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int
> node)
> +struct task_struct *create_io_thread(int (*fn)(void *), void *data,
> int node)
> {
> unsigned long flags =
> CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
> CLONE_IO;
> @@ -2496,7 +2496,7 @@ struct task_struct *create_io_thread(int
> (*fn)(void *), void *arg, int node)
> CLONE_UNTRACED) & ~CSIGNAL),
> .exit_signal = (lower_32_bits(flags) & CSIGNAL),
> .stack = (unsigned long)fn,
> - .stack_size = (unsigned long)arg,
> + .data = data,
> .io_thread = 1,
> };
>
> @@ -2594,14 +2594,14 @@ pid_t kernel_clone(struct kernel_clone_args
> *args)
> /*
> * Create a kernel thread.
> */
> -pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
> +pid_t kernel_thread(int (*fn)(void *), void *data, unsigned long
> flags)
> {
> struct kernel_clone_args args = {
> .flags = ((lower_32_bits(flags) | CLONE_VM |
> CLONE_UNTRACED) & ~CSIGNAL),
> .exit_signal = (lower_32_bits(flags) & CSIGNAL),
> .stack = (unsigned long)fn,
> - .stack_size = (unsigned long)arg,
> + .data = data,
> };
>
> return kernel_clone(&args);
>
>