[PATCH] kthread: Generalize pf_io_worker so it can point to struct kthread

From: Eric W. Biederman
Date: Thu Dec 23 2021 - 00:19:25 EST



The point of using set_child_tid to hold the kthread pointer was that
it already did what is necessary. There are now restrictions on when
set_child_tid can be initialized and when set_child_tid can be used in
schedule_tail. Which indicates that continuing to use set_child_tid
to hold the kthread pointer is a bad idea.

Instead of continuing to use the set_child_tid field of task_struct
generalize the pf_io_worker field of task_struct and use it to hold
the kthread pointer.

Rename pf_io_worker (which is a void * pointer) to worker_private so
it can be used to store kthreads struct kthread pointer. Update the
kthread code to store the kthread pointer in the worker_private field.
Remove the places where set_child_tid had to be dealt with carefully
because kthreads also used it.

Link: https://lkml.kernel.org/r/CAHk-=wgtFAA9SbVYg0gR1tqPMC17-NYcs0GQkaYg1bGhh1uJQQ@xxxxxxxxxxxxxx
Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
---

I looked again and the vhost_worker changes do not generalize
pf_io_worker, and as pf_io_worker is already a void * it is easy to
generalize. So I just did that.

Unless someone spots a problem I will add this to my signal-for-v5.17
branch in linux-next, as this seems much less error prone than using
set_child_tid.

fs/io-wq.c | 6 +++---
fs/io-wq.h | 2 +-
include/linux/sched.h | 4 ++--
kernel/fork.c | 8 +-------
kernel/kthread.c | 14 +++++---------
kernel/sched/core.c | 2 +-
6 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 88202de519f6..e4fc7384b40c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -657,7 +657,7 @@ static int io_wqe_worker(void *data)
*/
void io_wq_worker_running(struct task_struct *tsk)
{
- struct io_worker *worker = tsk->pf_io_worker;
+ struct io_worker *worker = tsk->worker_private;

if (!worker)
return;
@@ -675,7 +675,7 @@ void io_wq_worker_running(struct task_struct *tsk)
*/
void io_wq_worker_sleeping(struct task_struct *tsk)
{
- struct io_worker *worker = tsk->pf_io_worker;
+ struct io_worker *worker = tsk->worker_private;

if (!worker)
return;
@@ -694,7 +694,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
struct task_struct *tsk)
{
- tsk->pf_io_worker = worker;
+ tsk->worker_private = worker;
worker->task = tsk;
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
tsk->flags |= PF_NO_SETAFFINITY;
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 41bf37674a49..c7c23947cbcd 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -200,6 +200,6 @@ static inline void io_wq_worker_running(struct task_struct *tsk)
static inline bool io_wq_current_is_worker(void)
{
return in_task() && (current->flags & PF_IO_WORKER) &&
- current->pf_io_worker;
+ current->worker_private;
}
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78c351e35fec..52f2fdffa3ab 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -987,8 +987,8 @@ struct task_struct {
/* CLONE_CHILD_CLEARTID: */
int __user *clear_child_tid;

- /* PF_IO_WORKER */
- void *pf_io_worker;
+ /* PF_KTHREAD | PF_IO_WORKER */
+ void *worker_private;

u64 utime;
u64 stime;
diff --git a/kernel/fork.c b/kernel/fork.c
index 0816be1bb044..6f0293cb29c9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -950,7 +950,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->splice_pipe = NULL;
tsk->task_frag.page = NULL;
tsk->wake_q.next = NULL;
- tsk->pf_io_worker = NULL;
+ tsk->worker_private = NULL;

account_kernel_stack(tsk, 1);

@@ -2032,12 +2032,6 @@ static __latent_entropy struct task_struct *copy_process(
siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
}

- /*
- * This _must_ happen before we call free_task(), i.e. before we jump
- * to any of the bad_fork_* labels. This is to avoid freeing
- * p->set_child_tid which is (ab)used as a kthread's data pointer for
- * kernel threads (PF_KTHREAD).
- */
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
/*
* Clear TID on mm_release()?
diff --git a/kernel/kthread.c b/kernel/kthread.c
index c14707d15341..261a3c3b9c6c 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -72,7 +72,7 @@ enum KTHREAD_BITS {
static inline struct kthread *to_kthread(struct task_struct *k)
{
WARN_ON(!(k->flags & PF_KTHREAD));
- return (__force void *)k->set_child_tid;
+ return k->worker_private;
}

/*
@@ -80,7 +80,7 @@ static inline struct kthread *to_kthread(struct task_struct *k)
*
* Per construction; when:
*
- * (p->flags & PF_KTHREAD) && p->set_child_tid
+ * (p->flags & PF_KTHREAD) && p->worker_private
*
* the task is both a kthread and struct kthread is persistent. However
* PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and
@@ -88,7 +88,7 @@ static inline struct kthread *to_kthread(struct task_struct *k)
*/
static inline struct kthread *__to_kthread(struct task_struct *p)
{
- void *kthread = (__force void *)p->set_child_tid;
+ void *kthread = p->worker_private;
if (kthread && !(p->flags & PF_KTHREAD))
kthread = NULL;
return kthread;
@@ -109,11 +109,7 @@ bool set_kthread_struct(struct task_struct *p)
init_completion(&kthread->parked);
p->vfork_done = &kthread->exited;

- /*
- * We abuse ->set_child_tid to avoid the new member and because it
- * can't be wrongly copied by copy_process().
- */
- p->set_child_tid = (__force void __user *)kthread;
+ p->worker_private = kthread;
return true;
}

@@ -128,7 +124,7 @@ void free_kthread_struct(struct task_struct *k)
#ifdef CONFIG_BLK_CGROUP
WARN_ON_ONCE(kthread && kthread->blkcg_css);
#endif
- k->set_child_tid = (__force void __user *)NULL;
+ k->worker_private = NULL;
kfree(kthread);
}

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d8adbea77be1..ee222b89c692 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4908,7 +4908,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
finish_task_switch(prev);
preempt_enable();

- if (!(current->flags & PF_KTHREAD) && current->set_child_tid)
+ if (current->set_child_tid)
put_user(task_pid_vnr(current), current->set_child_tid);

calculate_sigpending();
--
2.29.2