[PATCH V8 2/8] fork/vm: Move common PF_IO_WORKER behavior to new flag

From: Mike Christie
Date: Wed Feb 09 2022 - 18:32:29 EST


This adds a new flag, PF_USER_WORKER, that's used for behavior common to
to both PF_IO_WORKER and users like vhost which will use a new helper
instead of create_io_thread because they require different behavior.

The common behavior PF_USER_WORKER covers is the initial frame and fpu
setup and the vm reclaim handling.

Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx>
[For m68k changes]
Acked-by: Geert Uytterhoeven <geert@xxxxxxxxxxxxxx>
---
arch/alpha/kernel/process.c | 2 +-
arch/arc/kernel/process.c | 2 +-
arch/arm/kernel/process.c | 2 +-
arch/arm64/kernel/process.c | 2 +-
arch/csky/kernel/process.c | 2 +-
arch/h8300/kernel/process.c | 2 +-
arch/hexagon/kernel/process.c | 2 +-
arch/ia64/kernel/process.c | 2 +-
arch/m68k/kernel/process.c | 2 +-
arch/microblaze/kernel/process.c | 2 +-
arch/mips/kernel/process.c | 2 +-
arch/nds32/kernel/process.c | 2 +-
arch/nios2/kernel/process.c | 2 +-
arch/openrisc/kernel/process.c | 2 +-
arch/parisc/kernel/process.c | 2 +-
arch/powerpc/kernel/process.c | 2 +-
arch/riscv/kernel/process.c | 2 +-
arch/s390/kernel/process.c | 2 +-
arch/sh/kernel/process_32.c | 2 +-
arch/sparc/kernel/process_32.c | 2 +-
arch/sparc/kernel/process_64.c | 2 +-
arch/um/kernel/process.c | 2 +-
arch/x86/kernel/fpu/core.c | 4 ++--
arch/x86/kernel/process.c | 2 +-
arch/xtensa/kernel/process.c | 2 +-
include/linux/sched.h | 1 +
include/linux/sched/task.h | 3 ++-
kernel/fork.c | 6 +++++-
mm/vmscan.c | 4 ++--
29 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 5f8527081da9..f4759e4ee4a9 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -249,7 +249,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childti->pcb.ksp = (unsigned long) childstack;
childti->pcb.flags = 1; /* set FEN, clear everything else */

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
/* kernel thread */
memset(childstack, 0,
sizeof(struct switch_stack) + sizeof(struct pt_regs));
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 8e90052f6f05..b409ecb1407f 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -191,7 +191,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childksp[0] = 0; /* fp */
childksp[1] = (unsigned long)ret_from_fork; /* blink */

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
memset(c_regs, 0, sizeof(struct pt_regs));

c_callee->r13 = kthread_arg;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d47159f3791c..44603062d661 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -251,7 +251,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
thread->cpu_domain = get_domain();
#endif

- if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
+ if (likely(!(p->flags & (PF_KTHREAD | PF_USER_WORKER)))) {
*childregs = *current_pt_regs();
childregs->ARM_r0 = 0;
if (stack_start)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 5369e649fa79..2528fb9650ef 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -334,7 +334,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,

ptrauth_thread_init_kernel(p);

- if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
+ if (likely(!(p->flags & (PF_KTHREAD | PF_USER_WORKER)))) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;

diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c
index 3d0ca22cd0e2..509f2bfe4ace 100644
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -49,7 +49,7 @@ int copy_thread(unsigned long clone_flags,
/* setup thread.sp for switch_to !!! */
p->thread.sp = (unsigned long)childstack;

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
childstack->r15 = (unsigned long) ret_from_kernel_thread;
childstack->r10 = kthread_arg;
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 8833fa4f5d51..050aca44ba6d 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -112,7 +112,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,

childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1;

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
childregs->retpc = (unsigned long) ret_from_kernel_thread;
childregs->er4 = topstk; /* arg */
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 232dfd8956aa..40f8294c6c7c 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -73,7 +73,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
sizeof(*ss));
ss->lr = (unsigned long)ret_from_fork;
p->thread.switch_sp = ss;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
/* r24 <- fn, r25 <- arg */
ss->r24 = usp;
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 834df24a88f1..29015ebdcf1d 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -338,7 +338,7 @@ copy_thread(unsigned long clone_flags, unsigned long user_stack_base,

ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
if (unlikely(!user_stack_base)) {
/* fork_idle() called us */
return 0;
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index a6030dbaa089..cbb693012b5e 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
*/
p->thread.fc = USER_DATA;

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
/* kernel thread */
memset(frame, 0, sizeof(struct fork_frame));
frame->regs.sr = PS_S;
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 5e2b91c1e8ce..de1da9900f7e 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -59,7 +59,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
struct pt_regs *childregs = task_pt_regs(p);
struct thread_info *ti = task_thread_info(p);

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
/* if we're creating a new kernel thread then just zeroing all
* the registers. That's OK for a brand new thread.*/
memset(childregs, 0, sizeof(struct pt_regs));
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index cbff1b974f88..6f1ed337cd41 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -120,7 +120,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
/* Put the stack after the struct pt_regs. */
childksp = (unsigned long) childregs;
p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
/* kernel thread */
unsigned long status = p->thread.cp0_status;
memset(childregs, 0, sizeof(struct pt_regs));
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
index 49fab9e39cbf..dba91dd1e289 100644
--- a/arch/nds32/kernel/process.c
+++ b/arch/nds32/kernel/process.c
@@ -156,7 +156,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,

memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
/* kernel thread fn */
p->thread.cpu_context.r6 = stack_start;
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index f8ea522a1588..eabf3452e5e2 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -109,7 +109,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
struct switch_stack *childstack =
((struct switch_stack *)childregs) - 1;

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
memset(childstack, 0,
sizeof(struct switch_stack) + sizeof(struct pt_regs));

diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 3c0c91bcdcba..aa110383cfa1 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -172,7 +172,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
sp -= sizeof(struct pt_regs);
kregs = (struct pt_regs *)sp;

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
memset(kregs, 0, sizeof(struct pt_regs));
kregs->gpr[20] = usp; /* fn, kernel thread */
kregs->gpr[22] = arg;
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index ea3d83b6fb62..a76120e30eb4 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -197,7 +197,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
extern void * const ret_from_kernel_thread;
extern void * const child_return;

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
/* kernel thread */
memset(cregs, 0, sizeof(struct pt_regs));
if (!usp) /* idle thread */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 984813a4d5dc..02f91fcc1417 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1736,7 +1736,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
/* Copy registers */
sp -= sizeof(struct pt_regs);
childregs = (struct pt_regs *) sp;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gpr[1] = sp + sizeof(struct pt_regs);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 03ac3aa611f5..8deeb94eb51e 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -125,7 +125,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
struct pt_regs *childregs = task_pt_regs(p);

/* p->thread holds context to be restored by __switch_to() */
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gp = gp_in_global;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 71d86f73b02c..51169bb351d4 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -130,7 +130,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
frame->sf.gprs[9] = (unsigned long)frame;

/* Store access registers to kernel stack of new process. */
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 1c28e3cddb60..0506a739b0a8 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -114,7 +114,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,

childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
p->thread.pc = (unsigned long) ret_from_kernel_thread;
childregs->regs[4] = arg;
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 2dc0bf9fe62e..5386e56b5e6c 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -296,7 +296,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
ti->ksp = (unsigned long) new_stack;
p->thread.kregs = childregs;

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
extern int nwindows;
unsigned long psr;
memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index f5b2cac8669f..6058b3966f71 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -594,7 +594,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
sizeof(struct sparc_stackf));
t->fpsaved[0] = 0;

- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
memset(child_trap_frame, 0, child_stack_sz);
__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] =
(current_pt_regs()->tstate + 1) & TSTATE_CWP;
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 4a420778ed87..25dd496fd820 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -158,7 +158,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct * p, unsigned long tls)
{
void (*handler)(void);
- int kthread = current->flags & (PF_KTHREAD | PF_IO_WORKER);
+ int kthread = current->flags & (PF_KTHREAD | PF_USER_WORKER);
int ret = 0;

p->thread = (struct thread_struct) INIT_THREAD;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8dea01ffc5c1..c5b34fa8bb0c 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -579,10 +579,10 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);

/*
- * No FPU state inheritance for kernel threads and IO
+ * No FPU state inheritance for kernel threads and user
* worker threads.
*/
- if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) {
+ if (dst->flags & (PF_KTHREAD | PF_USER_WORKER)) {
/* Clear out the minimal state */
memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs,
init_fpstate_copy_size());
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 81d8ef036637..b0e58fe55b31 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -195,7 +195,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
task_user_gs(p) = get_user_gs(current_pt_regs());
#endif

- if (unlikely(p->flags & PF_IO_WORKER)) {
+ if (unlikely(p->flags & PF_USER_WORKER)) {
/*
* An IO thread is a user space thread, but it doesn't
* return to ret_after_fork().
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index bd80df890b1e..00d81668ead4 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -224,7 +224,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
#error Unsupported Xtensa ABI
#endif

- if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (!(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
struct pt_regs *regs = current_pt_regs();
unsigned long usp = usp_thread_fn ?
usp_thread_fn : regs->areg[1];
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75ba8aa60248..fc45e692f6a2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1690,6 +1690,7 @@ extern struct pid *cad_pid;
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
+#define PF_USER_WORKER 0x01000000 /* Kernel thread cloned from userspace thread */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 0bf95966ae7d..0467b3ce1c94 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -18,7 +18,8 @@ struct css_set;
/* All the bits taken by the old clone syscall. */
#define CLONE_LEGACY_FLAGS 0xffffffffULL

-#define USER_WORKER_IO BIT(0)
+#define USER_WORKER BIT(0)
+#define USER_WORKER_IO BIT(1)

struct kernel_clone_args {
u64 flags;
diff --git a/kernel/fork.c b/kernel/fork.c
index 9273fd81a329..1f6e7184b31f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1998,6 +1998,10 @@ static __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node);
if (!p)
goto fork_out;
+
+ if (args->worker_flags & USER_WORKER)
+ p->flags |= PF_USER_WORKER;
+
if (args->worker_flags & USER_WORKER_IO) {
/*
* Mark us an IO worker, and block any signal that isn't
@@ -2497,7 +2501,7 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
.stack = (unsigned long)fn,
.stack_size = (unsigned long)arg,
- .worker_flags = USER_WORKER_IO,
+ .worker_flags = USER_WORKER | USER_WORKER_IO,
};

return copy_process(NULL, 0, node, &args);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 090bfb605ecf..bb85b454ff07 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1061,12 +1061,12 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
DEFINE_WAIT(wait);

/*
- * Do not throttle IO workers, kthreads other than kswapd or
+ * Do not throttle user workers, kthreads other than kswapd or
* workqueues. They may be required for reclaim to make
* forward progress (e.g. journalling workqueues or kthreads).
*/
if (!current_is_kswapd() &&
- current->flags & (PF_IO_WORKER|PF_KTHREAD))
+ current->flags & (PF_USER_WORKER|PF_KTHREAD))
return;

/*
--
2.25.1