[RFC PATCH 1/2] workqueue: Introduce PF_WQ_RESCUE_WORKER

From: Aaron Tomlin
Date: Sat Jul 29 2023 - 10:01:12 EST


The Linux kernel does not provide a way to differentiate between a
kworker and a rescue kworker for user-mode.
>From user-mode, one can establish if a task is a kworker by testing for
PF_WQ_WORKER in a specified task's flags bit mask (or bitmap) via
/proc/[PID]/stat. Indeed, one can examine /proc/[PID]/stack and search
for the function namely "rescuer_thread". This is only available to the
root user.

It can be useful to identify a rescue kworker since their CPU affinity
cannot be modified and their initial CPU assignment can be safely ignored.
Furthermore, a workqueue that was created with WQ_MEM_RECLAIM and
WQ_SYSFS the cpumask file is not applicable to the rescue kworker.
By design a rescue kworker should run anywhere.

This patch introduces PF_WQ_RESCUE_WORKER and ensures it is set and
cleared appropriately.

Signed-off-by: Aaron Tomlin <atomlin@xxxxxxxxxxx>
---
include/linux/sched.h | 2 +-
kernel/workqueue.c | 19 ++++++++++++-------
2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 609bde814cb0..039fcf8d9ed6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1734,7 +1734,7 @@ extern struct pid *cad_pid;
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
#define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
-#define PF__HOLE__00010000 0x00010000
+#define PF_WQ_RESCUE_WORKER 0x00010000 /* I am a rescue workqueue worker */
#define PF_KSWAPD 0x00020000 /* I am kswapd */
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */
#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 02a8f402eeb5..6d38d714b72b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2665,13 +2665,18 @@ static void process_scheduled_works(struct worker *worker)
}
}

-static void set_pf_worker(bool val)
+static void set_pf_worker_and_rescuer(bool worker, bool rescue)
{
mutex_lock(&wq_pool_attach_mutex);
- if (val)
+ if (worker) {
current->flags |= PF_WQ_WORKER;
- else
+ if (rescue)
+ current->flags |= PF_WQ_RESCUE_WORKER;
+ } else {
current->flags &= ~PF_WQ_WORKER;
+ if (rescue)
+ current->flags &= ~PF_WQ_RESCUE_WORKER;
+ }
mutex_unlock(&wq_pool_attach_mutex);
}

@@ -2693,14 +2698,14 @@ static int worker_thread(void *__worker)
struct worker_pool *pool = worker->pool;

/* tell the scheduler that this is a workqueue worker */
- set_pf_worker(true);
+ set_pf_worker_and_rescuer(true, false);
woke_up:
raw_spin_lock_irq(&pool->lock);

/* am I supposed to die? */
if (unlikely(worker->flags & WORKER_DIE)) {
raw_spin_unlock_irq(&pool->lock);
- set_pf_worker(false);
+ set_pf_worker_and_rescuer(false, false);

set_task_comm(worker->task, "kworker/dying");
ida_free(&pool->worker_ida, worker->id);
@@ -2804,7 +2809,7 @@ static int rescuer_thread(void *__rescuer)
* Mark rescuer as worker too. As WORKER_PREP is never cleared, it
* doesn't participate in concurrency management.
*/
- set_pf_worker(true);
+ set_pf_worker_and_rescuer(true, true);
repeat:
set_current_state(TASK_IDLE);

@@ -2903,7 +2908,7 @@ static int rescuer_thread(void *__rescuer)

if (should_stop) {
__set_current_state(TASK_RUNNING);
- set_pf_worker(false);
+ set_pf_worker_and_rescuer(false, true);
return 0;
}

--
2.39.1