[PATCH] workqueue: fix memory leak of struct pool_workqueue in destroy_workqueue()

From: lizhe . 67
Date: Thu Aug 12 2021 - 04:38:42 EST


From: Li Zhe <lizhe.67@xxxxxxxxxxxxx>

Even after def98c84b6cd, we may encount sanity check failures in
destroy_workqueue() although we call flush_work() before, which
result in memory leak of struct pool_workqueue.

The warning logs are listed below.

WARNING: CPU: 0 PID: 19336 at kernel/workqueue.c:4430 destroy_workqueue+0x11a/0x2f0
*****
destroy_workqueue: test_workqueue9 has the following busy pwq
pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=0/1 refcnt=2
in-flight: 5658:wq_barrier_func
Showing busy workqueues and worker pools:
*****

The possible stack which result in the failure is listed below.

thread A:
destroy_workqueue()
----raw_spin_lock_irq(&pwq->pool->lock)
----pwq_busy()

thread B:
----process_one_work()
----raw_spin_unlock_irq(&pool->lock)
----worker->current_func(work)
----cond_resched()
----raw_spin_lock_irq(&pool->lock)
----pwq_dec_nr_in_flight(pwq, work_color)

Thread A may get pool->lock before thread B, with the pwq->refcnt
is still 2, which result in memory leak and sanity check failures.

Notice that wq_barrier_func() only calls complete(), and it is not
suitable to expand struct work_struct considering of the memory cost,
this patch put complete() after obtaining pool->lock in function
process_one_work() to eliminate competition by identify the work as a
barrier with the work->func equal to NULL.

Signed-off-by: Li Zhe <lizhe.67@xxxxxxxxxxxxx>
---
kernel/workqueue.c | 39 ++++++++++++++++++++++++---------------
1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f148eacda55a..02f77f35522c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -280,6 +280,12 @@ struct workqueue_struct {
struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
};

+struct wq_barrier {
+ struct work_struct work;
+ struct completion done;
+ struct task_struct *task; /* purely informational */
+};
+
static struct kmem_cache *pwq_cache;

static cpumask_var_t *wq_numa_possible_cpumask;
@@ -2152,6 +2158,11 @@ static bool manage_workers(struct worker *worker)
return true;
}

+static inline bool is_barrier_func(work_func_t func)
+{
+ return func == NULL;
+}
+
/**
* process_one_work - process single work
* @worker: self
@@ -2273,7 +2284,8 @@ __acquires(&pool->lock)
*/
lockdep_invariant_state(true);
trace_workqueue_execute_start(work);
- worker->current_func(work);
+ if (likely(!is_barrier_func(worker->current_func)))
+ worker->current_func(work);
/*
* While we must be careful to not use "work" after this, the trace
* point will only record its address.
@@ -2303,6 +2315,11 @@ __acquires(&pool->lock)

raw_spin_lock_irq(&pool->lock);

+ if (unlikely(is_barrier_func(worker->current_func))) {
+ struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
+ complete(&barr->done);
+ }
+
/* clear cpu intensive status */
if (unlikely(cpu_intensive))
worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
@@ -2618,18 +2635,6 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
target_wq->name, target_func);
}

-struct wq_barrier {
- struct work_struct work;
- struct completion done;
- struct task_struct *task; /* purely informational */
-};
-
-static void wq_barrier_func(struct work_struct *work)
-{
- struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
- complete(&barr->done);
-}
-
/**
* insert_wq_barrier - insert a barrier work
* @pwq: pwq to insert barrier into
@@ -2667,7 +2672,11 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
* checks and call back into the fixup functions where we
* might deadlock.
*/
- INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
+ /* no need to init func because complete() has been moved to
+ * proccess_one_work(), which means that we use NULL to identify
+ * if this work is a barrier
+ */
+ INIT_WORK_ONSTACK(&barr->work, NULL);
__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));

init_completion_map(&barr->done, &target->lockdep_map);
@@ -4682,7 +4691,7 @@ static void pr_cont_pool_info(struct worker_pool *pool)

static void pr_cont_work(bool comma, struct work_struct *work)
{
- if (work->func == wq_barrier_func) {
+ if (is_barrier_func(work->func)) {
struct wq_barrier *barr;

barr = container_of(work, struct wq_barrier, work);
--
2.11.0