Re: [BUG] cgroup/workques/fork: deadlock when moving cgroups

From: Tejun Heo
Date: Wed Apr 13 2016 - 14:58:03 EST


On Wed, Apr 13, 2016 at 02:33:09PM -0400, Tejun Heo wrote:
> An easy solution would be to make lru_add_drain_all() use a
> WQ_MEM_RECLAIM workqueue. A better way would be making charge moving
> asynchronous similar to cpuset node migration but I don't know whether
> that's realistic. Will prep a patch to add a rescuer to
> lru_add_drain_all().

So, something like the following. Can you please see whether the
deadlock goes away with the patch?

diff --git a/mm/swap.c b/mm/swap.c
index a0bc206..7022872 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -664,8 +664,16 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
lru_add_drain();
}

+static struct workqueue_struct *lru_add_drain_wq;
static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);

+static int __init lru_add_drain_wq_init(void)
+{
+ lru_add_drain_wq = alloc_workqueue("lru_add_drain", WQ_MEM_RECLAIM, 0);
+ return lru_add_drain_wq ? 0 : -ENOMEM;
+}
+core_initcall(lru_add_drain_wq_init);
+
void lru_add_drain_all(void)
{
static DEFINE_MUTEX(lock);
@@ -685,13 +693,12 @@ void lru_add_drain_all(void)
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
- schedule_work_on(cpu, work);
+ queue_work_on(cpu, lru_add_drain_wq, work);
cpumask_set_cpu(cpu, &has_work);
}
}

- for_each_cpu(cpu, &has_work)
- flush_work(&per_cpu(lru_add_drain_work, cpu));
+ flush_workqueue(lru_add_drain_wq);

put_online_cpus();
mutex_unlock(&lock);