[PATCH 4/4] sched: Allow tasks to stack with a workqueue on the same CPU

From: Mel Gorman
Date: Mon Dec 18 2017 - 04:44:27 EST


If tasks wake a kworker to do some work and is woken on completion and it
was a per-cpu kworker that was used then a situation can arise where the
current CPU is always active when the kworker is waking and select_idle_sibling
moves the task. This leads to a situation where a task moves around the socket
each time a kworker is used even through the relationship is effectively sync.
This patch special cases a kworker running on the same CPU. It has a noticable
impact on migrations and performance of dbench running with the XFS filesystem
but has no impact on ext4 as ext4 interacts with a kthread, not a kworker.

4.15.0-rc3 4.15.0-rc3
wakeprev stackwq
Hmean 1 392.92 ( 0.00%) 1024.22 ( 160.67%)
Hmean 2 787.09 ( 0.00%) 1808.38 ( 129.75%)
Hmean 4 1559.71 ( 0.00%) 2525.42 ( 61.92%)
Hmean 8 2576.05 ( 0.00%) 2881.12 ( 11.84%)
Hmean 16 2949.28 ( 0.00%) 3137.65 ( 6.39%)
Hmean 32 3041.89 ( 0.00%) 3147.92 ( 3.49%)
Hmean 64 1655.42 ( 0.00%) 1756.21 ( 6.09%)
Hmean 128 1133.19 ( 0.00%) 1165.39 ( 2.84%)
Stddev 1 2.59 ( 0.00%) 11.21 (-332.82%)
Stddev 2 8.96 ( 0.00%) 13.57 ( -51.44%)
Stddev 4 20.15 ( 0.00%) 8.51 ( 57.75%)
Stddev 8 17.15 ( 0.00%) 14.45 ( 15.75%)
Stddev 16 30.29 ( 0.00%) 31.30 ( -3.33%)
Stddev 32 64.45 ( 0.00%) 57.22 ( 11.21%)
Stddev 64 55.89 ( 0.00%) 62.84 ( -12.43%)
Stddev 128 55.89 ( 0.00%) 62.75 ( -12.27%)

There is also a large drop in system CPU usage;

4.15.0-rc3 4.15.0-rc3
wakeprev stackwq
User 1561.85 1166.59
System 6961.89 4965.09
Elapsed 1472.05 1471.84

Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
---
kernel/sched/fair.c | 29 +++++++++++++++++++++++++++--
kernel/sched/features.h | 8 ++++++++
2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 95b1145bc38d..cff55481bd19 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5684,6 +5684,19 @@ static int wake_wide(struct task_struct *p)
return 1;
}

+/*
+ * Returns true if a wakeup is either from or to a workqueue and the tasks
+ * appear to be synchronised with each other.
+ */
+static bool
+is_wakeup_workqueue_sync(struct task_struct *p, int this_cpu, int prev_cpu)
+{
+ return sched_feat(WA_STACK_WQ) &&
+ this_cpu == prev_cpu &&
+ ((p->flags & PF_WQ_WORKER) || (current->flags & PF_WQ_WORKER)) &&
+ this_rq()->nr_running <= 1;
+}
+
/*
* The purpose of wake_affine() is to quickly determine on which CPU we can run
* soonest. For the purpose of speed we only consider the waking and previous
@@ -5735,7 +5748,7 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
}

static int
-wake_affine_sync(int this_cpu, int sync)
+wake_affine_sync(struct task_struct *p, int this_cpu, int prev_cpu, int sync)
{
/*
* Consider stacking tasks if it's a sync wakeup and there is only
@@ -5745,6 +5758,14 @@ wake_affine_sync(int this_cpu, int sync)
if (sync && cpu_rq(this_cpu)->nr_running == 1)
return this_cpu;

+ /*
+ * If the waker or wakee is a workqueue and it appears to be similar
+ * to a sync wakeup then assume the waker will sleep shortly and allow
+ * the tasks to stack on the same CPU.
+ */
+ if (is_wakeup_workqueue_sync(p, this_cpu, prev_cpu))
+ return this_cpu;
+
return nr_cpumask_bits;
}

@@ -5794,7 +5815,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
new_cpu = wake_affine_idle(this_cpu, prev_cpu, sync);

if (sched_feat(WA_IDLE) && new_cpu == nr_cpumask_bits)
- new_cpu = wake_affine_sync(this_cpu, sync);
+ new_cpu = wake_affine_sync(p, this_cpu, prev_cpu, sync);

if (sched_feat(WA_WEIGHT) && new_cpu == nr_cpumask_bits)
new_cpu = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
@@ -6240,6 +6261,10 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
if (idle_cpu(target))
return target;

+ /* Allow a wakeup to stack if it looks like a synchronous workqueue */
+ if (is_wakeup_workqueue_sync(p, smp_processor_id(), target))
+ return target;
+
/*
* If the previous cpu is cache affine and idle, don't be stupid.
*/
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 9552fd5854bf..c96ad246584a 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -85,3 +85,11 @@ SCHED_FEAT(ATTACH_AGE_LOAD, true)
SCHED_FEAT(WA_IDLE, true)
SCHED_FEAT(WA_WEIGHT, true)
SCHED_FEAT(WA_BIAS, true)
+
+/*
+ * If true then a process may stack with a workqueue on the same CPU during
+ * wakeup instead of finding an idle sibling. This should only happen in the
+ * case where there appears to be a strong relationship beween the wq and the
+ * task e.g. IO operations dispatched to a workqueue on XFS.
+ */
+SCHED_FEAT(WA_STACK_WQ, true)
--
2.15.0