[PATCH -tip V4 5/8] workqueue: introduce wq_unbound_online_cpumask

From: Lai Jiangshan
Date: Mon Jan 11 2021 - 09:27:28 EST


From: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>

wq_unbound_online_cpumask is the cached result of cpu_online_mask with
the going-down cpu cleared before the cpu is cleared from cpu_active_mask.
It is used to track the cpu hotplug process so the creation/attachment
of unbound workers can know where it is in the process when there is
ongoing cpu hotplug and so that workqueue can cooperate with hotplug and
scheduler correctly in later patches for setting correct cpumask for
workers and break affinity initiatively.

The first usage of wq_unbound_online_cpumask is also in this patch.
wq_calc_node_cpumask() and wq_update_unbound_numa() can be simplified
a little.

Acked-by: Tejun Heo <tj@xxxxxxxxxx>
Tested-by: Paul E. McKenney <paulmck@xxxxxxxxxx>
Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>
---
kernel/workqueue.c | 34 ++++++++++++++++++----------------
1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index aed08eddeb83..d01cca8e51f9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -310,6 +310,9 @@ static bool workqueue_freezing; /* PL: have wqs started freezing? */
/* PL: allowable cpus for unbound wqs and work items */
static cpumask_var_t wq_unbound_cpumask;

+/* PL: online cpus (cpu_online_mask with the going-down cpu cleared) */
+static cpumask_var_t wq_unbound_online_cpumask;
+
/* CPU where unbound work was last round robin scheduled from this CPU */
static DEFINE_PER_CPU(int, wq_rr_cpu_last);

@@ -3835,12 +3838,10 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
* wq_calc_node_cpumask - calculate a wq_attrs' cpumask for the specified node
* @attrs: the wq_attrs of the default pwq of the target workqueue
* @node: the target NUMA node
- * @cpu_going_down: if >= 0, the CPU to consider as offline
* @cpumask: outarg, the resulting cpumask
*
- * Calculate the cpumask a workqueue with @attrs should use on @node. If
- * @cpu_going_down is >= 0, that cpu is considered offline during
- * calculation. The result is stored in @cpumask.
+ * Calculate the cpumask a workqueue with @attrs should use on @node.
+ * The result is stored in @cpumask.
*
* If NUMA affinity is not enabled, @attrs->cpumask is always used. If
* enabled and @node has online CPUs requested by @attrs, the returned
@@ -3854,15 +3855,14 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
* %false if equal.
*/
static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
- int cpu_going_down, cpumask_t *cpumask)
+ cpumask_t *cpumask)
{
if (!wq_numa_enabled || attrs->no_numa)
goto use_dfl;

/* does @node have any online CPUs @attrs wants? */
cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
- if (cpu_going_down >= 0)
- cpumask_clear_cpu(cpu_going_down, cpumask);
+ cpumask_and(cpumask, cpumask, wq_unbound_online_cpumask);

if (cpumask_empty(cpumask))
goto use_dfl;
@@ -3971,7 +3971,7 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
goto out_free;

for_each_node(node) {
- if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
+ if (wq_calc_node_cpumask(new_attrs, node, tmp_attrs->cpumask)) {
ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
if (!ctx->pwq_tbl[node])
goto out_free;
@@ -4096,7 +4096,6 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
* wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
* @wq: the target workqueue
* @cpu: the CPU coming up or going down
- * @online: whether @cpu is coming up or going down
*
* This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and
* %CPU_DOWN_FAILED. @cpu is being hot[un]plugged, update NUMA affinity of
@@ -4114,11 +4113,9 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
* affinity, it's the user's responsibility to flush the work item from
* CPU_DOWN_PREPARE.
*/
-static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
- bool online)
+static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu)
{
int node = cpu_to_node(cpu);
- int cpu_off = online ? -1 : cpu;
struct pool_workqueue *old_pwq = NULL, *pwq;
struct workqueue_attrs *target_attrs;
cpumask_t *cpumask;
@@ -4146,7 +4143,7 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
* and create a new one if they don't match. If the target cpumask
* equals the default pwq's, the default pwq should be used.
*/
- if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
+ if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpumask)) {
if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
return;
} else {
@@ -5106,6 +5103,7 @@ int workqueue_unbound_online_cpu(unsigned int cpu)
int pi;

mutex_lock(&wq_pool_mutex);
+ cpumask_set_cpu(cpu, wq_unbound_online_cpumask);

/* update CPU affinity of workers of unbound pools */
for_each_pool(pool, pi) {
@@ -5119,7 +5117,7 @@ int workqueue_unbound_online_cpu(unsigned int cpu)

/* update NUMA affinity of unbound workqueues */
list_for_each_entry(wq, &workqueues, list)
- wq_update_unbound_numa(wq, cpu, true);
+ wq_update_unbound_numa(wq, cpu);

mutex_unlock(&wq_pool_mutex);
return 0;
@@ -5131,8 +5129,9 @@ int workqueue_unbound_offline_cpu(unsigned int cpu)

/* update NUMA affinity of unbound workqueues */
mutex_lock(&wq_pool_mutex);
+ cpumask_clear_cpu(cpu, wq_unbound_online_cpumask);
list_for_each_entry(wq, &workqueues, list)
- wq_update_unbound_numa(wq, cpu, false);
+ wq_update_unbound_numa(wq, cpu);
mutex_unlock(&wq_pool_mutex);

return 0;
@@ -5969,6 +5968,9 @@ void __init workqueue_init_early(void)

BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));

+ BUG_ON(!alloc_cpumask_var(&wq_unbound_online_cpumask, GFP_KERNEL));
+ cpumask_copy(wq_unbound_online_cpumask, cpu_online_mask);
+
BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));

@@ -6065,7 +6067,7 @@ void __init workqueue_init(void)
}

list_for_each_entry(wq, &workqueues, list) {
- wq_update_unbound_numa(wq, smp_processor_id(), true);
+ wq_update_unbound_numa(wq, smp_processor_id());
WARN(init_rescuer(wq),
"workqueue: failed to create early rescuer for %s",
wq->name);
--
2.19.1.6.gb485710b