[PATCH 3/3 V7] workqueue: Allow modifying low level unbound workqueue cpumask

From: Lai Jiangshan
Date: Tue Apr 07 2015 - 07:24:09 EST


Allow to modify the low-level unbound workqueues cpumask through
sysfs. This is performed by traversing the entire workqueue list
and calling apply_wqattrs_prepare() on the unbound workqueues
with the low level mask passed in. Only after all the preparation are done,
we commit them all together.

The oreder-workquue is ignore from the low level unbound workqueue cpumask,
it will be handled in near future.

The per-nodes' pwqs are mandatorily controlled by the low level cpumask, while
the default pwq fallback to the low level global cpumask when (and ONLY when) the
cpumask set by the user doesn't overlap with the low level cpumask.

The default wq_unbound_global_cpumask is still cpu_possible_mask due to the workqueue
subsystem doesn't know what is the best default value for the runtime, the
system manager or other subsystem which knows the sufficient information should set
it when needed.

Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Kevin Hilman <khilman@xxxxxxxxxx>
Cc: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Cc: Mike Galbraith <bitbucket@xxxxxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Cc: Tejun Heo <tj@xxxxxxxxxx>
Cc: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Original-patch-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
---
include/linux/workqueue.h | 1 +
kernel/workqueue.c | 122 +++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 116 insertions(+), 7 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index deee212..01483b3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -424,6 +424,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
void free_workqueue_attrs(struct workqueue_attrs *attrs);
int apply_workqueue_attrs(struct workqueue_struct *wq,
const struct workqueue_attrs *attrs);
+int workqueue_set_unbound_global_cpumask(cpumask_var_t cpumask);

extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cbccf5d..557612e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -299,7 +299,7 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
static bool workqueue_freezing; /* PL: have wqs started freezing? */

-static cpumask_var_t wq_unbound_global_cpumask;
+static cpumask_var_t wq_unbound_global_cpumask; /* PL: low level cpumask for all unbound wqs */

/* the per-cpu worker pools */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@@ -3493,6 +3493,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
struct apply_wqattrs_ctx {
struct workqueue_struct *wq; /* target to be applied */
struct workqueue_attrs *attrs; /* configured attrs */
+ struct list_head list; /* queued for batching commit */
struct pool_workqueue *dfl_pwq;
struct pool_workqueue *pwq_tbl[];
};
@@ -3517,7 +3518,8 @@ static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
/* Allocates the attrs and pwqs for later installment */
static struct apply_wqattrs_ctx *
apply_wqattrs_prepare(struct workqueue_struct *wq,
- const struct workqueue_attrs *attrs)
+ const struct workqueue_attrs *attrs,
+ cpumask_var_t unbound_cpumask)
{
struct apply_wqattrs_ctx *ctx;
struct workqueue_attrs *new_attrs, *tmp_attrs;
@@ -3535,7 +3537,7 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,

/* make a copy of @attrs and sanitize it */
copy_workqueue_attrs(new_attrs, attrs);
- cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_global_cpumask);
+ cpumask_and(new_attrs->cpumask, new_attrs->cpumask, unbound_cpumask);

/*
* We may create multiple pwqs with differing cpumasks. Make a
@@ -3548,13 +3550,18 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
* If something goes wrong during CPU up/down, we'll fall back to
* the default pwq covering whole @attrs->cpumask. Always create
* it even if we don't use it immediately.
+ *
+ * If the cpumask set by the user doesn't overlap with the
+ * unbound_cpumask, we fallback to the unbound_cpumask.
*/
- ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
+ if (unlikely(cpumask_empty(tmp_attrs->cpumask)))
+ cpumask_copy(tmp_attrs->cpumask, unbound_cpumask);
+ ctx->dfl_pwq = alloc_unbound_pwq(wq, tmp_attrs);
if (!ctx->dfl_pwq)
goto out_free;

for_each_node(node) {
- if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
+ if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
if (!ctx->pwq_tbl[node])
goto out_free;
@@ -3564,7 +3571,11 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
}
}

+ /* save the user configured attrs */
+ copy_workqueue_attrs(new_attrs, attrs);
+ cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
ctx->attrs = new_attrs;
+
ctx->wq = wq;
free_workqueue_attrs(tmp_attrs);
return ctx;
@@ -3636,7 +3647,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
get_online_cpus();

mutex_lock(&wq_pool_mutex);
- ctx = apply_wqattrs_prepare(wq, attrs);
+ ctx = apply_wqattrs_prepare(wq, attrs, wq_unbound_global_cpumask);
mutex_unlock(&wq_pool_mutex);

/* the ctx has been prepared successfully, let's commit it */
@@ -3710,6 +3721,14 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
* wq's, the default pwq should be used.
*/
if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
+ /*
+ * wq->unbound_attrs is the user configured attrs whose
+ * cpumask is not masked with wq_unbound_global_cpumask,
+ * so we make complete it.
+ */
+ cpumask_and(cpumask, cpumask, wq_unbound_global_cpumask);
+ if (cpumask_empty(cpumask))
+ goto use_dfl_pwq;
if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
goto out_unlock;
} else {
@@ -4732,6 +4751,75 @@ out_unlock:
}
#endif /* CONFIG_FREEZER */

+static int workqueue_apply_unbound_global_cpumask(cpumask_var_t cpumask)
+{
+ LIST_HEAD(ctxs);
+ int ret = 0;
+ struct workqueue_struct *wq;
+ struct apply_wqattrs_ctx *ctx, *n;
+
+ lockdep_assert_held(&wq_pool_mutex);
+
+ list_for_each_entry(wq, &workqueues, list) {
+ if (!(wq->flags & WQ_UNBOUND))
+ continue;
+ /* creating multiple pwqs breaks ordering guarantee */
+ if (wq->flags & __WQ_ORDERED)
+ continue;
+
+ ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs,
+ cpumask);
+ if (!ctx) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ list_add_tail(&ctx->list, &ctxs);
+ }
+
+ list_for_each_entry_safe(ctx, n, &ctxs, list) {
+ list_del(&ctx->list);
+ if (!ret)
+ apply_wqattrs_commit(ctx);
+ apply_wqattrs_cleanup(ctx);
+ }
+
+ return ret;
+}
+
+/**
+ * workqueue_set_unbound_global_cpumask - Set the low-level unbound cpumask
+ * @cpumask: the cpumask to set
+ *
+ * The low-level workqueues cpumask is a global cpumask that limits
+ * the affinity of all unbound workqueues. This function check the @cpumask
+ * and apply it to all unbound workqueues and updates all pwqs of them.
+ * When all succeed, it saves @cpumask to the global low-level unbound
+ * cpumask.
+ *
+ * Retun: 0 - Success
+ * -EINVAL - No online cpu in the @cpumask
+ * -ENOMEM - Failed to allocate memory for attrs or pwqs.
+ */
+int workqueue_set_unbound_global_cpumask(cpumask_var_t cpumask)
+{
+ int ret = -EINVAL;
+
+ get_online_cpus();
+ cpumask_and(cpumask, cpumask, cpu_possible_mask);
+ if (!cpumask_empty(cpumask)) {
+ mutex_lock(&wq_pool_mutex);
+ ret = workqueue_apply_unbound_global_cpumask(cpumask);
+ if (ret >= 0)
+ cpumask_copy(wq_unbound_global_cpumask, cpumask);
+ mutex_unlock(&wq_pool_mutex);
+ }
+ put_online_cpus();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(workqueue_set_unbound_global_cpumask);
+
#ifdef CONFIG_SYSFS
/*
* Workqueues with WQ_SYSFS flag set is visible to userland via
@@ -4953,14 +5041,34 @@ static ssize_t wq_unbound_global_cpumask_show(struct device *dev,
{
int written;

+ mutex_lock(&wq_pool_mutex);
written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
cpumask_pr_args(wq_unbound_global_cpumask));
+ mutex_unlock(&wq_pool_mutex);

return written;
}

+static ssize_t wq_unbound_global_cpumask_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ cpumask_var_t cpumask;
+ int ret;
+
+ if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = cpumask_parse(buf, cpumask);
+ if (!ret)
+ ret = workqueue_set_unbound_global_cpumask(cpumask);
+
+ free_cpumask_var(cpumask);
+ return ret ? ret : count;
+}
+
static struct device_attribute wq_sysfs_cpumask_attr =
- __ATTR(cpumask, 0444, wq_unbound_global_cpumask_show, NULL);
+ __ATTR(cpumask, 0644, wq_unbound_global_cpumask_show,
+ wq_unbound_global_cpumask_store);

static int __init wq_sysfs_init(void)
{
--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/