[tip:locking/urgent] smp: Fix smp_call_function_single_async() locking

From: tip-bot for Linus Torvalds
Date: Sat Apr 18 2015 - 06:14:21 EST


Commit-ID: 8053871d0f7f67c7efb7f226ef031f78877d6625
Gitweb: http://git.kernel.org/tip/8053871d0f7f67c7efb7f226ef031f78877d6625
Author: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
AuthorDate: Wed, 11 Feb 2015 12:42:10 -0800
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Fri, 17 Apr 2015 09:57:52 +0200

smp: Fix smp_call_function_single_async() locking

The current smp_function_call code suffers a number of problems, most
notably smp_call_function_single_async() is broken.

The problem is that flush_smp_call_function_queue() does csd_unlock()
_after_ calling csd->func(). This means that a caller cannot properly
synchronize the csd usage as it has to.

Change the code to release the csd before calling ->func() for the
async case, and put a WARN_ON_ONCE(csd->flags & CSD_FLAG_LOCK) in
smp_call_function_single_async() to warn us of improper serialization,
because any waiting there can results in deadlocks when called with
IRQs disabled.

Rename the (currently) unused WAIT flag to SYNCHRONOUS and (re)use it
such that we know what to do in flush_smp_call_function_queue().

Rework csd_{,un}lock() to use smp_load_acquire() / smp_store_release()
to avoid some full barriers while more clearly providing lock
semantics.

Finally move the csd maintenance out of generic_exec_single() into its
callers for clearer code.

Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
[ Added changelog. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Cc: Jens Axboe <axboe@xxxxxxxxx>
Cc: Rafael David Tinoco <inaddy@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/CA+55aFz492bzLFhdbKN-Hygjcreup7CjMEYk3nTSfRWjppz-OA@xxxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
kernel/smp.c | 78 ++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 47 insertions(+), 31 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index f38a1e6..2aaac2c 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -19,7 +19,7 @@

enum {
CSD_FLAG_LOCK = 0x01,
- CSD_FLAG_WAIT = 0x02,
+ CSD_FLAG_SYNCHRONOUS = 0x02,
};

struct call_function_data {
@@ -107,7 +107,7 @@ void __init call_function_init(void)
*/
static void csd_lock_wait(struct call_single_data *csd)
{
- while (csd->flags & CSD_FLAG_LOCK)
+ while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK)
cpu_relax();
}

@@ -121,19 +121,17 @@ static void csd_lock(struct call_single_data *csd)
* to ->flags with any subsequent assignments to other
* fields of the specified call_single_data structure:
*/
- smp_mb();
+ smp_wmb();
}

static void csd_unlock(struct call_single_data *csd)
{
- WARN_ON((csd->flags & CSD_FLAG_WAIT) && !(csd->flags & CSD_FLAG_LOCK));
+ WARN_ON(!(csd->flags & CSD_FLAG_LOCK));

/*
* ensure we're all done before releasing data:
*/
- smp_mb();
-
- csd->flags &= ~CSD_FLAG_LOCK;
+ smp_store_release(&csd->flags, 0);
}

static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
@@ -144,13 +142,16 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
* ->func, ->info, and ->flags set.
*/
static int generic_exec_single(int cpu, struct call_single_data *csd,
- smp_call_func_t func, void *info, int wait)
+ smp_call_func_t func, void *info)
{
- struct call_single_data csd_stack = { .flags = 0 };
- unsigned long flags;
-
-
if (cpu == smp_processor_id()) {
+ unsigned long flags;
+
+ /*
+ * We can unlock early even for the synchronous on-stack case,
+ * since we're doing this from the same CPU..
+ */
+ csd_unlock(csd);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
@@ -161,21 +162,9 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO;

-
- if (!csd) {
- csd = &csd_stack;
- if (!wait)
- csd = this_cpu_ptr(&csd_data);
- }
-
- csd_lock(csd);
-
csd->func = func;
csd->info = info;

- if (wait)
- csd->flags |= CSD_FLAG_WAIT;
-
/*
* The list addition should be visible before sending the IPI
* handler locks the list to pull the entry off it because of
@@ -190,9 +179,6 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
arch_send_call_function_single_ipi(cpu);

- if (wait)
- csd_lock_wait(csd);
-
return 0;
}

@@ -250,8 +236,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
}

llist_for_each_entry_safe(csd, csd_next, entry, llist) {
- csd->func(csd->info);
- csd_unlock(csd);
+ smp_call_func_t func = csd->func;
+ void *info = csd->info;
+
+ /* Do we wait until *after* callback? */
+ if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
+ func(info);
+ csd_unlock(csd);
+ } else {
+ csd_unlock(csd);
+ func(info);
+ }
}

/*
@@ -274,6 +269,8 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
int wait)
{
+ struct call_single_data *csd;
+ struct call_single_data csd_stack = { .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS };
int this_cpu;
int err;

@@ -292,7 +289,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);

- err = generic_exec_single(cpu, NULL, func, info, wait);
+ csd = &csd_stack;
+ if (!wait) {
+ csd = this_cpu_ptr(&csd_data);
+ csd_lock(csd);
+ }
+
+ err = generic_exec_single(cpu, csd, func, info);
+
+ if (wait)
+ csd_lock_wait(csd);

put_cpu();

@@ -321,7 +327,15 @@ int smp_call_function_single_async(int cpu, struct call_single_data *csd)
int err = 0;

preempt_disable();
- err = generic_exec_single(cpu, csd, csd->func, csd->info, 0);
+
+ /* We could deadlock if we have to wait here with interrupts disabled! */
+ if (WARN_ON_ONCE(csd->flags & CSD_FLAG_LOCK))
+ csd_lock_wait(csd);
+
+ csd->flags = CSD_FLAG_LOCK;
+ smp_wmb();
+
+ err = generic_exec_single(cpu, csd, csd->func, csd->info);
preempt_enable();

return err;
@@ -433,6 +447,8 @@ void smp_call_function_many(const struct cpumask *mask,
struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);

csd_lock(csd);
+ if (wait)
+ csd->flags |= CSD_FLAG_SYNCHRONOUS;
csd->func = func;
csd->info = info;
llist_add(&csd->llist, &per_cpu(call_single_queue, cpu));
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/