[RFC][PATCH v3 1/4] printk: move can_use_console out of console_trylock_for_printk

From: Sergey Senozhatsky
Date: Sat Jan 23 2016 - 03:18:06 EST


vprintk_emit() disables preemption around console_trylock_for_printk()
and console_unlock() calls for a strong reason -- can_use_console()
check. The thing is that vprintl_emit() can be called on a CPU that
is not fully brought up yet (!cpu_online()), which potentially can
cause problems if console driver wants to access per-cpu data. A
console driver can explicitly state that it's safe to call it from
!online cpu by setting CON_ANYTIME bit in console ->flags. That's why
for !cpu_online() can_use_console() iterates all the console to find
out if there is a CON_ANYTIME console, otherwise console_unlock() must
be avoided.

can_use_console() ensures that console_unlock() call is safe in
vprintk_emit() only; console_lock() and console_trylock() are not
covered by this check. Even though call_console_drivers(), invoked
from console_cont_flush() and console_unlock(), tests
`!cpu_online() && CON_ANYTIME' for_each_console(), it may be
too late, which can result in messages loss.

Assume that we have 2 cpus -- CPU0 is online, CPU1 is !online, and
no CON_ANYTIME consoles available.

CPU0 online CPU1 !online
console_trylock()
...
console_unlock()
console_cont_flush
spin_lock logbuf_lock
if (!cont.len) {
spin_unlock logbuf_lock
return
}
for (;;) {
vprintk_emit
spin_lock logbuf_lock
log_store
spin_unlock logbuf_lock
spin_lock logbuf_lock
!console_trylock_for_printk msg_print_text
return console_idx = log_next()
console_seq++
console_prev = msg->flags
spin_unlock logbuf_lock

call_console_drivers()
for_each_console(con) {
if (!cpu_online() &&
!(con->flags & CON_ANYTIME))
continue;
}
/*
* no message printed, we lost it
*/
vprintk_emit
spin_lock logbuf_lock
log_store
spin_unlock logbuf_lock
!console_trylock_for_printk
return
/*
* go to the beginning of the loop,
* find out there are new messages,
* lose it
*/
}

console_trylock()/console_lock() call on CPU1 may come from cpu
notifier registered on that CPU. Since notifiers are not getting
unregistered when CPU is going DOWN, all of the notifiers receive
notifications during CPU UP. For example, on my x86_64, I see around 50
notification sent from offline CPU to itself

[swapper/2] from cpu:2 to:2 action:CPU_STARTING hotplug_hrtick
[swapper/2] from cpu:2 to:2 action:CPU_STARTING blk_mq_main_cpu_notify
[swapper/2] from cpu:2 to:2 action:CPU_STARTING blk_mq_queue_reinit_notify
[swapper/2] from cpu:2 to:2 action:CPU_STARTING console_cpu_notify

while doing
echo 0 > /sys/devices/system/cpu/cpu2/online
echo 1 > /sys/devices/system/cpu/cpu2/online

So grabbing the console_sem lock while CPU is !online is possible,
in theory.

This patch moves can_use_console() check out of
console_trylock_for_printk(). Instead it calls it in
console_unlock(), so now console_lock()/console_unlock() are
also 'protected' by can_use_console().

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
---
kernel/printk/printk.c | 86 ++++++++++++++++++++++++--------------------------
1 file changed, 42 insertions(+), 44 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 7ebcfea..c39232a 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1485,33 +1485,6 @@ static void zap_locks(void)
}

/*
- * Check if we have any console that is capable of printing while cpu is
- * booting or shutting down. Requires console_sem.
- */
-static int have_callable_console(void)
-{
- struct console *con;
-
- for_each_console(con)
- if (con->flags & CON_ANYTIME)
- return 1;
-
- return 0;
-}
-
-/*
- * Can we actually use the console at this time on this cpu?
- *
- * Console drivers may assume that per-cpu resources have been allocated. So
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
- * call them until this CPU is officially up.
- */
-static inline int can_use_console(unsigned int cpu)
-{
- return cpu_online(cpu) || have_callable_console();
-}
-
-/*
* Try to get console ownership to actually show the kernel
* messages from a 'printk'. Return true (and with the
* console_lock held, and 'console_locked' set) if it
@@ -1519,21 +1492,7 @@ static inline int can_use_console(unsigned int cpu)
*/
static int console_trylock_for_printk(void)
{
- unsigned int cpu = smp_processor_id();
-
- if (!console_trylock())
- return 0;
- /*
- * If we can't use the console, we need to release the console
- * semaphore by hand to avoid flushing the buffer. We need to hold the
- * console semaphore in order to do this test safely.
- */
- if (!can_use_console(cpu)) {
- console_locked = 0;
- up_console_sem();
- return 0;
- }
- return 1;
+ return console_trylock();
}

int printk_delay_msec __read_mostly;
@@ -1683,7 +1642,6 @@ asmlinkage int vprintk_emit(int facility, int level,
boot_delay_msec(level);
printk_delay();

- /* This stops the holder of console_sem just where we want him */
local_irq_save(flags);
this_cpu = smp_processor_id();

@@ -1707,6 +1665,7 @@ asmlinkage int vprintk_emit(int facility, int level,
}

lockdep_off();
+ /* This stops the holder of console_sem just where we want him */
raw_spin_lock(&logbuf_lock);
logbuf_cpu = this_cpu;

@@ -2177,6 +2136,33 @@ int is_console_locked(void)
return console_locked;
}

+/*
+ * Check if we have any console that is capable of printing while cpu is
+ * booting or shutting down. Requires console_sem.
+ */
+static int have_callable_console(void)
+{
+ struct console *con;
+
+ for_each_console(con)
+ if (con->flags & CON_ANYTIME)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Can we actually use the console at this time on this cpu?
+ *
+ * Console drivers may assume that per-cpu resources have been allocated. So
+ * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
+ * call them until this CPU is officially up.
+ */
+static inline int can_use_console(void)
+{
+ return cpu_online(raw_smp_processor_id()) || have_callable_console();
+}
+
static void console_cont_flush(char *text, size_t size)
{
unsigned long flags;
@@ -2247,9 +2233,21 @@ void console_unlock(void)
do_cond_resched = console_may_schedule;
console_may_schedule = 0;

+again:
+ /*
+ * We released the console_sem lock, so we need to recheck if
+ * cpu is online and (if not) is there at least one CON_ANYTIME
+ * console.
+ */
+ if (!can_use_console()) {
+ console_locked = 0;
+ up_console_sem();
+ return;
+ }
+
/* flush buffered message fragment immediately to console */
console_cont_flush(text, sizeof(text));
-again:
+
for (;;) {
struct printk_log *msg;
size_t ext_len = 0;
--
2.7.0