[PATCH 4.14 096/165] printk/nmi: Prevent deadlock when accessing the main log buffer in NMI

From: Greg Kroah-Hartman
Date: Mon Sep 03 2018 - 13:25:08 EST


4.14-stable review patch. If anyone has any objections, please let me know.

------------------

From: Petr Mladek <pmladek@xxxxxxxx>

commit 03fc7f9c99c1e7ae2925d459e8487f1a6f199f79 upstream.

The commit 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI
when logbuf_lock is available") brought back the possible deadlocks
in printk() and NMI.

The check of logbuf_lock is done only in printk_nmi_enter() to prevent
mixed output. But another CPU might take the lock later, enter NMI, and:

+ Both NMIs might be serialized by yet another lock, for example,
the one in nmi_cpu_backtrace().

+ The other CPU might get stopped in NMI, see smp_send_stop()
in panic().

The only safe solution is to use trylock when storing the message
into the main log-buffer. It might cause reordering when some lines
go to the main lock buffer directly and others are delayed via
the per-CPU buffer. It means that it is not useful in general.

This patch replaces the problematic NMI deferred context with NMI
direct context. It can be used to mark a code that might produce
many messages in NMI and the risk of losing them is more critical
than problems with eventual reordering.

The context is then used when dumping trace buffers on oops. It was
the primary motivation for the original fix. Also the reordering is
even smaller issue there because some traces have their own time stamps.

Finally, nmi_cpu_backtrace() need not longer be serialized because
it will always us the per-CPU buffers again.

Fixes: 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available")
Cc: stable@xxxxxxxxxxxxxxx
Link: http://lkml.kernel.org/r/20180627142028.11259-1-pmladek@xxxxxxxx
To: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@xxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
Cc: stable@xxxxxxxxxxxxxxx
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
Signed-off-by: Petr Mladek <pmladek@xxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
include/linux/printk.h | 4 +++
kernel/printk/internal.h | 9 ++++++
kernel/printk/printk_safe.c | 58 ++++++++++++++++++++++++++++----------------
kernel/trace/trace.c | 4 ++-
lib/nmi_backtrace.c | 3 --
5 files changed, 52 insertions(+), 26 deletions(-)

--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -150,9 +150,13 @@ void early_printk(const char *s, ...) {
#ifdef CONFIG_PRINTK_NMI
extern void printk_nmi_enter(void);
extern void printk_nmi_exit(void);
+extern void printk_nmi_direct_enter(void);
+extern void printk_nmi_direct_exit(void);
#else
static inline void printk_nmi_enter(void) { }
static inline void printk_nmi_exit(void) { }
+static inline void printk_nmi_direct_enter(void) { }
+static inline void printk_nmi_direct_exit(void) { }
#endif /* PRINTK_NMI */

#ifdef CONFIG_PRINTK
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -19,11 +19,16 @@
#ifdef CONFIG_PRINTK

#define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff
-#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000
+#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x40000000
#define PRINTK_NMI_CONTEXT_MASK 0x80000000

extern raw_spinlock_t logbuf_lock;

+__printf(5, 0)
+int vprintk_store(int facility, int level,
+ const char *dict, size_t dictlen,
+ const char *fmt, va_list args);
+
__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
__printf(1, 0) int vprintk_func(const char *fmt, va_list args);
@@ -54,6 +59,8 @@ void __printk_safe_exit(void);
local_irq_enable(); \
} while (0)

+void defer_console_output(void);
+
#else

__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -311,24 +311,33 @@ static __printf(1, 0) int vprintk_nmi(co

void printk_nmi_enter(void)
{
- /*
- * The size of the extra per-CPU buffer is limited. Use it only when
- * the main one is locked. If this CPU is not in the safe context,
- * the lock must be taken on another CPU and we could wait for it.
- */
- if ((this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) &&
- raw_spin_is_locked(&logbuf_lock)) {
- this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
- } else {
- this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK);
- }
+ this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
}

void printk_nmi_exit(void)
{
- this_cpu_and(printk_context,
- ~(PRINTK_NMI_CONTEXT_MASK |
- PRINTK_NMI_DEFERRED_CONTEXT_MASK));
+ this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
+}
+
+/*
+ * Marks a code that might produce many messages in NMI context
+ * and the risk of losing them is more critical than eventual
+ * reordering.
+ *
+ * It has effect only when called in NMI context. Then printk()
+ * will try to store the messages into the main logbuf directly
+ * and use the per-CPU buffers only as a fallback when the lock
+ * is not available.
+ */
+void printk_nmi_direct_enter(void)
+{
+ if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
+ this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK);
+}
+
+void printk_nmi_direct_exit(void)
+{
+ this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK);
}

#else
@@ -366,6 +375,20 @@ void __printk_safe_exit(void)

__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
{
+ /*
+ * Try to use the main logbuf even in NMI. But avoid calling console
+ * drivers that might have their own locks.
+ */
+ if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
+ raw_spin_trylock(&logbuf_lock)) {
+ int len;
+
+ len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
+ raw_spin_unlock(&logbuf_lock);
+ defer_console_output();
+ return len;
+ }
+
/* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
return vprintk_nmi(fmt, args);
@@ -374,13 +397,6 @@ __printf(1, 0) int vprintk_func(const ch
if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
return vprintk_safe(fmt, args);

- /*
- * Use the main logbuf when logbuf_lock is available in NMI.
- * But avoid calling console drivers that might have their own locks.
- */
- if (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK)
- return vprintk_deferred(fmt, args);
-
/* No obstacles. */
return vprintk_default(fmt, args);
}
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8187,6 +8187,7 @@ void ftrace_dump(enum ftrace_dump_mode o
tracing_off();

local_irq_save(flags);
+ printk_nmi_direct_enter();

/* Simulate the iterator */
trace_init_global_iter(&iter);
@@ -8266,7 +8267,8 @@ void ftrace_dump(enum ftrace_dump_mode o
for_each_tracing_cpu(cpu) {
atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
}
- atomic_dec(&dump_running);
+ atomic_dec(&dump_running);
+ printk_nmi_direct_exit();
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(ftrace_dump);
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -87,11 +87,9 @@ void nmi_trigger_cpumask_backtrace(const

bool nmi_cpu_backtrace(struct pt_regs *regs)
{
- static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
int cpu = smp_processor_id();

if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
- arch_spin_lock(&lock);
if (regs && cpu_in_idle(instruction_pointer(regs))) {
pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n",
cpu, instruction_pointer(regs));
@@ -102,7 +100,6 @@ bool nmi_cpu_backtrace(struct pt_regs *r
else
dump_stack();
}
- arch_spin_unlock(&lock);
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
return true;
}