Re: [RFC][PATCH v4 1/2] printk: Make printk() completely async

From: Sergey Senozhatsky
Date: Tue Mar 15 2016 - 10:09:57 EST


On (03/15/16 11:03), Jan Kara wrote:
[..]
> > + if (!sync_print) {
> > + if (printk_thread && !in_panic) {
> > + /*
> > + * This will wakeup the printing kthread and offload
> > + * printing to a schedulable context.
> > + */
> > + __this_cpu_or(printk_pending,
> > + PRINTK_PENDING_KTHREAD_OUTPUT);
> > + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
> > + } else if (in_sched) {
> > + /*
> > + * @in_sched messages may come too early, when we don't
> > + * yet have @printk_thread. We can't print deferred
> > + * messages directly, because this may deadlock, route
> > + * them via IRQ context.
> > + */
> > + __this_cpu_or(printk_pending,
> > + PRINTK_PENDING_IRQ_OUTPUT);
> > + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
> > + } else {
> > + sync_print = true;
> > + }
> > + }
>
> I'm a bit undecided whether we want to go through irq work even for the
> common case of !in_sched messages or whether we want to directly call
> wake_up() in that case. Maybe I'd do it like:

thanks for review. I like the change that you have suggested. just
updated wake_up_klogd_work_func() check with '!printk_sync && printk_thread',
user can change `printk_sync' via sysfs.


what I wanted to do with this "irq indirection" is to move wake_up() out of
vprintk_emit(), but I didn't think good enough, I guess.

so, __wake_up() takes the spin_lock_irqsave(&q->lock, flags), which with
spin_debug enabled, can recursively call printk(). since we wake_up()
the printing thread after we release logbuf_lock and UINT_MAX logbuf_cpu,
we have no chance to survive this recursion.

/* theretical possibilities of having printk() in __wake_up_common() or down
* the call chain are not real concerns. */

so I guess what I want to do now -- is to move part of this wake_up()
decision making under the logbuf_lock and logbuf_cpu area. so recursion
detection code will be able to catch this. and we also need to switch
back to sync printk once the recursion happened; just to shut up the
faulty wake_up() in case if it was the root cause.


something like this (*minimally tested so far*).

-- move wake_up() and friends under the logbuf section; so we can detect
printk() recursion from wake_up()

-- in recursion_bug branch switch to sync printk. we don't know why did we
recurse, may be because of wake_up()->spin_lock(). doing
kthread_stop()->wake_up_process() can be unsafe, I guess, just set
`printk_sync' to true.


Signed-off-by: Jan Kara <jack@xxxxxxx>
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
---
Documentation/kernel-parameters.txt | 10 ++
kernel/printk/printk.c | 202 +++++++++++++++++++++++++-----------
2 files changed, 154 insertions(+), 58 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1e58ae9..454999e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3114,6 +3114,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
printk.time= Show timing data prefixed to each printk message line
Format: <bool> (1/Y/y=enable, 0/N/n=disable)

+ printk.synchronous=
+ By default kernel messages are printed to console
+ asynchronously (except during early boot or when oops
+ is happening). That avoids kernel stalling behind slow
+ serial console and thus avoids softlockups, interrupt
+ timeouts, or userspace timing out during heavy printing.
+ However for debugging problems, printing messages to
+ console immediately may be desirable. This option
+ enables such behavior.
+
processor.max_cstate= [HW,ACPI]
Limit processor to maximum C-state
max_cstate=9 overrides any DMI blacklist limit.
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index d5fd844..38baed1 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -46,6 +46,7 @@
#include <linux/utsname.h>
#include <linux/ctype.h>
#include <linux/uio.h>
+#include <linux/kthread.h>

#include <asm/uaccess.h>
#include <asm-generic/sections.h>
@@ -284,6 +285,105 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;

+/*
+ * When true, printing to console will happen synchronously unless someone else
+ * is already printing messages.
+ *
+ * The default value on UP systems is 'true'.
+ */
+static bool __read_mostly printk_sync = !IS_ENABLED(CONFIG_SMP);
+module_param_named(synchronous, printk_sync, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(synchronous, "make printing to console synchronous");
+
+/* Printing kthread for async vprintk_emit() */
+static struct task_struct *printk_thread;
+/* Wait for printing wakeups from async vprintk_emit() */
+static DECLARE_WAIT_QUEUE_HEAD(printing_wait);
+
+static int printing_func(void *data)
+{
+ while (1) {
+ DECLARE_WAITQUEUE(wait, current);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&printing_wait, &wait);
+
+ schedule();
+ remove_wait_queue(&printing_wait, &wait);
+
+ console_lock();
+ console_unlock();
+ }
+
+ return 0;
+}
+
+static int __init init_printk_thread(void)
+{
+ if (printk_sync)
+ return 0;
+
+ printk_thread = kthread_run(printing_func, NULL, "printk");
+ BUG_ON(IS_ERR(printk_thread));
+ return 0;
+}
+late_initcall(init_printk_thread);
+
+/*
+ * Delayed printk version, for scheduler-internal messages:
+ */
+#define PRINTK_PENDING_WAKEUP (1<<0)
+#define PRINTK_PENDING_OUTPUT (1<<1)
+
+static DEFINE_PER_CPU(int, printk_pending);
+
+static void wake_up_klogd_work_func(struct irq_work *irq_work)
+{
+ int pending = __this_cpu_xchg(printk_pending, 0);
+
+ if (pending & PRINTK_PENDING_OUTPUT) {
+ if (!printk_sync && printk_thread) {
+ wake_up(&printing_wait);
+ } else {
+ /*
+ * If trylock fails, someone else is doing
+ * the printing
+ */
+ if (console_trylock())
+ console_unlock();
+ }
+ }
+
+ if (pending & PRINTK_PENDING_WAKEUP)
+ wake_up_interruptible(&log_wait);
+}
+
+static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
+ .func = wake_up_klogd_work_func,
+ .flags = IRQ_WORK_LAZY,
+};
+
+void wake_up_klogd(void)
+{
+ preempt_disable();
+ if (waitqueue_active(&log_wait)) {
+ this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
+ irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
+ }
+ preempt_enable();
+}
+
+int printk_deferred(const char *fmt, ...)
+{
+ va_list args;
+ int r;
+
+ va_start(args, fmt);
+ r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
+ va_end(args);
+
+ return r;
+}
+
/* Return log buffer address */
char *log_buf_addr_get(void)
{
@@ -1609,6 +1709,8 @@ asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
{
+ /* cpu currently holding logbuf_lock in this function */
+ static unsigned int logbuf_cpu = UINT_MAX;
static bool recursion_bug;
static char textbuf[LOG_LINE_MAX];
char *text = textbuf;
@@ -1618,12 +1720,17 @@ asmlinkage int vprintk_emit(int facility, int level,
int this_cpu;
int printed_len = 0;
bool in_sched = false;
- /* cpu currently holding logbuf_lock in this function */
- static unsigned int logbuf_cpu = UINT_MAX;
+ bool in_panic = console_loglevel == CONSOLE_LOGLEVEL_MOTORMOUTH;
+ bool sync_print = printk_sync;

if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT;
+ /*
+ * Deferred sched messages must not be printed
+ * synchronously regardless the @printk_sync or @in_panic.
+ */
in_sched = true;
+ sync_print = false;
}

boot_delay_msec(level);
@@ -1660,6 +1767,7 @@ asmlinkage int vprintk_emit(int facility, int level,
static const char recursion_msg[] =
"BUG: recent printk recursion!";

+ printk_sync = true;
recursion_bug = false;
/* emit KERN_CRIT message */
printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
@@ -1749,13 +1857,45 @@ asmlinkage int vprintk_emit(int facility, int level,
dict, dictlen, text, text_len);
}

+ /*
+ * By default we print message to console asynchronously so that kernel
+ * doesn't get stalled due to slow serial console. That can lead to
+ * softlockups, lost interrupts, or userspace timing out under heavy
+ * printing load.
+ *
+ * However we resort to synchronous printing of messages during early
+ * boot, when synchronous printing was explicitly requested by
+ * kernel parameter, or when console_verbose() was called to print
+ * everything during panic / oops.
+ */
+ if (!sync_print) {
+ if (in_sched) {
+ /*
+ * @in_sched messages may come too early, when we don't
+ * yet have @printk_thread. We can't print deferred
+ * messages directly, because this may deadlock, route
+ * them via IRQ context.
+ */
+ __this_cpu_or(printk_pending,
+ PRINTK_PENDING_OUTPUT);
+ irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
+ } else if (printk_thread && !in_panic) {
+ /*
+ * This will wakeup the printing kthread and offload
+ * printing to a schedulable context.
+ */
+ wake_up(&printing_wait);
+ } else {
+ sync_print = true;
+ }
+ }
+
logbuf_cpu = UINT_MAX;
raw_spin_unlock(&logbuf_lock);
lockdep_on();
local_irq_restore(flags);

- /* If called from the scheduler, we can not call up(). */
- if (!in_sched) {
+ if (sync_print) {
lockdep_off();
/*
* Try to acquire and then immediately release the console
@@ -2724,60 +2864,6 @@ late_initcall(printk_late_init);

#if defined CONFIG_PRINTK
/*
- * Delayed printk version, for scheduler-internal messages:
- */
-#define PRINTK_PENDING_WAKEUP 0x01
-#define PRINTK_PENDING_OUTPUT 0x02
-
-static DEFINE_PER_CPU(int, printk_pending);
-
-static void wake_up_klogd_work_func(struct irq_work *irq_work)
-{
- int pending = __this_cpu_xchg(printk_pending, 0);
-
- if (pending & PRINTK_PENDING_OUTPUT) {
- /* If trylock fails, someone else is doing the printing */
- if (console_trylock())
- console_unlock();
- }
-
- if (pending & PRINTK_PENDING_WAKEUP)
- wake_up_interruptible(&log_wait);
-}
-
-static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
- .func = wake_up_klogd_work_func,
- .flags = IRQ_WORK_LAZY,
-};
-
-void wake_up_klogd(void)
-{
- preempt_disable();
- if (waitqueue_active(&log_wait)) {
- this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
- }
- preempt_enable();
-}
-
-int printk_deferred(const char *fmt, ...)
-{
- va_list args;
- int r;
-
- preempt_disable();
- va_start(args, fmt);
- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
- va_end(args);
-
- __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
- preempt_enable();
-
- return r;
-}
-
-/*
* printk rate limiting, lifted from the networking subsystem.
*
* This enforces a rate limit: not more than 10 kernel messages
--
2.8.0.rc0