Re: overlaping printk

From: Ingo Molnar
Date: Thu May 20 2004 - 10:52:45 EST



* Ingo Molnar <mingo@xxxxxxx> wrote:

> another solution would be to break the lock only once during the
> kernel's lifetime. The system is messed up anyway if it needs multiple
> lock breaks to get an oops out to the console. We dont care about
> followup oopses - the first oops is that matters.

i.e. something like the attached patch, against BK-curr. (i've also
attached a cleanup patch that gets rid of the many instances of
bust_spinlocks() - we now have a generic one in lib/bust_spinlocks.c)

i consider any secondary lockup after the first oops has been printed a
feature - sometimes the first oops gets washed away by the many followup
oopses.

i've tested the patch with parallel SMP oopses - they seem to be
serialized now. (but it's hard to time the oopses right.)

Ingo
--- linux/kernel/printk.c.orig
+++ linux/kernel/printk.c
@@ -55,6 +55,9 @@ EXPORT_SYMBOL(console_printk);

int oops_in_progress;

+/* zap spinlocks only once: */
+unsigned long zap_spinlocks = 1;
+
/*
* console_sem protects the console_drivers list, and also
* provides serialisation for access to the entire console
@@ -493,7 +496,7 @@ asmlinkage int printk(const char *fmt, .
static char printk_buf[1024];
static int log_level_unknown = 1;

- if (oops_in_progress) {
+ if (oops_in_progress && test_and_clear_bit(0, &zap_spinlocks)) {
/* If a crash is occurring, make sure we can't deadlock */
spin_lock_init(&logbuf_lock);
/* And make sure that we print immediately */
--- linux/arch/i386/mm/fault.c.orig
+++ linux/arch/i386/mm/fault.c
@@ -31,32 +31,6 @@
extern void die(const char *,struct pt_regs *,long);

/*
- * Unlock any spinlocks which will prevent us from getting the
- * message out
- */
-void bust_spinlocks(int yes)
-{
- int loglevel_save = console_loglevel;
-
- if (yes) {
- oops_in_progress = 1;
- return;
- }
-#ifdef CONFIG_VT
- unblank_screen();
-#endif
- oops_in_progress = 0;
- /*
- * OK, the message is on the console. Now we call printk()
- * without oops_in_progress set so that printk will give klogd
- * a poke. Hold onto your hats...
- */
- console_loglevel = 15; /* NMI oopser may have shut the console up */
- printk(" ");
- console_loglevel = loglevel_save;
-}
-
-/*
* Return EIP plus the CS segment base. The segment limit is also
* adjusted, clamped to the kernel/user address space (whichever is
* appropriate), and returned in *eip_limit.
--- linux/arch/ia64/kernel/traps.c.orig
+++ linux/arch/ia64/kernel/traps.c
@@ -58,34 +58,6 @@ trap_init (void)
fpswa_interface = __va(ia64_boot_param->fpswa);
}

-/*
- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
- * is acquired through the console unblank code)
- */
-void
-bust_spinlocks (int yes)
-{
- int loglevel_save = console_loglevel;
-
- if (yes) {
- oops_in_progress = 1;
- return;
- }
-
-#ifdef CONFIG_VT
- unblank_screen();
-#endif
- oops_in_progress = 0;
- /*
- * OK, the message is on the console. Now we call printk() without
- * oops_in_progress set so that printk will give klogd a poke. Hold onto
- * your hats...
- */
- console_loglevel = 15; /* NMI oopser may have shut the console up */
- printk(" ");
- console_loglevel = loglevel_save;
-}
-
void
die (const char *str, struct pt_regs *regs, long err)
{
--- linux/arch/x86_64/mm/fault.c.orig
+++ linux/arch/x86_64/mm/fault.c
@@ -34,27 +34,6 @@
#include <asm/kdebug.h>
#include <asm-generic/sections.h>

-void bust_spinlocks(int yes)
-{
- int loglevel_save = console_loglevel;
- if (yes) {
- oops_in_progress = 1;
- } else {
-#ifdef CONFIG_VT
- unblank_screen();
-#endif
- oops_in_progress = 0;
- /*
- * OK, the message is on the console. Now we call printk()
- * without oops_in_progress set so that printk will give klogd
- * a poke. Hold onto your hats...
- */
- console_loglevel = 15; /* NMI oopser may have shut the console up */
- printk(" ");
- console_loglevel = loglevel_save;
- }
-}
-
/* Sometimes the CPU reports invalid exceptions on prefetch.
Check that here and ignore.
Opcode checker based on code by Richard Brunner */
--- linux/arch/s390/mm/fault.c.orig
+++ linux/arch/s390/mm/fault.c
@@ -49,32 +49,6 @@ extern int sysctl_userprocess_debug;

extern void die(const char *,struct pt_regs *,long);

-extern spinlock_t timerlist_lock;
-
-/*
- * Unlock any spinlocks which will prevent us from getting the
- * message out (timerlist_lock is acquired through the
- * console unblank code)
- */
-void bust_spinlocks(int yes)
-{
- if (yes) {
- oops_in_progress = 1;
- } else {
- int loglevel_save = console_loglevel;
- oops_in_progress = 0;
- console_unblank();
- /*
- * OK, the message is on the console. Now we call printk()
- * without oops_in_progress set so that printk will give klogd
- * a poke. Hold onto your hats...
- */
- console_loglevel = 15;
- printk(" ");
- console_loglevel = loglevel_save;
- }
-}
-
/*
* Check which address space is addressed by the access
* register in S390_lowcore.exc_access_id.
--- linux/lib/bust_spinlocks.c.orig
+++ linux/lib/bust_spinlocks.c
@@ -17,23 +17,24 @@

void bust_spinlocks(int yes)
{
+ int loglevel_save = console_loglevel;
+
if (yes) {
oops_in_progress = 1;
- } else {
- int loglevel_save = console_loglevel;
+ return;
+ }
#ifdef CONFIG_VT
- unblank_screen();
+ unblank_screen();
#endif
- oops_in_progress = 0;
- /*
- * OK, the message is on the console. Now we call printk()
- * without oops_in_progress set so that printk() will give klogd
- * and the blanked console a poke. Hold onto your hats...
- */
- console_loglevel = 15; /* NMI oopser may have shut the console up */
- printk(" ");
- console_loglevel = loglevel_save;
- }
+ oops_in_progress = 0;
+ /*
+ * OK, the message is on the console. Now we call printk()
+ * without oops_in_progress set so that printk() will give klogd
+ * and the blanked console a poke. Hold onto your hats...
+ */
+ console_loglevel = 15; /* NMI oopser may have shut the console up */
+ printk(" ");
+ console_loglevel = loglevel_save;
}