Re: 2.6.21-rc2-git3 soft lockup detected on CPU#0 (crash dump kernel)

From: Ingo Molnar
Date: Mon Mar 05 2007 - 05:40:32 EST



* Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote:

> > shouldnt that be 'R' instead of the question mark?
>
> ? are dead ones,

ok.

> The interesting part is here:
>
> 1 lock held by udevd/918:
> #0: (&mm->mmap_sem){----}, at: [<c161135b>] do_page_fault+0x150/0x50d
>
> But udevd/918 is nowhere in the sysrq-t output.

also below is a softlockup-debug patch that might be useful, which adds
a tick/tock output so it spams the console once per second but can show
the dynamics of lockups (and other delays).

Ingo

------------------->
Subject: [patch] softlockup: add /proc/sys/kernel/softlockup_print_tick
From: Ingo Molnar <mingo@xxxxxxx>

/proc/sys/kernel/softlockup_print_tick tunable:

0: off
1: print tick/tock messages in softlockup irq and task.
2: also print stack dump

Signed-off-by: Ingo Molnar <mingo@xxxxxxx>
---
Documentation/kernel-parameters.txt | 5 +++++
include/linux/sched.h | 2 ++
kernel/softlockup.c | 13 ++++++++++++-
kernel/sysctl.c | 10 ++++++++++
4 files changed, 29 insertions(+), 1 deletion(-)

Index: linux/Documentation/kernel-parameters.txt
===================================================================
--- linux.orig/Documentation/kernel-parameters.txt
+++ linux/Documentation/kernel-parameters.txt
@@ -1659,6 +1659,11 @@ and is between 256 and 4096 characters.

snd-ymfpci= [HW,ALSA]

+ softlockup_print_tick
+ [KNL] Print softlockup tick in the irq and in the
+ watchdog task. If value 2 then print stackdump too.
+ Format: <0/1/2>
+
sonycd535= [HW,CD]
Format: <io>[,<irq>]

Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -223,6 +223,8 @@ extern void update_process_times(int use
extern void scheduler_tick(void);

#ifdef CONFIG_DETECT_SOFTLOCKUP
+extern int softlockup_print_tick;
+
extern void softlockup_tick(void);
extern void spawn_softlockup_task(void);
extern void touch_softlockup_watchdog(void);
Index: linux/kernel/softlockup.c
===================================================================
--- linux.orig/kernel/softlockup.c
+++ linux/kernel/softlockup.c
@@ -21,6 +21,7 @@ static DEFINE_PER_CPU(unsigned long, pri
static DEFINE_PER_CPU(struct task_struct *, watchdog_task);

static int did_panic = 0;
+int softlockup_print_tick = 2;

static int
softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -62,8 +63,15 @@ void softlockup_tick(void)
}

/* Wake up the high-prio watchdog task every second: */
- if (time_after(jiffies, touch_timestamp + HZ))
+ if (time_after(jiffies, touch_timestamp + HZ)) {
+ if (softlockup_print_tick) {
+ printk("softlockup tick on CPU#%d, %s:%d\n",
+ this_cpu, current->comm, current->pid);
+ if (softlockup_print_tick == 2)
+ dump_stack();
+ }
wake_up_process(per_cpu(watchdog_task, this_cpu));
+ }

/* Warn about unreasonable 10+ seconds delays: */
if (time_after(jiffies, touch_timestamp + 10*HZ)) {
@@ -83,6 +91,7 @@ void softlockup_tick(void)
static int watchdog(void * __bind_cpu)
{
struct sched_param param = { .sched_priority = 99 };
+ int this_cpu = (int)(long)__bind_cpu;

sched_setscheduler(current, SCHED_FIFO, &param);
current->flags |= PF_NOFREEZE;
@@ -96,6 +105,8 @@ static int watchdog(void * __bind_cpu)
set_current_state(TASK_INTERRUPTIBLE);
touch_softlockup_watchdog();
schedule();
+ if (softlockup_print_tick)
+ printk("softlockup tock on CPU#%d\n", this_cpu);
}

return 0;
Index: linux/kernel/sysctl.c
===================================================================
--- linux.orig/kernel/sysctl.c
+++ linux/kernel/sysctl.c
@@ -231,6 +231,16 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dostring,
.strategy = &sysctl_string,
},
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "softlockup_print_tick",
+ .data = &softlockup_print_tick,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.ctl_name = KERN_TAINTED,
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/