[patch] NMI-oopser.2.1.90-B

MOLNAR Ingo (mingo@chiara.csoma.elte.hu)
Wed, 11 Mar 1998 18:44:08 +0100 (CET)


this is a new pre1-2.1.90 version of the NMI watchdog, with new features
and bugfixes:

- most notably, unlike the previous version, this one compiles
cleanly ;)

- it now includes spinlock debugging code too, all these (subtle) error
cases are detected:

- scheduling with spinlocks held

- calling cli/sti/restore flags with spinlocks held

- 'instant oops', just put a call to funny(); anywhere in the kernel, it
will broadcast an IPI to all CPUs, thus creating an almost-perfect
snapshot of the system's state.

-- mingo

--- linux/drivers/char/Config.in.orig Sat Mar 14 01:32:49 1998
+++ linux/drivers/char/Config.in Tue Mar 17 04:54:29 1998
@@ -104,6 +104,12 @@
tristate ' Software Watchdog' CONFIG_SOFT_WATCHDOG
tristate ' Berkshire Products PC Watchdog' CONFIG_PCWATCHDOG
tristate ' Acquire SBC Watchdog Timer' CONFIG_ACQUIRE_WDT
+ if [ "$SMP" = "1" ]; then
+ bool ' SMP-IOAPIC NMI Software Watchdog' CONFIG_NMI_WATCHDOG
+ if [ "$CONFIG_NMI_WATCHDOG" = "y" ]; then
+ int ' watchdog source IRQ' CONFIG_NMI_WATCHDOG_IRQ 0
+ fi
+ fi
fi
bool 'Enhanced Real Time Clock Support' CONFIG_RTC
if [ "$CONFIG_RTC" = "y" ]; then
--- linux/kernel/sched.c.orig Tue Mar 17 01:27:54 1998
+++ linux/kernel/sched.c Tue Mar 17 04:54:29 1998
@@ -43,6 +43,42 @@

#include <linux/timex.h>

+void __slock(void)
+{
+ atomic_inc(&current->spinlock_depth);
+}
+
+extern void show_registers(struct pt_regs *);
+
+void __sunlock(void)
+{
+ atomic_dec(&current->spinlock_depth);
+ if (atomic_read(&current->spinlock_depth) == -1) {
+ printk("spinlock counter underflow!\n");
+ show_registers(0);
+ for (;;) __cli();
+ }
+}
+
+void __checklock(void)
+{
+ if (atomic_read(&current->spinlock_depth)) {
+ printk("spinlocks out of balance!\n");
+ for (;;) __cli();
+ }
+}
+void ___checklock(void)
+{
+#if 1
+ __checklock();
+#endif
+}
+void ____checklock(void)
+{
+#if 1
+ __checklock();
+#endif
+}
/*
* kernel variables
*/
@@ -396,6 +432,8 @@
goto scheduling_in_interrupt;
if (local_bh_count[this_cpu])
goto scheduling_in_interrupt;
+ if (atomic_read(&current->spinlock_depth))
+ goto scheduling_with_spinlocks_held;
release_kernel_lock(prev, this_cpu, lock_depth);
if (bh_active & bh_mask)
do_bottom_half();
@@ -501,6 +539,9 @@
scheduling_in_interrupt:
printk("Scheduling in interrupt\n");
*(int *)0 = 0;
+scheduling_with_spinlocks_held:
+ printk("Scheduling with spinlocks held!\n");
+ for (;;) __cli();
}


--- linux/kernel/ksyms.c.orig Tue Mar 17 01:27:53 1998
+++ linux/kernel/ksyms.c Tue Mar 17 04:54:29 1998
@@ -95,6 +95,9 @@
EXPORT_SYMBOL(request_module);
#endif

+EXPORT_SYMBOL(__sunlock);
+EXPORT_SYMBOL(__slock);
+
#ifdef CONFIG_MODULES
EXPORT_SYMBOL(get_module_symbol);
#endif
--- linux/include/linux/sched.h.orig Tue Mar 17 01:27:52 1998
+++ linux/include/linux/sched.h Tue Mar 17 04:57:43 1998
@@ -282,6 +282,7 @@
int processor;
int last_processor;
int lock_depth; /* Lock depth. We can context switch in and out of holding a syscall kernel lock... */
+ atomic_t spinlock_depth; /* spinlocks have to be balanced */
/* Spinlocks for various pieces or per-task state. */
spinlock_t sigmask_lock; /* Protects signal and blocked */
};
@@ -357,7 +358,7 @@
/* files */ &init_files, \
/* mm */ &init_mm, \
/* signals */ &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, \
-/* SMP */ 0,0,0,0, \
+/* SMP */ 0,0,0,0, ATOMIC_INIT(0), \
/* locks */ INIT_LOCKS \
}

--- linux/include/asm-i386/ptrace.h.orig Sat Jan 3 09:43:51 1998
+++ linux/include/asm-i386/ptrace.h Tue Mar 17 04:57:42 1998
@@ -1,6 +1,8 @@
#ifndef _I386_PTRACE_H
#define _I386_PTRACE_H

+#include <linux/config.h>
+
#define EBX 0
#define ECX 1
#define EDX 2
@@ -13,11 +15,20 @@
#define FS 9
#define GS 10
#define ORIG_EAX 11
+
+#if CONFIG_NMI_WATCHDOG
+#define EIP 13
+#define CS 14
+#define EFL 15
+#define UESP 16
+#define SS 17
+#else
#define EIP 12
#define CS 13
#define EFL 14
#define UESP 15
#define SS 16
+#endif


/* this struct defines the way the registers are stored on the
@@ -34,6 +45,9 @@
int xds;
int xes;
long orig_eax;
+#if CONFIG_NMI_WATCHDOG
+ long frame;
+#endif
long eip;
int xcs;
long eflags;
--- linux/include/asm-i386/spinlock.h.orig Tue Jan 20 02:25:49 1998
+++ linux/include/asm-i386/spinlock.h Tue Mar 17 04:54:29 1998
@@ -68,6 +68,18 @@

#define SPIN_LOCK_UNLOCKED { 0 }

+#define SPINLOCK_DEBUG 1
+
+#if SPINLOCK_DEBUG
+void __slock(void);
+void __sunlock(void);
+# define SLOCK __slock()
+# define SUNLOCK __sunlock()
+#else
+# define SLOCK
+# define SUNLOCK
+#endif
+
#define spin_lock_init(x) do { (x)->lock = 0; } while(0)
#define spin_unlock_wait(x) do { barrier(); } while(((volatile spinlock_t *)(x))->lock)

@@ -75,7 +87,7 @@
#define __dummy_lock(lock) (*(__dummy_lock_t *)(lock))

#define spin_lock(lock) \
-__asm__ __volatile__( \
+do { __asm__ __volatile__( \
"\n1:\t" \
"lock ; btsl $0,%0\n\t" \
"jc 2f\n" \
@@ -85,14 +97,20 @@
"jne 2b\n\t" \
"jmp 1b\n" \
".previous" \
- :"=m" (__dummy_lock(lock)))
+ :"=m" (__dummy_lock(lock))); \
+SLOCK; } while (0)

#define spin_unlock(lock) \
+do { SUNLOCK; \
__asm__ __volatile__( \
"lock ; btrl $0,%0" \
- :"=m" (__dummy_lock(lock)))
+ :"=m" (__dummy_lock(lock))); \
+} while (0)

-#define spin_trylock(lock) (!test_and_set_bit(0,(lock)))
+#define spin_trylock(lock) ({if (!test_and_set_bit(0,(lock))) \
+ { SLOCK; return 0; } \
+ else \
+ { return 1; }})

#define spin_lock_irq(lock) \
do { __cli(); spin_lock(lock); } while (0)
@@ -130,7 +148,7 @@
* The inline assembly is non-obvious. Think about it.
*/
#define read_lock(rw) \
- asm volatile("\n1:\t" \
+do { asm volatile("\n1:\t" \
"lock ; incl %0\n\t" \
"js 2f\n" \
".section .text.lock,\"ax\"\n" \
@@ -139,14 +157,17 @@
"js 3b\n\t" \
"jmp 1b\n" \
".previous" \
- :"=m" (__dummy_lock(&(rw)->lock)))
+ :"=m" (__dummy_lock(&(rw)->lock))); \
+SLOCK; } while (0)

#define read_unlock(rw) \
+do { SUNLOCK; \
asm volatile("lock ; decl %0" \
- :"=m" (__dummy_lock(&(rw)->lock)))
+ :"=m" (__dummy_lock(&(rw)->lock))); \
+ } while (0)

#define write_lock(rw) \
- asm volatile("\n1:\t" \
+do { asm volatile("\n1:\t" \
"lock ; btsl $31,%0\n\t" \
"jc 4f\n" \
"2:\ttestl $0x7fffffff,%0\n\t" \
@@ -157,10 +178,13 @@
"jne 4b\n\t" \
"jmp 1b\n" \
".previous" \
- :"=m" (__dummy_lock(&(rw)->lock)))
+ :"=m" (__dummy_lock(&(rw)->lock))); \
+SLOCK; } while (0)

#define write_unlock(rw) \
- asm volatile("lock ; btrl $31,%0":"=m" (__dummy_lock(&(rw)->lock)))
+do { SUNLOCK; \
+ asm volatile("lock ; btrl $31,%0":"=m" (__dummy_lock(&(rw)->lock))); \
+} while (0)

#define read_lock_irq(lock) do { __cli(); read_lock(lock); } while (0)
#define read_unlock_irq(lock) do { read_unlock(lock); __sti(); } while (0)
--- linux/arch/i386/kernel/entry.S.orig Wed Feb 18 10:17:01 1998
+++ linux/arch/i386/kernel/entry.S Tue Mar 17 04:54:29 1998
@@ -42,6 +42,7 @@

#include <linux/sys.h>
#include <linux/linkage.h>
+#include <linux/config.h>
#include <asm/segment.h>
#define ASSEMBLY
#include <asm/smp.h>
@@ -56,11 +57,21 @@
DS = 0x1C
ES = 0x20
ORIG_EAX = 0x24
-EIP = 0x28
-CS = 0x2C
-EFLAGS = 0x30
-OLDESP = 0x34
-OLDSS = 0x38
+
+
+#if CONFIG_NMI_WATCHDOG
+ EIP = 0x2C
+ CS = 0x30
+ EFLAGS = 0x34
+ OLDESP = 0x38
+ OLDSS = 0x3C
+#else
+ EIP = 0x28
+ CS = 0x2C
+ EFLAGS = 0x30
+ OLDESP = 0x34
+ OLDSS = 0x38
+#endif

CF_MASK = 0x00000001
IF_MASK = 0x00000200
@@ -78,6 +89,37 @@

ENOSYS = 38

+#undef IRQ_ENTRY
+
+#if CONFIG_NMI_WATCHDOG
+#define IRQ_ENTRY(name) \
+ .globl SYMBOL_NAME(name); \
+ ALIGN; \
+ SYMBOL_NAME_LABEL(name) \
+ pushl %ebp; \
+ movl %esp, %ebp;
+#else
+#define IRQ_ENTRY(name) \
+ .globl SYMBOL_NAME(name); \
+ ALIGN; \
+ SYMBOL_NAME_LABEL(name)
+#endif
+
+
+#if CONFIG_NMI_WATCHDOG
+#define IRQ_ENTRY_ERRORCODE(name) \
+ .globl SYMBOL_NAME(name); \
+ ALIGN; \
+ SYMBOL_NAME_LABEL(name) \
+ pushl %ebp; \
+ movl 4(%esp), %ebp; \
+ xchgl %ebp, (%esp); \
+ xchgl %ebp, 4(%esp); \
+ movl %esp, %ebp; \
+ leal 4(%ebp), %ebp;
+#else
+#define IRQ_ENTRY_ERRORCODE(name) IRQ_ENTRY(name)
+#endif

#define SAVE_ALL \
cld; \
@@ -94,6 +136,7 @@
mov %dx,%ds; \
mov %dx,%es;

+#if CONFIG_NMI_WATCHDOG
#define RESTORE_ALL \
popl %ebx; \
popl %ecx; \
@@ -105,7 +148,22 @@
pop %ds; \
pop %es; \
addl $4,%esp; \
+ popl %ebp; \
iret
+#else
+#define RESTORE_ALL \
+ popl %ebx; \
+ popl %ecx; \
+ popl %edx; \
+ popl %esi; \
+ popl %edi; \
+ popl %ebp; \
+ popl %eax; \
+ pop %ds; \
+ pop %es; \
+ addl $4,%esp; \
+ iret
+#endif

#define GET_CURRENT(reg) \
movl %esp, reg; \
@@ -113,6 +171,10 @@

ENTRY(lcall7)
pushfl # We get a different stack layout with call gates,
+#if CONFIG_NMI_WATCHDOG
+ pushl %ebp;
+ movl %esp, %ebp;
+#endif
pushl %eax # which has to be cleaned up later..
SAVE_ALL
movl EIP(%esp),%eax # due to call gates, this is eflags, not eip..
@@ -135,6 +197,7 @@
ALIGN
.globl ret_from_smpfork
ret_from_smpfork:
+ call SYMBOL_NAME(__sunlock)
GET_CURRENT(%ebx)
btrl $0, SYMBOL_NAME(scheduler_lock)
jmp ret_from_sys_call
@@ -147,7 +210,7 @@
* less clear than it otherwise should be.
*/

-ENTRY(system_call)
+IRQ_ENTRY(system_call)
pushl %eax # save orig_eax
SAVE_ALL
GET_CURRENT(%ebx)
@@ -155,16 +218,25 @@
jae badsys
testb $0x20,flags(%ebx) # PF_TRACESYS
jne tracesys
+ pushl %eax
+ call SYMBOL_NAME(____checklock)
+ popl %eax
call SYMBOL_NAME(sys_call_table)(,%eax,4)
movl %eax,EAX(%esp) # save the return value
+ call SYMBOL_NAME(____checklock)
+ GET_CURRENT(%ebx)
ALIGN
.globl ret_from_sys_call
.globl ret_from_intr
ret_from_sys_call:
+ call SYMBOL_NAME(____checklock)
+ GET_CURRENT(%ebx)
movl SYMBOL_NAME(bh_mask),%eax
andl SYMBOL_NAME(bh_active),%eax
jne handle_bottom_half
ret_with_reschedule:
+ call SYMBOL_NAME(____checklock)
+ GET_CURRENT(%ebx)
cmpl $0,SYMBOL_NAME(need_resched)
jne reschedule
cmpl $0,sigpending(%ebx)
@@ -172,12 +244,15 @@
RESTORE_ALL
ALIGN
signal_return:
+ call SYMBOL_NAME(____checklock)
+ GET_CURRENT(%ebx)
testl $(VM_MASK),EFLAGS(%esp)
pushl %esp
jne v86_signal_return
pushl $0
call SYMBOL_NAME(do_signal)
addl $8,%esp
+ call SYMBOL_NAME(____checklock)
RESTORE_ALL
ALIGN
v86_signal_return:
@@ -191,11 +266,15 @@
ALIGN
tracesys:
movl $-ENOSYS,EAX(%esp)
+ call SYMBOL_NAME(____checklock)
call SYMBOL_NAME(syscall_trace)
+ call SYMBOL_NAME(____checklock)
movl ORIG_EAX(%esp),%eax
call SYMBOL_NAME(sys_call_table)(,%eax,4)
movl %eax,EAX(%esp) # save the return value
+ call SYMBOL_NAME(____checklock)
call SYMBOL_NAME(syscall_trace)
+ call SYMBOL_NAME(____checklock)
jmp ret_from_sys_call
badsys:
movl $-ENOSYS,EAX(%esp)
@@ -203,6 +282,8 @@

ALIGN
ret_from_exception:
+ call SYMBOL_NAME(____checklock)
+ GET_CURRENT(%ebx)
movl SYMBOL_NAME(bh_mask),%eax
andl SYMBOL_NAME(bh_active),%eax
jne handle_bottom_half
@@ -226,7 +307,7 @@
jmp SYMBOL_NAME(schedule) # test


-ENTRY(divide_error)
+IRQ_ENTRY(divide_error)
pushl $0 # no error code
pushl $ SYMBOL_NAME(do_divide_error)
ALIGN
@@ -252,98 +333,111 @@
movl $(__KERNEL_DS),%edx
mov %dx,%ds
mov %dx,%es
+ pushl %ecx
+ call SYMBOL_NAME(____checklock)
+ popl %ecx
GET_CURRENT(%ebx)
call *%ecx
addl $8,%esp
+ call SYMBOL_NAME(____checklock)
jmp ret_from_exception

-ENTRY(coprocessor_error)
+IRQ_ENTRY(coprocessor_error)
pushl $0
pushl $ SYMBOL_NAME(do_coprocessor_error)
jmp error_code

-ENTRY(device_not_available)
+IRQ_ENTRY(device_not_available)
pushl $-1 # mark this as an int
SAVE_ALL
GET_CURRENT(%ebx)
- pushl $ret_from_exception
+ pushl $ret_from_FPU_error
movl %cr0,%eax
testl $0x4,%eax # EM (math emulation bit)
je SYMBOL_NAME(math_state_restore)
pushl $0 # temporary storage for ORIG_EIP
call SYMBOL_NAME(math_emulate)
addl $4,%esp
- ret
+ret_from_FPU_error:
+ RESTORE_ALL

-ENTRY(debug)
+IRQ_ENTRY(debug)
pushl $0
pushl $ SYMBOL_NAME(do_debug)
jmp error_code

-ENTRY(nmi)
+# NMI's dont listen to any locks, so they should just come in and
+# go out ...
+
+IRQ_ENTRY(nmi)
+ pushl %eax
+ SAVE_ALL
pushl $0
- pushl $ SYMBOL_NAME(do_nmi)
- jmp error_code
+ movl %esp,%edx
+ pushl %edx
+ call SYMBOL_NAME(do_nmi)
+ addl $8,%esp
+ RESTORE_ALL

-ENTRY(int3)
+IRQ_ENTRY(int3)
pushl $0
pushl $ SYMBOL_NAME(do_int3)
jmp error_code

-ENTRY(overflow)
+IRQ_ENTRY(overflow)
pushl $0
pushl $ SYMBOL_NAME(do_overflow)
jmp error_code

-ENTRY(bounds)
+IRQ_ENTRY(bounds)
pushl $0
pushl $ SYMBOL_NAME(do_bounds)
jmp error_code

-ENTRY(invalid_op)
+IRQ_ENTRY(invalid_op)
pushl $0
pushl $ SYMBOL_NAME(do_invalid_op)
jmp error_code

-ENTRY(coprocessor_segment_overrun)
+IRQ_ENTRY(coprocessor_segment_overrun)
pushl $0
pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
jmp error_code

-ENTRY(reserved)
+IRQ_ENTRY(reserved)
pushl $0
pushl $ SYMBOL_NAME(do_reserved)
jmp error_code

-ENTRY(double_fault)
+IRQ_ENTRY_ERRORCODE(double_fault)
pushl $ SYMBOL_NAME(do_double_fault)
jmp error_code

-ENTRY(invalid_TSS)
+IRQ_ENTRY_ERRORCODE(invalid_TSS)
pushl $ SYMBOL_NAME(do_invalid_TSS)
jmp error_code

-ENTRY(segment_not_present)
+IRQ_ENTRY_ERRORCODE(segment_not_present)
pushl $ SYMBOL_NAME(do_segment_not_present)
jmp error_code

-ENTRY(stack_segment)
+IRQ_ENTRY_ERRORCODE(stack_segment)
pushl $ SYMBOL_NAME(do_stack_segment)
jmp error_code

-ENTRY(general_protection)
+IRQ_ENTRY_ERRORCODE(general_protection)
pushl $ SYMBOL_NAME(do_general_protection)
jmp error_code

-ENTRY(alignment_check)
+IRQ_ENTRY_ERRORCODE(alignment_check)
pushl $ SYMBOL_NAME(do_alignment_check)
jmp error_code

-ENTRY(page_fault)
+IRQ_ENTRY_ERRORCODE(page_fault)
pushl $ SYMBOL_NAME(do_page_fault)
jmp error_code

-ENTRY(spurious_interrupt_bug)
+IRQ_ENTRY(spurious_interrupt_bug)
pushl $0
pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
jmp error_code
--- linux/arch/i386/kernel/traps.c.orig Wed Mar 4 15:16:04 1998
+++ linux/arch/i386/kernel/traps.c Tue Mar 17 05:15:12 1998
@@ -2,6 +2,8 @@
* linux/arch/i386/traps.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * 1998, Ingo Molnar, added NMI-Watchdog driver
*/

/*
@@ -21,6 +23,7 @@
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/delay.h>
+#include <linux/kernel_stat.h>

#include <asm/system.h>
#include <asm/uaccess.h>
@@ -115,7 +118,8 @@
#define VMALLOC_OFFSET (8*1024*1024)
#define MODULE_RANGE (8*1024*1024)

-static void show_registers(struct pt_regs *regs)
+#ifndef CONFIG_NMI_WATCHDOG
+void show_registers(struct pt_regs *regs)
{
int i;
unsigned long esp;
@@ -135,8 +139,9 @@
regs->eax, regs->ebx, regs->ecx, regs->edx);
printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
regs->esi, regs->edi, regs->ebp, esp);
- printk("ds: %04x es: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff, ss);
+ printk("ds: %04x es: %04x ss: %04x SPINLOCKS: %d\n",
+ regs->xds & 0xffff, regs->xes & 0xffff, ss,
+ atomic_read(&current->spinlock_depth));
store_TR(i);
printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)\nStack: ",
current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current);
@@ -179,18 +184,87 @@
printk("\n");
}

-spinlock_t die_lock;
+#else
+
+/*
+ * This version of show_registers() prints out the exact Call Trace, no
+ * guessing done.
+ * we guarantee that all files are compiled with fno-omit-frame-pointer.
+ */
+void show_registers (struct pt_regs * regs)
+{
+ int i=1;
+ unsigned long *this_stack, *prev_stack, prev_addr, *prev_bp, framesize;
+
+ printk("Call Trace:\n");
+ printk("(SPINLOCKS: %d): ", /*
+ * one less because we hold nmi_print_lock
+ */
+ atomic_read(&current->spinlock_depth)-1);
+
+ /*
+ * the stack layout: /----- *this_stack
+ * V
+ * [this_frame][prev_bp][prev_addr][prev_frame][...]
+ */
+
+ /*
+ * we are relying on linear mapping on i386
+ */
+
+ __asm__ volatile ("movl %%ebp, %0":"=g"(this_stack));
+ framesize=0;
+
+ while (((long) this_stack & 8191) != 0) {
+ prev_addr = *(this_stack+1);
+
+ if (i && ((i % 8) == 0))
+ printk("\n ");
+ printk("[<%08lx>(%lu)] ", prev_addr, framesize);
+ i++;
+
+ prev_bp = (unsigned long *)(*this_stack);
+ prev_stack = this_stack;
+ this_stack = prev_bp;
+
+ if (
+ ((unsigned long)this_stack < PAGE_OFFSET)
+ || (i>100) ) {
+
+ if ((unsigned long)this_stack < PAGE_OFFSET)
+ break;
+
+ printk("WARNING: something fishy with the stack frame?\n");
+ printk("this_stack: [<%08lx>]\n",
+ (unsigned long)this_stack);
+ printk("i: %d.\n", i);
+ break;
+ }
+ framesize = (unsigned long)this_stack-(unsigned long)prev_stack;
+ }
+ printk("\n<E>\n");
+}
+#endif
+
+static spinlock_t die_lock = SPIN_LOCK_UNLOCKED;

void die_if_kernel(const char * str, struct pt_regs * regs, long err)
{
if ((regs->eflags & VM_MASK) || (3 & regs->xcs) == 3)
return;
console_verbose();
+
+
spin_lock_irq(&die_lock);
printk("%s: %04lx\n", str, err & 0xffff);
show_registers(regs);
- spin_unlock_irq(&die_lock);
+/* HACK ON */
+ spin_unlock(&die_lock);
+ unlock_kernel();
+ for (;;) __cli();
+/* HACK OFF */
do_exit(SIGSEGV);
+
}

DO_VM86_ERROR( 0, SIGFPE, "divide error", divide_error, current)
@@ -224,12 +298,14 @@

asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
{
- lock_kernel();
if (regs->eflags & VM_MASK) {
+ lock_kernel();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
goto out;
}
die_if_kernel("general protection",regs,error_code);
+
+ lock_kernel();
current->tss.error_code = error_code;
current->tss.trap_no = 13;
force_sig(SIGSEGV, current);
@@ -237,12 +313,15 @@
unlock_kernel();
}

+#ifndef CONFIG_NMI_WATCHDOG
static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
{
printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
printk("You probably have a hardware problem with your RAM chips\n");
-}
+}
+#endif

+#ifndef CONFIG_NMI_WATCHDOG
static void io_check_error(unsigned char reason, struct pt_regs * regs)
{
unsigned long i;
@@ -258,14 +337,18 @@
reason &= ~8;
outb(reason, 0x61);
}
+#endif

+#ifndef CONFIG_NMI_WATCHDOG
static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
{
printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
printk("Dazed and confused, but trying to continue\n");
printk("Do you have a strange power saving mode enabled?\n");
}
+#endif

+#ifndef CONFIG_NMI_WATCHDOG
asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
{
unsigned char reason = inb(0x61);
@@ -279,6 +362,76 @@
if (!(reason & 0xc0))
unknown_nmi_error(reason, regs);
}
+#else
+
+/*
+ * FIXME: we assume here that the NMI came from the IO-APIC. It's a quite safe
+ * assumption in most cases, but if anyone knows a way to distinguish between
+ * NMI reasons, please speak up ... [i doubt that the IO-APIC does IO port 0x61
+ * correctly]
+ */
+
+extern atomic_t apic_timer_irqs [NR_CPUS];
+extern spinlock_t console_lock;
+static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+ /*
+ * the best way to detect wether a CPU has a 'hard lockup' problem
+ * is to check it's local APIC timer IRQ counts. If they are not
+ * changing then that CPU has some problem.
+ *
+ * as these watchdog NMI IRQs are broadcasted to every CPU, here
+ * we only have to check the current processor.
+ *
+ * since NMIs dont listen to _any_ locks, we have to be extremely
+ * careful not to rely on unsafe variables. The printk might lock
+ * up though, so we have to break up console_lock first ...
+ * [when there will be more tty-related locks, break them up
+ * here too!]
+ */
+
+ static atomic_t lockup_detected = ATOMIC_INIT(0);
+ static atomic_t last_irq_sums [NR_CPUS] = { ATOMIC_INIT(0), };
+ static atomic_t alert_counter [NR_CPUS] = { ATOMIC_INIT(0), };
+
+ /*
+ * Since current-> is always on the stack, and we always switch
+ * the stack NMI-atomically, it's safe to use smp_processor_id().
+ */
+ int sum, cpu = smp_processor_id();
+
+ sum = atomic_read(apic_timer_irqs+cpu);
+
+ if (atomic_read(&lockup_detected)) {
+ spin_lock(&nmi_print_lock);
+ printk("followup-LOCKUP on CPU%d, forcing oops\n", cpu);
+ show_registers(0);
+ spin_unlock(&nmi_print_lock);
+ for (;;) __cli();
+ }
+
+ if (atomic_read(last_irq_sums+cpu) == sum) {
+ /*
+ * Ayiee, looks like this CPU is stuck ...
+ * wait a few IRQs (5 seconds) before doing the oops ...
+ */
+ atomic_inc(alert_counter+cpu);
+ if (atomic_read(alert_counter+cpu) == 5*HZ) {
+ spin_lock(&nmi_print_lock);
+ atomic_set(&lockup_detected,1);
+ printk("NMI Watchdog detected LOCKUP on CPU%d, forcing oops\n", cpu);
+ show_registers(0);
+ spin_unlock(&nmi_print_lock);
+ for (;;) __cli();
+ }
+ } else {
+ atomic_set(last_irq_sums+cpu,sum);
+ atomic_set(alert_counter+cpu,0);
+ }
+}
+#endif

asmlinkage void do_debug(struct pt_regs * regs, long error_code)
{
--- linux/arch/i386/kernel/process.c.orig Sat Jan 3 09:44:17 1998
+++ linux/arch/i386/kernel/process.c Tue Mar 17 04:54:29 1998
@@ -491,6 +491,7 @@
#ifdef __SMP__
p->tss.eip = (unsigned long) ret_from_smpfork;
p->tss.eflags = regs->eflags & 0xffffcdff; /* iopl always 0 for a new process */
+ atomic_set(&p->spinlock_depth,1);
#else
p->tss.eip = (unsigned long) ret_from_sys_call;
p->tss.eflags = regs->eflags & 0xffffcfff; /* iopl always 0 for a new process */
--- linux/arch/i386/kernel/irq.c.orig Sat Mar 14 01:32:47 1998
+++ linux/arch/i386/kernel/irq.c Tue Mar 17 04:54:29 1998
@@ -550,23 +550,41 @@
int cpu = smp_processor_id();

__cli();
- if (!local_irq_count[cpu])
+ if (!local_irq_count[cpu]) {
+#if 1
+#if SPINLOCK_DEBUG
+ if (!local_bh_count[cpu])
+ if (atomic_read(&current->spinlock_depth))
+ for (;;) __cli();
+#endif
+#endif
get_irqlock(cpu);
+ }
}

void __global_sti(void)
{
int cpu = smp_processor_id();

- if (!local_irq_count[cpu])
+ if (!local_irq_count[cpu]) {
+#if 1
+#if SPINLOCK_DEBUG
+ if (!local_bh_count[cpu])
+ if (atomic_read(&current->spinlock_depth))
+ for (;;) __cli();
+#endif
+#endif
release_irqlock(cpu);
+ }
__sti();
}

unsigned long __global_save_flags(void)
{
- if (!local_irq_count[smp_processor_id()])
- return global_irq_holder == (unsigned char) smp_processor_id();
+ int cpu = smp_processor_id();
+
+ if (!local_irq_count[cpu])
+ return global_irq_holder == (unsigned char) cpu;
else {
unsigned long x;
__save_flags(x);
--- linux/arch/i386/kernel/irq.h.orig Sat Mar 14 01:32:47 1998
+++ linux/arch/i386/kernel/irq.h Tue Mar 17 05:05:22 1998
@@ -38,6 +38,7 @@
* IO-APIC
*/

+
#ifdef __SMP__

#include <asm/atomic.h>
@@ -89,6 +90,14 @@
"mov %dx,%ds\n\t" \
"mov %dx,%es\n\t"

+#if CONFIG_NMI_WATCHDOG
+#define INSERT_FRAME \
+ "pushl %ebp\n\t" \
+ "movl %esp, %ebp\n\t"
+#else
+#define INSERT_FRAME
+#endif
+
#define IRQ_NAME2(nr) nr##_interrupt(void)
#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)

@@ -107,6 +116,7 @@
__asm__( \
"\n"__ALIGN_STR"\n" \
SYMBOL_NAME_STR(x) ":\n\t" \
+ INSERT_FRAME \
"pushl $-1\n\t" \
SAVE_ALL \
"call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
@@ -117,6 +127,7 @@
__asm__( \
"\n"__ALIGN_STR"\n" \
SYMBOL_NAME_STR(x) ":\n\t" \
+ INSERT_FRAME \
"pushl $-1\n\t" \
SAVE_ALL \
"movl %esp,%eax\n\t" \
@@ -140,6 +151,7 @@
__asm__( \
"\n"__ALIGN_STR"\n" \
SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
+ INSERT_FRAME \
"pushl $"#nr"-256\n\t" \
"jmp common_interrupt");

--- linux/arch/i386/kernel/io_apic.c.orig Wed Feb 18 10:17:02 1998
+++ linux/arch/i386/kernel/io_apic.c Tue Mar 17 04:54:30 1998
@@ -37,6 +37,17 @@
volatile unsigned int * io_apic_reg = NULL;

/*
+ * We want to avoid #ifdef CONFIG_'s in the main code whenever possible:
+ */
+#ifdef CONFIG_NMI_WATCHDOG
+ int nmi_pin = -1;
+ const int nmi_irq = CONFIG_NMI_WATCHDOG_IRQ;
+#else
+ int nmi_pin = 0;
+ const int nmi_irq = -1;
+#endif
+
+/*
* The structure of the IO-APIC:
*/
struct IO_APIC_reg_00 {
@@ -62,6 +73,7 @@
__u32 vector : 8,
delivery_mode : 3, /* 000: FIXED
* 001: lowest prio
+ * 100: NMI
* 111: ExtInt
*/
dest_mode : 1, /* 0: physical, 1: logical */
@@ -273,6 +285,19 @@

entry.vector = IO_APIC_GATE_OFFSET + (irq<<3);

+ if (mp_irqs[i].mpc_irqtype)
+ continue;
+
+ if (irq == nmi_irq) {
+ entry.delivery_mode = 4; /* broadcast NMI */
+ make_8259A_irq(irq);
+ /*
+ * Remember which register has the NMI IRQ entry,
+ * so we can turn it off in case there is some
+ * incompatibility
+ */
+ nmi_pin = i;
+ }
/*
* Determine IRQ line polarity (high active or low active):
*/
@@ -629,16 +654,23 @@
init_IO_APIC_traps();
setup_IO_APIC_irqs ();

+ if (nmi_pin == -1)
+ printk(".. NMI watchdog has invalid source IRQ.\n");
+ else if (nmi_irq != -1)
+ printk("NMI Watchdog activated on source IRQ %d\n", nmi_irq);
+
if (!timer_irq_works ()) {
make_8259A_irq(0);
if (!timer_irq_works ())
panic("IO-APIC + timer doesnt work!");
printk("..MP-BIOS bug: i8254 timer not connected to IO-APIC\n");
printk("..falling back to 8259A-based timer interrupt\n");
+ if ((nmi_pin != -1) && (nmi_irq == 0))
+ printk(".. NMI Watchdog disabled, as source IRQ is timer!\n");
}
-
- printk("nr of MP irq sources: %d.\n", mp_irq_entries);
- printk("nr of IOAPIC registers: %d.\n", nr_ioapic_registers);
+
+ printk("nr of MP irq sources: %d.\n", mp_irq_entries);
+ printk("nr of IOAPIC registers: %d.\n", nr_ioapic_registers);
print_IO_APIC();
}

--- linux/arch/i386/kernel/smp.c.orig Wed Mar 4 15:16:04 1998
+++ linux/arch/i386/kernel/smp.c Tue Mar 17 05:14:00 1998
@@ -1420,17 +1420,27 @@
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
+#ifdef CONFIG_NMI_WATCHDOG
+atomic_t apic_timer_irqs [NR_CPUS] = { ATOMIC_INIT(0), };
+#endif
void smp_apic_timer_interrupt(struct pt_regs * regs)
{
+#ifdef CONFIG_NMI_WATCHDOG
+ /*
+ * the only thing that can lock an NMI is an unACK-ed APIC ...
+ */
+ atomic_inc(apic_timer_irqs+smp_processor_id());
+#endif
+
/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow, and we
* want to be able to accept NMI tlb invalidates
* during this time.
*/
- spin_lock(&irq_controller_lock);
+ /* spin_lock(&irq_controller_lock); */
ack_APIC_irq ();
- spin_unlock(&irq_controller_lock);
+ /* spin_unlock(&irq_controller_lock); */

smp_local_timer_interrupt(regs);
}
--- linux/Makefile.orig Tue Mar 17 01:27:49 1998
+++ linux/Makefile Tue Mar 17 04:54:30 1998
@@ -24,7 +24,12 @@
FINDHPATH = $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net

HOSTCC =gcc
-HOSTCFLAGS =-O2 -fomit-frame-pointer
+
+ifeq ($(CONFIG_NMI_WATCHDOG),y)
+ HOSTCFLAGS =-O2 -fno-omit-frame-pointer
+else
+ HOSTCFLAGS =-O2 -fomit-frame-pointer
+endif

CROSS_COMPILE =

@@ -85,7 +90,11 @@
# standard CFLAGS
#

-CFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+ifeq ($(CONFIG_NMI_WATCHDOG),y)
+ CFLAGS = -Wall -Wstrict-prototypes -O2 -fno-omit-frame-pointer
+else
+ CFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+endif

ifdef CONFIG_CPP
CFLAGS := $(CFLAGS) -x c++

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu