Re: Filesystem read corruption/TLB flush bug still present in 2.3.43 final

From: Manfred Spraul (manfreds@colorfullife.com)
Date: Fri Feb 11 2000 - 10:07:14 EST


Simon Kirby wrote:
> I think this started with the TLB flush changes. This is on my same
> dual-processor IDE box with 128 MB ECC SDRAM that I have reported other
> problems on.
>
Are you sure that the corruptions started with 2.3.43prex? I've attached
my internal debug patch, but I've never received any suspicious
messages. [apart from rare messages on 8-way boxes, they caused these
tlb changes]

Btw, I just read that someone else reports a corruption with tar+bz2 in
2.3.4{1,2,3}. I'll try to reproduce that.

--
	Manfred

// $Header$ // Kernel Version: // VERSION = 2 // PATCHLEVEL = 3 // SUBLEVEL = 43 // EXTRAVERSION = --- 2.3/arch/i386/kernel/smp.c Thu Feb 10 22:38:57 2000 +++ build-2.3/arch/i386/kernel/smp.c Fri Feb 11 15:45:58 2000 @@ -103,7 +103,7 @@ /* The 'big kernel lock' */ spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; -struct tlb_state cpu_tlbstate[NR_CPUS]; +volatile struct tlb_state cpu_tlbstate[NR_CPUS]; /* * the following functions deal with sending IPIs between CPUs. @@ -337,6 +337,15 @@ { unsigned long cpu = smp_processor_id(); + do { + unsigned int tmpreg; + __asm__ __volatile__("movl %%cr3,%0\n\t": "=r" (tmpreg)); + if(tmpreg != __pa(cpu_tlbstate[smp_processor_id()].active_mm->pgd)) { + printk("switching? cr3 error!\n"); + show_stack(NULL); + } + } while(0); + if (!test_bit(cpu, &flush_cpumask)) BUG(); if (flush_mm == cpu_tlbstate[cpu].active_mm) { @@ -348,11 +357,9 @@ } else leave_mm(cpu); } else { - extern void show_stack (void *); printk("hm #1: %p, %p.\n", flush_mm, cpu_tlbstate[cpu].active_mm); show_stack(NULL); } - __flush_tlb(); ack_APIC_irq(); clear_bit(cpu, &flush_cpumask); } @@ -382,7 +389,7 @@ * Temporarily this turns IRQs off, so that lockups are * detected by the NMI watchdog. */ - spin_lock_irq(&tlbstate_lock); + spin_lock(&tlbstate_lock); flush_mm = mm; flush_va = va; @@ -393,12 +400,14 @@ */ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + __cli(); while (flush_cpumask) /* nothing. lockup detection does not belong here */; + __sti(); flush_mm = NULL; flush_va = 0; - spin_unlock_irq(&tlbstate_lock); + spin_unlock(&tlbstate_lock); } void flush_tlb_current_task(void) --- 2.3/include/asm-i386/pgtable.h Thu Feb 10 22:39:10 2000 +++ build-2.3/include/asm-i386/pgtable.h Fri Feb 11 15:43:07 2000 @@ -17,6 +17,18 @@ #include <asm/fixmap.h> #include <linux/threads.h> +extern void show_stack(unsigned long* esp); +#ifndef DBG_TLBSTATE +#define DBG_TLBSTATE +struct tlb_state +{ + struct mm_struct *active_mm; + int state; +}; +extern volatile struct tlb_state cpu_tlbstate[NR_CPUS]; +#endif + + extern pgd_t swapper_pg_dir[1024]; /* Caches aren't brain-dead on the intel. */ @@ -36,6 +48,10 @@ "movl %0, %%cr3; \n" \ : "=r" (tmpreg) \ :: "memory"); \ + if(tmpreg != __pa(cpu_tlbstate[smp_processor_id()].active_mm->pgd)) { \ + printk("cr3 error!\n"); \ + show_stack(NULL); \ + } \ } while (0) /* @@ -55,6 +71,10 @@ : "r" (mmu_cr4_features & ~X86_CR4_PGE), \ "r" (mmu_cr4_features) \ : "memory"); \ + if(tmpreg != __pa(cpu_tlbstate[smp_processor_id()].active_mm->pgd)) { \ + printk("cr3 error!\n"); \ + show_stack(NULL); \ + } \ } while (0) extern unsigned long pgkern_mask; @@ -78,7 +98,15 @@ #define __flush_tlb_one(addr) __flush_tlb() #else #define __flush_tlb_one(addr) \ -__asm__ __volatile__("invlpg %0": :"m" (*(char *) addr)) + do { \ + unsigned int tmpreg; \ + __asm__ __volatile__("invlpg %0\n\t": :"m" (*(char *) addr)); \ + __asm__ __volatile__("movl %%cr3,%0\n\t": "=r" (tmpreg));\ + if(tmpreg != __pa(cpu_tlbstate[smp_processor_id()].active_mm->pgd)) { \ + printk("cr3 error!\n"); \ + show_stack(NULL); \ + } \ + } while(0) #endif /* --- 2.3/include/asm-i386/pgalloc.h Thu Feb 10 22:39:10 2000 +++ build-2.3/include/asm-i386/pgalloc.h Fri Feb 11 15:42:57 2000 @@ -242,12 +242,15 @@ #define TLBSTATE_LAZY 2 #define TLBSTATE_OLD 3 +#ifndef DBG_TLBSTATE +#define DBG_TLBSTATE struct tlb_state { struct mm_struct *active_mm; int state; }; -extern struct tlb_state cpu_tlbstate[NR_CPUS]; +extern volatile struct tlb_state cpu_tlbstate[NR_CPUS]; +#endif #endif --- 2.3/arch/i386/kernel/setup.c Tue Feb 8 09:22:17 2000 +++ build-2.3/arch/i386/kernel/setup.c Tue Feb 8 16:35:56 2000 @@ -1546,6 +1546,9 @@ if(current->mm) BUG(); enter_lazy_tlb(&init_mm, current, nr); +#ifdef CONFIG_SMP + cpu_tlbstate[nr].active_mm = &init_mm; +#endif t->esp0 = current->thread.esp0; set_tss_desc(nr,t);

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Feb 15 2000 - 21:00:21 EST