Re: 2.6.18: Kernel BUG at mm/rmap.c:522

From: Nick Piggin
Date: Wed Oct 04 2006 - 10:01:53 EST


Andre Noll wrote:
Hi

MATLAB triggers the following bug on both of our new 16-way opteron
machines (64G Ram): The same kernel is running with no problems on a
bunch of smaller (8-way, 4-way, max 32G Ram) cluster nodes.

Any hints?
Andre


----------- [cut here ] --------- [please bite here ] ---------
Kernel BUG at ...aid0/home/maan/scm/stable/linux-2.6.18.y/mm/rmap.c:522

Ah, this old thing. I hope it is repeatable?

What we really want is the bit before this, the "Eeek! page_mapcount went
negative" part.

It is also nice if we can work out where the page actually came from. The
following attached patch should help out a bit with that, if you could
run with it?

Thanks a lot for reporting, this is very useful.

Nick

--
SUSE Labs, Novell Inc.
Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h 2006-10-04 23:51:55.000000000 +1000
+++ linux-2.6/include/linux/rmap.h 2006-10-04 23:52:28.000000000 +1000
@@ -72,7 +72,7 @@ void __anon_vma_link(struct vm_area_stru
void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
void page_add_file_rmap(struct page *);
-void page_remove_rmap(struct page *);
+void page_remove_rmap(struct page *, struct vm_area_struct *);

/**
* page_dup_rmap - duplicate pte mapping to a page
Index: linux-2.6/mm/filemap_xip.c
===================================================================
--- linux-2.6.orig/mm/filemap_xip.c 2006-10-04 23:56:59.000000000 +1000
+++ linux-2.6/mm/filemap_xip.c 2006-10-04 23:57:08.000000000 +1000
@@ -189,7 +189,7 @@ __xip_unmap (struct address_space * mapp
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
pteval = ptep_clear_flush(vma, address, pte);
- page_remove_rmap(page);
+ page_remove_rmap(page, vma);
dec_mm_counter(mm, file_rss);
BUG_ON(pte_dirty(pteval));
pte_unmap_unlock(pte, ptl);
Index: linux-2.6/mm/fremap.c
===================================================================
--- linux-2.6.orig/mm/fremap.c 2006-10-04 23:57:00.000000000 +1000
+++ linux-2.6/mm/fremap.c 2006-10-04 23:57:13.000000000 +1000
@@ -33,7 +33,7 @@ static int zap_pte(struct mm_struct *mm,
if (page) {
if (pte_dirty(pte))
set_page_dirty(page);
- page_remove_rmap(page);
+ page_remove_rmap(page, vma);
page_cache_release(page);
}
} else {
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c 2006-10-04 23:57:00.000000000 +1000
+++ linux-2.6/mm/memory.c 2006-10-04 23:57:26.000000000 +1000
@@ -677,7 +677,7 @@ static unsigned long zap_pte_range(struc
mark_page_accessed(page);
file_rss--;
}
- page_remove_rmap(page);
+ page_remove_rmap(page, vma);
tlb_remove_page(tlb, page);
continue;
}
@@ -1540,7 +1540,7 @@ gotten:
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (likely(pte_same(*page_table, orig_pte))) {
if (old_page) {
- page_remove_rmap(old_page);
+ page_remove_rmap(old_page, vma);
if (!PageAnon(old_page)) {
dec_mm_counter(mm, file_rss);
inc_mm_counter(mm, anon_rss);
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c 2006-10-04 23:49:15.000000000 +1000
+++ linux-2.6/mm/rmap.c 2006-10-04 23:58:00.000000000 +1000
@@ -508,7 +508,7 @@ void page_add_file_rmap(struct page *pag
*
* The caller needs to hold the pte lock.
*/
-void page_remove_rmap(struct page *page)
+void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
{
if (atomic_add_negative(-1, &page->_mapcount)) {
#ifdef CONFIG_DEBUG_VM
@@ -517,6 +517,7 @@ void page_remove_rmap(struct page *page)
printk (KERN_EMERG " page->flags = %lx\n", page->flags);
printk (KERN_EMERG " page->count = %x\n", page_count(page));
printk (KERN_EMERG " page->mapping = %p\n", page->mapping);
+ print_symbol (KERN_EMERG " vma->vm_ops = %s\n", vma->vm_ops);
}
#endif
BUG_ON(page_mapcount(page) < 0);
@@ -621,7 +622,7 @@ static int try_to_unmap_one(struct page
dec_mm_counter(mm, file_rss);


- page_remove_rmap(page);
+ page_remove_rmap(page, vma);
page_cache_release(page);

out_unmap:
@@ -711,7 +712,7 @@ static void try_to_unmap_cluster(unsigne
if (pte_dirty(pteval))
set_page_dirty(page);

- page_remove_rmap(page);
+ page_remove_rmap(page, vma);
page_cache_release(page);
dec_mm_counter(mm, file_rss);
(*mapcount)--;