Re: [PATCH] Fix to make check_pgt_cache work on !i386 architectures

Jakub Jelinek (jj@sunsite.ms.mff.cuni.cz)
Fri, 31 Jul 1998 23:20:11 +0200 (MET DST)


>
> This is too ugly to live. My personal solution would be to remove the pgd
> cache altogether from the generic code. If "check_pgd_table" isn't
> generic, it shouldn't be in the generic code.

Ok, see patch below which does exactly that.

>
> The other question that props up is that _why_ doesn't check_pgd_table()
> work as expected? Sounds like a bug in the whole design to me.

The case when there are multiple page tables within one page is far more
complicated. It cannot be per-cpu any longer (as otherwise freeing of the
pages would be really difficult without locking), one has to keep track on
which chunks of the page are free and which are allocated. So, even the
check_pgt_cache routine for that has to take those things into account and
free only pages which are completely free (but still it is a lot better
than before with the big/small chunk allocator).
But as most architectures use full pages only, the check_pgt_cache routine
in memory.c could be used for most of them.

--- ./mm/memory.c.jj Sun Jul 26 22:55:43 1998
+++ ./mm/memory.c Fri Jul 31 22:12:18 1998
@@ -56,6 +56,11 @@ unsigned long max_mapnr = 0;
unsigned long num_physpages = 0;
void * high_memory = NULL;

+/* Low and high watermarks for page table cache.
+ The system should try to have pgt_water[0] <= cache elements <= pgt_water[1]
+ */
+int pgt_cache_water[2] = { 25, 50 };
+
/*
* We special-case the C-O-W ZERO_PAGE, because it's such
* a common occurrence (no need to read the page to know
@@ -946,23 +951,4 @@ void make_pages_present(unsigned long ad
handle_mm_fault(current, vma, addr, write);
addr += PAGE_SIZE;
}
-}
-
-/* Low and high watermarks for page table cache.
- The system should try to have pgt_water[0] <= cache elements <= pgt_water[1]
- */
-int pgt_cache_water[2] = { 25, 50 };
-
-void check_pgt_cache(void)
-{
- if(pgtable_cache_size > pgt_cache_water[0]) {
- do {
- if(pgd_quicklist)
- free_pgd_slow(get_pgd_fast());
- if(pmd_quicklist)
- free_pmd_slow(get_pmd_fast());
- if(pte_quicklist)
- free_pte_slow(get_pte_fast());
- } while(pgtable_cache_size > pgt_cache_water[1]);
- }
}
--- ./include/linux/mm.h.jj Tue Jul 28 22:51:19 1998
+++ ./include/linux/mm.h Fri Jul 31 22:57:15 1998
@@ -277,8 +277,10 @@ extern int zeromap_page_range(unsigned l

extern void vmtruncate(struct inode * inode, unsigned long offset);
extern void handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access);
-extern void check_pgt_cache(void);
extern void make_pages_present(unsigned long addr, unsigned long end);
+
+extern int pgt_cache_water[2];
+extern void check_pgt_cache(void);

extern unsigned long paging_init(unsigned long start_mem, unsigned long end_mem);
extern void mem_init(unsigned long start_mem, unsigned long end_mem);
--- ./arch/sparc/kernel/process.c.jj Thu May 21 23:24:05 1998
+++ ./arch/sparc/kernel/process.c Fri Jul 31 22:51:12 1998
@@ -41,6 +41,7 @@

extern void fpsave(unsigned long *, unsigned long *, void *, unsigned long *);
extern void srmmu_check_pgt_cache(void);
+extern void sun4c_check_pgt_cache(void);

struct task_struct *current_set[NR_CPUS] = {&init_task, };

@@ -92,7 +93,7 @@ asmlinkage int sys_idle(void)
}
}
restore_flags(flags);
- check_pgt_cache();
+ sun4c_check_pgt_cache();
} else
srmmu_check_pgt_cache();
schedule();
--- ./arch/sparc/mm/sun4c.c.jj Fri Jul 31 22:45:29 1998
+++ ./arch/sparc/mm/sun4c.c Fri Jul 31 22:47:18 1998
@@ -2536,6 +2536,20 @@ extern __inline__ pgd_t *sun4c_get_pgd_f
return (pgd_t *)ret;
}

+void sun4c_check_pgt_cache(void)
+{
+ if(pgtable_cache_size > pgt_cache_water[0]) {
+ do {
+ if(pgd_quicklist)
+ free_pgd_slow(get_pgd_fast());
+ if(pmd_quicklist)
+ free_pmd_slow(get_pmd_fast());
+ if(pte_quicklist)
+ free_pte_slow(get_pte_fast());
+ } while(pgtable_cache_size > pgt_cache_water[1]);
+ }
+}
+
static void sun4c_set_pgdir(unsigned long address, pgd_t entry)
{
/* Nothing to do */
--- ./arch/sparc/mm/init.c.jj Wed Apr 15 02:44:20 1998
+++ ./arch/sparc/mm/init.c Fri Jul 31 22:49:12 1998
@@ -46,6 +46,21 @@ struct pgtable_cache_struct pgt_quicklis
/* References to section boundaries */
extern char __init_begin, __init_end, etext;

+void check_pgt_cache(void)
+{
+ extern void srmmu_check_pgt_cache(void);
+ extern void sun4c_check_pgt_cache(void);
+#ifdef __SMP__
+ srmmu_check_pgt_cache();
+#else
+ if (ARCH_SUN4C_SUN4)
+ sun4c_check_pgt_cache();
+ else
+ srmmu_check_pgt_cache();
+#endif
+}
+
+
/*
* BAD_PAGE is the page that is used for page faults when linux
* is out-of-memory. Older versions of linux just did a
--- ./arch/sparc64/kernel/process.c.jj Thu May 21 23:24:05 1998
+++ ./arch/sparc64/kernel/process.c Fri Jul 31 22:52:29 1998
@@ -43,45 +43,6 @@

#ifndef __SMP__

-extern int pgt_cache_water[2];
-
-static inline void ultra_check_pgt_cache(void)
-{
- struct page *page, *page2;
-
- if(pgtable_cache_size > pgt_cache_water[0]) {
- do {
- if(pmd_quicklist)
- free_pmd_slow(get_pmd_fast());
- if(pte_quicklist)
- free_pte_slow(get_pte_fast());
- } while(pgtable_cache_size > pgt_cache_water[1]);
- }
- if (pgd_cache_size > pgt_cache_water[0] / 4) {
- for (page2 = NULL, page = (struct page *)pgd_quicklist; page;) {
- if ((unsigned long)page->pprev_hash == 3) {
- if (page2)
- page2->next_hash = page->next_hash;
- else
- (struct page *)pgd_quicklist = page->next_hash;
- page->next_hash = NULL;
- page->pprev_hash = NULL;
- pgd_cache_size -= 2;
- free_page(PAGE_OFFSET + (page->map_nr << PAGE_SHIFT));
- if (page2)
- page = page2->next_hash;
- else
- page = (struct page *)pgd_quicklist;
- if (pgd_cache_size <= pgt_cache_water[1] / 4)
- break;
- continue;
- }
- page2 = page;
- page = page->next_hash;
- }
- }
-}
-
/*
* the idle loop on a Sparc... ;)
*/
@@ -94,7 +55,7 @@ asmlinkage int sys_idle(void)
current->priority = -100;
current->counter = -100;
for (;;) {
- ultra_check_pgt_cache();
+ check_pgt_cache();
run_task_queue(&tq_scheduler);
schedule();
}
--- ./arch/sparc64/mm/init.c.jj Thu May 21 03:54:36 1998
+++ ./arch/sparc64/mm/init.c Fri Jul 31 22:54:09 1998
@@ -54,6 +54,63 @@ static __inline__ void __init_pmd(pmd_t
__bfill64((void *)pmdp, &two_null_pte_table);
}

+#ifndef __SMP__
+
+void check_pgt_cache(void)
+{
+ struct page *page, *page2;
+
+ if(pgtable_cache_size > pgt_cache_water[0]) {
+ do {
+ if(pmd_quicklist)
+ free_pmd_slow(get_pmd_fast());
+ if(pte_quicklist)
+ free_pte_slow(get_pte_fast());
+ } while(pgtable_cache_size > pgt_cache_water[1]);
+ }
+ if (pgd_cache_size > pgt_cache_water[0] / 4) {
+ for (page2 = NULL, page = (struct page *)pgd_quicklist; page;) {
+ if ((unsigned long)page->pprev_hash == 3) {
+ if (page2)
+ page2->next_hash = page->next_hash;
+ else
+ (struct page *)pgd_quicklist = page->next_hash;
+ page->next_hash = NULL;
+ page->pprev_hash = NULL;
+ pgd_cache_size -= 2;
+ free_page(PAGE_OFFSET + (page->map_nr << PAGE_SHIFT));
+ if (page2)
+ page = page2->next_hash;
+ else
+ page = (struct page *)pgd_quicklist;
+ if (pgd_cache_size <= pgt_cache_water[1] / 4)
+ break;
+ continue;
+ }
+ page2 = page;
+ page = page->next_hash;
+ }
+ }
+}
+
+#else
+
+void check_pgt_cache(void)
+{
+ if(pgtable_cache_size > pgt_cache_water[0]) {
+ do {
+ if(pgd_quicklist)
+ free_pgd_slow(get_pgd_fast());
+ if(pmd_quicklist)
+ free_pmd_slow(get_pmd_fast());
+ if(pte_quicklist)
+ free_pte_slow(get_pte_fast());
+ } while(pgtable_cache_size > pgt_cache_water[1]);
+ }
+}
+
+#endif
+
/*
* BAD_PAGE is the page that is used for page faults when linux
* is out-of-memory. Older versions of linux just did a
--- ./arch/alpha/mm/init.c.jj Sat Jun 13 21:48:10 1998
+++ ./arch/alpha/mm/init.c Fri Jul 31 22:34:00 1998
@@ -88,6 +88,19 @@ pte_t *get_pte_slow(pmd_t *pmd, unsigned
return (pte_t *) pmd_page(*pmd) + offset;
}

+void check_pgt_cache(void)
+{
+ if(pgtable_cache_size > pgt_cache_water[0]) {
+ do {
+ if(pgd_quicklist)
+ free_pgd_slow(get_pgd_fast());
+ if(pmd_quicklist)
+ free_pmd_slow(get_pmd_fast());
+ if(pte_quicklist)
+ free_pte_slow(get_pte_fast());
+ } while(pgtable_cache_size > pgt_cache_water[1]);
+ }
+}

/*
* BAD_PAGE is the page that is used for page faults when linux
--- ./arch/arm/kernel/process.c.jj Sat Jul 18 20:55:23 1998
+++ ./arch/arm/kernel/process.c Fri Jul 31 22:34:46 1998
@@ -71,6 +71,7 @@ asmlinkage int sys_idle(void)
current->priority = -100;
for (;;)
{
+ check_pgt_cache();
#if 0 //def ARCH_IDLE_OK
if (!hlt_counter && !need_resched)
proc_idle ();
--- ./arch/arm/mm/init.c.jj Fri May 8 09:42:38 1998
+++ ./arch/arm/mm/init.c Fri Jul 31 22:36:00 1998
@@ -34,6 +34,20 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern char _etext, _stext, _edata, __bss_start, _end;
extern char __init_begin, __init_end;

+void check_pgt_cache(void)
+{
+ if(pgtable_cache_size > pgt_cache_water[0]) {
+ do {
+ if(pgd_quicklist)
+ free_pgd_slow(get_pgd_fast());
+ if(pmd_quicklist)
+ free_pmd_slow(get_pmd_fast());
+ if(pte_quicklist)
+ free_pte_slow(get_pte_fast());
+ } while(pgtable_cache_size > pgt_cache_water[1]);
+ }
+}
+
/*
* BAD_PAGE is the page that is used for page faults when linux
* is out-of-memory. Older versions of linux just did a
--- ./arch/i386/mm/init.c.jj Mon Jul 20 23:00:07 1998
+++ ./arch/i386/mm/init.c Fri Jul 31 22:37:04 1998
@@ -88,6 +88,19 @@ pte_t *get_pte_slow(pmd_t *pmd, unsigned
return (pte_t *) (pmd_page(*pmd) + offset);
}

+void check_pgt_cache(void)
+{
+ if(pgtable_cache_size > pgt_cache_water[0]) {
+ do {
+ if(pgd_quicklist)
+ free_pgd_slow(get_pgd_fast());
+ if(pmd_quicklist)
+ free_pmd_slow(get_pmd_fast());
+ if(pte_quicklist)
+ free_pte_slow(get_pte_fast());
+ } while(pgtable_cache_size > pgt_cache_water[1]);
+ }
+}

/*
* BAD_PAGE is the page that is used for page faults when linux
--- ./arch/m68k/mm/init.c.jj Sat Jun 13 22:14:33 1998
+++ ./arch/m68k/mm/init.c Fri Jul 31 22:38:26 1998
@@ -31,6 +31,20 @@ extern void die_if_kernel(char *,struct
extern void init_kpointer_table(void);
extern void show_net_buffers(void);

+void check_pgt_cache(void)
+{
+ if(pgtable_cache_size > pgt_cache_water[0]) {
+ do {
+ if(pgd_quicklist)
+ free_pgd_slow(get_pgd_fast());
+ if(pmd_quicklist)
+ free_pmd_slow(get_pmd_fast());
+ if(pte_quicklist)
+ free_pte_slow(get_pte_fast());
+ } while(pgtable_cache_size > pgt_cache_water[1]);
+ }
+}
+
/*
* BAD_PAGE is the page that is used for page faults when linux
* is out-of-memory. Older versions of linux just did a
--- ./arch/mips/mm/init.c.jj Fri May 8 09:13:24 1998
+++ ./arch/mips/mm/init.c Fri Jul 31 22:41:40 1998
@@ -47,6 +47,20 @@ asmlinkage int sys_cacheflush(void *addr
return 0;
}

+void check_pgt_cache(void)
+{
+ if(pgtable_cache_size > pgt_cache_water[0]) {
+ do {
+ if(pgd_quicklist)
+ free_pgd_slow(get_pgd_fast());
+ if(pmd_quicklist)
+ free_pmd_slow(get_pmd_fast());
+ if(pte_quicklist)
+ free_pte_slow(get_pte_fast());
+ } while(pgtable_cache_size > pgt_cache_water[1]);
+ }
+}
+
/*
* BAD_PAGE is the page that is used for page faults when linux
* is out-of-memory. Older versions of linux just did a
--- ./arch/ppc/mm/init.c.jj Thu May 21 03:54:36 1998
+++ ./arch/ppc/mm/init.c Fri Jul 31 22:43:21 1998
@@ -148,6 +148,20 @@ pte_t *get_pte_slow(pmd_t *pmd, unsigned
return (pte_t *) pmd_page(*pmd) + offset;
}

+void check_pgt_cache(void)
+{
+ if(pgtable_cache_size > pgt_cache_water[0]) {
+ do {
+ if(pgd_quicklist)
+ free_pgd_slow(get_pgd_fast());
+ if(pmd_quicklist)
+ free_pmd_slow(get_pmd_fast());
+ if(pte_quicklist)
+ free_pte_slow(get_pte_fast());
+ } while(pgtable_cache_size > pgt_cache_water[1]);
+ }
+}
+
/*
* BAD_PAGE is the page that is used for page faults when linux
* is out-of-memory. Older versions of linux just did a

Cheers,
Jakub
___________________________________________________________________
Jakub Jelinek | jj@sunsite.mff.cuni.cz | http://sunsite.mff.cuni.cz
Administrator of SunSITE Czech Republic, MFF, Charles University
___________________________________________________________________
Ultralinux - first 64bit OS to take full power of the UltraSparc
Linux version 2.1.112 on a sparc64 machine (498.80 BogoMips).
___________________________________________________________________

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.altern.org/andrebalsa/doc/lkml-faq.html