[PATCH] pre-100 correction + sysctl stuff

Rik van Riel (H.H.vanRiel@phys.uu.nl)
Sun, 3 May 1998 01:37:28 +0200 (MET DST)

Messages sorted by: [ date ][ thread ][ subject ][ author ]
Next message: Chris Evans: "2.0.34-pre11b; 3c59x driver broken?"
Previous message: Andrea Arcangeli: "Re: New pre-patch (Re: [PATCH] kswapd fully sysctl tunable)"

Hi Linus,

Here's my sysctl patch, this time without any
reference to the dcache stuff.

It also fixes a header file error (include/linux/dcache.h)
that prevents pre-100 from compiling...

For the uninitiated, this patch also does:
- put the low/high water marks from free_memory_available()
in the freepages.{low,high} struct
- makes kswapd more sysctl tunable
- updates the documentation accordingly
- fixes an annoying pre-100 header file bug

grtz,

--- linux/kernel/sysctl.c.100 Sun May 3 01:17:42 1998
+++ linux/kernel/sysctl.c Sun May 3 01:15:28 1998
@@ -189,20 +189,22 @@

static ctl_table vm_table[] = {
{VM_SWAPCTL, "swapctl",
- &swap_control, sizeof(swap_control_t), 0600, NULL, &proc_dointvec},
+ &swap_control, sizeof(swap_control_t), 0644, NULL, &proc_dointvec},
{VM_SWAPOUT, "swapout_interval",
- &swapout_interval, sizeof(int), 0600, NULL, &proc_dointvec},
+ &swapout_interval, sizeof(int), 0644, NULL, &proc_dointvec},
{VM_FREEPG, "freepages",
- &freepages, sizeof(freepages_t), 0600, NULL, &proc_dointvec},
+ &freepages, sizeof(freepages_t), 0644, NULL, &proc_dointvec},
{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0600, NULL,
&proc_dointvec_minmax, &sysctl_intvec, NULL,
&bdflush_min, &bdflush_max},
{VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory,
sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec},
{VM_BUFFERMEM, "buffermem",
- &buffer_mem, sizeof(buffer_mem_t), 0600, NULL, &proc_dointvec},
+ &buffer_mem, sizeof(buffer_mem_t), 0644, NULL, &proc_dointvec},
{VM_PAGECACHE, "pagecache",
- &page_cache, sizeof(buffer_mem_t), 0600, NULL, &proc_dointvec},
+ &page_cache, sizeof(buffer_mem_t), 0644, NULL, &proc_dointvec},
+ {VM_PAGERDAEMON, "kswapd",
+ &pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec},
{0}
};

--- linux/mm/vmscan.c.100 Sun May 3 01:16:55 1998
+++ linux/mm/vmscan.c Sun May 3 01:24:26 1998
@@ -553,22 +553,23 @@
* more aggressive if we're really
* low on free memory.
*
- * The number of tries is 512 divided by an
- * 'urgency factor'. In practice this will mean
- * a value of 512 / 8 = 64 pages at a time,
- * giving 64 * 4 (times/sec) * 4k (pagesize) =
- * 1 MB/s in lowest-priority background
- * paging. This number rises to 8 MB/s when the
- * priority is highest (but then we'll be woken
- * up more often and the rate will be even higher).
- * -- Should make this sysctl tunable...
+ * We try page_daemon.tries_base times, divided by
+ * an 'urgency factor'. In practice this will mean
+ * a value of pager_daemon.tries_base / 8 or 4 = 64
+ * or 128 pages at a time.
+ * This gives us 64 (or 128) * 4k * 4 (times/sec) =
+ * 1 (or 2) MB/s swapping bandwidth in low-priority
+ * background paging. This number rises to 8 MB/s
+ * when the priority is highest (but then we'll be
+ * woken up more often and the rate will be even
+ * higher).
*/
- tries = (512) >> free_memory_available(3);
+ tries = pager_daemon.tries_base >> free_memory_available(3);

while (tries--) {
int gfp_mask;

- if (++tried > SWAP_CLUSTER_MAX && free_memory_available(0))
+ if (++tried > pager_daemon.tries_min && free_memory_available(0))
break;
gfp_mask = __GFP_IO;
try_to_free_page(gfp_mask);
@@ -576,7 +577,7 @@
* Syncing large chunks is faster than swapping
* synchronously (less head movement). -- Rik.
*/
- if (atomic_read(&nr_async_pages) >= SWAP_CLUSTER_MAX)
+ if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster)
run_task_queue(&tq_disk);

}
--- linux/mm/page_alloc.c.100 Sun May 3 01:17:19 1998
+++ linux/mm/page_alloc.c Sun May 3 01:15:28 1998
@@ -125,7 +125,7 @@
* free unfragmented memory.
* Added low/high water marks to avoid thrashing -- Rik.
*/
- if (nr_free_pages > (num_physpages >> 5) + (nr ? 0 : num_physpages >> 6))
+ if (nr_free_pages > (nr ? freepages.low : freepages.high))
return nr+1;

list = free_area + NR_MEM_LISTS;
@@ -335,15 +335,19 @@
int i;

/*
- * select nr of pages we try to keep free for important stuff
- * with a minimum of 48 pages. This is totally arbitrary
+ * Select nr of pages we try to keep free for important stuff
+ * with a minimum of 48 pages and a maximum of 256 pages, so
+ * that we don't waste too much memory on large systems.
+ * This is totally arbitrary.
*/
i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
if (i < 48)
i = 48;
+ if (i > 256)
+ i = 256;
freepages.min = i;
- freepages.low = i + (i>>1);
- freepages.high = i + i;
+ freepages.low = i << 1;
+ freepages.high = freepages.low + i;
mem_map = (mem_map_t *) LONG_ALIGN(start_mem);
p = mem_map + MAP_NR(end_mem);
start_mem = LONG_ALIGN((unsigned long) p);
--- linux/mm/swap.c.100 Sun May 3 01:17:29 1998
+++ linux/mm/swap.c Sun May 3 01:24:34 1998
@@ -44,8 +44,8 @@
*/
freepages_t freepages = {
48, /* freepages.min */
- 72, /* freepages.low */
- 96 /* freepages.high */
+ 96, /* freepages.low */
+ 144 /* freepages.high */
};

/* We track the number of pages currently being asynchronously swapped
@@ -76,4 +76,10 @@
10, /* minimum percent page cache */
30, /* borrow percent page cache */
75 /* maximum */
+};
+
+pager_daemon_t pager_daemon = {
+ 512, /* base number for calculating the number of tries */
+ SWAP_CLUSTER_MAX, /* minimum number of tries */
+ SWAP_CLUSTER_MAX, /* do swap I/O in clusters of this size */
};
--- linux/include/linux/dcache.h.100 Sun May 3 01:30:50 1998
+++ linux/include/linux/dcache.h Sun May 3 01:31:26 1998
@@ -131,7 +131,7 @@

/* dcache memory management */
extern int select_dcache(int, int);
-extern void shrink_dcache_memory(void);
+extern void shrink_dcache_memory(int, unsigned int);
extern void check_dcache_memory(void);
extern void free_inode_memory(int); /* defined in fs/inode.c */

--- linux/include/linux/sysctl.h.100 Sun May 3 01:18:20 1998
+++ linux/include/linux/sysctl.h Sun May 3 01:15:29 1998
@@ -84,7 +84,8 @@
VM_BDFLUSH, /* struct: Control buffer cache flushing */
VM_OVERCOMMIT_MEMORY, /* Turn off the virtual memory safety limit */
VM_BUFFERMEM, /* struct: Set buffer memory thresholds */
- VM_PAGECACHE /* struct: Set cache memory thresholds */
+ VM_PAGECACHE, /* struct: Set cache memory thresholds */
+ VM_PAGERDAEMON /* struct: Control kswapd behaviour */
};

--- linux/include/linux/swapctl.h.100 Sun May 3 01:18:26 1998
+++ linux/include/linux/swapctl.h Sun May 3 01:24:52 1998
@@ -50,6 +50,15 @@
typedef freepages_v1 freepages_t;
extern freepages_t freepages;

+typedef struct pager_daemon_v1
+{
+ unsigned int tries_base;
+ unsigned int tries_min;
+ unsigned int swap_cluster;
+} pager_daemon_v1;
+typedef pager_daemon_v1 pager_daemon_t;
+extern pager_daemon_t pager_daemon;
+
#define SC_VERSION 1
#define SC_MAX_VERSION 1

--- linux/Documentation/sysctl/vm.txt.100 Sun May 3 01:17:59 1998
+++ linux/Documentation/sysctl/vm.txt Sun May 3 01:26:06 1998
@@ -18,6 +18,7 @@
- bdflush
- buffermem
- freepages
+- kswapd
- overcommit_memory
- pagecache
- swapctl
@@ -112,9 +113,58 @@
This file contains the values in the struct freepages. That
struct contains three members: min, low and high.

-These variables are currently unused (?), but they're
-very likely to be abused for something else in the near
-future, so don't yet remove it from the source...
+Although the goal of the Linux memory management subsystem
+is to avoid fragmentation and make large chunks of free
+memory (so that we can hand out DMA buffers and such), there
+still are some page-based limits in the system, mainly to
+make sure we don't waste too much memory trying to get large
+free area's.
+
+The meaning of the numbers is:
+
+freepages.min When the number of free pages in the system
+ reaches this number, only the kernel can
+ allocate more memory.
+freepages.low If memory is too fragmented, the swapout
+ daemon is started, except when the number
+ of free pages is larger than freepages.low.
+freepages.high The swapping daemon exits when memory is
+ sufficiently defragmented, when the number
+ of free pages reaches freepages.high or when
+ it has tried the maximum number of times.
+
+==============================================================
+
+kswapd:
+
+Kswapd is the kernel swapout daemon. That is, kswapd is that
+piece of the kernel that frees memory when it get's fragmented
+or full. Since every system is different, you'll probably want
+some control over this piece of the system.
+
+The numbers in this page correspond to the numbers in the
+struct pager_daemon {tries_base, tries_min, swap_cluster
+}; The tries_base and swap_cluster probably have the
+largest influence on system performance.
+
+tries_base The maximum number of pages kswapd tries to
+ free in one round is calculated from this
+ number. Usually this number will be divided
+ by 4 or 8 (see mm/vmscan.c), so it isn't as
+ big as it looks.
+ When you need to increase the bandwith to/from
+ swap, you'll want to increase this number.
+tries_min This is the minimum number of times kswapd
+ tries to free a page each time it is called.
+ Basically it's just there to make sure that
+ kswapd frees some pages even when it's being
+ called with minimum priority.
+swap_cluster This is the number of pages kswapd writes in
+ one turn. You want this large so that kswapd
+ does it's I/O in large chunks and the disk
+ doesn't have to seek often, but you don't want
+ it to be too large since that would flood the
+ request queue.

==============================================================

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu

Next message: Chris Evans: "2.0.34-pre11b; 3c59x driver broken?"
Previous message: Andrea Arcangeli: "Re: New pre-patch (Re: [PATCH] kswapd fully sysctl tunable)"