Re: sched-idle and disk-priorities for 2.6.X

From: Rik van Riel
Date: Wed Jan 21 2004 - 14:50:58 EST


On Sun, 18 Jan 2004, Pavel Machek wrote:

> > > Is there effective way to limit RSS?
> >
> > Want me to port the RSS stuff from 2.4-rmap to 2.6 ?
>
> Well, if it allows me to limit memory for one task so that it does not
> make system unusable... yes, that would be great.

Here it is. Untested, except for whether it compiles cleanly ;)

Let me know how it works, if the enforcement is aggressive
enough or not, whether I need to tweak things etc...

--
"Debugging is twice as hard as writing the code in the first place.
Therefore, if you write the code as cleverly as possible, you are,
by definition, not smart enough to debug it." - Brian W. Kernighan



===== include/linux/init_task.h 1.27 vs edited =====
--- 1.27/include/linux/init_task.h Mon Aug 18 22:46:23 2003
+++ edited/include/linux/init_task.h Tue Jan 20 17:34:40 2004
@@ -2,6 +2,7 @@
#define _LINUX__INIT_TASK_H

#include <linux/file.h>
+#include <asm/resource.h>

#define INIT_FILES \
{ \
@@ -41,6 +42,7 @@
.page_table_lock = SPIN_LOCK_UNLOCKED, \
.mmlist = LIST_HEAD_INIT(name.mmlist), \
.default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \
+ .rlimit_rss = RLIM_INFINITY \
}

#define INIT_SIGNALS(sig) { \
===== include/linux/sched.h 1.178 vs edited =====
--- 1.178/include/linux/sched.h Mon Jan 19 18:38:15 2004
+++ edited/include/linux/sched.h Tue Jan 20 17:32:56 2004
@@ -204,6 +204,7 @@
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long rss, total_vm, locked_vm;
unsigned long def_flags;
+ unsigned long rlimit_rss;
cpumask_t cpu_vm_mask;

unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
===== include/linux/swap.h 1.80 vs edited =====
--- 1.80/include/linux/swap.h Mon Jan 19 01:28:35 2004
+++ edited/include/linux/swap.h Tue Jan 20 18:16:28 2004
@@ -179,7 +179,7 @@

/* linux/mm/rmap.c */
#ifdef CONFIG_MMU
-int FASTCALL(page_referenced(struct page *));
+int FASTCALL(page_referenced(struct page *, int *));
struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *,
struct pte_chain *));
void FASTCALL(page_remove_rmap(struct page *, pte_t *));
@@ -188,7 +188,7 @@
/* linux/mm/shmem.c */
extern int shmem_unuse(swp_entry_t entry, struct page *page);
#else
-#define page_referenced(page) TestClearPageReferenced(page)
+#define page_referenced(page, _x) TestClearPageReferenced(page)
#define try_to_unmap(page) SWAP_FAIL
#endif /* CONFIG_MMU */

===== kernel/sys.c 1.69 vs edited =====
--- 1.69/kernel/sys.c Mon Jan 19 18:38:13 2004
+++ edited/kernel/sys.c Tue Jan 20 18:02:19 2004
@@ -1308,6 +1308,14 @@
if (retval)
return retval;

+ /* The rlimit is specified in bytes, convert to pages for mm. */
+ if (resource == RLIMIT_RSS && current->mm) {
+ unsigned long pages = RLIM_INFINITY;
+ if (new_rlim.rlim_cur != RLIM_INFINITY)
+ pages = new_rlim.rlim_cur >> PAGE_SHIFT;
+ current->mm->rlimit_rss = pages;
+ }
+
*old_rlim = new_rlim;
return 0;
}
===== mm/rmap.c 1.34 vs edited =====
--- 1.34/mm/rmap.c Mon Jan 19 01:36:00 2004
+++ edited/mm/rmap.c Tue Jan 20 18:26:03 2004
@@ -104,6 +104,7 @@
/**
* page_referenced - test if the page was referenced
* @page: the page to test
+ * rsslimit: set if the process(es) using the page is(are) over RSS limit
*
* Quick test_and_clear_referenced for all mappings to a page,
* returns the number of processes which referenced the page.
@@ -112,8 +113,9 @@
* If the page has a single-entry pte_chain, collapse that back to a PageDirect
* representation. This way, it's only done under memory pressure.
*/
-int page_referenced(struct page * page)
+int page_referenced(struct page * page, int * rsslimit)
{
+ struct mm_struct * mm;
struct pte_chain *pc;
int referenced = 0;

@@ -127,10 +129,17 @@
pte_t *pte = rmap_ptep_map(page->pte.direct);
if (ptep_test_and_clear_young(pte))
referenced++;
+
+ mm = ptep_to_mm(pte);
+ if (mm->rss > mm->rlimit_rss)
+ *rsslimit = 1;
rmap_ptep_unmap(pte);
} else {
int nr_chains = 0;

+ /* We clear it if any task using the page is under its limit. */
+ *rsslimit = 1;
+
/* Check all the page tables mapping this page. */
for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) {
int i;
@@ -142,6 +151,10 @@
p = rmap_ptep_map(pte_paddr);
if (ptep_test_and_clear_young(p))
referenced++;
+
+ mm = ptep_to_mm(p);
+ if (mm->rss < mm->rlimit_rss)
+ *rsslimit = 0;
rmap_ptep_unmap(p);
nr_chains++;
}
===== mm/vmscan.c 1.177 vs edited =====
--- 1.177/mm/vmscan.c Mon Jan 19 18:38:07 2004
+++ edited/mm/vmscan.c Wed Jan 21 14:34:44 2004
@@ -250,6 +250,7 @@
LIST_HEAD(ret_pages);
struct pagevec freed_pvec;
int pgactivate = 0;
+ int over_rsslimit;
int ret = 0;

cond_resched();
@@ -278,10 +279,12 @@
goto keep_locked;

pte_chain_lock(page);
- referenced = page_referenced(page);
+ referenced = page_referenced(page, &over_rsslimit);
if (referenced && page_mapping_inuse(page)) {
/* In active use or really unfreeable. Activate it. */
pte_chain_unlock(page);
+ if (over_rsslimit)
+ goto keep_locked;
goto activate_locked;
}

@@ -597,6 +600,7 @@
long mapped_ratio;
long distress;
long swap_tendency;
+ int over_rsslimit;

lru_add_drain();
pgmoved = 0;
@@ -657,7 +661,7 @@
list_del(&page->lru);
if (page_mapped(page)) {
pte_chain_lock(page);
- if (page_mapped(page) && page_referenced(page)) {
+ if (page_mapped(page) && page_referenced(page, &over_rsslimit) && !over_rsslimit) {
pte_chain_unlock(page);
list_add(&page->lru, &l_active);
continue;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/