Re: [PATCH] vmscan: evict use-once pages first (v2)

From: Wu Fengguang
Date: Sat May 02 2009 - 23:16:53 EST


On Fri, May 01, 2009 at 12:35:41PM -0700, Andrew Morton wrote:
> On Fri, 01 May 2009 10:05:53 -0400
> Rik van Riel <riel@xxxxxxxxxx> wrote:
>
> > Andrew Morton wrote:
> >
> > >> When we implement working set protection, we might as well
> > >> do it for frequently accessed unmapped pages too. There is
> > >> no reason to restrict this protection to mapped pages.
> > >
> > > Well. Except for empirical observation, which tells us that biasing
> > > reclaim to prefer to retain mapped memory produces a better result.
> >
> > That used to be the case because file-backed and
> > swap-backed pages shared the same set of LRUs,
> > while each following a different page reclaim
> > heuristic!
>
> No, I think it still _is_ the case. When reclaim is treating mapped
> and non-mapped pages equally, the end result sucks. Applications get
> all laggy and humans get irritated. It may be that the system was
> optimised from an overall throughput POV, but the result was
> *irritating*.
>
> Which led us to prefer to retain mapped pages. This had nothing at all
> to do with internal impementation details - it was a design objective
> based upon empirical observation of system behaviour.

Heartily Agreed. We shall try hard to protect the running applications.

Commit 7e9cd484204f(vmscan: fix pagecache reclaim referenced bit check)
tries to address scalability problem when every page get mapped and
referenced, so that logic(which lowed the priority of mapped pages)
could be enabled only on conditions like (priority < DEF_PRIORITY).

Or preferably we can explicitly protect the mapped executables,
as illustrated by this patch (a quick prototype).

Thanks,
Fengguang
---
include/linux/pagemap.h | 1 +
mm/mmap.c | 2 ++
mm/nommu.c | 2 ++
mm/vmscan.c | 37 +++++++++++++++++++++++++++++++++++--
4 files changed, 40 insertions(+), 2 deletions(-)

--- linux.orig/include/linux/pagemap.h
+++ linux/include/linux/pagemap.h
@@ -25,6 +25,7 @@ enum mapping_flags {
#ifdef CONFIG_UNEVICTABLE_LRU
AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */
#endif
+ AS_EXEC = __GFP_BITS_SHIFT + 4, /* mapped PROT_EXEC somewhere */
};

static inline void mapping_set_error(struct address_space *mapping, int error)
--- linux.orig/mm/mmap.c
+++ linux/mm/mmap.c
@@ -1198,6 +1198,8 @@ munmap_back:
goto unmap_and_free_vma;
if (vm_flags & VM_EXECUTABLE)
added_exe_file_vma(mm);
+ if (vm_flags & VM_EXEC)
+ set_bit(AS_EXEC, &file->f_mapping->flags);
} else if (vm_flags & VM_SHARED) {
error = shmem_zero_setup(vma);
if (error)
--- linux.orig/mm/vmscan.c
+++ linux/mm/vmscan.c
@@ -1220,6 +1220,7 @@ static void shrink_active_list(unsigned
int pgdeactivate = 0;
unsigned long pgscanned;
LIST_HEAD(l_hold); /* The pages which were snipped off */
+ LIST_HEAD(l_active);
LIST_HEAD(l_inactive);
struct page *page;
struct pagevec pvec;
@@ -1259,8 +1260,15 @@ static void shrink_active_list(unsigned

/* page_referenced clears PageReferenced */
if (page_mapping_inuse(page) &&
- page_referenced(page, 0, sc->mem_cgroup))
+ page_referenced(page, 0, sc->mem_cgroup)) {
+ struct address_space *mapping = page_mapping(page);
+
pgmoved++;
+ if (mapping && test_bit(AS_EXEC, &mapping->flags)) {
+ list_add(&page->lru, &l_active);
+ continue;
+ }
+ }

list_add(&page->lru, &l_inactive);
}
@@ -1269,7 +1277,6 @@ static void shrink_active_list(unsigned
* Move the pages to the [file or anon] inactive list.
*/
pagevec_init(&pvec, 1);
- lru = LRU_BASE + file * LRU_FILE;

spin_lock_irq(&zone->lru_lock);
/*
@@ -1281,6 +1288,7 @@ static void shrink_active_list(unsigned
reclaim_stat->recent_rotated[!!file] += pgmoved;

pgmoved = 0;
+ lru = LRU_BASE + file * LRU_FILE;
while (!list_empty(&l_inactive)) {
page = lru_to_page(&l_inactive);
prefetchw_prev_lru_page(page, &l_inactive, flags);
@@ -1305,6 +1313,31 @@ static void shrink_active_list(unsigned
}
__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
pgdeactivate += pgmoved;
+
+ pgmoved = 0;
+ lru = LRU_ACTIVE + file * LRU_FILE;
+ while (!list_empty(&l_active)) {
+ page = lru_to_page(&l_active);
+ prefetchw_prev_lru_page(page, &l_active, flags);
+ VM_BUG_ON(PageLRU(page));
+ SetPageLRU(page);
+ VM_BUG_ON(!PageActive(page));
+
+ list_move(&page->lru, &zone->lru[lru].list);
+ mem_cgroup_add_lru_list(page, lru);
+ pgmoved++;
+ if (!pagevec_add(&pvec, page)) {
+ __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
+ pgmoved = 0;
+ spin_unlock_irq(&zone->lru_lock);
+ if (buffer_heads_over_limit)
+ pagevec_strip(&pvec);
+ __pagevec_release(&pvec);
+ spin_lock_irq(&zone->lru_lock);
+ }
+ }
+ __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
+
__count_zone_vm_events(PGREFILL, zone, pgscanned);
__count_vm_events(PGDEACTIVATE, pgdeactivate);
spin_unlock_irq(&zone->lru_lock);
--- linux.orig/mm/nommu.c
+++ linux/mm/nommu.c
@@ -1220,6 +1220,8 @@ unsigned long do_mmap_pgoff(struct file
added_exe_file_vma(current->mm);
vma->vm_mm = current->mm;
}
+ if (vm_flags & VM_EXEC)
+ set_bit(AS_EXEC, &file->f_mapping->flags);
}

down_write(&nommu_region_sem);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/