Re: nonlinear swapping w/o pte_chains [Re: VMA_MERGING_FIXUP and patch]

From: Andrea Arcangeli
Date: Wed Mar 24 2004 - 13:43:24 EST


On Wed, Mar 24, 2004 at 03:37:29PM +0100, Andrea Arcangeli wrote:
> cute, I agree we should recheck the young bit inside.

I did it (the below is incremental with 2.6.5-rc2-aa2), looks ok but I
cannot notice absolutely any difference in practice. Anyways it makes
sense so I keep it. now re-running the regressions for both linear and
non-linear heavy shm swap.

So I'm now going to merge the prio_tree, then I can care about the
filter for s/anon_vma_t/struct anon_vma/ and the mprotect/mremap
vma merging (for file mappings too).

--- x/mm/objrmap.c.~1~ 2004-03-24 07:16:18.000000000 +0100
+++ x/mm/objrmap.c 2004-03-24 19:27:31.213416608 +0100
@@ -137,7 +137,7 @@ page_referenced_one(struct vm_area_struc

pte = find_pte(vma, page, NULL);
if (pte) {
- if (ptep_test_and_clear_young(pte))
+ if (pte_young(*pte) && ptep_test_and_clear_young(pte))
referenced++;
pte_unmap(pte);
}
@@ -442,6 +442,8 @@ try_to_unmap_nonlinear_pte(struct vm_are
page = pfn_to_page(pfn);
if (PageReserved(page))
continue;
+ if (pte_young(pte) && ptep_test_and_clear_young(ptep))
+ continue;
/*
* any other page in the nonlinear mapping will not wait
* on us since only one cpu can take the i_shared_sem
@@ -506,7 +508,7 @@ try_to_unmap_nonlinear(struct vm_area_st
* This function is strictly a helper function for try_to_unmap_inode.
*/
static int
-try_to_unmap_one(struct vm_area_struct *vma, struct page *page)
+try_to_unmap_one(struct vm_area_struct *vma, struct page *page, int * young)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
@@ -523,12 +525,21 @@ try_to_unmap_one(struct vm_area_struct *

if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
/*
- * All it matters is that the page won't go
- * away under us after we unlock.
+ * If this was a false positive generated by a
+ * failed trylock in the referenced pass let's
+ * avoid to pay the big cost of the nonlinear
+ * swap, we'd better be sure we've to pay that
+ * cost before running it.
*/
- page_map_unlock(page);
- try_to_unmap_nonlinear(vma);
- page_map_lock(page);
+ if (!*young) {
+ /*
+ * All it matters is that the page won't go
+ * away under us after we unlock.
+ */
+ page_map_unlock(page);
+ try_to_unmap_nonlinear(vma);
+ page_map_lock(page);
+ }
goto out;
}

@@ -536,10 +547,21 @@ try_to_unmap_one(struct vm_area_struct *
if (!pte)
goto out;

- unmap_pte_page(page, vma, address, pte);
+ /*
+ * We use trylocks in the "reference" methods, if they fails
+ * we let the VM to go ahead unmapping to avoid locking
+ * congestions, so here we may be trying to unmap young
+ * ptes, if that happens we givup trying unmapping this page
+ * and we clear all other reference bits instead (basically
+ * downgrading to a page_referenced pass).
+ */
+ if ((!pte_young(*pte) || !ptep_test_and_clear_young(pte)) && !*young)
+ unmap_pte_page(page, vma, address, pte);
+ else
+ *young = 1;

pte_unmap(pte);
-out:
+ out:
spin_unlock(&mm->page_table_lock);
return ret;
}
@@ -561,7 +583,7 @@ try_to_unmap_inode(struct page *page)
{
struct address_space *mapping = page->mapping;
struct vm_area_struct *vma;
- int ret = SWAP_AGAIN;
+ int ret = SWAP_AGAIN, young = 0;

BUG_ON(PageSwapCache(page));

@@ -569,13 +591,13 @@ try_to_unmap_inode(struct page *page)
return ret;

list_for_each_entry(vma, &mapping->i_mmap, shared) {
- ret = try_to_unmap_one(vma, page);
+ ret = try_to_unmap_one(vma, page, &young);
if (ret == SWAP_FAIL || !page->mapcount)
goto out;
}

list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
- ret = try_to_unmap_one(vma, page);
+ ret = try_to_unmap_one(vma, page, &young);
if (ret == SWAP_FAIL || !page->mapcount)
goto out;
}
@@ -588,7 +610,7 @@ out:
static int
try_to_unmap_anon(struct page * page)
{
- int ret = SWAP_AGAIN;
+ int ret = SWAP_AGAIN, young = 0;
struct vm_area_struct * vma;
anon_vma_t * anon_vma = (anon_vma_t *) page->mapping;

@@ -598,7 +620,7 @@ try_to_unmap_anon(struct page * page)
spin_lock(&anon_vma->anon_vma_lock);
BUG_ON(list_empty(&anon_vma->anon_vma_head));
list_for_each_entry(vma, &anon_vma->anon_vma_head, anon_vma_node) {
- ret = try_to_unmap_one(vma, page);
+ ret = try_to_unmap_one(vma, page, &young);
if (ret == SWAP_FAIL || !page->mapcount)
break;
}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/