Re: new swap cache regime

Andrea Arcangeli (andrea@e-mind.com)
Sun, 27 Sep 1998 16:07:10 +0200 (CEST)


On Fri, 25 Sep 1998, Stephen C. Tweedie wrote:

>There is only one flaw I can see in this patch as it stands (and my own
>had exactly the same flaw). If we kill a process which refers to swap
>pages still in the swap cache, then we no longer free the swap pages if
>that process was the last user of the swap entry: we have to wait for
>shrink_mmap() to free the corresponding swap cache pages before the swap
>entries become reusable. That's not much of a problem in most cases,
>however, and I'm not sure it warrants the expense of dealing with
>(because we certainly cannot afford to make the normal exit(2) path
>longer).

I' ve tryied to make swap_free more clever. There' s to say that the O(n)
search in the swap cache is done only when the exit(2) is run by a swapped
out process. So it won' t hurt performance of systems with lots of ram or
exit path of _not_ swapped out processes.

I don' t know if the patch is perfectly stable though. Seems to run fine
here but there' s a thing that still I don' t understand. Sometimes the
page is found in the swap cache but it has page->count == 0. This make no
sense to me (I think that if the page is found in the page cache should
always have page->count >= 1). These swap cache pages with page->count ==
0 seems to be all pages swap_freed() by zap_page_range after exit(2).

As just said seems to work fine though.

With this patch applyed shrink_mmap() has _only_ to free the really useful
_caching_ swap cache pages that are not removed from the swap cache
anymore because some other process is sharing the same swap entry.

Comments?

Andrea[s] Arcangeli

diff -urN /home/andrea/devel/kernel-tree/linux-2.1.122/include/linux/swap.h linux/include/linux/swap.h
--- /home/andrea/devel/kernel-tree/linux-2.1.122/include/linux/swap.h Sat Sep 5 14:17:56 1998
+++ linux/include/linux/swap.h Sat Sep 26 18:50:41 1998
@@ -89,6 +89,8 @@
extern int swap_check_entry(unsigned long);
extern struct page * read_swap_cache_async(unsigned long, int);
#define read_swap_cache(entry) read_swap_cache_async(entry, 1);
+extern int FASTCALL(swap_count(unsigned long));
+extern void FASTCALL(try_to_free_last_swap_entry(unsigned long));
/*
* Make these inline later once they are working properly.
*/
@@ -146,14 +148,20 @@
*/
static inline int is_page_shared(struct page *page)
{
- int count = atomic_read(&page->count);
+ int count;
if (PageReserved(page))
return 1;
- if (page->inode == &swapper_inode)
- count--;
+ count = atomic_read(&page->count);
+ if (PageSwapCache(page))
+ {
+ /* PARANOID */
+ if (page->inode != &swapper_inode)
+ panic("swap cache page has wrong inode\n");
+ count += swap_count(page->offset) - 2;
+ }
if (PageFreeAfter(page))
count--;
- return (count > 1);
+ return count > 1;
}

#endif /* __KERNEL__*/
diff -urN /home/andrea/devel/kernel-tree/linux-2.1.122/mm/page_alloc.c linux/mm/page_alloc.c
--- /home/andrea/devel/kernel-tree/linux-2.1.122/mm/page_alloc.c Thu Sep 10 23:56:48 1998
+++ linux/mm/page_alloc.c Sat Sep 26 19:01:22 1998
@@ -163,9 +163,11 @@
free_pages_ok(page->map_nr, 0);
return;
}
+#if 0
if (PageSwapCache(page) && atomic_read(&page->count) == 1)
printk(KERN_WARNING "VM: Releasing swap cache page at %p",
__builtin_return_address(0));
+#endif
}

void free_pages(unsigned long addr, unsigned long order)
@@ -182,10 +184,12 @@
free_pages_ok(map_nr, order);
return;
}
+#if 0
if (PageSwapCache(map) && atomic_read(&map->count) == 1)
printk(KERN_WARNING
"VM: Releasing swap cache pages at %p",
__builtin_return_address(0));
+#endif
}
}

diff -urN /home/andrea/devel/kernel-tree/linux-2.1.122/mm/swap_state.c linux/mm/swap_state.c
--- /home/andrea/devel/kernel-tree/linux-2.1.122/mm/swap_state.c Thu Sep 10 23:56:48 1998
+++ linux/mm/swap_state.c Sun Sep 27 15:58:01 1998
@@ -143,6 +143,50 @@
goto out;
}

+int swap_count(unsigned long entry)
+{
+ struct swap_info_struct * p;
+ unsigned long offset, type;
+ int retval = 0;
+
+ if (!entry)
+ goto bad_entry;
+ type = SWP_TYPE(entry);
+ if (type & SHM_SWP_TYPE)
+ goto out;
+ if (type >= nr_swapfiles)
+ goto bad_file;
+ p = type + swap_info;
+ offset = SWP_OFFSET(entry);
+ if (offset >= p->max)
+ goto bad_offset;
+ if (!p->swap_map[offset])
+ goto bad_unused;
+ retval = p->swap_map[offset];
+#ifdef DEBUG_SWAP
+ printk("DebugVM: swap_count(entry %08lx, count %d)\n",
+ entry, retval);
+#endif
+out:
+ return retval;
+
+bad_entry:
+ printk(KERN_ERR "swap_count: null entry!\n");
+ goto out;
+bad_file:
+ printk(KERN_ERR
+ "swap_count: entry %08lx, nonexistent swap file!\n", entry);
+ goto out;
+bad_offset:
+ printk(KERN_ERR
+ "swap_count: entry %08lx, offset exceeds max!\n", entry);
+ goto out;
+bad_unused:
+ printk(KERN_ERR
+ "swap_count at %8p: entry %08lx, unused page!\n",
+ __builtin_return_address(0), entry);
+ goto out;
+}

static inline void remove_from_swap_cache(struct page *page)
{
@@ -155,6 +199,7 @@
printk ("VM: Removing swap cache page with wrong inode hash "
"on page %08lx\n", page_address(page));
}
+#if 0
/*
* This is a legal case, but warn about it.
*/
@@ -163,6 +208,7 @@
"VM: Removing page cache on unshared page %08lx\n",
page_address(page));
}
+#endif

#ifdef DEBUG_SWAP
printk("DebugVM: remove_from_swap_cache(%08lx count %d)\n",
@@ -238,6 +284,30 @@
printk (KERN_ERR "VM: Found a non-swapper swap page!\n");
__free_page(found);
return 0;
+}
+
+void try_to_free_last_swap_entry(unsigned long entry)
+{
+ struct page * page = lookup_swap_cache(entry);
+ if (page)
+ {
+ /*
+ * The last reference in the swap_map[entry] is caused
+ * by this swap cache page.
+ *
+ * Decrease the page->count increased by __find_page().
+ * -arca
+ */
+ __free_page(page);
+ if (atomic_read(&page->count) == 1)
+ /*
+ * The page is resident in memory only because
+ * it' s in the swap cache so we can remove it
+ * because it can' t be useful anymore.
+ * -arca
+ */
+ delete_from_swap_cache(page);
+ }
}

/*
diff -urN /home/andrea/devel/kernel-tree/linux-2.1.122/mm/swapfile.c linux/mm/swapfile.c
--- /home/andrea/devel/kernel-tree/linux-2.1.122/mm/swapfile.c Thu Sep 10 23:56:48 1998
+++ linux/mm/swapfile.c Sun Sep 27 15:27:22 1998
@@ -144,10 +144,15 @@
p->highest_bit = offset;
if (!p->swap_map[offset])
goto bad_free;
- if (p->swap_map[offset] < SWAP_MAP_MAX) {
- if (!--p->swap_map[offset])
+ if (p->swap_map[offset] < SWAP_MAP_MAX)
+ switch(--p->swap_map[offset])
+ {
+ case 0:
nr_swap_pages++;
- }
+ break;
+ case 1:
+ try_to_free_last_swap_entry(entry);
+ }
#ifdef DEBUG_SWAP
printk("DebugVM: swap_free(entry %08lx, count now %d)\n",
entry, p->swap_map[offset]);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/