[PATCH 5/7] mm: Batch addtion of pages to swap cache

From: Tim Chen
Date: Tue May 03 2016 - 17:02:53 EST


When a page is to be swapped, it needed to be added to the swap cache
and then removed after the paging has been completed.ÂÂA swap partition's
mapping tree lock is acquired for each anonymous page's addition to the
swap cache.

This patch created new functions add_to_swap_batch and
__add_to_swap_cache_batch that allows multiple pages destinied for the
same swap partition to be added to that swap partition's swap cache in
one acquisition of the mapping tree lock.ÂÂThese functions extend the
original add_to_swap and __add_to_swap_cache. This reduces the contention
of the swap partition's mapping tree lock when we are actively reclaiming
memory and swapping pages

Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---
Âinclude/linux/swap.h |ÂÂÂ2 +
Âmm/swap_state.cÂÂÂÂÂÂ| 248 +++++++++++++++++++++++++++++++++++++--------------
Âmm/vmscan.cÂÂÂÂÂÂÂÂÂÂ|ÂÂ19 ++--
Â3 files changed, 196 insertions(+), 73 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index da6d994..cd06f2a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -373,6 +373,8 @@ extern unsigned long total_swapcache_pages(void);
Âextern void show_swap_cache_info(void);
Âextern int add_to_swap(struct page *, struct list_head *list,
 swp_entry_t *entry);
+extern void add_to_swap_batch(struct page *pages[], struct list_head *list,
+ swp_entry_t entries[], int ret_codes[], int nr);
Âextern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
Âextern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
Âextern void __delete_from_swap_cache(struct page *);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index bad02c1..ce02024 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -72,49 +72,94 @@ void show_swap_cache_info(void)
 printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
Â}
Â
-/*
- * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
- * but sets SwapCache flag and private instead of mapping and index.
- */
-int __add_to_swap_cache(struct page *page, swp_entry_t entry)
+void __add_to_swap_cache_batch(struct page *pages[], swp_entry_t entries[],
+ int ret[], int nr)
Â{
- int error;
+ int error, i;
 struct address_space *address_space;
+ struct address_space *prev;
+ struct page *page;
+ swp_entry_t entry;
Â
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(PageSwapCache(page), page);
- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
+ prev = NULL;
+ address_space = NULL;
+ for (i = 0; i < nr; ++i) {
+ /* error at pre-processing stage, swap entry already released */
+ if (ret[i] == -ENOENT)
+ continue;
Â
- get_page(page);
- SetPageSwapCache(page);
- set_page_private(page, entry.val);
+ page = pages[i];
+ entry = entries[i];
Â
- address_space = swap_address_space(entry);
- spin_lock_irq(&address_space->tree_lock);
- error = radix_tree_insert(&address_space->page_tree,
- entry.val, page);
- if (likely(!error)) {
- address_space->nrpages++;
- __inc_zone_page_state(page, NR_FILE_PAGES);
- INC_CACHE_INFO(add_total);
- }
- spin_unlock_irq(&address_space->tree_lock);
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(PageSwapCache(page), page);
+ VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
Â
- if (unlikely(error)) {
- /*
- Â* Only the context which have set SWAP_HAS_CACHE flag
- Â* would call add_to_swap_cache().
- Â* So add_to_swap_cache() doesn't returns -EEXIST.
- Â*/
- VM_BUG_ON(error == -EEXIST);
- set_page_private(page, 0UL);
- ClearPageSwapCache(page);
- put_page(page);
+ get_page(page);
+ SetPageSwapCache(page);
+ set_page_private(page, entry.val);
+
+ address_space = swap_address_space(entry);
+ if (prev != address_space) {
+ if (prev)
+ spin_unlock_irq(&prev->tree_lock);
+ spin_lock_irq(&address_space->tree_lock);
+ }
+ error = radix_tree_insert(&address_space->page_tree,
+ entry.val, page);
+ if (likely(!error)) {
+ address_space->nrpages++;
+ __inc_zone_page_state(page, NR_FILE_PAGES);
+ INC_CACHE_INFO(add_total);
+ }
+
+ if (unlikely(error)) {
+ spin_unlock_irq(&address_space->tree_lock);
+ address_space = NULL;
+ /*
+ Â* Only the context which have set SWAP_HAS_CACHE flag
+ Â* would call add_to_swap_cache().
+ Â* So add_to_swap_cache() doesn't returns -EEXIST.
+ Â*/
+ VM_BUG_ON(error == -EEXIST);
+ set_page_private(page, 0UL);
+ ClearPageSwapCache(page);
+ put_page(page);
+ }
+ prev = address_space;
+ ret[i] = error;
 }
+ if (address_space)
+ spin_unlock_irq(&address_space->tree_lock);
+}
Â
- return error;
+/*
+ * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
+ * but sets SwapCache flag and private instead of mapping and index.
+ */
+int __add_to_swap_cache(struct page *page, swp_entry_t entry)
+{
+ swp_entry_t entries[1];
+ struct page *pages[1];
+ int ret[1];
+
+ pages[0] = page;
+ entries[0] = entry;
+ __add_to_swap_cache_batch(pages, entries, ret, 1);
+ return ret[0];
Â}
Â
+void add_to_swap_cache_batch(struct page *pages[], swp_entry_t entries[],
+ gfp_t gfp_mask, int ret[], int nr)
+{
+ int error;
+
+ error = radix_tree_maybe_preload(gfp_mask);
+ if (!error) {
+ __add_to_swap_cache_batch(pages, entries, ret, nr);
+ radix_tree_preload_end();
+ }
+}
Â
Âint add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
Â{
@@ -151,6 +196,73 @@ void __delete_from_swap_cache(struct page *page)
 INC_CACHE_INFO(del_total);
Â}
Â
+void add_to_swap_batch(struct page *pages[], struct list_head *list,
+ swp_entry_t entries[], int ret_codes[], int nr)
+{
+ swp_entry_t *entry;
+ struct page *page;
+ int i;
+
+ for (i = 0; i < nr; ++i) {
+ entry = &entries[i];
+ page = pages[i];
+
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(!PageUptodate(page), page);
+
+ ret_codes[i] = 1;
+
+ if (!entry->val)
+ ret_codes[i] = -ENOENT;
+
+ if (mem_cgroup_try_charge_swap(page, *entry)) {
+ swapcache_free(*entry);
+ ret_codes[i] = 0;
+ }
+
+ if (unlikely(PageTransHuge(page)))
+ if (unlikely(split_huge_page_to_list(page, list))) {
+ swapcache_free(*entry);
+ ret_codes[i] = -ENOENT;
+ continue;
+ }
+ }
+
+ /*
+ Â* Radix-tree node allocations from PF_MEMALLOC contexts could
+ Â* completely exhaust the page allocator. __GFP_NOMEMALLOC
+ Â* stops emergency reserves from being allocated.
+ Â*
+ Â* TODO: this could cause a theoretical memory reclaim
+ Â* deadlock in the swap out path.
+ Â*/
+ /*
+ Â* Add it to the swap cache
+ Â*/
+ add_to_swap_cache_batch(pages, entries,
+ __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN,
+ ret_codes, nr);
+
+ for (i = 0; i < nr; ++i) {
+ entry = &entries[i];
+ page = pages[i];
+
+ if (!ret_codes[i]) {ÂÂÂÂ/* Success */
+ ret_codes[i] = 1;
+ continue;
+ } else {ÂÂÂÂÂÂÂÂ/* -ENOMEM radix-tree allocation failure */
+ /*
+ Â* add_to_swap_cache() doesn't return -EEXIST,
+ Â* so we can safely clear SWAP_HAS_CACHE flag.
+ Â*/
+ if (ret_codes[i] != -ENOENT)
+ swapcache_free(*entry);
+ ret_codes[i] = 0;
+ continue;
+ }
+ }
+}
+
Â/**
 * add_to_swap - allocate swap space for a page
 * @page: page we want to move to swap
@@ -161,54 +273,56 @@ void __delete_from_swap_cache(struct page *page)
 */
Âint add_to_swap(struct page *page, struct list_head *list, swp_entry_t *entry)
Â{
- int err;
- swp_entry_t ent;
+ int ret[1];
+ swp_entry_t ent[1];
Â
 VM_BUG_ON_PAGE(!PageLocked(page), page);
 VM_BUG_ON_PAGE(!PageUptodate(page), page);
Â
 if (!entry) {
- ent = get_swap_page();
- entry = &ent;
+ ent[0] = get_swap_page();
+ entry = &ent[0];
 }
Â
 if (entry && !entry->val)
 return 0;
Â
- if (mem_cgroup_try_charge_swap(page, *entry)) {
- swapcache_free(*entry);
- return 0;
- }
+ add_to_swap_batch(&page, list, entry, ret, 1);
+ return ret[0];
+}
Â
- if (unlikely(PageTransHuge(page)))
- if (unlikely(split_huge_page_to_list(page, list))) {
- swapcache_free(*entry);
- return 0;
+void delete_from_swap_cache_batch(struct page pages[], int nr)
+{
+ struct page *page;
+ swp_entry_t entry;
+ struct address_space *address_space, *prev;
+ int i;
+
+ prev = NULL;
+ address_space = NULL;
+ for (i = 0; i < nr; ++i) {
+ page = &pages[i];
+ entry.val = page_private(page);
+
+ address_space = swap_address_space(entry);
+ if (address_space != prev) {
+ if (prev)
+ spin_unlock_irq(&prev->tree_lock);
+ spin_lock_irq(&address_space->tree_lock);
 }
+ __delete_from_swap_cache(page);
+ prev = address_space;
+ }
+ if (address_space)
+ spin_unlock_irq(&address_space->tree_lock);
Â
- /*
- Â* Radix-tree node allocations from PF_MEMALLOC contexts could
- Â* completely exhaust the page allocator. __GFP_NOMEMALLOC
- Â* stops emergency reserves from being allocated.
- Â*
- Â* TODO: this could cause a theoretical memory reclaim
- Â* deadlock in the swap out path.
- Â*/
- /*
- Â* Add it to the swap cache.
- Â*/
- err = add_to_swap_cache(page, *entry,
- __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
+ for (i = 0; i < nr; ++i) {
+ page = &pages[i];
+ entry.val = page_private(page);
Â
- if (!err) {
- return 1;
- } else { /* -ENOMEM radix-tree allocation failure */
- /*
- Â* add_to_swap_cache() doesn't return -EEXIST, so we can safely
- Â* clear SWAP_HAS_CACHE flag.
- Â*/
- swapcache_free(*entry);
- return 0;
+ /* can batch this */
+ swapcache_free(entry);
+ put_page(page);
 }
Â}
Â
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 310e2b2..fab61f1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1097,8 +1097,9 @@ static unsigned long shrink_anon_page_list(struct list_head *page_list,
 unsigned long nr_reclaimed = 0;
 enum pg_result pg_dispose;
 swp_entry_t swp_entries[SWAP_BATCH];
+ struct page *pages[SWAP_BATCH];
+ int m, i, k, ret[SWAP_BATCH];
 struct page *page;
- int m, i, k;
Â
 while (n > 0) {
 int swap_ret = SWAP_SUCCESS;
@@ -1117,13 +1118,19 @@ static unsigned long shrink_anon_page_list(struct list_head *page_list,
 page = lru_to_page(swap_pages);
Â
 list_del(&page->lru);
+ pages[i] = page;
+ }
Â
- /*
- * Anonymous process memory has backing store?
- * Try to allocate it some swap space here.
- */
+ /*
+ * Anonymous process memory has backing store?
+ * Try to allocate it some swap space here.
+ */
+ add_to_swap_batch(pages, page_list, swp_entries, ret, m);
+
+ for (i = 0; i < m; ++i) {
+ page = pages[i];
Â
- if (!add_to_swap(page, page_list, NULL)) {
+ if (!ret[i]) {
 pg_finish(page, PG_ACTIVATE_LOCKED, swap_ret,
 &nr_reclaimed, pgactivate,
 ret_pages, free_pages);
--Â
2.5.5