[PATCH 1/1] mm: thp: calculate the mapcount correctly for THP pages during WP faults

From: Andrea Arcangeli
Date: Thu Apr 28 2016 - 19:05:06 EST


This will provide fully accuracy to the mapcount calculation in the
write protect faults, so page pinning will not get broken by false
positive copy-on-writes.

total_mapcount() isn't the right calculation needed in
reuse_swap_page, so this introduces a page_trans_huge_mapcount() that
is effectively the full accurate return value for page_mapcount() if
dealing with Transparent Hugepages, however we only use the
page_trans_huge_mapcount() during COW faults where it strictly needed,
due to its higher runtime cost.

Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
---
include/linux/mm.h | 5 +++++
include/linux/swap.h | 3 +--
mm/huge_memory.c | 44 ++++++++++++++++++++++++++++++++++++--------
mm/swapfile.c | 5 +----
4 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8fb3604..c2026a1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -501,11 +501,16 @@ static inline int page_mapcount(struct page *page)

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int total_mapcount(struct page *page);
+int page_trans_huge_mapcount(struct page *page);
#else
static inline int total_mapcount(struct page *page)
{
return page_mapcount(page);
}
+static inline int page_trans_huge_mapcount(struct page *page)
+{
+ return page_mapcount(page);
+}
#endif

static inline struct page *virt_to_head_page(const void *x)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2f6478f..905bf8e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -517,8 +517,7 @@ static inline int swp_swapcount(swp_entry_t entry)
return 0;
}

-#define reuse_swap_page(page) \
- (!PageTransCompound(page) && page_mapcount(page) == 1)
+#define reuse_swap_page(page) (page_trans_huge_mapcount(page) == 1)

static inline int try_to_free_swap(struct page *page)
{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 06bce0f..6a6d9c0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1298,15 +1298,9 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
/*
* We can only reuse the page if nobody else maps the huge page or it's
- * part. We can do it by checking page_mapcount() on each sub-page, but
- * it's expensive.
- * The cheaper way is to check page_count() to be equal 1: every
- * mapcount takes page reference reference, so this way we can
- * guarantee, that the PMD is the only mapping.
- * This can give false negative if somebody pinned the page, but that's
- * fine.
+ * part.
*/
- if (page_mapcount(page) == 1 && page_count(page) == 1) {
+ if (page_trans_huge_mapcount(page) == 1) {
pmd_t entry;
entry = pmd_mkyoung(orig_pmd);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
@@ -3226,6 +3220,40 @@ int total_mapcount(struct page *page)
}

/*
+ * This calculates accurately how many mappings a transparent hugepage
+ * has (unlike page_mapcount() which isn't fully accurate). This full
+ * accuracy is primarily needed to know if copy-on-write faults can
+ * takeover the page and change the mapping to read-write instead of
+ * copying them. This is different from total_mapcount() too: we must
+ * not count all mappings on the subpages individually, but instead we
+ * must check the highest mapcount any one of the subpages has.
+ *
+ * It would be entirely safe and even more correct to replace
+ * page_mapcount() with page_trans_huge_mapcount(), however we only
+ * use page_trans_huge_mapcount() in the copy-on-write faults where we
+ * need full accuracy to avoid breaking page pinning.
+ */
+int page_trans_huge_mapcount(struct page *page)
+{
+ int i, ret;
+
+ VM_BUG_ON_PAGE(PageTail(page), page);
+
+ if (likely(!PageCompound(page)))
+ return atomic_read(&page->_mapcount) + 1;
+
+ ret = 0;
+ if (likely(!PageHuge(page))) {
+ for (i = 0; i < HPAGE_PMD_NR; i++)
+ ret = max(ret, atomic_read(&page[i]._mapcount) + 1);
+ if (PageDoubleMap(page))
+ ret -= 1;
+ }
+ ret += compound_mapcount(page);
+ return ret;
+}
+
+/*
* This function splits huge page into normal pages. @page can point to any
* subpage of huge page to split. Split doesn't change the position of @page.
*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 83874ec..984470a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -930,10 +930,7 @@ int reuse_swap_page(struct page *page)
VM_BUG_ON_PAGE(!PageLocked(page), page);
if (unlikely(PageKsm(page)))
return 0;
- /* The page is part of THP and cannot be reused */
- if (PageTransCompound(page))
- return 0;
- count = page_mapcount(page);
+ count = page_trans_huge_mapcount(page);
if (count <= 1 && PageSwapCache(page)) {
count += page_swapcount(page);
if (count == 1 && !PageWriteback(page)) {