Re: [PATCH v2 1/5] mm: Implement folio_remove_rmap_range()

From: Matthew Wilcox
Date: Wed Aug 30 2023 - 14:46:20 EST


On Wed, Aug 30, 2023 at 10:50:07AM +0100, Ryan Roberts wrote:
> Like page_remove_rmap() but batch-removes the rmap for a range of pages
> belonging to a folio. This can provide a small speedup due to less
> manipuation of the various counters. But more crucially, if removing the
> rmap for all pages of a folio in a batch, there is no need to
> (spuriously) add it to the deferred split list, which saves significant
> cost when there is contention for the split queue lock.
>
> All contained pages are accounted using the order-0 folio (or base page)
> scheme.
>
> page_remove_rmap() is refactored so that it forwards to
> folio_remove_rmap_range() for !compound cases, and both functions now
> share a common epilogue function. The intention here is to avoid
> duplication of code.

What would you think to doing it like this instead? This probably doesn't
even compile and it's definitely not sanity checked; just trying to get
across an idea of the shape of this code. I think this is more like
what DavidH was asking for (but he's on holiday this week so won't be
able to confirm).


diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index a3825ce81102..d442d1e5425d 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -202,6 +202,8 @@ void folio_add_file_rmap_range(struct folio *, struct page *, unsigned int nr,
struct vm_area_struct *, bool compound);
void page_remove_rmap(struct page *, struct vm_area_struct *,
bool compound);
+void folio_remove_rmap_range(struct folio *folio, struct page *page,
+ int nr, struct vm_area_struct *vma);

void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long address, rmap_t flags);
diff --git a/mm/rmap.c b/mm/rmap.c
index ec7f8e6c9e48..2592be47452e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1380,24 +1380,26 @@ void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
}

/**
- * page_remove_rmap - take down pte mapping from a page
- * @page: page to remove mapping from
- * @vma: the vm area from which the mapping is removed
- * @compound: uncharge the page as compound or small page
+ * folio_remove_rmap_range - Take down PTE mappings from a range of pages.
+ * @folio: Folio containing all pages in range.
+ * @page: First page in range to unmap.
+ * @nr: Number of pages to unmap. -1 to unmap a PMD.
+ * @vma: The VM area containing the range.
*
- * The caller needs to hold the pte lock.
+ * All pages in the range must belong to the same VMA & folio.
+ *
+ * Context: Caller holds the pte lock.
*/
-void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
- bool compound)
+void folio_remove_rmap_range(struct folio *folio, struct page *page,
+ int pages, struct vm_area_struct *vma)
{
- struct folio *folio = page_folio(page);
atomic_t *mapped = &folio->_nr_pages_mapped;
+ int nr_unmapped = 0;
+ int nr_mapped = 0;
int nr = 0, nr_pmdmapped = 0;
bool last;
enum node_stat_item idx;

- VM_BUG_ON_PAGE(compound && !PageHead(page), page);
-
/* Hugetlb pages are not counted in NR_*MAPPED */
if (unlikely(folio_test_hugetlb(folio))) {
/* hugetlb pages are always mapped with pmds */
@@ -1405,14 +1407,25 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
return;
}

- /* Is page being unmapped by PTE? Is this its last map to be removed? */
- if (likely(!compound)) {
- last = atomic_add_negative(-1, &page->_mapcount);
- nr = last;
- if (last && folio_test_large(folio)) {
- nr = atomic_dec_return_relaxed(mapped);
- nr = (nr < COMPOUND_MAPPED);
+ /* Are we taking down a PMD mapping? */
+ if (likely(pages > 0)) {
+ VM_WARN_ON_ONCE(page < folio_page(folio, 0) ||
+ page + pages > folio_page(folio,
+ folio_nr_pages(folio)));
+ while (pages) {
+ /* Is this the page's last map to be removed? */
+ last = atomic_add_negative(-1, &page->_mapcount);
+ if (last)
+ nr_unmapped++;
+ pages--;
+ page++;
}
+
+ /* Pages still mapped if folio mapped entirely */
+ nr_mapped = atomic_sub_return_relaxed(nr_unmapped, mapped);
+ if (nr_mapped >= COMPOUND_MAPPED)
+ nr_unmapped = 0;
+
} else if (folio_test_pmd_mappable(folio)) {
/* That test is redundant: it's for safety or to optimize out */

@@ -1441,18 +1454,19 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
idx = NR_FILE_PMDMAPPED;
__lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
}
+
if (nr) {
idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
__lruvec_stat_mod_folio(folio, idx, -nr);

/*
- * Queue anon THP for deferred split if at least one
- * page of the folio is unmapped and at least one page
- * is still mapped.
+ * Queue large anon folio for deferred split if at least one
+ * page of the folio is unmapped and at least one page is still
+ * mapped.
*/
- if (folio_test_pmd_mappable(folio) && folio_test_anon(folio))
- if (!compound || nr < nr_pmdmapped)
- deferred_split_folio(folio);
+ if (folio_test_large(folio) && folio_test_anon(folio) &&
+ nr_mapped)
+ deferred_split_folio(folio);
}

/*
@@ -1466,6 +1480,20 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
munlock_vma_folio(folio, vma, compound);
}

+/**
+ * page_remove_rmap - take down pte mapping from a page
+ * @page: page to remove mapping from
+ * @vma: the vm area from which the mapping is removed
+ * @compound: uncharge the page as compound or small page
+ *
+ * The caller needs to hold the pte lock.
+ */
+void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
+ bool compound)
+{
+ folio_remove_rmap_range(page_folio(page), page, compound ? -1 : 1, vma);
+}
+
/*
* @arg: enum ttu_flags will be passed to this argument
*/