[RFC PATCH v2 8/9] mm: Handle COW PTE with reclaim algorithm

From: Chih-En Lin
Date: Tue Sep 27 2022 - 12:29:06 EST


To avoid the PFRA reclaiming the page resided in the COWed PTE table,
break COW when it using rmap to unmap all the processes.

Signed-off-by: Chih-En Lin <shiyn.lin@xxxxxxxxx>
---
include/linux/rmap.h | 2 ++
mm/page_vma_mapped.c | 5 +++++
mm/rmap.c | 2 +-
mm/swapfile.c | 1 +
mm/vmscan.c | 1 +
5 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b89b4b86951f8..5c7e3bedc068b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -312,6 +312,8 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
#define PVMW_SYNC (1 << 0)
/* Look for migration entries rather than present PTEs */
#define PVMW_MIGRATION (1 << 1)
+/* Break COW PTE during the walking */
+#define PVMW_COW_PTE (1 << 2)

struct page_vma_mapped_walk {
unsigned long pfn;
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 8e9e574d535aa..5008957bbe4a7 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -251,6 +251,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
step_forward(pvmw, PMD_SIZE);
continue;
}
+
+ /* TODO: Does breaking COW PTE here is correct? */
+ if (pvmw->flags & PVMW_COW_PTE)
+ handle_cow_pte(vma, pvmw->pmd, pvmw->address, false);
+
if (!map_pte(pvmw))
goto next_pte;
this_pte:
diff --git a/mm/rmap.c b/mm/rmap.c
index 93d5a6f793d20..8f737cb44e48a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1477,7 +1477,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct mm_struct *mm = vma->vm_mm;
- DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_COW_PTE);
pte_t pteval;
struct page *subpage;
bool anon_exclusive, ret = true;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1fdccd2f1422e..ef4d3d81a824b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1916,6 +1916,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
do {
cond_resched();
next = pmd_addr_end(addr, end);
+ handle_cow_pte(vma, pmd, addr, false);
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
continue;
ret = unuse_pte_range(vma, pmd, addr, next, type);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b2b1431352dcd..030fad3d310d9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1822,6 +1822,7 @@ static unsigned int shrink_page_list(struct list_head *page_list,
/*
* The folio is mapped into the page tables of one or more
* processes. Try to unmap it here.
+ * It will write to the page tables, break COW PTE here.
*/
if (folio_mapped(folio)) {
enum ttu_flags flags = TTU_BATCH_FLUSH;
--
2.37.3