[RFC PATCH 14/18] mm: use try_to_free_user_pte() in MADV_DONTNEED case

From: Qi Zheng
Date: Fri Apr 29 2022 - 09:38:07 EST


Immediately after a successful MADV_DONTNEED operation, the
physical page is unmapped from the PTE page table entry. This
is a good time to call try_to_free_user_pte() to try to free
the PTE page table page.

Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx>
---
mm/internal.h | 3 ++-
mm/memory.c | 43 +++++++++++++++++++++++++++++--------------
mm/oom_kill.c | 3 ++-
3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index cf16280ce132..f93a9170d2e3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -77,7 +77,8 @@ struct zap_details;
void unmap_page_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long addr, unsigned long end,
- struct zap_details *details);
+ struct zap_details *details,
+ bool free_pte);

void page_cache_ra_order(struct readahead_control *, struct file_ra_state *,
unsigned int order);
diff --git a/mm/memory.c b/mm/memory.c
index aa2bac561d5e..75a0e16a095a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1339,7 +1339,8 @@ static inline bool should_zap_page(struct zap_details *details, struct page *pag
static unsigned long zap_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ struct zap_details *details,
+ bool free_pte)
{
struct mm_struct *mm = tlb->mm;
int force_flush = 0;
@@ -1348,6 +1349,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
pte_t *start_pte;
pte_t *pte;
swp_entry_t entry;
+ unsigned long start = addr;

tlb_change_page_size(tlb, PAGE_SIZE);
again:
@@ -1455,13 +1457,17 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
goto again;
}

+ if (free_pte)
+ try_to_free_user_pte(mm, pmd, start, true);
+
return addr;
}

static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ struct zap_details *details,
+ bool free_pte)
{
pmd_t *pmd;
unsigned long next;
@@ -1496,7 +1502,8 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
*/
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
goto next;
- next = zap_pte_range(tlb, vma, pmd, addr, next, details);
+ next = zap_pte_range(tlb, vma, pmd, addr, next, details,
+ free_pte);
next:
cond_resched();
} while (pmd++, addr = next, addr != end);
@@ -1507,7 +1514,8 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, p4d_t *p4d,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ struct zap_details *details,
+ bool free_pte)
{
pud_t *pud;
unsigned long next;
@@ -1525,7 +1533,8 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
}
if (pud_none_or_clear_bad(pud))
continue;
- next = zap_pmd_range(tlb, vma, pud, addr, next, details);
+ next = zap_pmd_range(tlb, vma, pud, addr, next, details,
+ free_pte);
next:
cond_resched();
} while (pud++, addr = next, addr != end);
@@ -1536,7 +1545,8 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ struct zap_details *details,
+ bool free_pte)
{
p4d_t *p4d;
unsigned long next;
@@ -1546,7 +1556,8 @@ static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d))
continue;
- next = zap_pud_range(tlb, vma, p4d, addr, next, details);
+ next = zap_pud_range(tlb, vma, p4d, addr, next, details,
+ free_pte);
} while (p4d++, addr = next, addr != end);

return addr;
@@ -1555,7 +1566,8 @@ static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
void unmap_page_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ struct zap_details *details,
+ bool free_pte)
{
pgd_t *pgd;
unsigned long next;
@@ -1567,7 +1579,8 @@ void unmap_page_range(struct mmu_gather *tlb,
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
- next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
+ next = zap_p4d_range(tlb, vma, pgd, addr, next, details,
+ free_pte);
} while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
}
@@ -1576,7 +1589,8 @@ void unmap_page_range(struct mmu_gather *tlb,
static void unmap_single_vma(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr,
- struct zap_details *details)
+ struct zap_details *details,
+ bool free_pte)
{
unsigned long start = max(vma->vm_start, start_addr);
unsigned long end;
@@ -1612,7 +1626,8 @@ static void unmap_single_vma(struct mmu_gather *tlb,
i_mmap_unlock_write(vma->vm_file->f_mapping);
}
} else
- unmap_page_range(tlb, vma, start, end, details);
+ unmap_page_range(tlb, vma, start, end, details,
+ free_pte);
}
}

@@ -1644,7 +1659,7 @@ void unmap_vmas(struct mmu_gather *tlb,
start_addr, end_addr);
mmu_notifier_invalidate_range_start(&range);
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
- unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
+ unmap_single_vma(tlb, vma, start_addr, end_addr, NULL, false);
mmu_notifier_invalidate_range_end(&range);
}

@@ -1669,7 +1684,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range);
for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next)
- unmap_single_vma(&tlb, vma, start, range.end, NULL);
+ unmap_single_vma(&tlb, vma, start, range.end, NULL, true);
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
@@ -1695,7 +1710,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr
tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range);
- unmap_single_vma(&tlb, vma, address, range.end, details);
+ unmap_single_vma(&tlb, vma, address, range.end, details, true);
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 7ec38194f8e1..c4c25a7add7b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -549,7 +549,8 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
ret = false;
continue;
}
- unmap_page_range(&tlb, vma, range.start, range.end, NULL);
+ unmap_page_range(&tlb, vma, range.start, range.end,
+ NULL, false);
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
--
2.20.1