[PATCH v7 08/15] mm/hugetlb: Free the vmemmap pages associated with each hugetlb page

From: Muchun Song
Date: Mon Nov 30 2020 - 10:22:11 EST


When we allocate a hugetlb page from the buddy, we should free the
unused vmemmap pages associated with it. We can do that in the
prep_new_huge_page().

Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx>
---
mm/hugetlb.c | 2 +
mm/hugetlb_vmemmap.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++
mm/hugetlb_vmemmap.h | 5 ++
3 files changed, 146 insertions(+)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 25f9e8e9fc4a..93dee37ceb6d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1498,6 +1498,8 @@ void free_huge_page(struct page *page)

static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
{
+ free_huge_page_vmemmap(h, page);
+
INIT_LIST_HEAD(&page->lru);
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
set_hugetlb_cgroup(page, NULL);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index ad8fc61ea273..2c997b5de3b6 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -94,6 +94,7 @@
*/
#define pr_fmt(fmt) "HugeTLB vmemmap: " fmt

+#include <linux/bootmem_info.h>
#include "hugetlb_vmemmap.h"

/*
@@ -105,6 +106,144 @@
* these page frames. Therefore, we need to reserve two pages as vmemmap areas.
*/
#define RESERVE_VMEMMAP_NR 2U
+#define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT)
+#define VMEMMAP_TAIL_PAGE_REUSE -1
+
+#ifndef VMEMMAP_HPAGE_SHIFT
+#define VMEMMAP_HPAGE_SHIFT HPAGE_SHIFT
+#endif
+#define VMEMMAP_HPAGE_ORDER (VMEMMAP_HPAGE_SHIFT - PAGE_SHIFT)
+#define VMEMMAP_HPAGE_NR (1 << VMEMMAP_HPAGE_ORDER)
+#define VMEMMAP_HPAGE_SIZE (1UL << VMEMMAP_HPAGE_SHIFT)
+#define VMEMMAP_HPAGE_MASK (~(VMEMMAP_HPAGE_SIZE - 1))
+
+#define vmemmap_hpage_addr_end(addr, end) \
+({ \
+ unsigned long __boundary; \
+ __boundary = ((addr) + VMEMMAP_HPAGE_SIZE) & VMEMMAP_HPAGE_MASK; \
+ (__boundary - 1 < (end) - 1) ? __boundary : (end); \
+})
+
+static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
+{
+ return h->nr_free_vmemmap_pages;
+}
+
+static inline unsigned int vmemmap_pages_per_hpage(struct hstate *h)
+{
+ return free_vmemmap_pages_per_hpage(h) + RESERVE_VMEMMAP_NR;
+}
+
+static inline unsigned long vmemmap_pages_size_per_hpage(struct hstate *h)
+{
+ return (unsigned long)vmemmap_pages_per_hpage(h) << PAGE_SHIFT;
+}
+
+/*
+ * Walk a vmemmap address to the pmd it maps.
+ */
+static pmd_t *vmemmap_to_pmd(unsigned long addr)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd))
+ return NULL;
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
+ return NULL;
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud))
+ return NULL;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return NULL;
+
+ return pmd;
+}
+
+static void vmemmap_reuse_pte_range(struct page *reuse, pte_t *pte,
+ unsigned long start, unsigned long end,
+ struct list_head *vmemmap_pages)
+{
+ /*
+ * Make the tail pages are mapped with read-only to catch
+ * illegal write operation to the tail pages.
+ */
+ pgprot_t pgprot = PAGE_KERNEL_RO;
+ pte_t entry = mk_pte(reuse, pgprot);
+ unsigned long addr;
+
+ for (addr = start; addr < end; addr += PAGE_SIZE, pte++) {
+ struct page *page;
+
+ VM_BUG_ON(pte_none(*pte));
+
+ page = pte_page(*pte);
+ list_add(&page->lru, vmemmap_pages);
+
+ set_pte_at(&init_mm, addr, pte, entry);
+ }
+}
+
+static void vmemmap_remap_range(unsigned long start, unsigned long end,
+ struct list_head *vmemmap_pages)
+{
+ pmd_t *pmd;
+ unsigned long next, addr = start;
+ struct page *reuse = NULL;
+
+ VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
+ VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
+ VM_BUG_ON((start >> PUD_SHIFT) != (end >> PUD_SHIFT));
+
+ pmd = vmemmap_to_pmd(addr);
+ BUG_ON(!pmd);
+
+ do {
+ pte_t *pte = pte_offset_kernel(pmd, addr);
+
+ if (!reuse)
+ reuse = pte_page(pte[VMEMMAP_TAIL_PAGE_REUSE]);
+
+ next = vmemmap_hpage_addr_end(addr, end);
+ vmemmap_reuse_pte_range(reuse, pte, addr, next, vmemmap_pages);
+ } while (pmd++, addr = next, addr != end);
+
+ flush_tlb_kernel_range(start, end);
+}
+
+static inline void free_vmemmap_page_list(struct list_head *list)
+{
+ struct page *page, *next;
+
+ list_for_each_entry_safe(page, next, list, lru) {
+ list_del(&page->lru);
+ free_vmemmap_page(page);
+ }
+}
+
+void free_huge_page_vmemmap(struct hstate *h, struct page *head)
+{
+ unsigned long start, end;
+ unsigned long vmemmap_addr = (unsigned long)head;
+ LIST_HEAD(vmemmap_pages);
+
+ if (!free_vmemmap_pages_per_hpage(h))
+ return;
+
+ start = vmemmap_addr + RESERVE_VMEMMAP_SIZE;
+ end = vmemmap_addr + vmemmap_pages_size_per_hpage(h);
+ vmemmap_remap_range(start, end, &vmemmap_pages);
+
+ free_vmemmap_page_list(&vmemmap_pages);
+}

void __init hugetlb_vmemmap_init(struct hstate *h)
{
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index 40c0c7dfb60d..67113b67495f 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -12,9 +12,14 @@

#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
void __init hugetlb_vmemmap_init(struct hstate *h);
+void free_huge_page_vmemmap(struct hstate *h, struct page *head);
#else
static inline void hugetlb_vmemmap_init(struct hstate *h)
{
}
+
+static inline void free_huge_page_vmemmap(struct hstate *h, struct page *head)
+{
+}
#endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */
#endif /* _LINUX_HUGETLB_VMEMMAP_H */
--
2.11.0