[v3 4/4] mm: hugetlb: Skip initialization of gigantic tail struct pages if freed by HVO

From: Usama Arif
Date: Fri Aug 25 2023 - 07:19:57 EST


The new boot flow when it comes to initialization of gigantic pages
is as follows:
- At boot time, for a gigantic page during __alloc_bootmem_hugepage,
the region after the first struct page is marked as noinit.
- This results in only the first struct page to be
initialized in reserve_bootmem_region. As the tail struct pages are
not initialized at this point, there can be a significant saving
in boot time if HVO succeeds later on.
- Later on in the boot, HVO is attempted. If its successful, only the first
HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page) - 1 tail struct pages
after the head struct page are initialized. If it is not successful,
then all of the tail struct pages are initialized.

Signed-off-by: Usama Arif <usama.arif@xxxxxxxxxxxxx>
---
mm/hugetlb.c | 52 +++++++++++++++++++++++++++++++++++---------
mm/hugetlb_vmemmap.h | 8 +++----
mm/internal.h | 3 +++
mm/mm_init.c | 2 +-
4 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6da626bfb52e..964f7a2b693e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1953,7 +1953,6 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)

static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
{
- hugetlb_vmemmap_optimize(h, &folio->page);
INIT_LIST_HEAD(&folio->lru);
folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
hugetlb_set_folio_subpool(folio, NULL);
@@ -2225,6 +2224,7 @@ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
return NULL;
}
}
+ hugetlb_vmemmap_optimize(h, &folio->page);
prep_new_hugetlb_folio(h, folio, folio_nid(folio));

return folio;
@@ -2943,6 +2943,7 @@ static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL);
if (!new_folio)
return -ENOMEM;
+ hugetlb_vmemmap_optimize(h, &new_folio->page);
__prep_new_hugetlb_folio(h, new_folio);

retry:
@@ -3206,6 +3207,15 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
}

found:
+
+ /*
+ * Only initialize the head struct page in memmap_init_reserved_pages,
+ * rest of the struct pages will be initialized by the HugeTLB subsystem itself.
+ * The head struct page is used to get folio information by the HugeTLB
+ * subsystem like zone id and node id.
+ */
+ memblock_reserved_mark_noinit_vmemmap(virt_to_phys((void *)m + PAGE_SIZE),
+ huge_page_size(h) - PAGE_SIZE);
/* Put them into a private list first because mem_map is not up yet */
INIT_LIST_HEAD(&m->list);
list_add(&m->list, &huge_boot_pages);
@@ -3213,6 +3223,27 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
return 1;
}

+static void __init hugetlb_folio_init_vmemmap(struct hstate *h, struct folio *folio,
+ unsigned long nr_pages)
+{
+ enum zone_type zone = zone_idx(folio_zone(folio));
+ int nid = folio_nid(folio);
+ unsigned long head_pfn = folio_pfn(folio);
+ unsigned long pfn, end_pfn = head_pfn + nr_pages;
+
+ __folio_clear_reserved(folio);
+ __folio_set_head(folio);
+
+ for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) {
+ struct page *page = pfn_to_page(pfn);
+
+ __init_single_page(page, pfn, zone, nid);
+ prep_compound_tail((struct page *)folio, pfn - head_pfn);
+ set_page_count(page, 0);
+ }
+ prep_compound_head((struct page *)folio, huge_page_order(h));
+}
+
/*
* Put bootmem huge pages into the standard lists after mem_map is up.
* Note: This only applies to gigantic (order > MAX_ORDER) pages.
@@ -3223,19 +3254,19 @@ static void __init gather_bootmem_prealloc(void)

list_for_each_entry(m, &huge_boot_pages, list) {
struct page *page = virt_to_page(m);
- struct folio *folio = page_folio(page);
+ struct folio *folio = (void *)page;
struct hstate *h = m->hstate;
+ unsigned long nr_pages = pages_per_huge_page(h);

VM_BUG_ON(!hstate_is_gigantic(h));
WARN_ON(folio_ref_count(folio) != 1);
- if (prep_compound_gigantic_folio(folio, huge_page_order(h))) {
- WARN_ON(folio_test_reserved(folio));
- prep_new_hugetlb_folio(h, folio, folio_nid(folio));
- free_huge_page(page); /* add to the hugepage allocator */
- } else {
- /* VERY unlikely inflated ref count on a tail page */
- free_gigantic_folio(folio, huge_page_order(h));
- }
+
+ hugetlb_vmemmap_optimize(h, &folio->page);
+ if (HPageVmemmapOptimized(&folio->page))
+ nr_pages = HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page);
+ hugetlb_folio_init_vmemmap(h, folio, nr_pages);
+ prep_new_hugetlb_folio(h, folio, folio_nid(folio));
+ free_huge_page(page); /* add to the hugepage allocator */

/*
* We need to restore the 'stolen' pages to totalram_pages
@@ -3656,6 +3687,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
else
prep_compound_page(subpage, target_hstate->order);
folio_change_private(inner_folio, NULL);
+ hugetlb_vmemmap_optimize(h, &folio->page);
prep_new_hugetlb_folio(target_hstate, inner_folio, nid);
free_huge_page(subpage);
}
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index 25bd0e002431..d30aff8f3573 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -10,16 +10,16 @@
#define _LINUX_HUGETLB_VMEMMAP_H
#include <linux/hugetlb.h>

-#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
-int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
-void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
-
/*
* Reserve one vmemmap page, all vmemmap addresses are mapped to it. See
* Documentation/vm/vmemmap_dedup.rst.
*/
#define HUGETLB_VMEMMAP_RESERVE_SIZE PAGE_SIZE

+#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
+void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
+
static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
{
return pages_per_huge_page(h) * sizeof(struct page);
diff --git a/mm/internal.h b/mm/internal.h
index a7d9e980429a..31b3d45f4609 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1102,4 +1102,7 @@ struct vma_prepare {
struct vm_area_struct *remove;
struct vm_area_struct *remove2;
};
+
+void __meminit __init_single_page(struct page *page, unsigned long pfn,
+ unsigned long zone, int nid);
#endif /* __MM_INTERNAL_H */
diff --git a/mm/mm_init.c b/mm/mm_init.c
index a1963c3322af..3d4ab595ca7d 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -551,7 +551,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
node_states[N_MEMORY] = saved_node_state;
}

-static void __meminit __init_single_page(struct page *page, unsigned long pfn,
+void __meminit __init_single_page(struct page *page, unsigned long pfn,
unsigned long zone, int nid)
{
mm_zero_struct_page(page);
--
2.25.1