[PATCH V2] mm/page_alloc: Add alloc_contig_pages()

From: Anshuman Khandual
Date: Wed Oct 16 2019 - 07:02:29 EST


HugeTLB helper alloc_gigantic_page() implements fairly generic allocation
method where it scans over various zones looking for a large contiguous pfn
range before trying to allocate it with alloc_contig_range(). Other than
deriving the requested order from 'struct hstate', there is nothing HugeTLB
specific in there. This can be made available for general use to allocate
contiguous memory which could not have been allocated through the buddy
allocator.

alloc_gigantic_page() has been split carving out actual allocation method
which is then made available via new alloc_contig_pages() helper wrapped
under CONFIG_CONTIG_ALLOC. All references to 'gigantic' have been replaced
with more generic term 'contig'. Allocated pages here should be freed with
free_contig_range() or by calling __free_page() on each allocated page.

Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Vlastimil Babka <vbabka@xxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: David Rientjes <rientjes@xxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Oscar Salvador <osalvador@xxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
Cc: Mike Rapoport <rppt@xxxxxxxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Pavel Tatashin <pavel.tatashin@xxxxxxxxxxxxx>
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: David Hildenbrand <david@xxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
Acked-by: Michal Hocko <mhocko@xxxxxxxx>
Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>
---
This is based on https://patchwork.kernel.org/patch/11190213/

Changes in V2:

- Rephrased patch subject per David
- Fixed all typos per David
- s/order/contiguous

Changes from [V5,1/2] mm/hugetlb: Make alloc_gigantic_page()...

- alloc_contig_page() takes nr_pages instead of order per Michal
- s/gigantic/contig on all related functions

include/linux/gfp.h | 2 +
mm/hugetlb.c | 77 +----------------------------------
mm/page_alloc.c | 97 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 101 insertions(+), 75 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index fb07b503dc45..1a11d4857027 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -589,6 +589,8 @@ static inline bool pm_suspended_storage(void)
/* The below functions must be run on a range from a single zone. */
extern int alloc_contig_range(unsigned long start, unsigned long end,
unsigned migratetype, gfp_t gfp_mask);
+extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
+ int nid, nodemask_t *nodemask);
#endif
void free_contig_range(unsigned long pfn, unsigned int nr_pages);

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 985ee15eb04b..a5c2c880af27 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1023,85 +1023,12 @@ static void free_gigantic_page(struct page *page, unsigned int order)
}

#ifdef CONFIG_CONTIG_ALLOC
-static int __alloc_gigantic_page(unsigned long start_pfn,
- unsigned long nr_pages, gfp_t gfp_mask)
-{
- unsigned long end_pfn = start_pfn + nr_pages;
- return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE,
- gfp_mask);
-}
-
-static bool pfn_range_valid_gigantic(struct zone *z,
- unsigned long start_pfn, unsigned long nr_pages)
-{
- unsigned long i, end_pfn = start_pfn + nr_pages;
- struct page *page;
-
- for (i = start_pfn; i < end_pfn; i++) {
- page = pfn_to_online_page(i);
- if (!page)
- return false;
-
- if (page_zone(page) != z)
- return false;
-
- if (PageReserved(page))
- return false;
-
- if (page_count(page) > 0)
- return false;
-
- if (PageHuge(page))
- return false;
- }
-
- return true;
-}
-
-static bool zone_spans_last_pfn(const struct zone *zone,
- unsigned long start_pfn, unsigned long nr_pages)
-{
- unsigned long last_pfn = start_pfn + nr_pages - 1;
- return zone_spans_pfn(zone, last_pfn);
-}
-
static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nodemask)
{
- unsigned int order = huge_page_order(h);
- unsigned long nr_pages = 1 << order;
- unsigned long ret, pfn, flags;
- struct zonelist *zonelist;
- struct zone *zone;
- struct zoneref *z;
-
- zonelist = node_zonelist(nid, gfp_mask);
- for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nodemask) {
- spin_lock_irqsave(&zone->lock, flags);
+ unsigned long nr_pages = 1UL << huge_page_order(h);

- pfn = ALIGN(zone->zone_start_pfn, nr_pages);
- while (zone_spans_last_pfn(zone, pfn, nr_pages)) {
- if (pfn_range_valid_gigantic(zone, pfn, nr_pages)) {
- /*
- * We release the zone lock here because
- * alloc_contig_range() will also lock the zone
- * at some point. If there's an allocation
- * spinning on this lock, it may win the race
- * and cause alloc_contig_range() to fail...
- */
- spin_unlock_irqrestore(&zone->lock, flags);
- ret = __alloc_gigantic_page(pfn, nr_pages, gfp_mask);
- if (!ret)
- return pfn_to_page(pfn);
- spin_lock_irqsave(&zone->lock, flags);
- }
- pfn += nr_pages;
- }
-
- spin_unlock_irqrestore(&zone->lock, flags);
- }
-
- return NULL;
+ return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
}

static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cd1dd0712624..880bcbc5380d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8499,6 +8499,103 @@ int alloc_contig_range(unsigned long start, unsigned long end,
pfn_max_align_up(end), migratetype);
return ret;
}
+
+static int __alloc_contig_pages(unsigned long start_pfn,
+ unsigned long nr_pages, gfp_t gfp_mask)
+{
+ unsigned long end_pfn = start_pfn + nr_pages;
+
+ return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE,
+ gfp_mask);
+}
+
+static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ unsigned long i, end_pfn = start_pfn + nr_pages;
+ struct page *page;
+
+ for (i = start_pfn; i < end_pfn; i++) {
+ page = pfn_to_online_page(i);
+ if (!page)
+ return false;
+
+ if (page_zone(page) != z)
+ return false;
+
+ if (PageReserved(page))
+ return false;
+
+ if (page_count(page) > 0)
+ return false;
+
+ if (PageHuge(page))
+ return false;
+ }
+ return true;
+}
+
+static bool zone_spans_last_pfn(const struct zone *zone,
+ unsigned long start_pfn, unsigned long nr_pages)
+{
+ unsigned long last_pfn = start_pfn + nr_pages - 1;
+
+ return zone_spans_pfn(zone, last_pfn);
+}
+
+/**
+ * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
+ * @nr_pages: Number of contiguous pages to allocate
+ * @gfp_mask: GFP mask to limit search and used during compaction
+ * @nid: Target node
+ * @nodemask: Mask for other possible nodes
+ *
+ * This routine is a wrapper around alloc_contig_range(). It scans over zones
+ * on an applicable zonelist to find a contiguous pfn range which can then be
+ * tried for allocation with alloc_contig_range(). This routine is intended
+ * for allocation requests which can not be fulfilled with the buddy allocator.
+ *
+ * Allocated pages can be freed with free_contig_range() or by manually calling
+ * __free_page() on each allocated page.
+ *
+ * Return: pointer to contiguous pages on success, or NULL if not successful.
+ */
+struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
+ int nid, nodemask_t *nodemask)
+{
+ unsigned long ret, pfn, flags;
+ struct zonelist *zonelist;
+ struct zone *zone;
+ struct zoneref *z;
+
+ zonelist = node_zonelist(nid, gfp_mask);
+ for_each_zone_zonelist_nodemask(zone, z, zonelist,
+ gfp_zone(gfp_mask), nodemask) {
+ spin_lock_irqsave(&zone->lock, flags);
+
+ pfn = ALIGN(zone->zone_start_pfn, nr_pages);
+ while (zone_spans_last_pfn(zone, pfn, nr_pages)) {
+ if (pfn_range_valid_contig(zone, pfn, nr_pages)) {
+ /*
+ * We release the zone lock here because
+ * alloc_contig_range() will also lock the zone
+ * at some point. If there's an allocation
+ * spinning on this lock, it may win the race
+ * and cause alloc_contig_range() to fail...
+ */
+ spin_unlock_irqrestore(&zone->lock, flags);
+ ret = __alloc_contig_pages(pfn, nr_pages,
+ gfp_mask);
+ if (!ret)
+ return pfn_to_page(pfn);
+ spin_lock_irqsave(&zone->lock, flags);
+ }
+ pfn += nr_pages;
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+ return NULL;
+}
#endif /* CONFIG_CONTIG_ALLOC */

void free_contig_range(unsigned long pfn, unsigned int nr_pages)
--
2.20.1