[RFC PATCH 23/26] mm: page_alloc: kill highatomic

From: Johannes Weiner
Date: Tue Apr 18 2023 - 15:15:38 EST


The highatomic reserves are blocks set aside specifically for
higher-order atomic allocations. Since watermarks are now required to
be met in pageblocks, this is no longer necessary.

Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
---
include/linux/gfp.h | 2 -
include/linux/mmzone.h | 6 +-
mm/internal.h | 5 --
mm/page_alloc.c | 187 ++---------------------------------------
mm/vmstat.c | 1 -
5 files changed, 10 insertions(+), 191 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 65a78773dcca..78b5176d354e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -19,8 +19,6 @@ static inline int gfp_migratetype(const gfp_t gfp_flags)
BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
BUILD_BUG_ON((___GFP_RECLAIMABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_RECLAIMABLE);
- BUILD_BUG_ON(((___GFP_MOVABLE | ___GFP_RECLAIMABLE) >>
- GFP_MOVABLE_SHIFT) != MIGRATE_HIGHATOMIC);

if (unlikely(page_group_by_mobility_disabled))
return MIGRATE_UNMOVABLE;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d1083ab81998..c705f2f7c829 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -44,8 +44,7 @@ enum migratetype {
MIGRATE_MOVABLE,
MIGRATE_RECLAIMABLE,
MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
- MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
- MIGRATE_FREE,
+ MIGRATE_FREE = MIGRATE_PCPTYPES,
#ifdef CONFIG_CMA
/*
* MIGRATE_CMA migration type is designed to mimic the way
@@ -142,7 +141,6 @@ enum zone_stat_item {
NR_FREE_UNMOVABLE,
NR_FREE_MOVABLE,
NR_FREE_RECLAIMABLE,
- NR_FREE_HIGHATOMIC,
NR_FREE_FREE,
NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
@@ -713,8 +711,6 @@ struct zone {
unsigned long _watermark[NR_WMARK];
unsigned long watermark_boost;

- unsigned long nr_reserved_highatomic;
-
/*
* We don't know if the memory that we're going to allocate will be
* freeable or/and it will be released eventually, so to avoid totally
diff --git a/mm/internal.h b/mm/internal.h
index 5c76455f8042..24f43f5db88b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -778,11 +778,6 @@ extern const struct trace_print_flags pageflag_names[];
extern const struct trace_print_flags vmaflag_names[];
extern const struct trace_print_flags gfpflag_names[];

-static inline bool is_migrate_highatomic(enum migratetype migratetype)
-{
- return migratetype == MIGRATE_HIGHATOMIC;
-}
-
void setup_zone_pageset(struct zone *zone);

struct migration_target_control {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6f0bfc226c36..e8ae04feb1bd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -379,7 +379,6 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
"Unmovable",
"Movable",
"Reclaimable",
- "HighAtomic",
"Free",
#ifdef CONFIG_CMA
"CMA",
@@ -1202,7 +1201,7 @@ static inline void __free_one_page(struct page *page,
* We want to prevent merge between freepages on pageblock
* without fallbacks and normal pageblock. Without this,
* pageblock isolation could cause incorrect freepage or CMA
- * accounting or HIGHATOMIC accounting.
+ * accounting.
*/
if (migratetype != buddy_mt
&& (!migratetype_is_mergeable(migratetype) ||
@@ -2797,13 +2796,6 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,

old_block_type = get_pageblock_migratetype(page);

- /*
- * This can happen due to races and we want to prevent broken
- * highatomic accounting.
- */
- if (is_migrate_highatomic(old_block_type))
- goto single_page;
-
/* Take ownership for orders >= pageblock_order */
if (current_order >= pageblock_order) {
change_pageblock_range(page, current_order, start_type);
@@ -2918,126 +2910,6 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
return -1;
}

-/*
- * Reserve a pageblock for exclusive use of high-order atomic allocations if
- * there are no empty page blocks that contain a page with a suitable order
- */
-static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
- unsigned int alloc_order)
-{
- int mt;
- unsigned long max_managed, flags;
-
- /*
- * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
- * Check is race-prone but harmless.
- */
- max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages;
- if (zone->nr_reserved_highatomic >= max_managed)
- return;
-
- spin_lock_irqsave(&zone->lock, flags);
-
- /* Recheck the nr_reserved_highatomic limit under the lock */
- if (zone->nr_reserved_highatomic >= max_managed)
- goto out_unlock;
-
- /* Yoink! */
- mt = get_pageblock_migratetype(page);
- /* Only reserve normal pageblocks (i.e., they can merge with others) */
- if (migratetype_is_mergeable(mt)) {
- zone->nr_reserved_highatomic += pageblock_nr_pages;
- set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
- move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC, NULL);
- }
-
-out_unlock:
- spin_unlock_irqrestore(&zone->lock, flags);
-}
-
-/*
- * Used when an allocation is about to fail under memory pressure. This
- * potentially hurts the reliability of high-order allocations when under
- * intense memory pressure but failed atomic allocations should be easier
- * to recover from than an OOM.
- *
- * If @force is true, try to unreserve a pageblock even though highatomic
- * pageblock is exhausted.
- */
-static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
- bool force)
-{
- struct zonelist *zonelist = ac->zonelist;
- unsigned long flags;
- struct zoneref *z;
- struct zone *zone;
- struct page *page;
- int order;
- bool ret;
-
- for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
- ac->nodemask) {
- /*
- * Preserve at least one pageblock unless memory pressure
- * is really high.
- */
- if (!force && zone->nr_reserved_highatomic <=
- pageblock_nr_pages)
- continue;
-
- spin_lock_irqsave(&zone->lock, flags);
- for (order = 0; order < MAX_ORDER; order++) {
- struct free_area *area = &(zone->free_area[order]);
- int mt;
-
- page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
- if (!page)
- continue;
-
- mt = get_pageblock_migratetype(page);
- /*
- * In page freeing path, migratetype change is racy so
- * we can counter several free pages in a pageblock
- * in this loop although we changed the pageblock type
- * from highatomic to ac->migratetype. So we should
- * adjust the count once.
- */
- if (is_migrate_highatomic(mt)) {
- /*
- * It should never happen but changes to
- * locking could inadvertently allow a per-cpu
- * drain to add pages to MIGRATE_HIGHATOMIC
- * while unreserving so be safe and watch for
- * underflows.
- */
- zone->nr_reserved_highatomic -= min(
- pageblock_nr_pages,
- zone->nr_reserved_highatomic);
- }
-
- /*
- * Convert to ac->migratetype and avoid the normal
- * pageblock stealing heuristics. Minimally, the caller
- * is doing the work and needs the pages. More
- * importantly, if the block was always converted to
- * MIGRATE_UNMOVABLE or another type then the number
- * of pageblocks that cannot be completely freed
- * may increase.
- */
- set_pageblock_migratetype(page, ac->migratetype);
- ret = move_freepages_block(zone, page, mt,
- ac->migratetype, NULL);
- if (ret) {
- spin_unlock_irqrestore(&zone->lock, flags);
- return ret;
- }
- }
- spin_unlock_irqrestore(&zone->lock, flags);
- }
-
- return false;
-}
-
/*
* Try finding a free buddy page on the fallback list and put it on the free
* list of requested migratetype, possibly along with other pages from the same
@@ -3510,18 +3382,11 @@ void free_unref_page(struct page *page, unsigned int order)

/*
* We only track unmovable, reclaimable and movable on pcp lists.
- * Place ISOLATE pages on the isolated list because they are being
- * offlined but treat HIGHATOMIC as movable pages so we can get those
- * areas back if necessary. Otherwise, we may have to free
- * excessively into the page allocator
*/
migratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
- if (unlikely(is_migrate_isolate(migratetype) || migratetype == MIGRATE_FREE)) {
- free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
- return;
- }
- migratetype = MIGRATE_MOVABLE;
+ free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
+ return;
}

zone = page_zone(page);
@@ -3740,24 +3605,11 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
unsigned long flags;

do {
- page = NULL;
spin_lock_irqsave(&zone->lock, flags);
- /*
- * order-0 request can reach here when the pcplist is skipped
- * due to non-CMA allocation context. HIGHATOMIC area is
- * reserved for high-order atomic allocation, so order-0
- * request should skip it.
- */
- if (order > 0 && alloc_flags & ALLOC_HARDER)
- page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
- if (!page) {
- page = __rmqueue(zone, order, migratetype, alloc_flags);
- if (!page) {
- spin_unlock_irqrestore(&zone->lock, flags);
- return NULL;
- }
- }
+ page = __rmqueue(zone, order, migratetype, alloc_flags);
spin_unlock_irqrestore(&zone->lock, flags);
+ if (!page)
+ return NULL;
} while (check_new_pages(page, order));

__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
@@ -4003,8 +3855,6 @@ static long __zone_free_pages(struct zone *zone, int alloc_flags, bool safe)
* compaction is necessary to restore the watermarks.
*/
free_pages = page_state(zone, NR_FREE_FREE, safe);
- if (alloc_flags & (ALLOC_HARDER | ALLOC_OOM))
- free_pages += page_state(zone, NR_FREE_HIGHATOMIC, safe);
if (IS_ENABLED(CONFIG_CMA) && (alloc_flags & ALLOC_CMA))
free_pages += page_state(zone, NR_FREE_CMA_PAGES, safe);

@@ -4098,8 +3948,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
return true;
}
#endif
- if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC))
- return true;
}
return false;
}
@@ -4340,14 +4188,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
gfp_mask, alloc_flags, ac->migratetype);
if (page) {
prep_new_page(page, order, gfp_mask, alloc_flags);
-
- /*
- * If this is a high-order atomic allocation then check
- * if the pageblock should be reserved for the future
- */
- if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
- reserve_highatomic_pageblock(page, zone, order);
-
return page;
} else {
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
@@ -4844,7 +4684,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
* Shrink them and try again
*/
if (!page && !drained) {
- unreserve_highatomic_pageblock(ac, false);
drain_all_pages(NULL);
drained = true;
goto retry;
@@ -5013,10 +4852,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
* Make sure we converge to OOM if we cannot make any progress
* several times in the row.
*/
- if (*no_progress_loops > MAX_RECLAIM_RETRIES) {
- /* Before OOM, exhaust highatomic_reserve */
- return unreserve_highatomic_pageblock(ac, true);
- }
+ if (*no_progress_loops > MAX_RECLAIM_RETRIES)
+ return false;

/*
* Keep reclaiming pages while there is a chance this will lead
@@ -6129,7 +5966,6 @@ static void show_migration_types(unsigned char type)
[MIGRATE_UNMOVABLE] = 'U',
[MIGRATE_MOVABLE] = 'M',
[MIGRATE_RECLAIMABLE] = 'E',
- [MIGRATE_HIGHATOMIC] = 'H',
[MIGRATE_FREE] = 'F',
#ifdef CONFIG_CMA
[MIGRATE_CMA] = 'C',
@@ -6194,7 +6030,7 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i
" sec_pagetables:%lu bounce:%lu\n"
" kernel_misc_reclaimable:%lu\n"
" free:%lu free_unmovable:%lu free_movable:%lu\n"
- " free_reclaimable:%lu free_highatomic:%lu free_free:%lu\n"
+ " free_reclaimable:%lu free_free:%lu\n"
" free_cma:%lu free_pcp:%lu\n",
global_node_page_state(NR_ACTIVE_ANON),
global_node_page_state(NR_INACTIVE_ANON),
@@ -6217,7 +6053,6 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i
global_zone_page_state(NR_FREE_UNMOVABLE),
global_zone_page_state(NR_FREE_MOVABLE),
global_zone_page_state(NR_FREE_RECLAIMABLE),
- global_zone_page_state(NR_FREE_HIGHATOMIC),
global_zone_page_state(NR_FREE_FREE),
global_zone_page_state(NR_FREE_CMA_PAGES),
free_pcp);
@@ -6301,13 +6136,11 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i
" free_unmovable:%lukB"
" free_movable:%lukB"
" free_reclaimable:%lukB"
- " free_highatomic:%lukB"
" free_free:%lukB"
" boost:%lukB"
" min:%lukB"
" low:%lukB"
" high:%lukB"
- " reserved_highatomic:%luKB"
" active_anon:%lukB"
" inactive_anon:%lukB"
" active_file:%lukB"
@@ -6327,13 +6160,11 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i
K(zone_page_state(zone, NR_FREE_UNMOVABLE)),
K(zone_page_state(zone, NR_FREE_MOVABLE)),
K(zone_page_state(zone, NR_FREE_RECLAIMABLE)),
- K(zone_page_state(zone, NR_FREE_HIGHATOMIC)),
K(zone_page_state(zone, NR_FREE_FREE)),
K(zone->watermark_boost),
K(min_wmark_pages(zone)),
K(low_wmark_pages(zone)),
K(high_wmark_pages(zone)),
- K(zone->nr_reserved_highatomic),
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c8b8e6e259da..a2f7b41564df 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1171,7 +1171,6 @@ const char * const vmstat_text[] = {
"nr_free_unmovable",
"nr_free_movable",
"nr_free_reclaimable",
- "nr_free_highatomic",
"nr_free_free",
"nr_zone_inactive_anon",
"nr_zone_active_anon",
--
2.39.2