[RFC PATCH 07/26] mm: page_alloc: move capture_control to the page allocator

From: Johannes Weiner
Date: Tue Apr 18 2023 - 15:14:27 EST


Compaction capturing is really a component of the page allocator.
Later patches will also disconnect allocation request size from the
compaction size, so move the setup of the capturing parameters to the
"request domain", i.e. the page allocator. No functional change.

Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
---
include/linux/compaction.h | 3 ++-
mm/compaction.c | 33 ++++++++++-----------------------
mm/page_alloc.c | 25 ++++++++++++++++++++++---
3 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 52a9ff65faee..06eeb2e25833 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -56,6 +56,7 @@ enum compact_result {
};

struct alloc_context; /* in mm/internal.h */
+struct capture_control; /* in mm/internal.h */

/*
* Number of free order-0 pages that should be available above given watermark
@@ -94,7 +95,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
unsigned int order, unsigned int alloc_flags,
const struct alloc_context *ac, enum compact_priority prio,
- struct page **page);
+ struct capture_control *capc);
extern void reset_isolation_suitable(pg_data_t *pgdat);
extern enum compact_result compaction_suitable(struct zone *zone, int order,
unsigned int alloc_flags, int highest_zoneidx);
diff --git a/mm/compaction.c b/mm/compaction.c
index 84db84e8fd3a..a2280001eea3 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2510,7 +2510,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
static enum compact_result compact_zone_order(struct zone *zone, int order,
gfp_t gfp_mask, enum compact_priority prio,
unsigned int alloc_flags, int highest_zoneidx,
- struct page **capture)
+ struct capture_control *capc)
{
enum compact_result ret;
struct compact_control cc = {
@@ -2527,38 +2527,25 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
};
- struct capture_control capc = {
- .cc = &cc,
- .page = NULL,
- };

- /*
- * Make sure the structs are really initialized before we expose the
- * capture control, in case we are interrupted and the interrupt handler
- * frees a page.
- */
+ /* See the comment in __alloc_pages_direct_compact() */
barrier();
- WRITE_ONCE(current->capture_control, &capc);
+ WRITE_ONCE(capc->cc, &cc);

- ret = compact_zone(&cc, &capc);
+ ret = compact_zone(&cc, capc);
+
+ WRITE_ONCE(capc->cc, NULL);

VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));

- /*
- * Make sure we hide capture control first before we read the captured
- * page pointer, otherwise an interrupt could free and capture a page
- * and we would leak it.
- */
- WRITE_ONCE(current->capture_control, NULL);
- *capture = READ_ONCE(capc.page);
/*
* Technically, it is also possible that compaction is skipped but
* the page is still captured out of luck(IRQ came and freed the page).
* Returning COMPACT_SUCCESS in such cases helps in properly accounting
* the COMPACT[STALL|FAIL] when compaction is skipped.
*/
- if (*capture)
+ if (capc->page)
ret = COMPACT_SUCCESS;

return ret;
@@ -2573,13 +2560,13 @@ int sysctl_extfrag_threshold = 500;
* @alloc_flags: The allocation flags of the current allocation
* @ac: The context of current allocation
* @prio: Determines how hard direct compaction should try to succeed
- * @capture: Pointer to free page created by compaction will be stored here
+ * @capc: The context for capturing pages during freeing
*
* This is the main entry point for direct page compaction.
*/
enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
unsigned int alloc_flags, const struct alloc_context *ac,
- enum compact_priority prio, struct page **capture)
+ enum compact_priority prio, struct capture_control *capc)
{
int may_perform_io = (__force int)(gfp_mask & __GFP_IO);
struct zoneref *z;
@@ -2607,7 +2594,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
}

status = compact_zone_order(zone, order, gfp_mask, prio,
- alloc_flags, ac->highest_zoneidx, capture);
+ alloc_flags, ac->highest_zoneidx, capc);
rc = max(status, rc);

/* The allocation should succeed, stop compacting */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b9366c002334..4d20513c83be 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -944,7 +944,7 @@ static inline struct capture_control *task_capc(struct zone *zone)
{
struct capture_control *capc = current->capture_control;

- return unlikely(capc) &&
+ return unlikely(capc && capc->cc) &&
!(current->flags & PF_KTHREAD) &&
!capc->page &&
capc->cc->zone == zone ? capc : NULL;
@@ -4480,22 +4480,41 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct page *page = NULL;
unsigned long pflags;
unsigned int noreclaim_flag;
+ struct capture_control capc = {
+ .page = NULL,
+ };

if (!order)
return NULL;

+ /*
+ * Make sure the structs are really initialized before we expose the
+ * capture control, in case we are interrupted and the interrupt handler
+ * frees a page.
+ */
+ barrier();
+ WRITE_ONCE(current->capture_control, &capc);
+
psi_memstall_enter(&pflags);
delayacct_compact_start();
noreclaim_flag = memalloc_noreclaim_save();

*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
- prio, &page);
+ prio, &capc);

memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
delayacct_compact_end();

- if (*compact_result == COMPACT_SKIPPED)
+ /*
+ * Make sure we hide capture control first before we read the captured
+ * page pointer, otherwise an interrupt could free and capture a page
+ * and we would leak it.
+ */
+ WRITE_ONCE(current->capture_control, NULL);
+ page = READ_ONCE(capc.page);
+
+ if (!page && *compact_result == COMPACT_SKIPPED)
return NULL;
/*
* At least in one zone compaction wasn't deferred or skipped, so let's
--
2.39.2