[PATCH] mm/page_alloc: avoid deadlocks for &pagesets.lock

From: Desmond Cheong Zhi Xi
Date: Wed Jul 07 2021 - 07:13:38 EST


Syzbot reports a number of potential deadlocks for &pagesets.lock. It
seems that this new lock is being used as both an inner and outer
lock, which makes it prone to creating circular dependencies.

For example, one such call trace goes as follows:
__alloc_pages_bulk()
local_lock_irqsave(&pagesets.lock, flags) <---- outer lock here
prep_new_page():
post_alloc_hook():
set_page_owner():
__set_page_owner():
save_stack():
stack_depot_save():
alloc_pages():
alloc_page_interleave():
__alloc_pages():
get_page_from_freelist():
rm_queue():
rm_queue_pcplist():
local_lock_irqsave(&pagesets.lock, flags);
*** DEADLOCK ***

The common culprit for the lockdep splats seems to be the call to
local_lock_irqsave(&pagesets.lock, flags) inside
__alloc_pages_bulk(). &pagesets.lock becomes an outer lock if it's
held during the call to prep_new_page().

As the local lock is used to protect the PCP structure, we adjust the
locking in __alloc_pages_bulk so that only the necessary structures
are protected.

Fixes: dbbee9d5cd83 ("mm/page_alloc: convert per-cpu list protection to local_lock")
Reported-and-tested-by: syzbot+127fd7828d6eeb611703@xxxxxxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@xxxxxxxxx>
---
mm/page_alloc.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0817d88383d5..0e005b1a60e3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5288,10 +5288,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
goto failed;

/* Attempt the batch allocation */
- local_lock_irqsave(&pagesets.lock, flags);
- pcp = this_cpu_ptr(zone->per_cpu_pageset);
- pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
-
while (nr_populated < nr_pages) {

/* Skip existing pages */
@@ -5300,12 +5296,16 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
continue;
}

+ local_lock_irqsave(&pagesets.lock, flags);
+ pcp = this_cpu_ptr(zone->per_cpu_pageset);
+ pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
page = __rmqueue_pcplist(zone, 0, ac.migratetype, alloc_flags,
pcp, pcp_list);
+ local_unlock_irqrestore(&pagesets.lock, flags);
if (unlikely(!page)) {
/* Try and get at least one page */
if (!nr_populated)
- goto failed_irq;
+ goto failed;
break;
}
nr_account++;
@@ -5318,16 +5318,11 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
nr_populated++;
}

- local_unlock_irqrestore(&pagesets.lock, flags);
-
__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
zone_statistics(ac.preferred_zoneref->zone, zone, nr_account);

return nr_populated;

-failed_irq:
- local_unlock_irqrestore(&pagesets.lock, flags);
-
failed:
page = __alloc_pages(gfp, 0, preferred_nid, nodemask);
if (page) {
--
2.25.1