[PATCH v5 13/35] mm, slub: do initial checks in ___slab_alloc() with irqs enabled

From: Vlastimil Babka
Date: Mon Aug 23 2021 - 11:00:10 EST


As another step of shortening irq disabled sections in ___slab_alloc(), delay
disabling irqs until we pass the initial checks if there is a cached percpu
slab and it's suitable for our allocation.

Now we have to recheck c->page after actually disabling irqs as an allocation
in irq handler might have replaced it.

Because we call pfmemalloc_match() as one of the checks, we might hit
VM_BUG_ON_PAGE(!PageSlab(page)) in PageSlabPfmemalloc in case we get
interrupted and the page is freed. Thus introduce a pfmemalloc_match_unsafe()
variant that lacks the PageSlab check.

Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx>
Acked-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
---
include/linux/page-flags.h | 9 +++++++
mm/slub.c | 54 +++++++++++++++++++++++++++++++-------
2 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5922031ffab6..7fda4fb85bdc 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -815,6 +815,15 @@ static inline int PageSlabPfmemalloc(struct page *page)
return PageActive(page);
}

+/*
+ * A version of PageSlabPfmemalloc() for opportunistic checks where the page
+ * might have been freed under us and not be a PageSlab anymore.
+ */
+static inline int __PageSlabPfmemalloc(struct page *page)
+{
+ return PageActive(page);
+}
+
static inline void SetPageSlabPfmemalloc(struct page *page)
{
VM_BUG_ON_PAGE(!PageSlab(page), page);
diff --git a/mm/slub.c b/mm/slub.c
index 31f946e03823..fcc38638c645 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2606,6 +2606,19 @@ static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
return true;
}

+/*
+ * A variant of pfmemalloc_match() that tests page flags without asserting
+ * PageSlab. Intended for opportunistic checks before taking a lock and
+ * rechecking that nobody else freed the page under us.
+ */
+static inline bool pfmemalloc_match_unsafe(struct page *page, gfp_t gfpflags)
+{
+ if (unlikely(__PageSlabPfmemalloc(page)))
+ return gfp_pfmemalloc_allowed(gfpflags);
+
+ return true;
+}
+
/*
* Check the page->freelist of a page and either transfer the freelist to the
* per cpu freelist or deactivate the page.
@@ -2668,8 +2681,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,

stat(s, ALLOC_SLOWPATH);

- local_irq_save(flags);
- page = c->page;
+reread_page:
+
+ page = READ_ONCE(c->page);
if (!page) {
/*
* if the node is not online or has no normal memory, just
@@ -2678,6 +2692,11 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
if (unlikely(node != NUMA_NO_NODE &&
!node_isset(node, slab_nodes)))
node = NUMA_NO_NODE;
+ local_irq_save(flags);
+ if (unlikely(c->page)) {
+ local_irq_restore(flags);
+ goto reread_page;
+ }
goto new_slab;
}
redo:
@@ -2692,8 +2711,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
goto redo;
} else {
stat(s, ALLOC_NODE_MISMATCH);
- deactivate_slab(s, page, c->freelist, c);
- goto new_slab;
+ goto deactivate_slab;
}
}

@@ -2702,12 +2720,15 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
* PFMEMALLOC but right now, we are losing the pfmemalloc
* information when the page leaves the per-cpu allocator
*/
- if (unlikely(!pfmemalloc_match(page, gfpflags))) {
- deactivate_slab(s, page, c->freelist, c);
- goto new_slab;
- }
+ if (unlikely(!pfmemalloc_match_unsafe(page, gfpflags)))
+ goto deactivate_slab;

- /* must check again c->freelist in case of cpu migration or IRQ */
+ /* must check again c->page in case IRQ handler changed it */
+ local_irq_save(flags);
+ if (unlikely(page != c->page)) {
+ local_irq_restore(flags);
+ goto reread_page;
+ }
freelist = c->freelist;
if (freelist)
goto load_freelist;
@@ -2723,6 +2744,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
stat(s, ALLOC_REFILL);

load_freelist:
+
+ lockdep_assert_irqs_disabled();
+
/*
* freelist is pointing to the list of objects to be used.
* page is pointing to the page from which the objects are obtained.
@@ -2734,11 +2758,23 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
local_irq_restore(flags);
return freelist;

+deactivate_slab:
+
+ local_irq_save(flags);
+ if (page != c->page) {
+ local_irq_restore(flags);
+ goto reread_page;
+ }
+ deactivate_slab(s, page, c->freelist, c);
+
new_slab:

+ lockdep_assert_irqs_disabled();
+
if (slub_percpu_partial(c)) {
page = c->page = slub_percpu_partial(c);
slub_set_percpu_partial(c, page);
+ local_irq_restore(flags);
stat(s, CPU_PARTIAL_ALLOC);
goto redo;
}
--
2.32.0