[PATCH] mm/hugetlb: add support for mempolicy MPOL_PREFERRED_MANY

From: Ben Widawsky
Date: Thu Aug 05 2021 - 23:01:11 EST


Implement the missing huge page allocation functionality while obeying the
preferred node semantics. This is similar to the implementation for
general page allocation, as it uses a fallback mechanism to try multiple
preferred nodes first, and then all other nodes.

To avoid adding too many "#ifdef CONFIG_NUMA" check, add a helper function
in mempolicy.h to check whether a mempolicy is MPOL_PREFERRED_MANY.

[akpm: fix compling issue when merging with other hugetlb patch]
[Thanks to 0day bot for catching the !CONFIG_NUMA compiling issue]
[Michal Hocko: suggest to remove the #ifdef CONFIG_NUMA check]
Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@xxxxxxxxx
Link: https://lkml.kernel.org/r/1627970362-61305-4-git-send-email-feng.tang@xxxxxxxxx
Suggested-by: Michal Hocko <mhocko@xxxxxxxx>
Signed-off-by: Ben Widawsky <ben.widawsky@xxxxxxxxx>
Co-developed-by: Feng Tang <feng.tang@xxxxxxxxx>
Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx>
--
include/linux/mempolicy.h | 12 ++++++++++++
mm/hugetlb.c | 28 ++++++++++++++++++++++++----
2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 0117e1e..60d5e6c 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -187,6 +187,12 @@ extern void mpol_put_task_policy(struct task_struct *);

extern bool numa_demotion_enabled;

+static inline bool mpol_is_preferred_many(struct mempolicy *pol)
+{
+ return (pol->mode == MPOL_PREFERRED_MANY);
+}
+
+
#else

struct mempolicy {};
@@ -297,5 +303,11 @@ static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
}

#define numa_demotion_enabled false
+
+static inline bool mpol_is_preferred_many(struct mempolicy *pol)
+{
+ return false;
+}
+
#endif /* CONFIG_NUMA */
#endif
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 95714fb..75ea8bc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1145,7 +1145,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
unsigned long address, int avoid_reserve,
long chg)
{
- struct page *page;
+ struct page *page = NULL;
struct mempolicy *mpol;
gfp_t gfp_mask;
nodemask_t *nodemask;
@@ -1166,7 +1166,17 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,

gfp_mask = htlb_alloc_mask(h);
nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
- page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+
+ if (mpol_is_preferred_many(mpol)) {
+ page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+
+ /* Fallback to all nodes if page==NULL */
+ nodemask = NULL;
+ }
+
+ if (!page)
+ page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+
if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
SetHPageRestoreReserve(page);
h->resv_huge_pages--;
@@ -2147,9 +2157,19 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
nodemask_t *nodemask;

nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
- page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false);
- mpol_cond_put(mpol);
+ if (mpol_is_preferred_many(mpol)) {
+ gfp_t gfp = gfp_mask | __GFP_NOWARN;

+ gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
+ page = alloc_surplus_huge_page(h, gfp, nid, nodemask, false);
+
+ /* Fallback to all nodes if page==NULL */
+ nodemask = NULL;
+ }
+
+ if (!page)
+ page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false);
+ mpol_cond_put(mpol);
return page;
}

--
2.7.4