Re: [PATCH v2 06/12] mm/hugetlb: make hugetlb migration target allocation APIs CMA aware

From: Michal Hocko
Date: Tue Jun 09 2020 - 09:53:33 EST


On Wed 27-05-20 15:44:57, Joonsoo Kim wrote:
> From: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
>
> There is a user who do not want to use CMA memory for migration. Until
> now, it is implemented by caller side but it's not optimal since there
> is limited information on caller. This patch implements it on callee side
> to get better result.

I do not follow this changelog and honestly do not see an improvement.
skip_cma in the alloc_control sound like a hack to me. I can now see
why your earlier patch has started to or the given gfp_mask. If anything
this should be folded here. But even then I do not like a partial
gfp_mask (__GFP_NOWARN on its own really has GFP_NOWAIT like semantic).

> Acked-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
> ---
> include/linux/hugetlb.h | 2 --
> mm/gup.c | 9 +++------
> mm/hugetlb.c | 21 +++++++++++++++++----
> mm/internal.h | 1 +
> 4 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index f482563..3d05f7d 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -503,8 +503,6 @@ struct huge_bootmem_page {
> struct hstate *hstate;
> };
>
> -struct page *alloc_migrate_huge_page(struct hstate *h,
> - struct alloc_control *ac);
> struct page *alloc_huge_page_nodemask(struct hstate *h,
> struct alloc_control *ac);
> struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> diff --git a/mm/gup.c b/mm/gup.c
> index 6b78f11..87eca79 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1617,14 +1617,11 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
> struct alloc_control ac = {
> .nid = nid,
> .nmask = NULL,
> - .gfp_mask = gfp_mask,
> + .gfp_mask = __GFP_NOWARN,
> + .skip_cma = true,
> };
>
> - /*
> - * We don't want to dequeue from the pool because pool pages will
> - * mostly be from the CMA region.
> - */
> - return alloc_migrate_huge_page(h, &ac);
> + return alloc_huge_page_nodemask(h, &ac);
> }
>
> if (PageTransHuge(page)) {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 8132985..e465582 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1033,13 +1033,19 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
> h->free_huge_pages_node[nid]++;
> }
>
> -static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
> +static struct page *dequeue_huge_page_node_exact(struct hstate *h,
> + int nid, bool skip_cma)
> {
> struct page *page;
>
> - list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
> + list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
> + if (skip_cma && is_migrate_cma_page(page))
> + continue;
> +
> if (!PageHWPoison(page))
> break;
> + }
> +
> /*
> * if 'non-isolated free hugepage' not found on the list,
> * the allocation fails.
> @@ -1080,7 +1086,7 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h,
> continue;
> node = zone_to_nid(zone);
>
> - page = dequeue_huge_page_node_exact(h, node);
> + page = dequeue_huge_page_node_exact(h, node, ac->skip_cma);
> if (page)
> return page;
> }
> @@ -1937,7 +1943,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
> return page;
> }
>
> -struct page *alloc_migrate_huge_page(struct hstate *h,
> +static struct page *alloc_migrate_huge_page(struct hstate *h,
> struct alloc_control *ac)
> {
> struct page *page;
> @@ -1999,6 +2005,13 @@ struct page *alloc_huge_page_nodemask(struct hstate *h,
> }
> spin_unlock(&hugetlb_lock);
>
> + /*
> + * clearing __GFP_MOVABLE flag ensure that allocated page
> + * will not come from CMA area
> + */
> + if (ac->skip_cma)
> + ac->gfp_mask &= ~__GFP_MOVABLE;
> +
> return alloc_migrate_huge_page(h, ac);
> }
>
> diff --git a/mm/internal.h b/mm/internal.h
> index 6e613ce..159cfd6 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -618,6 +618,7 @@ struct alloc_control {
> int nid; /* preferred node id */
> nodemask_t *nmask;
> gfp_t gfp_mask;
> + bool skip_cma;
> };
>
> #endif /* __MM_INTERNAL_H */
> --
> 2.7.4
>

--
Michal Hocko
SUSE Labs