Re: [Resend RFC PATCH] mm: introduce __GFP_TRACKLEAK to track in-kernel allocation

From: Andrew Morton
Date: Fri Sep 02 2022 - 14:58:46 EST


Cc willy for page-flags changes.

On Fri, 2 Sep 2022 18:59:07 +0800 "zhaoyang.huang" <zhaoyang.huang@xxxxxxxxxx> wrote:

> From: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx>
>
> Kthread and drivers could fetch memory via alloc_pages directly which make them
> hard to debug when leaking. Solve this by introducing __GFP_TRACELEAK and reuse
> kmemleak mechanism which unified most of kernel cosuming pages into kmemleak.
>
> ...
>

cc wi
> index 2d2ccae..081ab54 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -68,6 +68,7 @@
> #else
> #define ___GFP_NOLOCKDEP 0
> #endif
> +#define ___GFP_TRACKLEAK 0x10000000u
> /* If the above are modified, __GFP_BITS_SHIFT may need updating */
>
> /*
> @@ -259,12 +260,13 @@
> #define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO)
> #define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON)
> #define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON)
> +#define __GFP_TRACKLEAK ((__force gfp_t)___GFP_TRACKLEAK)
>
> /* Disable lockdep for GFP context tracking */
> #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
>
> /* Room for N __GFP_FOO bits */
> -#define __GFP_BITS_SHIFT (27 + IS_ENABLED(CONFIG_LOCKDEP))
> +#define __GFP_BITS_SHIFT (28 + IS_ENABLED(CONFIG_LOCKDEP))
> #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
>
> /**
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index e66f7aa..ef0f814 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -942,6 +942,7 @@ static inline bool is_page_hwpoison(struct page *page)
> #define PG_offline 0x00000100
> #define PG_table 0x00000200
> #define PG_guard 0x00000400
> +#define PG_trackleak 0x00000800
>
> #define PageType(page, flag) \
> ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
> @@ -1012,6 +1013,8 @@ static inline int page_has_type(struct page *page)
> */
> PAGE_TYPE_OPS(Guard, guard)
>
> +PAGE_TYPE_OPS(Trackleak, trackleak)

We'd want this to evaluate to zero at compile time if
CONFIG_HAVE_DEBUG_KMEMLEAK=n

> extern bool is_free_buddy_page(struct page *page);
>
> PAGEFLAG(Isolated, isolated, PF_ANY);
> diff --git a/mm/kmemleak.c b/mm/kmemleak.c
> index 422f28f..a182f5d 100644
> --- a/mm/kmemleak.c
> +++ b/mm/kmemleak.c
> @@ -1471,7 +1471,7 @@ static void kmemleak_scan(void)
> if (page_zone(page) != zone)
> continue;
> /* only scan if page is in use */
> - if (page_count(page) == 0 || PageReserved(page))
> + if (page_count(page) == 0)

Please changelog this alteration.

> continue;
> scan_block(page, page + 1, NULL);
> if (!(pfn & 63))
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index e008a3d..d8995c6 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1361,6 +1361,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
> page->mapping = NULL;
> if (memcg_kmem_enabled() && PageMemcgKmem(page))
> __memcg_kmem_uncharge_page(page, order);
> + if (PageTrackleak(page))
> + kmemleak_free(page);
> if (check_free)
> bad += check_free_page(page);
> if (bad)
> @@ -5444,6 +5446,10 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
> __free_pages(page, order);
> page = NULL;
> }
> + if (gfp & __GFP_TRACKLEAK) {

And we'd want __GFP_TRACKLEAK to evaluate to zero at compile time if
CONFIG_HAVE_DEBUG_KMEMLEAK=n.

> + kmemleak_alloc(page_address(page), PAGE_SIZE << order, 1, gfp & ~__GFP_TRACKLEAK);
> + __SetPageTrackleak(page);
> + }
>
> trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
>
> --
> 1.9.1