Re: [RFC PATCH 4/4] [PATCH 4/4] mm: add tracing for VMA merges

From: Liam Howlett
Date: Fri Feb 18 2022 - 14:59:01 EST


* Jakub Matěna <matenajakub@xxxxxxxxx> [220218 07:21]:
> Adds trace support for vma_merge to measure successful and unsuccessful
> merges of two VMAs with distinct anon_vmas and also trace support for
> merges made possible by update of page offset made possible by a previous
> patch in this series.
>
> Signed-off-by: Jakub Matěna <matenajakub@xxxxxxxxx>
> ---
> include/trace/events/mmap.h | 55 ++++++++++++++++++++++++++++++++
> mm/internal.h | 11 +++++++
> mm/mmap.c | 63 ++++++++++++++++++++-----------------
> 3 files changed, 100 insertions(+), 29 deletions(-)
>
> diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h
> index 4661f7ba07c0..9f6439e2ed2d 100644
> --- a/include/trace/events/mmap.h
> +++ b/include/trace/events/mmap.h
> @@ -6,6 +6,7 @@
> #define _TRACE_MMAP_H
>
> #include <linux/tracepoint.h>
> +#include <../mm/internal.h>
>
> TRACE_EVENT(vm_unmapped_area,
>
> @@ -42,6 +43,60 @@ TRACE_EVENT(vm_unmapped_area,
> __entry->low_limit, __entry->high_limit, __entry->align_mask,
> __entry->align_offset)
> );
> +
> +TRACE_EVENT(vm_av_merge,
> +
> + TP_PROTO(int merged, enum vma_merge_res merge_prev, enum vma_merge_res merge_next, enum vma_merge_res merge_both),
> +
> + TP_ARGS(merged, merge_prev, merge_next, merge_both),
> +
> + TP_STRUCT__entry(
> + __field(int, merged)
> + __field(enum vma_merge_res, predecessor_different_av)
> + __field(enum vma_merge_res, successor_different_av)
> + __field(enum vma_merge_res, predecessor_with_successor_different_av)
> + __field(int, diff_count)
> + __field(int, failed_count)
> + ),
> +
> + TP_fast_assign(
> + __entry->merged = merged == 0;
> + __entry->predecessor_different_av = merge_prev;
> + __entry->successor_different_av = merge_next;
> + __entry->predecessor_with_successor_different_av = merge_both;
> + __entry->diff_count = (merge_prev == AV_MERGE_DIFFERENT)
> + + (merge_next == AV_MERGE_DIFFERENT) + (merge_both == AV_MERGE_DIFFERENT);
> + __entry->failed_count = (merge_prev == AV_MERGE_FAILED)
> + + (merge_next == AV_MERGE_FAILED) + (merge_both == AV_MERGE_FAILED);
> + ),
> +
> + TP_printk("merged=%d predecessor=%d successor=%d predecessor_with_successor=%d diff_count=%d failed_count=%d\n",
> + __entry->merged,
> + __entry->predecessor_different_av, __entry->successor_different_av,
> + __entry->predecessor_with_successor_different_av,
> + __entry->diff_count, __entry->failed_count)
> +
> +);
> +
> +TRACE_EVENT(vm_pgoff_merge,
> +
> + TP_PROTO(struct vm_area_struct *vma, bool anon_pgoff_updated),
> +
> + TP_ARGS(vma, anon_pgoff_updated),
> +
> + TP_STRUCT__entry(
> + __field(bool, faulted)
> + __field(bool, updated)
> + ),
> +
> + TP_fast_assign(
> + __entry->faulted = vma->anon_vma;
> + __entry->updated = anon_pgoff_updated;
> + ),
> +
> + TP_printk("faulted=%d updated=%d\n",
> + __entry->faulted, __entry->updated)
> +);
> #endif
>
> /* This part must be outside protection */
> diff --git a/mm/internal.h b/mm/internal.h
> index d80300392a19..b3e482175518 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -34,6 +34,17 @@ struct folio_batch;
> /* Do not use these with a slab allocator */
> #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>
> +/*
> + * Following values indicate reason for merge success or failure.
> + */
> +enum vma_merge_res {
> + MERGE_FAILED,
> + AV_MERGE_FAILED,
> + AV_MERGE_SAME,
> + MERGE_OK = AV_MERGE_SAME,
> + AV_MERGE_DIFFERENT,
> +};
> +
> void page_writeback_init(void);
>
> static inline void *folio_raw_mapping(struct folio *folio)
> diff --git a/mm/mmap.c b/mm/mmap.c
> index ed91d0cd2111..10c76c6a3288 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1064,21 +1064,21 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
> */
> if ((!anon_vma1 || !anon_vma2) && (!vma ||
> list_is_singular(&vma->anon_vma_chain)))
> - return 1;
> + return AV_MERGE_SAME;
> if (anon_vma1 == anon_vma2)
> - return 1;
> + return AV_MERGE_SAME;
> /*
> * Different anon_vma but not shared by several processes
> */
> else if ((anon_vma1 && anon_vma2) &&
> (anon_vma1 == anon_vma1->root)
> && (rbt_no_children(anon_vma1)))
> - return 1;
> + return AV_MERGE_DIFFERENT;
> /*
> * Different anon_vma and shared -> unmergeable
> */
> else
> - return 0;
> + return AV_MERGE_FAILED;
> }
>
> /*
> @@ -1099,12 +1099,10 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
> struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> const char *anon_name)
> {
> - if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
> - is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
> + if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name))
> if (vma->vm_pgoff == vm_pgoff)
> - return 1;
> - }
> - return 0;
> + return is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma);
> + return MERGE_FAILED;
> }
>
> /*
> @@ -1121,14 +1119,13 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
> struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> const char *anon_name)
> {
> - if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
> - is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
> + if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name)) {
> pgoff_t vm_pglen;
> vm_pglen = vma_pages(vma);
> if (vma->vm_pgoff + vm_pglen == vm_pgoff)
> - return 1;
> + return is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma);
> }
> - return 0;
> + return MERGE_FAILED;
> }
>
> /*
> @@ -1185,9 +1182,14 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
> pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
> struct vm_area_struct *area, *next;
> int err;
> - int merge_prev = 0;
> - int merge_both = 0;
> - int merge_next = 0;
> + /*
> + * Following three variables are used to store values
> + * indicating wheather this VMA and its anon_vma can
> + * be merged and also the type of failure or success.
> + */
> + enum vma_merge_res merge_prev = MERGE_FAILED;
> + enum vma_merge_res merge_both = MERGE_FAILED;
> + enum vma_merge_res merge_next = MERGE_FAILED;
>
> /*
> * We later require that vma->vm_flags == vm_flags,
> @@ -1210,38 +1212,39 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
> * Can we merge predecessor?
> */
> if (prev && prev->vm_end == addr &&
> - mpol_equal(vma_policy(prev), policy) &&
> - can_vma_merge_after(prev, vm_flags,
> + mpol_equal(vma_policy(prev), policy)) {
> + merge_prev = can_vma_merge_after(prev, vm_flags,
> anon_vma, file, pgoff,
> - vm_userfaultfd_ctx, anon_name)) {
> - merge_prev = true;
> + vm_userfaultfd_ctx, anon_name);
> }
> +
> /*
> * Can we merge successor?
> */
> if (next && end == next->vm_start &&
> - mpol_equal(policy, vma_policy(next)) &&
> - can_vma_merge_before(next, vm_flags,
> + mpol_equal(policy, vma_policy(next))) {
> + merge_next = can_vma_merge_before(next, vm_flags,
> anon_vma, file, pgoff+pglen,
> - vm_userfaultfd_ctx, anon_name)) {
> - merge_next = true;
> + vm_userfaultfd_ctx, anon_name);
> }
> +
> /*
> * Can we merge both predecessor and successor?
> */
> - if (merge_prev && merge_next)
> + if (merge_prev >= MERGE_OK && merge_next >= MERGE_OK) {

What happened to making vma_merge easier to read? What does > MERGE_OK
mean? I guess this answers why booleans were not used, but I don't like
it. Couldn't you just log the err/success and the value of
merge_prev/merge_next? It's not like the code tries more than one way
of merging on failure..

> merge_both = is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL);
> + }
>
> - if (merge_both) { /* cases 1, 6 */
> + if (merge_both >= MERGE_OK) { /* cases 1, 6 */
> err = __vma_adjust(prev, prev->vm_start,
> next->vm_end, prev->vm_pgoff, NULL,
> prev);
> area = prev;
> - } else if (merge_prev) { /* cases 2, 5, 7 */
> + } else if (merge_prev >= MERGE_OK) { /* cases 2, 5, 7 */
> err = __vma_adjust(prev, prev->vm_start,
> end, prev->vm_pgoff, NULL, prev);
> area = prev;
> - } else if (merge_next) {
> + } else if (merge_next >= MERGE_OK) {
> if (prev && addr < prev->vm_end) /* case 4 */
> err = __vma_adjust(prev, prev->vm_start,
> addr, prev->vm_pgoff, NULL, next);
> @@ -1252,7 +1255,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
> } else {
> err = -1;
> }
> -
> + trace_vm_av_merge(err, merge_prev, merge_next, merge_both);
> /*
> * Cannot merge with predecessor or successor or error in __vma_adjust?
> */
> @@ -3359,6 +3362,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
> /*
> * Source vma may have been merged into new_vma
> */
> + trace_vm_pgoff_merge(vma, anon_pgoff_updated);
> +
> if (unlikely(vma_start >= new_vma->vm_start &&
> vma_start < new_vma->vm_end)) {
> /*
> --
> 2.34.1
>