Re: linux-next: manual merge of the folio tree with the maple tree

From: Mark Brown
Date: Wed Feb 23 2022 - 21:03:09 EST


On Thu, Feb 24, 2022 at 01:16:53AM +0000, broonie@xxxxxxxxxx wrote:

> I have absolutely no confindence in this resolution.

Pretty sure that the resolution is bogus and I've broken boot (at least
from some quick testing). Not confirmed that this merge is the cause
though, I'll take a closer look tomorrow. Any help from someone who's
looked at this code before would be much appreciated.

> diff --cc mm/mmap.c
> index 1b3600152f6f5,64b5985b5295c..0000000000000
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@@ -2286,32 -2780,38 +2286,37 @@@ int split_vma(struct mm_struct *mm, str
> return __split_vma(mm, vma, addr, new_below);
> }
>
> -/* Munmap is split into 2 main parts -- this part which finds
> +/*
> + * do_mas_align_munmap() - munmap the aligned region from @start to @end.
> + * @mas: The maple_state, ideally set up to alter the correct tree location.
> + * @vma: The starting vm_area_struct
> + * @mm: The mm_struct
> + * @start: The aligned start address to munmap.
> + * @end: The aligned end address to munmap.
> + * @uf: The userfaultfd list_head
> + * @downgrade: Set to true to attempt a write downgrade of the mmap_sem
> + *
> + * If @downgrade is true, check return code for potential release of the lock.
> ++ *
> ++ * Munmap is split into 2 main parts -- this part which finds
> + * what needs doing, and the areas themselves, which do the
> + * work. This now handles partial unmappings.
> + * Jeremy Fitzhardinge <jeremy@xxxxxxxx>
> */
> -int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
> - struct list_head *uf, bool downgrade)
> -{
> - unsigned long end;
> - struct vm_area_struct *vma, *prev, *last;
> -
> - if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
> - return -EINVAL;
> -
> - len = PAGE_ALIGN(len);
> - end = start + len;
> - if (len == 0)
> - return -EINVAL;
> -
> - /*
> - * arch_unmap() might do unmaps itself. It must be called
> - * and finish any rbtree manipulation before this code
> - * runs and also starts to manipulate the rbtree.
> - */
> - arch_unmap(mm, start, end);
> -
> - /* Find the first overlapping VMA where start < vma->vm_end */
> - vma = find_vma_intersection(mm, start, end);
> - if (!vma)
> - return 0;
> - prev = vma->vm_prev;
> -
> +static int
> +do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
> + struct mm_struct *mm, unsigned long start,
> + unsigned long end, struct list_head *uf, bool downgrade)
> +{
> + struct vm_area_struct *prev, *next;
> + struct maple_tree mt_detach;
> + int count = 0;
> + MA_STATE(mas_detach, &mt_detach, start, end - 1);
> + mt_init_flags(&mt_detach, MM_MT_FLAGS);
> + mt_set_external_lock(&mt_detach, &mm->mmap_lock);
> +
> + mas->last = end - 1;
> + prev = next = NULL;
> /*
> * If we need to split any vma, do it now to save pain later.
> *
> @@@ -3107,13 -3127,8 +3112,12 @@@ void exit_mmap(struct mm_struct *mm
> * Nothing can be holding mm->mmap_lock here and the above call
> * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
> * __oom_reap_task_mm() will not block.
> + *
> + * This needs to be done before unlocking the VMAs in the loop
> + * below which clears VM_LOCKED, otherwise the oom reaper cannot
> + * reliably test it.
> */
> (void)__oom_reap_task_mm(mm);
> -
> set_bit(MMF_OOM_SKIP, &mm->flags);
> }
>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 2ddf1caf90488..d469555df0a21 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2373,10 +2373,8 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
> BUG_ON(next->vm_start > end);
> #endif
> vma_mas_store(next, &mas_detach);
> - if (next->vm_flags & VM_LOCKED) {
> + if (next->vm_flags & VM_LOCKED)
> mm->locked_vm -= vma_pages(next);
> - munlock_vma_pages_all(next);
> - }
> }
>
> next = mas_find(mas, ULONG_MAX);
> @@ -2906,10 +2904,8 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
> }
>
> unmap_pages = vma_pages(&unmap);
> - if (vma->vm_flags & VM_LOCKED) {
> + if (vma->vm_flags & VM_LOCKED)
> mm->locked_vm -= unmap_pages;
> - munlock_vma_pages_range(&unmap, newbrk, oldbrk);
> - }
>
> next = mas_next(mas, ULONG_MAX);
> mmap_write_downgrade(mm);
> @@ -3130,10 +3126,8 @@ void exit_mmap(struct mm_struct *mm)
> rwsem_acquire(&mm->mmap_lock.dep_map, 0, 0, _THIS_IP_);
> if (mm->locked_vm) {
> mas_for_each(&mas, vma, ULONG_MAX) {
> - if (vma->vm_flags & VM_LOCKED) {
> + if (vma->vm_flags & VM_LOCKED)
> mm->locked_vm -= vma_pages(vma);
> - munlock_vma_pages_all(vma);
> - }
> }
> mas_set(&mas, 0);
> }
>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 2ddf1caf90488..d469555df0a21 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2373,10 +2373,8 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
> BUG_ON(next->vm_start > end);
> #endif
> vma_mas_store(next, &mas_detach);
> - if (next->vm_flags & VM_LOCKED) {
> + if (next->vm_flags & VM_LOCKED)
> mm->locked_vm -= vma_pages(next);
> - munlock_vma_pages_all(next);
> - }
> }
>
> next = mas_find(mas, ULONG_MAX);
> @@ -2906,10 +2904,8 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
> }
>
> unmap_pages = vma_pages(&unmap);
> - if (vma->vm_flags & VM_LOCKED) {
> + if (vma->vm_flags & VM_LOCKED)
> mm->locked_vm -= unmap_pages;
> - munlock_vma_pages_range(&unmap, newbrk, oldbrk);
> - }
>
> next = mas_next(mas, ULONG_MAX);
> mmap_write_downgrade(mm);
> @@ -3130,10 +3126,8 @@ void exit_mmap(struct mm_struct *mm)
> rwsem_acquire(&mm->mmap_lock.dep_map, 0, 0, _THIS_IP_);
> if (mm->locked_vm) {
> mas_for_each(&mas, vma, ULONG_MAX) {
> - if (vma->vm_flags & VM_LOCKED) {
> + if (vma->vm_flags & VM_LOCKED)
> mm->locked_vm -= vma_pages(vma);
> - munlock_vma_pages_all(vma);
> - }
> }
> mas_set(&mas, 0);
> }

Attachment: signature.asc
Description: PGP signature