Re: [PATCH v2 2/7] mm: Extend copy_vma()

From: Kirill A. Shutemov
Date: Tue May 21 2019 - 04:20:55 EST


On Mon, May 20, 2019 at 05:00:12PM +0300, Kirill Tkhai wrote:
> This prepares the function to copy a vma between
> two processes. Two new arguments are introduced.

This kind of changes requires a lot more explanation in commit message,
describing all possible corner cases.

For instance, I would really like to see a story on why logic around
need_rmap_locks is safe after the change.

>
> Signed-off-by: Kirill Tkhai <ktkhai@xxxxxxxxxxxxx>
> ---
> include/linux/mm.h | 4 ++--
> mm/mmap.c | 33 ++++++++++++++++++++++++---------
> mm/mremap.c | 4 ++--
> 3 files changed, 28 insertions(+), 13 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0e8834ac32b7..afe07e4a76f8 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2329,8 +2329,8 @@ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
> struct rb_node **, struct rb_node *);
> extern void unlink_file_vma(struct vm_area_struct *);
> extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
> - unsigned long addr, unsigned long len, pgoff_t pgoff,
> - bool *need_rmap_locks);
> + struct mm_struct *, unsigned long addr, unsigned long len,
> + pgoff_t pgoff, bool *need_rmap_locks, bool clear_flags_ctx);
> extern void exit_mmap(struct mm_struct *);
>
> static inline int check_data_rlimit(unsigned long rlim,
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 57803a0a3a5c..99778e724ad1 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -3195,19 +3195,21 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
> }
>
> /*
> - * Copy the vma structure to a new location in the same mm,
> - * prior to moving page table entries, to effect an mremap move.
> + * Copy the vma structure to new location in the same vma
> + * prior to moving page table entries, to effect an mremap move;
> */
> struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
> - unsigned long addr, unsigned long len, pgoff_t pgoff,
> - bool *need_rmap_locks)
> + struct mm_struct *mm, unsigned long addr,
> + unsigned long len, pgoff_t pgoff,
> + bool *need_rmap_locks, bool clear_flags_ctx)
> {
> struct vm_area_struct *vma = *vmap;
> unsigned long vma_start = vma->vm_start;
> - struct mm_struct *mm = vma->vm_mm;
> + struct vm_userfaultfd_ctx uctx;
> struct vm_area_struct *new_vma, *prev;
> struct rb_node **rb_link, *rb_parent;
> bool faulted_in_anon_vma = true;
> + unsigned long flags;
>
> /*
> * If anonymous vma has not yet been faulted, update new pgoff
> @@ -3220,15 +3222,25 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>
> if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
> return NULL; /* should never get here */
> - new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
> - vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
> - vma->vm_userfaultfd_ctx);
> +
> + uctx = vma->vm_userfaultfd_ctx;
> + flags = vma->vm_flags;
> + if (clear_flags_ctx) {
> + uctx = NULL_VM_UFFD_CTX;
> + flags &= ~(VM_UFFD_MISSING | VM_UFFD_WP | VM_MERGEABLE |
> + VM_LOCKED | VM_LOCKONFAULT | VM_WIPEONFORK |
> + VM_DONTCOPY);
> + }

Why is the new logic required? No justification given.

> +
> + new_vma = vma_merge(mm, prev, addr, addr + len, flags, vma->anon_vma,
> + vma->vm_file, pgoff, vma_policy(vma), uctx);
> if (new_vma) {
> /*
> * Source vma may have been merged into new_vma
> */
> if (unlikely(vma_start >= new_vma->vm_start &&
> - vma_start < new_vma->vm_end)) {
> + vma_start < new_vma->vm_end) &&
> + vma->vm_mm == mm) {

How can vma_merge() succeed if vma->vm_mm != mm?

> /*
> * The only way we can get a vma_merge with
> * self during an mremap is if the vma hasn't
> @@ -3249,6 +3261,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
> new_vma = vm_area_dup(vma);
> if (!new_vma)
> goto out;
> + new_vma->vm_mm = mm;
> + new_vma->vm_flags = flags;
> + new_vma->vm_userfaultfd_ctx = uctx;
> new_vma->vm_start = addr;
> new_vma->vm_end = addr + len;
> new_vma->vm_pgoff = pgoff;
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 37b5b2ad91be..9a96cfc28675 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -352,8 +352,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
> return err;
>
> new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
> - new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
> - &need_rmap_locks);
> + new_vma = copy_vma(&vma, mm, new_addr, new_len, new_pgoff,
> + &need_rmap_locks, false);
> if (!new_vma)
> return -ENOMEM;
>
>