Re: [PATCH v3 1/1] mm: fix use-after-free when anon vma name is used after vma is freed

From: Suren Baghdasaryan
Date: Tue Feb 15 2022 - 12:36:52 EST


On Tue, Feb 15, 2022 at 8:00 AM Michal Hocko <mhocko@xxxxxxxx> wrote:
>
> On Thu 10-02-22 17:30:32, Suren Baghdasaryan wrote:
> [...]
> > +struct anon_vma_name *vma_anon_name_get(struct vm_area_struct *vma)
> > +{
> > + if (!has_vma_anon_name(vma))
> > + return NULL;
> > +
> > + mmap_assert_locked(vma->vm_mm);
> > +
> > + kref_get(&vma->anon_name->kref);
> > + return vma->anon_name;
> > +}
> > +
> > +void vma_anon_name_put(struct anon_vma_name *anon_name)
> > +{
> > + if (anon_name)
> > + kref_put(&anon_name->kref, vma_anon_name_free);
> > +}
> > +
> > void dup_vma_anon_name(struct vm_area_struct *orig_vma,
> > struct vm_area_struct *new_vma)
> > {
> > @@ -126,33 +146,34 @@ void free_vma_anon_name(struct vm_area_struct *vma)
> > }
> >
> > /* mmap_lock should be write-locked */
> > -static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
> > +static int replace_vma_anon_name(struct vm_area_struct *vma,
> > + struct anon_vma_name *anon_name)
> > {
> > - const char *anon_name;
> > + const char *orig_name;
> >
> > - if (!name) {
> > + if (!anon_name) {
> > free_vma_anon_name(vma);
> > return 0;
> > }
> >
> > - anon_name = vma_anon_name(vma);
> > - if (anon_name) {
> > + orig_name = vma_anon_name(vma);
> > + if (orig_name) {
> > /* Same name, nothing to do here */
> > - if (!strcmp(name, anon_name))
> > + if (!strcmp(anon_name->name, orig_name))
> > return 0;
> >
> > free_vma_anon_name(vma);
> > }
> > - vma->anon_name = anon_vma_name_alloc(name);
> > - if (!vma->anon_name)
> > - return -ENOMEM;
> > + kref_get(&anon_name->kref);
> > + vma->anon_name = anon_name;
>
> I really have to say that this is still confusing and potentially error
> prone. I haven't really dived deeply in the early break out from strcmp
> because I find your mixing strings and their referenced pointers rather
> confusing in principle. I still do not see why you are avoiding a
> relatively straighforward pattern.
>
> All you need is a shared pointer for your string and share
> it as much as possible, no? Sharing means that you elevate the reference
> counter whenever you assign to a vma and unshare when you are replacing
> the shared pointer by a different one or when vma is freed. In other
> words something like the following (pls note I have only compile tested
> it and it very likely needs tweaks here and there I just wanted to point
> the idea):

Thanks for looking into this! Let me digest the proposal and will get
back with questions or a refactoring patch.

>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 213cc569b192..689971696864 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2626,7 +2626,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start,
> extern struct vm_area_struct *vma_merge(struct mm_struct *,
> struct vm_area_struct *prev, unsigned long addr, unsigned long end,
> unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
> - struct mempolicy *, struct vm_userfaultfd_ctx, const char *);
> + struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *);
> extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
> extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
> unsigned long addr, int new_below);
> @@ -3372,11 +3372,11 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
>
> #ifdef CONFIG_ANON_VMA_NAME
> int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> - unsigned long len_in, const char *name);
> + unsigned long len_in, struct anon_vma_name *name);
> #else
> static inline int
> madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> - unsigned long len_in, const char *name) {
> + unsigned long len_in, struct anon_vma_name *name) {
> return 0;
> }
> #endif
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> index b725839dfe71..cff619f762d0 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -145,45 +145,48 @@ static __always_inline void del_page_from_lru_list(struct page *page,
> */
> extern const char *vma_anon_name(struct vm_area_struct *vma);
>
> -/*
> - * mmap_lock should be read-locked for orig_vma->vm_mm.
> - * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be
> - * isolated.
> - */
> -extern void dup_vma_anon_name(struct vm_area_struct *orig_vma,
> - struct vm_area_struct *new_vma);
> +static inline void anon_vma_name_get(struct anon_vma_name *name)
> +{
> + if (!name)
> + return;
>
> -/*
> - * mmap_lock should be write-locked or vma should have been isolated under
> - * write-locked mmap_lock protection.
> - */
> -extern void free_vma_anon_name(struct vm_area_struct *vma);
> + kref_get(&name->kref);
> +}
>
> -/* mmap_lock should be read-locked */
> -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
> - const char *name)
> +void vma_anon_name_free(struct kref *kref);
> +static inline void anon_vma_name_put(struct anon_vma_name *name)
> {
> - const char *vma_name = vma_anon_name(vma);
> + if (!name)
> + return;
>
> - /* either both NULL, or pointers to same string */
> - if (vma_name == name)
> + kref_put(&name->kref, vma_anon_name_free);
> +}
> +
> +static inline bool anon_vma_name_eq(struct anon_vma_name *name1, struct anon_vma_name *name2)
> +{
> + if (name1 == name2)
> return true;
>
> - return name && vma_name && !strcmp(name, vma_name);
> + return name1 && name2 && !strcmp(name1->name, name2->name);
> +}
> +
> +static inline void free_vma_anon_name(struct vm_area_struct *vma)
> +{
> + anon_vma_name_put(vma->anon_name);
> }
> +
> #else /* CONFIG_ANON_VMA_NAME */
> static inline const char *vma_anon_name(struct vm_area_struct *vma)
> {
> return NULL;
> }
> -static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma,
> - struct vm_area_struct *new_vma) {}
> -static inline void free_vma_anon_name(struct vm_area_struct *vma) {}
> -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
> - const char *name)
> +static inline bool anon_vma_name_eq(struct anon_vma_name *name1, struct anon_vma_name *name2)
> {
> return true;
> }
> +static inline void free_vma_anon_name(struct vm_area_struct *vma)
> +{
> +}
> #endif /* CONFIG_ANON_VMA_NAME */
>
> static inline void init_tlb_flush_pending(struct mm_struct *mm)
> diff --git a/kernel/sys.c b/kernel/sys.c
> index ecc4cf019242..662a0c8b4ceb 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -2278,7 +2278,8 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
> {
> struct mm_struct *mm = current->mm;
> const char __user *uname;
> - char *name, *pch;
> + char *uname, *pch;
> + struct anon_vma_name *vma_name = NULL;
> int error;
>
> switch (opt) {
> @@ -2296,15 +2297,17 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
> return -EINVAL;
> }
> }
> - } else {
> - /* Reset the name */
> - name = NULL;
> +
> + /* anon_vma has its own copy */
> + vma_name = anon_vma_name_alloc(name);
> + kfree(name);
> + if (!vma_name)
> + return PTR_ERR(vma_name);
> }
>
> mmap_write_lock(mm);
> - error = madvise_set_anon_name(mm, addr, size, name);
> + error = madvise_set_anon_name(mm, addr, size, vma_name);
> mmap_write_unlock(mm);
> - kfree(name);
> break;
> default:
> error = -EINVAL;
> diff --git a/mm/madvise.c b/mm/madvise.c
> index 5604064df464..f2f8065f67c1 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -65,7 +65,7 @@ static int madvise_need_mmap_write(int behavior)
> }
>
> #ifdef CONFIG_ANON_VMA_NAME
> -static struct anon_vma_name *anon_vma_name_alloc(const char *name)
> +struct anon_vma_name *anon_vma_name_alloc(const char *name)
> {
> struct anon_vma_name *anon_name;
> size_t count;
> @@ -81,7 +81,7 @@ static struct anon_vma_name *anon_vma_name_alloc(const char *name)
> return anon_name;
> }
>
> -static void vma_anon_name_free(struct kref *kref)
> +void vma_anon_name_free(struct kref *kref)
> {
> struct anon_vma_name *anon_name =
> container_of(kref, struct anon_vma_name, kref);
> @@ -103,54 +103,25 @@ const char *vma_anon_name(struct vm_area_struct *vma)
> return vma->anon_name->name;
> }
>
> -void dup_vma_anon_name(struct vm_area_struct *orig_vma,
> - struct vm_area_struct *new_vma)
> -{
> - if (!has_vma_anon_name(orig_vma))
> - return;
> -
> - kref_get(&orig_vma->anon_name->kref);
> - new_vma->anon_name = orig_vma->anon_name;
> -}
> -
> -void free_vma_anon_name(struct vm_area_struct *vma)
> -{
> - struct anon_vma_name *anon_name;
> -
> - if (!has_vma_anon_name(vma))
> - return;
> -
> - anon_name = vma->anon_name;
> - vma->anon_name = NULL;
> - kref_put(&anon_name->kref, vma_anon_name_free);
> -}
> -
> /* mmap_lock should be write-locked */
> -static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
> +static int replace_vma_anon_name(struct vm_area_struct *vma, struct anon_vma_name *name)
> {
> - const char *anon_name;
> -
> if (!name) {
> - free_vma_anon_name(vma);
> + anon_vma_name_put(vma->anon_name);
> + vma->anon_name = NULL;
> return 0;
> }
>
> - anon_name = vma_anon_name(vma);
> - if (anon_name) {
> - /* Same name, nothing to do here */
> - if (!strcmp(name, anon_name))
> - return 0;
> -
> - free_vma_anon_name(vma);
> - }
> - vma->anon_name = anon_vma_name_alloc(name);
> - if (!vma->anon_name)
> - return -ENOMEM;
> + if (anon_vma_name_eq(vma->anon_name, name))
> + return 0;
>
> + anon_vma_name_put(vma->anon_name);
> + anon_vma_name_get(name);
> + vma->anon_name = name;
> return 0;
> }
> #else /* CONFIG_ANON_VMA_NAME */
> -static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
> +static int replace_vma_anon_name(struct vm_area_struct *vma, struct anon_vma_name *name)
> {
> if (name)
> return -EINVAL;
> @@ -165,13 +136,13 @@ static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
> static int madvise_update_vma(struct vm_area_struct *vma,
> struct vm_area_struct **prev, unsigned long start,
> unsigned long end, unsigned long new_flags,
> - const char *name)
> + struct anon_vma_name *name)
> {
> struct mm_struct *mm = vma->vm_mm;
> int error;
> pgoff_t pgoff;
>
> - if (new_flags == vma->vm_flags && is_same_vma_anon_name(vma, name)) {
> + if (new_flags == vma->vm_flags && anon_vma_name_eq(vma->anon_name, name)) {
> *prev = vma;
> return 0;
> }
> @@ -1041,7 +1012,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
> }
>
> error = madvise_update_vma(vma, prev, start, end, new_flags,
> - vma_anon_name(vma));
> + vma->anon_name);
>
> out:
> /*
> @@ -1234,7 +1205,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
> return -EBADF;
>
> error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
> - (const char *)name);
> + (struct anon_vma_name *)name);
>
> /*
> * madvise() returns EAGAIN if kernel resources, such as
> @@ -1246,7 +1217,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
> }
>
> int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> - unsigned long len_in, const char *name)
> + unsigned long len_in, struct anon_vma_name *name)
> {
> unsigned long end;
> unsigned long len;
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 8f584eddd305..e992755e8ffb 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -512,7 +512,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
> pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
> *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
> vma->vm_file, pgoff, vma_policy(vma),
> - vma->vm_userfaultfd_ctx, vma_anon_name(vma));
> + vma->vm_userfaultfd_ctx, vma->anon_name);
> if (*prev) {
> vma = *prev;
> goto success;
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 1e8fdb0b51ed..c3b2e73d9c9a 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1031,7 +1031,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
> static inline int is_mergeable_vma(struct vm_area_struct *vma,
> struct file *file, unsigned long vm_flags,
> struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> - const char *anon_name)
> + struct anon_vma_name *anon_name)
> {
> /*
> * VM_SOFTDIRTY should not prevent from VMA merging, if we
> @@ -1049,7 +1049,7 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
> return 0;
> if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
> return 0;
> - if (!is_same_vma_anon_name(vma, anon_name))
> + if (!anon_vma_name_eq(vma->anon_name, anon_name))
> return 0;
> return 1;
> }
> @@ -1084,7 +1084,7 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
> struct anon_vma *anon_vma, struct file *file,
> pgoff_t vm_pgoff,
> struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> - const char *anon_name)
> + struct anon_vma_name *anon_name)
> {
> if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
> is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
> @@ -1106,7 +1106,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
> struct anon_vma *anon_vma, struct file *file,
> pgoff_t vm_pgoff,
> struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> - const char *anon_name)
> + struct anon_vma_name *anon_name)
> {
> if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
> is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
> @@ -1167,7 +1167,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
> struct anon_vma *anon_vma, struct file *file,
> pgoff_t pgoff, struct mempolicy *policy,
> struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> - const char *anon_name)
> + struct anon_vma_name *anon_name)
> {
> pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
> struct vm_area_struct *area, *next;
> @@ -3255,7 +3255,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
> return NULL; /* should never get here */
> new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
> vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
> - vma->vm_userfaultfd_ctx, vma_anon_name(vma));
> + vma->vm_userfaultfd_ctx, vma->anon_name);
> if (new_vma) {
> /*
> * Source vma may have been merged into new_vma
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 0138dfcdb1d8..dae529d24fa3 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -464,7 +464,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
> pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
> *pprev = vma_merge(mm, *pprev, start, end, newflags,
> vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
> - vma->vm_userfaultfd_ctx, vma_anon_name(vma));
> + vma->vm_userfaultfd_ctx, vma->anon_name);
> if (*pprev) {
> vma = *pprev;
> VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
> --
> Michal Hocko
> SUSE Labs