Re: [PATCH] proc: revert /proc/*/cmdline rewrite

From: Eric W. Biederman
Date: Sun Jul 14 2019 - 00:16:53 EST


Alexey Dobriyan <adobriyan@xxxxxxxxx> writes:

> /proc/*/cmdline continues to cause problems:
>
> https://lkml.org/lkml/2019/4/5/825
> Subject get_mm_cmdline and userspace (Perl) changing argv0
>
> https://marc.info/?l=linux-kernel&m=156294831308786&w=4
> [PATCH] proc: Fix uninitialized byte read in get_mm_cmdline()
>
> This patch reverts implementation to the last known good state.
> Revert
>
> commit f5b65348fd77839b50e79bc0a5e536832ea52d8d
> proc: fix missing final NUL in get_mm_cmdline() rewrite
>
> commit 5ab8271899658042fabc5ae7e6a99066a210bc0e
> fs/proc: simplify and clarify get_mm_cmdline() function
>
> Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx>

Given that this fixes a regression and a bug.

Acked-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>

> ---
>
> Cc lists
>
> fs/proc/base.c | 198 +++++++++++++++++++++++++++++++++------------------------
> 1 file changed, 118 insertions(+), 80 deletions(-)
>
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -210,16 +210,24 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
> }
>
> static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
> - size_t count, loff_t *ppos)
> + size_t _count, loff_t *pos)
> {
> + unsigned long count = _count;
> unsigned long arg_start, arg_end, env_start, env_end;
> - unsigned long pos, len;
> + unsigned long len1, len2, len;
> + unsigned long p;
> char *page;
> + ssize_t rv;
> + char c;
>
> /* Check if process spawned far enough to have cmdline. */
> if (!mm->env_end)
> return 0;
>
> + page = (char *)__get_free_page(GFP_KERNEL);
> + if (!page)
> + return -ENOMEM;
> +
> spin_lock(&mm->arg_lock);
> arg_start = mm->arg_start;
> arg_end = mm->arg_end;
> @@ -227,96 +235,126 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
> env_end = mm->env_end;
> spin_unlock(&mm->arg_lock);
>
> - if (arg_start >= arg_end)
> - return 0;
> + BUG_ON(arg_start > arg_end);
> + BUG_ON(env_start > env_end);
> +
> + len1 = arg_end - arg_start;
> + len2 = env_end - env_start;
>
> + /* Empty ARGV. */
> + if (len1 == 0) {
> + rv = 0;
> + goto out_free_page;
> + }
> /*
> - * We have traditionally allowed the user to re-write
> - * the argument strings and overflow the end result
> - * into the environment section. But only do that if
> - * the environment area is contiguous to the arguments.
> + * Inherently racy -- command line shares address space
> + * with code and data.
> */
> - if (env_start != arg_end || env_start >= env_end)
> - env_start = env_end = arg_end;
> -
> - /* .. and limit it to a maximum of one page of slop */
> - if (env_end >= arg_end + PAGE_SIZE)
> - env_end = arg_end + PAGE_SIZE - 1;
> -
> - /* We're not going to care if "*ppos" has high bits set */
> - pos = arg_start + *ppos;
> -
> - /* .. but we do check the result is in the proper range */
> - if (pos < arg_start || pos >= env_end)
> - return 0;
> -
> - /* .. and we never go past env_end */
> - if (env_end - pos < count)
> - count = env_end - pos;
> -
> - page = (char *)__get_free_page(GFP_KERNEL);
> - if (!page)
> - return -ENOMEM;
> -
> - len = 0;
> - while (count) {
> - int got;
> - size_t size = min_t(size_t, PAGE_SIZE, count);
> - long offset;
> + rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
> + if (rv <= 0)
> + goto out_free_page;
> +
> + rv = 0;
> +
> + if (c == '\0') {
> + /* Command line (set of strings) occupies whole ARGV. */
> + if (len1 <= *pos)
> + goto out_free_page;
> +
> + p = arg_start + *pos;
> + len = len1 - *pos;
> + while (count > 0 && len > 0) {
> + unsigned int _count;
> + int nr_read;
> +
> + _count = min3(count, len, PAGE_SIZE);
> + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
> + if (nr_read < 0)
> + rv = nr_read;
> + if (nr_read <= 0)
> + goto out_free_page;
> +
> + if (copy_to_user(buf, page, nr_read)) {
> + rv = -EFAULT;
> + goto out_free_page;
> + }
>
> + p += nr_read;
> + len -= nr_read;
> + buf += nr_read;
> + count -= nr_read;
> + rv += nr_read;
> + }
> + } else {
> /*
> - * Are we already starting past the official end?
> - * We always include the last byte that is *supposed*
> - * to be NUL
> + * Command line (1 string) occupies ARGV and
> + * extends into ENVP.
> */
> - offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
> -
> - got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
> - if (got <= offset)
> - break;
> - got -= offset;
> -
> - /* Don't walk past a NUL character once you hit arg_end */
> - if (pos + got >= arg_end) {
> - int n = 0;
> -
> - /*
> - * If we started before 'arg_end' but ended up
> - * at or after it, we start the NUL character
> - * check at arg_end-1 (where we expect the normal
> - * EOF to be).
> - *
> - * NOTE! This is smaller than 'got', because
> - * pos + got >= arg_end
> - */
> - if (pos < arg_end)
> - n = arg_end - pos - 1;
> -
> - /* Cut off at first NUL after 'n' */
> - got = n + strnlen(page+n, offset+got-n);
> - if (got < offset)
> - break;
> - got -= offset;
> -
> - /* Include the NUL if it existed */
> - if (got < size)
> - got++;
> + struct {
> + unsigned long p;
> + unsigned long len;
> + } cmdline[2] = {
> + { .p = arg_start, .len = len1 },
> + { .p = env_start, .len = len2 },
> + };
> + loff_t pos1 = *pos;
> + unsigned int i;
> +
> + i = 0;
> + while (i < 2 && pos1 >= cmdline[i].len) {
> + pos1 -= cmdline[i].len;
> + i++;
> }
> + while (i < 2) {
> + p = cmdline[i].p + pos1;
> + len = cmdline[i].len - pos1;
> + while (count > 0 && len > 0) {
> + unsigned int _count, l;
> + int nr_read;
> + bool final;
> +
> + _count = min3(count, len, PAGE_SIZE);
> + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
> + if (nr_read < 0)
> + rv = nr_read;
> + if (nr_read <= 0)
> + goto out_free_page;
> +
> + /*
> + * Command line can be shorter than whole ARGV
> + * even if last "marker" byte says it is not.
> + */
> + final = false;
> + l = strnlen(page, nr_read);
> + if (l < nr_read) {
> + nr_read = l;
> + final = true;
> + }
> +
> + if (copy_to_user(buf, page, nr_read)) {
> + rv = -EFAULT;
> + goto out_free_page;
> + }
> +
> + p += nr_read;
> + len -= nr_read;
> + buf += nr_read;
> + count -= nr_read;
> + rv += nr_read;
> +
> + if (final)
> + goto out_free_page;
> + }
>
> - got -= copy_to_user(buf, page+offset, got);
> - if (unlikely(!got)) {
> - if (!len)
> - len = -EFAULT;
> - break;
> + /* Only first chunk can be read partially. */
> + pos1 = 0;
> + i++;
> }
> - pos += got;
> - buf += got;
> - len += got;
> - count -= got;
> }
>
> +out_free_page:
> free_page((unsigned long)page);
> - return len;
> + return rv;
> }
>
> static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf,