Re: linux-next: manual merge of the userns tree with the vfs tree

From: Stephen Rothwell
Date: Mon Aug 06 2018 - 03:51:16 EST


Hi all,

On Wed, 20 Jun 2018 12:39:05 +1000 Stephen Rothwell <sfr@xxxxxxxxxxxxxxxx> wrote:
>
> Today's linux-next merge of the userns tree got a conflict in:
>
> fs/proc/inode.c
> fs/proc/root.c
>
> between commit:
>
> 0223e0999be2 ("procfs: Move proc_fill_super() to fs/proc/root.c")
> 83cd45075c36 ("proc: Add fs_context support to procfs")
>
> from the vfs tree and commit:
>
> cc8cda3af2ba ("proc: Simplify and fix proc by removing the kernel mount")
> 9303f5f81bd4 ("proc: Change proc_parse_options to return an errno value")
> 04035aa33a12 ("proc: Don't change mount options on remount failure.")
>
> from the userns tree.
>
> I effectively reverted 9303f5f81bd4 and 04035aa33a12 since (I think)
> they are subsumed by the vfs tree changes.
>
> I fixed it up (I think I got it mostly right this time - see below) and
> can carry the fix as necessary. This is now fixed as far as linux-next
> is concerned, but any non trivial conflicts should be mentioned to your
> upstream maintainer when your tree is submitted for merging. You may
> also want to consider cooperating with the maintainer of the
> conflicting tree to minimise any particularly complex conflicts.
>
> --
> Cheers,
> Stephen Rothwell
>
> diff --cc fs/proc/root.c
> index efbdc08a3c86,3dceff6cd121..91e9b417bc6d
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@@ -80,71 -78,21 +80,74 @@@ static int proc_parse_option(struct fs_
> return 0;
> }
>
> -int proc_remount(struct super_block *sb, int *flags, char *data)
> +static void proc_set_options(struct super_block *s,
> + struct fs_context *fc,
> + struct pid_namespace *pid_ns,
> + struct user_namespace *user_ns)
> {
> - struct pid_namespace *pid = sb->s_fs_info;
> - struct proc_mount_options opts = {
> - .pid_gid = pid->pid_gid,
> - .hide_pid = pid->hide_pid,
> - };
> + struct proc_fs_context *ctx = fc->fs_private;
> +
> + if (ctx->mask & (1 << Opt_gid))
> + pid_ns->pid_gid = make_kgid(user_ns, ctx->gid);
> + if (ctx->mask & (1 << Opt_hidepid))
> + pid_ns->hide_pid = ctx->hidepid;
> +}
> +
> +static int proc_fill_super(struct super_block *s, struct fs_context *fc)
> +{
> + struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info);
> + struct inode *root_inode;
> int ret;
>
> - sync_filesystem(sb);
> - ret = proc_parse_options(data, &opts);
> - if (ret)
> + proc_set_options(s, fc, pid_ns, current_user_ns());
> +
> + /* User space would break if executables or devices appear on proc */
> + s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
> + s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
> + s->s_blocksize = 1024;
> + s->s_blocksize_bits = 10;
> + s->s_magic = PROC_SUPER_MAGIC;
> + s->s_op = &proc_sops;
> + s->s_time_gran = 1;
> +
> + /*
> + * procfs isn't actually a stacking filesystem; however, there is
> + * too much magic going on inside it to permit stacking things on
> + * top of it
> + */
> + s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
> +
> + pde_get(&proc_root);
> + root_inode = proc_get_inode(s, &proc_root);
> + if (!root_inode) {
> + pr_err("proc_fill_super: get root inode failed\n");
> + return -ENOMEM;
> + }
> +
> + s->s_root = d_make_root(root_inode);
> + if (!s->s_root) {
> + pr_err("proc_fill_super: allocate dentry failed\n");
> + return -ENOMEM;
> + }
> +
> + ret = proc_setup_self(s);
> + if (ret) {
> return ret;
> - pid->pid_gid = opts.pid_gid;
> - pid->hide_pid = opts.hide_pid;
> + }
> - return proc_setup_thread_self(s);
> ++ ret = proc_setup_thread_self(s);
> ++
> ++ rcu_assign_pointer(pid_ns->proc_super, s);
> ++ return ret;
> +}
> +
> +int proc_reconfigure(struct super_block *sb, struct fs_context *fc)
> +{
> + struct pid_namespace *pid = sb->s_fs_info;
> +
> + sync_filesystem(sb);
> +
> + if (fc)
> + proc_set_options(sb, fc, pid, current_user_ns());
> return 0;
> }
>
> @@@ -288,44 -208,23 +292,22 @@@ struct proc_dir_entry proc_root =
> .proc_fops = &proc_root_operations,
> .parent = &proc_root,
> .subdir = RB_ROOT,
> - .name = proc_root.inline_name,
> - .inline_name = "/proc",
> + .name = "/proc",
> };
>
> - int pid_ns_prepare_proc(struct pid_namespace *ns)
> + #if defined(CONFIG_SYSCTL_SYSCALL) || defined(CONFIG_MCONSOLE)
> + struct file *file_open_proc(const char *pathname, int flags, umode_t mode)
> {
> - struct proc_fs_context *ctx;
> - struct fs_context *fc;
> struct vfsmount *mnt;
> - int ret;
> -
> - fc = vfs_new_fs_context(&proc_fs_type, NULL, 0,
> - FS_CONTEXT_FOR_KERNEL_MOUNT);
> - if (IS_ERR(fc))
> - return PTR_ERR(fc);
> -
> - ctx = fc->fs_private;
> - if (ctx->pid_ns != ns) {
> - put_pid_ns(ctx->pid_ns);
> - get_pid_ns(ns);
> - ctx->pid_ns = ns;
> - }
> + struct file *file;
>
> - ret = vfs_get_tree(fc);
> - if (ret < 0) {
> - put_fs_context(fc);
> - return ret;
> - }
> -
> - mnt = vfs_create_mount(fc, 0);
> - put_fs_context(fc);
> + mnt = kern_mount(&proc_fs_type);
> if (IS_ERR(mnt))
> - return PTR_ERR(mnt);
> + return ERR_CAST(mnt);
>
> - ns->proc_mnt = mnt;
> - return 0;
> - }
> + file = file_open_root(mnt->mnt_root, mnt, pathname, flags, mode);
> + kern_unmount(mnt);
>
> - void pid_ns_release_proc(struct pid_namespace *ns)
> - {
> - kern_unmount(ns->proc_mnt);
> + return file;
> }
> + #endif

Are there any comments on this resolution. I just had to do it all
again due to slight changes in the vfs tree. What are you guys going
to tell Linus when he comes to merge this?

--
Cheers,
Stephen Rothwell

Attachment: pgpZS11YJHBy2.pgp
Description: OpenPGP digital signature