Re: [RFC v2 PATCH 3/8] fs: Treat foreign mounts as nosuid

From: Serge Hallyn
Date: Wed May 04 2016 - 19:19:26 EST


Quoting Djalal Harouni (tixxdz@xxxxxxxxx):
> If a process gets access to a mount from a different user
> namespace, that process should not be able to take advantage of
> setuid files or selinux entrypoints from that filesystem. Prevent
> this by treating mounts from other mount namespaces and those not
> owned by current_user_ns() or an ancestor as nosuid.
>
> This patch was just adapted from the original one that was written
> by Andy Lutomirski <luto@xxxxxxxxxxxxxx>
> https://www.redhat.com/archives/dm-devel/2016-April/msg00374.html

I'm not sure that this makes sense given what you're doing. In the
case of Seth's set, a filesystem is mounted specifically (and privately)
in a user namespace. We don't want for instance the initial user ns
to find a link to a setuid-root exploit left in the container-mounted
filesystem.

But you are having a parent user namespace mount the fs so that its
children can all access the fs, uid-shifted for convenience. Not
allowing the child namespaces to make use of setuid-root does not
seem applicable here.

> Signed-off-by: Djalal Harouni <tixxdz@xxxxxxxxxx>
> ---
> fs/exec.c | 2 +-
> fs/namespace.c | 15 +++++++++++++++
> include/linux/mount.h | 1 +
> include/linux/user_namespace.h | 8 ++++++++
> kernel/user_namespace.c | 13 +++++++++++++
> security/commoncap.c | 2 +-
> security/selinux/hooks.c | 2 +-
> 7 files changed, 40 insertions(+), 3 deletions(-)
>
> diff --git a/fs/exec.c b/fs/exec.c
> index c4010b8..706088d 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1391,7 +1391,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
> bprm->cred->euid = current_euid();
> bprm->cred->egid = current_egid();
>
> - if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
> + if (!mnt_may_suid(bprm->file->f_path.mnt))
> return;
>
> if (task_no_new_privs(current))
> diff --git a/fs/namespace.c b/fs/namespace.c
> index de02b39..a8820fb 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -3374,6 +3374,21 @@ found:
> return visible;
> }
>
> +bool mnt_may_suid(struct vfsmount *mnt)
> +{
> + struct mount *m = real_mount(mnt);
> +
> + /*
> + * Foreign mounts (accessed via fchdir or through /proc
> + * symlinks) are always treated as if they are nosuid. This
> + * prevents namespaces from trusting potentially unsafe
> + * suid/sgid bits, file caps, or security labels that originate
> + * in other namespaces.
> + */
> + return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(m) &&
> + in_userns(current_user_ns(), m->mnt_ns->user_ns);
> +}
> +
> static struct ns_common *mntns_get(struct task_struct *task)
> {
> struct ns_common *ns = NULL;
> diff --git a/include/linux/mount.h b/include/linux/mount.h
> index f822c3c..54a594d 100644
> --- a/include/linux/mount.h
> +++ b/include/linux/mount.h
> @@ -81,6 +81,7 @@ extern void mntput(struct vfsmount *mnt);
> extern struct vfsmount *mntget(struct vfsmount *mnt);
> extern struct vfsmount *mnt_clone_internal(struct path *path);
> extern int __mnt_is_readonly(struct vfsmount *mnt);
> +extern bool mnt_may_suid(struct vfsmount *mnt);
>
> struct path;
> extern struct vfsmount *clone_private_mount(struct path *path);
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 8297e5b..a43faa7 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -72,6 +72,8 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t,
> extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
> extern int proc_setgroups_show(struct seq_file *m, void *v);
> extern bool userns_may_setgroups(const struct user_namespace *ns);
> +extern bool in_userns(const struct user_namespace *ns,
> + const struct user_namespace *target_ns);
> #else
>
> static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
> @@ -100,6 +102,12 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns)
> {
> return true;
> }
> +
> +static inline bool in_userns(const struct user_namespace *ns,
> + const struct user_namespace *target_ns)
> +{
> + return true;
> +}
> #endif
>
> #endif /* _LINUX_USER_H */
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 9bafc21..9a496a8 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -938,6 +938,19 @@ bool userns_may_setgroups(const struct user_namespace *ns)
> return allowed;
> }
>
> +/*
> + * Returns true if @ns is the same namespace as or a descendant of
> + * @target_ns.
> + */
> +bool in_userns(const struct user_namespace *ns,
> + const struct user_namespace *target_ns)
> +{
> + for (; ns; ns = ns->parent) {
> + if (ns == target_ns)
> + return true;
> + }
> +}
> +
> static inline struct user_namespace *to_user_ns(struct ns_common *ns)
> {
> return container_of(ns, struct user_namespace, ns);
> diff --git a/security/commoncap.c b/security/commoncap.c
> index 48071ed..6c082d2 100644
> --- a/security/commoncap.c
> +++ b/security/commoncap.c
> @@ -453,7 +453,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c
> if (!file_caps_enabled)
> return 0;
>
> - if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
> + if (!mnt_may_suid(bprm->file->f_path.mnt))
> return 0;
>
> rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 912deee..1350167 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -2234,7 +2234,7 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm,
> const struct task_security_struct *new_tsec)
> {
> int nnp = (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS);
> - int nosuid = (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID);
> + int nosuid = !mnt_may_suid(bprm->file->f_path.mnt);
> int rc;
>
> if (!nnp && !nosuid)
> --
> 2.5.5
>