Re: [PATCH 01/13] devpts: Teach /dev/ptmx to find the associated devpts via path lookup

From: Al Viro
Date: Mon Apr 04 2016 - 22:56:00 EST


On Mon, Apr 04, 2016 at 08:29:17PM -0500, Eric W. Biederman wrote:

> +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
> +static int devpts_path_ptmx(struct file *filp)
> +{
> + struct pts_fs_info *fsi;
> + struct path root, path;
> + struct dentry *old;
> + int err = -ENOENT;
> + int ret;
> +
> + /* Can the pts filesystem be found with a path walk? */
> + path = filp->f_path;
> + path_get(&path);
> + get_fs_root(current->fs, &root);
> + ret = path_parent(&root, &path);
> + path_put(&root);
> + if (ret != 1)
> + goto fail;

That, I take it, is a lookup for .. and buggering off if it fails *or* if
we had been in caller's root or something that overmount it? Not that the
latter had been possible - root is a directory and can be overmounted only
by another such, and we are called from ->open() of a device node.

> + /* Remember the result of this permission check for later */
> + ret = inode_permission(path.dentry->d_inode, MAY_EXEC);
> + if (path_pts(&path))
> + goto fail;

Egads, man - you've just introduced a special function for looking up
something named "pts" in a given directory!

The reason not to use kern_path() would be what, the fact that it doesn't
allow starting at given location? So let's make a variant that would - and
rather than bothering with RCU, just go for something like (completely
untested)

/* on success overwrite *path with the result of walk; do _not_ drop the
reference to old contents - let the caller arrange that */
int kern_path_relative(struct path *path, const char *s, int flags)
{
int err;
struct nameidata nd = {.path = *path};
struct filename *name;

if (!*s || *s == '/' || flags & (LOOKUP_ROOT | LOOKUP_RCU))
return -EINVAL;

name = getname_kernel(s);
if (IS_ERR(name))
return PTR_ERR(name);

set_nameidata(&nd, AT_FDCWD, name);

nd.last_type = LAST_ROOT;
nd.flags = flags | LOOKUP_REVAL | LOOKUP_JUMPED | LOOKUP_PARENT;
nd.m_seq = read_seqbegin(&mount_lock);
path_get(&nd.path);
nd.inode = nd.path.dentry->d_inode;

while (!(err = link_path_walk(s, &nd))
&& ((err = lookup_last(&nd)) > 0)) {
s = trailing_symlink(&nd);
if (IS_ERR(s)) {
err = PTR_ERR(s);
break;
}
}
if (!err)
err = complete_walk(&nd);

if (!err && flags & LOOKUP_DIRECTORY)
if (!d_can_lookup(nd.path.dentry))
err = -ENOTDIR;
if (!err) {
*path = nd.path;
nd.path.mnt = NULL;
nd.path.dentry = NULL;
}
terminate_walk(&nd);
restore_nameidata();
putname(name);
return err;
}

and use it as

path = filp->f_path;
err = kern_path_relative(&path, "../pts", LOOKUP_DIRECTORY);
if (err)
return err;
/* from here on we need to path_put() it */
if (path.mnt->mnt_sb->s_magic != DEVPTS_SUPER_MAGIC)
goto fail;
/* must be its root; no other directories on that puppy */

> + fsi = DEVPTS_SB(path.mnt->mnt_sb);
> +
> + /* Get out if the path walk resulted in the default devpts instance */
> + if (devpts_mnt->mnt_sb == path.mnt->mnt_sb)
> + goto fail;
> +
> + /* Don't allow bypassing the existing /dev/pts/ptmx permission check */

err = inode_permission(path.dentry->d_inode, MAY_EXEC);
if (err)
goto fail;
err = inode_permission(fsi->ptmx_dentry->d_inode,
ACC_MODE(filp->f_flags));
if (err)
goto fail;

> + /* Advance path to the ptmx dentry */
> + old = path.dentry;
> + path.dentry = dget(fsi->ptmx_dentry);
> + dput(old);
> +
> + /* Make it look like /dev/pts/ptmx was opened */
> + err = update_file_path(filp, &path);
> + if (err)
> + goto fail;
> +
> + return 0;
> +fail:
> + path_put(&path);
> + return err;
> +}
> +#else
> +static inline int devpts_path_ptmx(struct file *filp)
> +{
> + return -ENOENT;
> +}
> +#endif
> +
> +struct inode *devpts_ptmx(struct inode *inode, struct file *filp)
> +{
> + int err;
> + if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
> + return inode;
> +
> + err = devpts_path_ptmx(filp);
> + if (err == 0)
> + return filp->f_inode;
> + if (err != -ENOENT)
> + return ERR_PTR(err);
> +
> + return inode;
> +}

Umm... I'm not sure it makes for good calling conventions - the caller can
do inode = file_inode(filp) just as well, so why not simply return 0 or -E...?
"return inode;" cases become simply return 0...

> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1415,29 +1415,41 @@ static void follow_mount(struct path *path)
> }
> }
>
> -static int follow_dotdot(struct nameidata *nd)
> +int path_parent(struct path *root, struct path *path)

Please, don't.

> +#ifdef CONFIG_UNIX98_PTYS
> +int path_pts(struct path *path)

Fuck, no.

> index 17cb6b1dab75..e1ed78fa474b 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -679,6 +679,24 @@ int open_check_o_direct(struct file *f)
> return 0;
> }
>
> +int update_file_path(struct file *filp, struct path *path)
> +{
> + /* Only valid during f_op->open, and even in open use very carefully */
> + struct path old;
> + struct inode *inode;
> +
> + if (filp->f_mode & FMODE_WRITER)
> + return -EINVAL;

That really needs to be commented.

> + old = filp->f_path;
> + inode = path->dentry->d_inode;
> + filp->f_path = *path;
> + filp->f_inode = inode;
> + filp->f_mapping = inode->i_mapping;
> + path_put(&old);
> + return 0;
> +}

> +
> static int do_dentry_open(struct file *f,
> struct inode *inode,
> int (*open)(struct inode *, struct file *),
> @@ -736,6 +754,7 @@ static int do_dentry_open(struct file *f,
> error = open(inode, f);
> if (error)
> goto cleanup_all;
> + inode = f->f_inode;
> }
> if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
> i_readcount_inc(inode);

BTW, have you looked through the callers of dentry_open()? It can hit that
case as well...