[RFC v2 PATCH 6/8] VFS:userns: shift UID/GID to on-disk view before any write to disk

From: Djalal Harouni
Date: Wed May 04 2016 - 10:30:34 EST


If both the mount namespace and the mount point support UID/GID shifts,
then during inode creation or during a chown call on an inode, make sure
that kuid and kgid that will be used to set inode->{i_uid|i_gid} are in
on-disk view.

Perform the shift to on-disk view during inode initialization or during
notify_change() calls. Usually in this case inode's uid/gid will contain
a kuid and kgid that are valid in the context of the caller and its view
inside the global init_user_ns user namespace. They will always end up
either with current_fsuid() value or the attr->ia_uid of the struct iattr.

inode->{i_uid|i_gid} on-disk writes inside user_ns_X
----------------------------------------------------

Without this Patch:
------------------------------------------------------------
user_ns_X uid | init_user_ns uid | inode->i_uid on-disk
------------------------------------------------------------
0 | 1000000 | 1000000
------------------------------------------------------------
999 | 1000999 | 1000999
------------------------------------------------------------
1000 | 1001000 | 1001000
------------------------------------------------------------

inode->{i_uid|i_gid} always end up with global kuid/kgid of the caller
in the init_user_ns.

With this patch:
------------------------------------------------------------
user_ns_X uid | init_user_ns uid | inode->i_uid on-disk
------------------------------------------------------------
0 | 1000000 | 0
------------------------------------------------------------
999 | 1000999 | 999
------------------------------------------------------------
1000 | 1001000 | 1000
------------------------------------------------------------

inode->{i_uid|i_gid} will have the values of the uid_t and gid_t that
are shown inside the user namespace of the caller.

Of course this works only on mounts that support VFS UID/GID shift and
are inside a mount namespace that also supports the above. The shift into
on-disk is done inside notify_change() to give a chance to
notify_change_ok() to catch permissions access. At the same time we
adapt notify_change_ok() and make the necessary translation when it's
needed from virtual ot on-disk and vice versa.

The approach is to always keep inode->{i_uid|i_gid} even in memory with
on-disk values. The virtual translation is only done when needed for
permission access or stat() calls.

Signed-off-by: Dongsu Park <dongsu@xxxxxxxxxxxx>
Signed-off-by: Djalal Harouni <tixxdz@xxxxxxxxxx>
---
fs/attr.c | 44 +++++++++++++++++++++++++++++++++-----------
fs/inode.c | 4 ++--
2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/fs/attr.c b/fs/attr.c
index 25b24d0..c476257 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -47,26 +47,38 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
return 0;

/* Make sure a caller can chown. */
- if ((ia_valid & ATTR_UID) &&
- (!uid_eq(current_fsuid(), inode->i_uid) ||
- !uid_eq(attr->ia_uid, inode->i_uid)) &&
- !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
- return -EPERM;
+ if (ia_valid & ATTR_UID) {
+ /* Shift to virtual if necessary */
+ kuid_t i_uid = vfs_shift_i_uid_to_virtual(inode);
+
+ if ((!uid_eq(current_fsuid(), i_uid) ||
+ !uid_eq(attr->ia_uid, inode->i_uid)) &&
+ !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+ return -EPERM;
+ }

/* Make sure caller can chgrp. */
- if ((ia_valid & ATTR_GID) &&
- (!uid_eq(current_fsuid(), inode->i_uid) ||
- (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
- !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+ if (ia_valid & ATTR_GID) {
+ /* Shift to virtual if ncessary */
+ kuid_t i_uid = vfs_shift_i_uid_to_virtual(inode);
+ /* Shift it back to virtual if necessary */
+ kgid_t ia_gid = vfs_kgid_disk_to_virtual(inode, attr->ia_gid);
+
+ if ((!uid_eq(current_fsuid(), i_uid) ||
+ (!in_group_p(ia_gid) &&
+ !gid_eq(attr->ia_gid, inode->i_gid))) &&
+ !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
return -EPERM;
+ }

/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
if (!inode_owner_or_capable(inode))
return -EPERM;
/* Also check the setgid bit! */
- if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
- inode->i_gid) &&
+ if (!in_group_p((ia_valid & ATTR_GID) ?
+ vfs_kgid_disk_to_virtual(inode, attr->ia_gid) :
+ vfs_shift_i_gid_to_virtual(inode)) &&
!capable_wrt_inode_uidgid(inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
@@ -209,6 +221,16 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
inode->i_flags &= ~S_NOSEC;
}

+ /*
+ * Shift if necessary the UID and GID that are mean to be written
+ * into inodes's uid/gid to on-disk view. Do that as early as
+ * possible.
+ */
+ if ((ia_valid & ATTR_UID))
+ attr->ia_uid = vfs_shift_kuid_to_disk(inode, attr->ia_uid);
+ if ((ia_valid & ATTR_GID))
+ attr->ia_gid = vfs_shift_kgid_to_disk(inode, attr->ia_gid);
+
now = current_fs_time(inode->i_sb);

attr->ia_ctime = now;
diff --git a/fs/inode.c b/fs/inode.c
index 07daf5f..e6ee56a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1940,13 +1940,13 @@ EXPORT_SYMBOL(init_special_inode);
void inode_init_owner(struct inode *inode, const struct inode *dir,
umode_t mode)
{
- inode->i_uid = current_fsuid();
+ inode->i_uid = vfs_shift_kuid_to_disk(inode, current_fsuid());
if (dir && dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- inode->i_gid = current_fsgid();
+ inode->i_gid = vfs_shift_kgid_to_disk(inode, current_fsgid());
inode->i_mode = mode;
}
EXPORT_SYMBOL(inode_init_owner);
--
2.5.5