[PATCH 13/16] fs: use vfs interfaces for capabilities xattrs

From: Seth Forshee (DigitalOcean)
Date: Wed Nov 29 2023 - 16:51:44 EST


Now that all the plumbing is in place, switch over to using the new
inode operations to get/set fs caps. This pushes all mapping of ids into
the caller's user ns to above the vfs_*() level, making this consistent
with other vfs_*() interfaces.

cap_convert_nscap() is updated to return vfs_caps and moved to be called
from the new code path for setting fscaps. This means that use of
vfs_setxattr() will no longer remap ids in fscap xattrs, but all code
which used vfs_setxattr() for fscaps xattrs has been converted to the
new interfaces.

Removing the mapping of fscaps rootids from vfs_getxattr() is more
invovled and will be addressed in a later commit.

Signed-off-by: Seth Forshee (DigitalOcean) <sforshee@xxxxxxxxxx>
---
fs/xattr.c | 49 ++++++++++++++++++++++++----
include/linux/capability.h | 2 +-
security/commoncap.c | 79 +++++++++++++++-------------------------------
3 files changed, 69 insertions(+), 61 deletions(-)

diff --git a/fs/xattr.c b/fs/xattr.c
index f60ef2a79dfa..372644b15457 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -540,13 +540,6 @@ vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
const void *orig_value = value;
int error;

- if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
- error = cap_convert_nscap(idmap, dentry, &value, size);
- if (error < 0)
- return error;
- size = error;
- }
-
retry_deleg:
inode_lock(inode);
error = __vfs_setxattr_locked(idmap, dentry, name, value, size,
@@ -857,6 +850,24 @@ int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
return do_set_acl(idmap, dentry, ctx->kname->name,
ctx->kvalue, ctx->size);

+ if (strcmp(ctx->kname->name, XATTR_NAME_CAPS) == 0) {
+ struct vfs_caps caps;
+ int ret;
+
+ /*
+ * rootid is already in the mount idmap, so pass nop_mnt_idmap
+ * so that it won't be mapped.
+ */
+ ret = vfs_caps_from_xattr(&nop_mnt_idmap, current_user_ns(),
+ &caps, ctx->kvalue, ctx->size);
+ if (ret)
+ return ret;
+ ret = cap_convert_nscap(idmap, dentry, &caps);
+ if (ret)
+ return ret;
+ return vfs_set_fscaps(idmap, dentry, &caps, ctx->flags);
+ }
+
return vfs_setxattr(idmap, dentry, ctx->kname->name,
ctx->kvalue, ctx->size, ctx->flags);
}
@@ -955,6 +966,27 @@ do_getxattr(struct mnt_idmap *idmap, struct dentry *d,
ssize_t error;
char *kname = ctx->kname->name;

+ if (strcmp(kname, XATTR_NAME_CAPS) == 0) {
+ struct vfs_caps caps;
+ struct vfs_ns_cap_data data;
+ int ret;
+
+ ret = vfs_get_fscaps(idmap, d, &caps);
+ if (ret)
+ return ret;
+ /*
+ * rootid is already in the mount idmap, so pass nop_mnt_idmap
+ * so that it won't be mapped.
+ */
+ ret = vfs_caps_to_user_xattr(&nop_mnt_idmap, current_user_ns(),
+ &caps, &data, ctx->size);
+ if (ret < 0)
+ return ret;
+ if (ctx->size && copy_to_user(ctx->value, &data, ret))
+ return -EFAULT;
+ return ret;
+ }
+
if (ctx->size) {
if (ctx->size > XATTR_SIZE_MAX)
ctx->size = XATTR_SIZE_MAX;
@@ -1145,6 +1177,9 @@ removexattr(struct mnt_idmap *idmap, struct dentry *d,
if (is_posix_acl_xattr(kname))
return vfs_remove_acl(idmap, d, kname);

+ if (strcmp(kname, XATTR_NAME_CAPS) == 0)
+ return vfs_remove_fscaps(idmap, d);
+
return vfs_removexattr(idmap, d, kname);
}

diff --git a/include/linux/capability.h b/include/linux/capability.h
index c0bd9447685b..563f084e9453 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -229,6 +229,6 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
struct vfs_caps *cpu_caps);

int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry,
- const void **ivalue, size_t size);
+ struct vfs_caps *caps);

#endif /* !_LINUX_CAPABILITY_H */
diff --git a/security/commoncap.c b/security/commoncap.c
index c645330f83a0..bd95b806af2f 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -484,27 +484,21 @@ int cap_inode_getsecurity(struct mnt_idmap *idmap,
}

/**
- * rootid_from_xattr - translate root uid of vfs caps
+ * rootid_from_vfs_caps - translate root uid of vfs caps
*
- * @value: vfs caps value which may be modified by this function
- * @size: size of @ivalue
+ * @caps: vfs caps value which may be modified by this function
* @task_ns: user namespace of the caller
+ *
+ * Return the rootid from a v3 fs cap, or the id of root in the task's user
+ * namespace for v1 and v2 fs caps.
*/
-static vfsuid_t rootid_from_xattr(const void *value, size_t size,
- struct user_namespace *task_ns)
+static vfsuid_t rootid_from_vfs_caps(const struct vfs_caps *caps,
+ struct user_namespace *task_ns)
{
- const struct vfs_ns_cap_data *nscap = value;
- uid_t rootid = 0;
-
- if (size == XATTR_CAPS_SZ_3)
- rootid = le32_to_cpu(nscap->rootid);
-
- return VFSUIDT_INIT(make_kuid(task_ns, rootid));
-}
+ if ((caps->magic_etc & VFS_CAP_REVISION_MASK) == VFS_CAP_REVISION_3)
+ return caps->rootid;

-static bool validheader(size_t size, const struct vfs_cap_data *cap)
-{
- return is_v2header(size, cap) || is_v3header(size, cap);
+ return VFSUIDT_INIT(make_kuid(task_ns, 0));
}

/**
@@ -512,11 +506,10 @@ static bool validheader(size_t size, const struct vfs_cap_data *cap)
*
* @idmap: idmap of the mount the inode was found from
* @dentry: used to retrieve inode to check permissions on
- * @ivalue: vfs caps value which may be modified by this function
- * @size: size of @ivalue
+ * @caps: vfs caps which may be modified by this function
*
- * User requested a write of security.capability. If needed, update the
- * xattr to change from v2 to v3, or to fixup the v3 rootid.
+ * User requested a write of security.capability. Check permissions, and if
+ * needed, update the xattr to change from v2 to v3.
*
* If the inode has been found through an idmapped mount the idmap of
* the vfsmount must be passed through @idmap. This function will then
@@ -524,59 +517,39 @@ static bool validheader(size_t size, const struct vfs_cap_data *cap)
* permissions. On non-idmapped mounts or if permission checking is to be
* performed on the raw inode simply pass @nop_mnt_idmap.
*
- * Return: On success, return the new size; on error, return < 0.
+ * Return: On success, return 0; on error, return < 0.
*/
int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry,
- const void **ivalue, size_t size)
+ struct vfs_caps *caps)
{
- struct vfs_ns_cap_data *nscap;
- uid_t nsrootid;
- const struct vfs_cap_data *cap = *ivalue;
- __u32 magic, nsmagic;
struct inode *inode = d_backing_inode(dentry);
struct user_namespace *task_ns = current_user_ns(),
*fs_ns = inode->i_sb->s_user_ns;
- kuid_t rootid;
vfsuid_t vfsrootid;
- size_t newsize;
+ __u32 revision;

- if (!*ivalue)
- return -EINVAL;
- if (!validheader(size, cap))
+ revision = sansflags(caps->magic_etc);
+ if (revision != VFS_CAP_REVISION_2 && revision != VFS_CAP_REVISION_3)
return -EINVAL;
if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
return -EPERM;
- if (size == XATTR_CAPS_SZ_2 && (idmap == &nop_mnt_idmap))
+ if (revision == VFS_CAP_REVISION_2 && (idmap == &nop_mnt_idmap))
if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
/* user is privileged, just write the v2 */
- return size;
+ return 0;

- vfsrootid = rootid_from_xattr(*ivalue, size, task_ns);
+ vfsrootid = rootid_from_vfs_caps(caps, task_ns);
if (!vfsuid_valid(vfsrootid))
return -EINVAL;

- rootid = from_vfsuid(idmap, fs_ns, vfsrootid);
- if (!uid_valid(rootid))
+ if (!vfsuid_has_fsmapping(idmap, fs_ns, vfsrootid))
return -EINVAL;

- nsrootid = from_kuid(fs_ns, rootid);
- if (nsrootid == -1)
- return -EINVAL;
+ caps->rootid = vfsrootid;
+ caps->magic_etc = VFS_CAP_REVISION_3 |
+ (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE);

- newsize = sizeof(struct vfs_ns_cap_data);
- nscap = kmalloc(newsize, GFP_ATOMIC);
- if (!nscap)
- return -ENOMEM;
- nscap->rootid = cpu_to_le32(nsrootid);
- nsmagic = VFS_CAP_REVISION_3;
- magic = le32_to_cpu(cap->magic_etc);
- if (magic & VFS_CAP_FLAGS_EFFECTIVE)
- nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
- nscap->magic_etc = cpu_to_le32(nsmagic);
- memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
-
- *ivalue = nscap;
- return newsize;
+ return 0;
}

/*

--
2.43.0