[PATCH] !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!

From: Christian Brauner
Date: Mon Dec 06 2021 - 08:08:28 EST


---
include/linux/securityfs.h | 20 +++++
include/linux/user_namespace.h | 1 +
kernel/user_namespace.c | 3 +
security/inode.c | 129 ++++++++++++++++++++++++++++++--
security/integrity/ima/ima.h | 1 +
security/integrity/ima/ima_fs.c | 20 ++++-
6 files changed, 165 insertions(+), 9 deletions(-)
create mode 100644 include/linux/securityfs.h

diff --git a/include/linux/securityfs.h b/include/linux/securityfs.h
new file mode 100644
index 000000000000..2e973be160b1
--- /dev/null
+++ b/include/linux/securityfs.h
@@ -0,0 +1,20 @@
+#ifndef __LINUX_SECURITYFS_H
+#define __LINUX_SECURITYFS_H
+
+struct vfsmount;
+
+#ifdef CONFIG_SECURITYFS
+
+/*
+ * Allocated once per user_ns the first time securityfs is mounted. Can be
+ * used to stash securityfs relevant state that absolutely needs to survive
+ * super_block destruction on last umount.
+ */
+struct securityfs_info {
+ // pointer to relevant ima stuff or instance?
+ // pointer to relevant apparmor stuff or instance?
+ // pointer to relevant selinux stuff or instance?
+};
+#endif /* CONFIG_SECURITYFS */
+
+#endif /* ! __LINUX_SECURITYFS_H */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 6b8bd060d8c4..42676f5bcd43 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -103,6 +103,7 @@ struct user_namespace {
#ifdef CONFIG_IMA
struct ima_namespace *ima_ns;
#endif
+ struct securityfs_info *securityfs_info;
#ifdef CONFIG_SECURITYFS
struct vfsmount *securityfs_mount;
bool securityfs_notifier_sent;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index c26885343b19..d65b20d8a90b 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -211,6 +211,9 @@ static void free_user_ns(struct work_struct *work)
}
#ifdef CONFIG_IMA
put_ima_ns(ns->ima_ns);
+#endif
+#ifdef CONFIG_SECURITYFS
+ kfree(ns->securityfs_info);
#endif
retire_userns_sysctls(ns);
key_free_user_ns(ns);
diff --git a/security/inode.c b/security/inode.c
index 62ab4630dc31..1c3b2797367d 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -66,18 +66,57 @@ static void securityfs_free_context(struct fs_context *fc)
mntput(ns->securityfs_mount);
}

+
+/*
+ * This is really just a helper we would need in case we wanted to retrieve
+ * securityfs_info independent of the super_block. If that's not needed, then
+ * you can as well remove the smp_load_acquire() and the associated
+ * smp_store_release().
+ */
+struct securitfs_info *to_securityfs_info(struct user_namespace *user_ns)
+{
+
+ return smp_load_acquire(&user_ns->securityfs_info);
+}
+
static int securityfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ int err;
static const struct tree_descr files[] = {{""}};
- int error;
-
- error = simple_fill_super(sb, SECURITYFS_MAGIC, files);
- if (error)
- return error;
+ struct user_namespace *user_ns = sb->s_user_ns;
+ struct securityfs_info *securityfs_info;
+
+ /*
+ * Allocate a new securityfs_info instance for this userns.
+ * While multiple superblocks can exist they are keyed by userns in
+ * s_fs_info for user_ns. Hence, the vfs guarantees that
+ * securityfs_fill_super() is called exactly once whenever a
+ * securityfs superblock for a userns is created. This in turn lets us
+ * conclude that when a securityfs superblock is created for the first
+ * time for a userns there's no one racing us. Therefore we don't need
+ * any barriers when we dereference securityfs_info.
+ */
+ securityfs_info = user_ns->securityfs_info;
+ if (!securityfs_info) {
+ securityfs_info = kzalloc(sizeof(struct securityfs_info), GFP_KERNEL);
+ if (!securityfs_info)
+ return -ENOMEM;
+
+ // TODO: Initialize securityfs_info
+
+ /*
+ * Pairs with smp_load_acquire() in to_securityfs_info().
+ *
+ * Please see the commment there.
+ */
+ smp_store_release(&user_ns->securityfs_info, securityfs_info);
+ }

- sb->s_op = &securityfs_super_operations;
+ err = simple_fill_super(sb, SECURITYFS_MAGIC, files);
+ if (!err)
+ sb->s_op = &securityfs_super_operations;

- return 0;
+ return ima_fs_ns_late_init(sb);
}

static int securityfs_get_tree(struct fs_context *fc)
@@ -237,6 +276,82 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode,
return dentry;
}

+struct dentry *securityfs_create_dentry_ns(struct super_block *sb,
+ const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fops,
+ const struct inode_operations *iops)
+{
+ struct dentry *dentry;
+ struct inode *dir, *inode;
+ int error;
+ struct user_namespace *ns = sb->s_user_ns;
+
+ if (!(mode & S_IFMT))
+ mode = (mode & S_IALLUGO) | S_IFREG;
+
+ pr_debug("securityfs: creating file '%s', ns=%u\n",name, ns->ns.inum);
+
+ if (ns == &init_user_ns) {
+ error = simple_pin_fs(&fs_type, &ns->securityfs_mount,
+ &securityfs_mount_count);
+ if (error)
+ return ERR_PTR(error);
+ }
+
+ /* You really just require to always pass the parent? */
+ if (!parent)
+ parent = sb->s_root;
+
+ dir = d_inode(parent);
+
+ inode_lock(dir);
+ dentry = lookup_one_len(name, parent, strlen(name));
+ if (IS_ERR(dentry))
+ goto out;
+
+ if (d_really_is_positive(dentry)) {
+ error = -EEXIST;
+ goto out1;
+ }
+
+ inode = new_inode(dir->i_sb);
+ if (!inode) {
+ error = -ENOMEM;
+ goto out1;
+ }
+
+ inode->i_ino = get_next_ino();
+ inode->i_mode = mode;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_private = data;
+ if (S_ISDIR(mode)) {
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ inc_nlink(inode);
+ inc_nlink(dir);
+ } else if (S_ISLNK(mode)) {
+ inode->i_op = iops ? iops : &simple_symlink_inode_operations;
+ inode->i_link = data;
+ } else {
+ inode->i_fop = fops;
+ }
+ d_instantiate(dentry, inode);
+ dget(dentry);
+ inode_unlock(dir);
+ return dentry;
+
+out1:
+ dput(dentry);
+ dentry = ERR_PTR(error);
+out:
+ inode_unlock(dir);
+ if (ns == &init_user_ns)
+ simple_release_fs(&ns->securityfs_mount,
+ &securityfs_mount_count);
+ return dentry;
+}
+
/**
* securityfs_create_file - create a file in the securityfs filesystem
*
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 12b7df65a5ff..806f19215052 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -140,6 +140,7 @@ struct ns_status {
int ima_init(void);
int ima_fs_init(void);
void ima_fs_ns_free(void);
+int ima_fs_ns_late_init(struct super_block *sb);
int ima_add_template_entry(struct ima_namespace *ns,
struct ima_template_entry *entry, int violation,
const char *op, struct inode *inode,
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 26f26e8756a8..4b25912db448 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -500,11 +500,27 @@ static void ima_fs_ns_free_dentries(struct ima_namespace *ns)
/* Function to populeate namespace SecurityFS once user namespace
* has been configured.
*/
-static int ima_fs_ns_late_init(struct user_namespace *user_ns)
+int ima_fs_ns_late_init(struct super_block *sb)
{
- struct ima_namespace *ns = user_ns->ima_ns;
+ /*
+ * We know that s_user_ns === ima_ns->user_ns.
+ *
+ * In other words, here we can go from superblock to relevant
+ * namespaces never from namespace to superblock. Ideally we try to
+ * avoid going from namespace to superblock.
+ */
+ struct ima_namespace *ns = sb->s_user_ns->ima_ns;
struct dentry *parent;

+
+ // TODO:
+ //
+ // Port this to use new helpers that take a super_block as argument.
+ //
+ // This allows us to get rid of any vfsmount dependencies.
+ //
+ // Probably should also be renamed to something better.
+
/* already initialized? */
if (ns->dentry[IMAFS_DENTRY_INTEGRITY_DIR])
return 0;
--
2.30.2