[RFC PATCH 1/2] fs: allow filesystems to directly pass an existing struct file

From: Enrico Weigelt, metux IT consult
Date: Tue Jun 29 2021 - 10:44:31 EST


In some scenarios, file systems might want to pass an already opened
struct file instance on an open() call, instead of opening a new one.

This allows similar techniques like the already well known file descriptor
passing via Unix domain sockets, but now also for plain open() calls.

Signed-off-by: Enrico Weigelt, metux IT consult <info@xxxxxxxxx>
---
fs/Kconfig | 3 +++
fs/internal.h | 6 ++++++
fs/namei.c | 2 +-
fs/open.c | 42 +++++++++++++++++++++++++++++++++++++++++-
include/linux/fs.h | 9 +++++++++
5 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 141a856c50e7..b8b7a77b656c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -357,4 +357,7 @@ source "fs/unicode/Kconfig"
config IO_WQ
bool

+config FS_BOXED_FILE
+ bool
+
endmenu
diff --git a/fs/internal.h b/fs/internal.h
index 6aeae7ef3380..e5e9cf038a24 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -142,6 +142,12 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int chown_common(const struct path *path, uid_t user, gid_t group);
extern int vfs_open(const struct path *, struct file *);

+#ifdef CONFIG_FS_BOXED_FILE
+extern struct file *unbox_file(struct file *);
+#else
+static inline struct file *unbox_file(struct file *f) { return f; }
+#endif
+
/*
* inode.c
*/
diff --git a/fs/namei.c b/fs/namei.c
index 79b0ff9b151e..b186d2d75b63 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3496,7 +3496,7 @@ static struct file *path_openat(struct nameidata *nd,
}
if (likely(!error)) {
if (likely(file->f_mode & FMODE_OPENED))
- return file;
+ return unbox_file(file);
WARN_ON(1);
error = -EINVAL;
}
diff --git a/fs/open.c b/fs/open.c
index e53af13b5835..88daf09ffeb4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -769,6 +769,46 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
return ksys_fchown(fd, user, group);
}

+#ifdef CONFIG_FS_BOXED_FILE
+/*
+ * Finish up an open procedure before returning the file to the caller.
+ * in case the the fs returns some unusual things like directly passing
+ * another file, this will be handled here.
+ *
+ * This function is only supposed to be called by functions like dentry_open()
+ * and path_openat() that allocate a new struct file and finally pass it to
+ * vfs_open() - the struct file should not have been used in any ways in the
+ * meantime, or unpleasant things may happen.
+ */
+struct file *unbox_file(struct file *f)
+{
+ struct file *boxed;
+
+ if (unlikely(!f))
+ return NULL;
+
+ if (IS_ERR(f))
+ return f;
+
+ if (likely(!f->boxed_file))
+ return f;
+
+ /* the fs returned another struct file (f->lower_file) that should be
+ directly passed to our callers instead of the one that had been newly
+ created for the open procedure.
+
+ the lower_file is already ref'ed, so we keep the refcount.
+ since the upper file (f) just had been opened, and no further access,
+ we can just call fput() on it.
+ */
+
+ boxed = f->boxed_file;
+ fput(f);
+
+ return boxed;
+}
+#endif /* CONFIG_FS_BOXED_FILE */
+
static int do_dentry_open(struct file *f,
struct inode *inode,
int (*open)(struct inode *, struct file *))
@@ -959,7 +999,7 @@ struct file *dentry_open(const struct path *path, int flags,
f = ERR_PTR(error);
}
}
- return f;
+ return unbox_file(f);
}
EXPORT_SYMBOL(dentry_open);

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3c88fdb9b2a..a778c5c057ab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -955,6 +955,15 @@ struct file {
struct address_space *f_mapping;
errseq_t f_wb_err;
errseq_t f_sb_err; /* for syncfs */
+
+#ifdef CONFIG_FS_BOXED_FILE
+ /* Only for file systems that wanna pass an *existing* file to the
+ caller of open() instead of the newly created one. This has similar
+ semantics like passing an fd via unix socket, but instead via some
+ open() call.
+ */
+ struct file *boxed_file;
+#endif
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */

--
2.20.1