[PATCH v3 4/4] add listmount(2) syscall

From: Miklos Szeredi
Date: Thu Sep 28 2023 - 09:04:22 EST


Add way to query the children of a particular mount. This is a more
flexible way to iterate the mount tree than having to parse the complete
/proc/self/mountinfo.

Lookup the mount by the new 64bit mount ID. If a mount needs to be queried
based on path, then statx(2) can be used to first query the mount ID
belonging to the path.

Return an array of new (64bit) mount ID's. Without privileges only mounts
are listed which are reachable from the task's root.

Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx>
---
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
fs/namespace.c | 69 ++++++++++++++++++++++++++
include/linux/syscalls.h | 3 ++
include/uapi/asm-generic/unistd.h | 5 +-
include/uapi/linux/mount.h | 3 ++
6 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 317b1320ad18..65e0185b47a9 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -458,3 +458,4 @@
451 i386 cachestat sys_cachestat
452 i386 fchmodat2 sys_fchmodat2
454 i386 statmount sys_statmount
+455 i386 listmount sys_listmount
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 7312c440978f..a1b3ce7d22cc 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -376,6 +376,7 @@
452 common fchmodat2 sys_fchmodat2
453 64 map_shadow_stack sys_map_shadow_stack
454 common statmount sys_statmount
+455 common listmount sys_listmount

#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/fs/namespace.c b/fs/namespace.c
index 3326ba2b2810..050e2d2af110 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4970,6 +4970,75 @@ SYSCALL_DEFINE4(statmount, const struct __mount_arg __user *, req,
return ret;
}

+static long do_listmount(struct vfsmount *mnt, u64 __user *buf, size_t bufsize,
+ const struct path *root, unsigned int flags)
+{
+ struct mount *r, *m = real_mount(mnt);
+ struct path rootmnt = {
+ .mnt = root->mnt,
+ .dentry = root->mnt->mnt_root
+ };
+ long ctr = 0;
+ bool reachable_only = true;
+ int err;
+
+ err = security_sb_statfs(mnt->mnt_root);
+ if (err)
+ return err;
+
+ if (flags & LISTMOUNT_UNREACHABLE) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ reachable_only = false;
+ }
+
+ if (reachable_only && !is_path_reachable(m, mnt->mnt_root, &rootmnt))
+ return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+ list_for_each_entry(r, &m->mnt_mounts, mnt_child) {
+ if (reachable_only &&
+ !is_path_reachable(r, r->mnt.mnt_root, root))
+ continue;
+
+ if (ctr >= bufsize)
+ return -EOVERFLOW;
+ if (put_user(r->mnt_id_unique, buf + ctr))
+ return -EFAULT;
+ ctr++;
+ if (ctr < 0)
+ return -ERANGE;
+ }
+ return ctr;
+}
+
+SYSCALL_DEFINE4(listmount, const struct __mount_arg __user *, req,
+ u64 __user *, buf, size_t, bufsize, unsigned int, flags)
+{
+ struct __mount_arg kreq;
+ struct vfsmount *mnt;
+ struct path root;
+ long err;
+
+ if (flags & ~LISTMOUNT_UNREACHABLE)
+ return -EINVAL;
+
+ if (copy_from_user(&kreq, req, sizeof(kreq)))
+ return -EFAULT;
+
+ down_read(&namespace_sem);
+ mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns);
+ err = -ENOENT;
+ if (mnt) {
+ get_fs_root(current->fs, &root);
+ err = do_listmount(mnt, buf, bufsize, &root, flags);
+ path_put(&root);
+ }
+ up_read(&namespace_sem);
+
+ return err;
+}
+
+
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index ba371024d902..38f3da7e04d1 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -413,6 +413,9 @@ asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz,
asmlinkage long sys_statmount(const struct __mount_arg __user *req,
struct statmnt __user *buf, size_t bufsize,
unsigned int flags);
+asmlinkage long sys_listmount(const struct __mount_arg __user *req,
+ u64 __user *buf, size_t bufsize,
+ unsigned int flags);
asmlinkage long sys_truncate(const char __user *path, long length);
asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
#if BITS_PER_LONG == 32
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 8f034e934a2e..8df6a747e21a 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -826,8 +826,11 @@ __SYSCALL(__NR_fchmodat2, sys_fchmodat2)
#define __NR_statmount 454
__SYSCALL(__NR_statmount, sys_statmount)

+#define __NR_listmount 455
+__SYSCALL(__NR_listmount, sys_listmount)
+
#undef __NR_syscalls
-#define __NR_syscalls 455
+#define __NR_syscalls 456

/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index d2c988ab526b..7aa9916659d2 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -194,4 +194,7 @@ struct __mount_arg {
#define STMT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#define STMT_FS_TYPE 0x00000020U /* Want/got fs_type */

+/* listmount(2) flags */
+#define LISTMOUNT_UNREACHABLE 0x01 /* List unreachable mounts too */
+
#endif /* _UAPI_LINUX_MOUNT_H */
--
2.41.0