Re: [PATCH 00/13] VFS: Filesystem information [ver #19]

From: Miklos Szeredi
Date: Wed Apr 01 2020 - 08:36:11 EST


On Wed, Apr 1, 2020 at 10:37 AM Miklos Szeredi <miklos@xxxxxxxxxx> wrote:
>
> On Wed, Apr 1, 2020 at 10:27 AM David Howells <dhowells@xxxxxxxxxx> wrote:
> >
> > Miklos Szeredi <miklos@xxxxxxxxxx> wrote:
> >
> > > According to dhowell's measurements processing 100k mounts would take
> > > about a few seconds of system time (that's the time spent by the
> > > kernel to retrieve the data,
> >
> > But the inefficiency of mountfs - at least as currently implemented - scales
> > up with the number of individual values you want to retrieve, both in terms of
> > memory usage and time taken.
>
> I've taken that into account when guesstimating a "few seconds per
> 100k entries". My guess is that there's probably an order of
> magnitude difference between the performance of a fs based interface
> and a binary syscall based interface. That could be reduced somewhat
> with a readfile(2) type API.

And to show that I'm not completely off base, attached a patch that
adds a limited readfile(2) syscall and uses it in the p2 method.

Results are promising:

./test-fsinfo-perf /tmp/a 30000
--- make mounts ---
--- test fsinfo by path ---
sum(mnt_id) = 930000
--- test fsinfo by mnt_id ---
sum(mnt_id) = 930000
--- test /proc/fdinfo ---
sum(mnt_id) = 930000
--- test mountfs ---
sum(mnt_id) = 930000
For 30000 mounts, f= 146400us f2= 136766us p= 1406569us p2=
221669us; p=9.6*f p=10.3*f2 p=6.3*p2
--- umount ---

This is about a 2 fold increase in speed compared to open + read + close.

Is someone still worried about performance, or can we move on to more
interesting parts of the design?

Thanks,
Miklos
Index: linux/fs/mountfs/super.c
===================================================================
--- linux.orig/fs/mountfs/super.c 2020-04-01 14:21:24.609955072 +0200
+++ linux/fs/mountfs/super.c 2020-04-01 14:21:42.426151545 +0200
@@ -51,10 +51,11 @@ static bool mountfs_entry_visible(struct

return visible;
}
+
static int mountfs_attr_show(struct seq_file *sf, void *v)
{
const char *name = sf->file->f_path.dentry->d_name.name;
- struct mountfs_entry *entry = sf->private;
+ struct mountfs_entry *entry = file_inode(sf->file)->i_private;
struct mount *mnt;
struct vfsmount *m;
struct super_block *sb;
@@ -140,12 +141,40 @@ static int mountfs_attr_show(struct seq_
return err;
}

+ssize_t mountfs_attr_readfile(struct file *file, char __user *buf, size_t size)
+{
+ struct seq_file m = { .size = PAGE_SIZE, .file = file };
+ ssize_t ret;
+
+retry:
+ m.buf = kvmalloc(m.size, GFP_KERNEL);
+ if (!m.buf)
+ return -ENOMEM;
+
+ ret = mountfs_attr_show(&m, NULL);
+ if (!ret) {
+ if (m.count == m.size) {
+ kvfree(m.buf);
+ m.size <<= 1;
+ m.count = 0;
+ goto retry;
+ }
+ ret = min(m.count, size);
+ if (copy_to_user(buf, m.buf, ret))
+ ret = -EFAULT;
+ }
+
+ kvfree(m.buf);
+ return ret;
+}
+
static int mountfs_attr_open(struct inode *inode, struct file *file)
{
- return single_open(file, mountfs_attr_show, inode->i_private);
+ return single_open(file, mountfs_attr_show, NULL);
}

static const struct file_operations mountfs_attr_fops = {
+ .readfile = mountfs_attr_readfile,
.open = mountfs_attr_open,
.read = seq_read,
.llseek = seq_lseek,
Index: linux/samples/vfs/test-fsinfo-perf.c
===================================================================
--- linux.orig/samples/vfs/test-fsinfo-perf.c 2020-04-01 14:21:24.609955072 +0200
+++ linux/samples/vfs/test-fsinfo-perf.c 2020-04-01 14:21:42.426151545 +0200
@@ -172,6 +172,12 @@ static void get_id_by_proc(int ix, const
//printf("[%u] %u\n", ix, x);
}

+static long readfile(int dfd, const char *name, char *buffer, size_t size,
+ int flags)
+{
+ return syscall(__NR_readfile, dfd, name, buffer, size, flags);
+}
+
static void get_id_by_fsinfo_2(void)
{
struct fsinfo_mount_topology t;
@@ -300,11 +306,8 @@ static void get_id_by_mountfs(void)
}

sprintf(procfile, "%u/parent", mnt_id);
- fd = openat(mntfd, procfile, O_RDONLY);
- ERR(fd, procfile);
- len = read(fd, buffer, sizeof(buffer) - 1);
- ERR(len, "read/parent");
- close(fd);
+ len = readfile(mntfd, procfile, buffer, sizeof(buffer), 0);
+ ERR(len, "readfile/parent");
if (len > 0 && buffer[len - 1] == '\n')
len--;
buffer[len] = 0;
@@ -319,11 +322,8 @@ static void get_id_by_mountfs(void)
sum_check += x;

sprintf(procfile, "%u/counter", mnt_id);
- fd = openat(mntfd, procfile, O_RDONLY);
- ERR(fd, procfile);
- len = read(fd, buffer, sizeof(buffer) - 1);
- ERR(len, "read/counter");
- close(fd);
+ len = readfile(mntfd, procfile, buffer, sizeof(buffer) - 1, 0);
+ ERR(len, "readfile/counter");
if (len > 0 && buffer[len - 1] == '\n')
len--;
buffer[len] = 0;
Index: linux/arch/x86/entry/syscalls/syscall_64.tbl
===================================================================
--- linux.orig/arch/x86/entry/syscalls/syscall_64.tbl 2020-04-01 14:21:37.284094840 +0200
+++ linux/arch/x86/entry/syscalls/syscall_64.tbl 2020-04-01 14:21:42.412151390 +0200
@@ -362,6 +362,7 @@
439 common watch_mount __x64_sys_watch_mount
440 common watch_sb __x64_sys_watch_sb
441 common fsinfo __x64_sys_fsinfo
+442 common readfile __x64_sys_readfile

#
# x32-specific system call numbers start at 512 to avoid cache impact
Index: linux/fs/open.c
===================================================================
--- linux.orig/fs/open.c 2020-04-01 14:21:37.284094840 +0200
+++ linux/fs/open.c 2020-04-01 14:21:42.424151523 +0200
@@ -1340,3 +1340,25 @@ int stream_open(struct inode *inode, str
}

EXPORT_SYMBOL(stream_open);
+
+SYSCALL_DEFINE5(readfile, int, dfd, const char __user *, filename,
+ char __user *, buffer, size_t, bufsize, int, flags)
+{
+ ssize_t ret;
+ struct file file = {};
+
+ if (flags)
+ return -EINVAL;
+
+ ret = user_path_at(dfd, filename, 0, &file.f_path);
+ if (!ret) {
+ file.f_inode = file.f_path.dentry->d_inode;
+ file.f_op = file.f_inode->i_fop;
+ ret = -EOPNOTSUPP;
+ if (file.f_op->readfile)
+ ret = file.f_op->readfile(&file, buffer, bufsize);
+ path_put(&file.f_path);
+ }
+
+ return ret;
+}
Index: linux/include/linux/syscalls.h
===================================================================
--- linux.orig/include/linux/syscalls.h 2020-04-01 14:21:37.284094840 +0200
+++ linux/include/linux/syscalls.h 2020-04-01 14:21:42.413151401 +0200
@@ -1011,6 +1011,8 @@ asmlinkage long sys_watch_sb(int dfd, co
asmlinkage long sys_fsinfo(int dfd, const char __user *pathname,
struct fsinfo_params __user *params, size_t params_size,
void __user *result_buffer, size_t result_buf_size);
+asmlinkage long sys_readfile(int dfd, const char __user *filename,
+ char __user *buffer, size_t bufsize, int flags);

/*
* Architecture-specific system calls
Index: linux/include/uapi/asm-generic/unistd.h
===================================================================
--- linux.orig/include/uapi/asm-generic/unistd.h 2020-04-01 14:21:37.284094840 +0200
+++ linux/include/uapi/asm-generic/unistd.h 2020-04-01 14:21:42.413151401 +0200
@@ -861,9 +861,11 @@ __SYSCALL(__NR_watch_mount, sys_watch_mo
__SYSCALL(__NR_watch_sb, sys_watch_sb)
#define __NR_fsinfo 441
__SYSCALL(__NR_fsinfo, sys_fsinfo)
+#define __NR_readfile 442
+__SYSCALL(__NR_readfile, sys_readfile)

#undef __NR_syscalls
-#define __NR_syscalls 442
+#define __NR_syscalls 443

/*
* 32 bit systems traditionally used different
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h 2020-04-01 14:21:19.144894804 +0200
+++ linux/include/linux/fs.h 2020-04-01 14:21:42.425151534 +0200
@@ -1868,6 +1868,7 @@ struct file_operations {
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
+ ssize_t (*readfile)(struct file *, char __user *, size_t);
} __randomize_layout;

struct inode_operations {