Re: [PATCH 4/7] kernfs: Skip kernfs_drain_open_files() more aggressively

From: Chengming Zhou
Date: Thu Aug 25 2022 - 08:12:16 EST


On 2022/8/20 08:05, Tejun Heo wrote:
> Track the number of mmapped files and files that need to be released and
> skip kernfs_drain_open_file() if both are zero, which are the precise
> conditions which require draining open_files. The early exit test is
> factored into kernfs_should_drain_open_files() which is now tested by
> kernfs_drain_open_files()'s caller - kernfs_drain().
>
> This isn't a meaningful optimization on its own but will enable future
> stand-alone kernfs_deactivate() implementation.
>
> Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
> ---
> fs/kernfs/dir.c | 3 ++-
> fs/kernfs/file.c | 51 +++++++++++++++++++++++++------------
> fs/kernfs/kernfs-internal.h | 1 +
> 3 files changed, 38 insertions(+), 17 deletions(-)
>
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index 1cc88ba6de90..8ae44db920d4 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -489,7 +489,8 @@ static void kernfs_drain(struct kernfs_node *kn)
> rwsem_release(&kn->dep_map, _RET_IP_);
> }
>
> - kernfs_drain_open_files(kn);
> + if (kernfs_should_drain_open_files(kn))
> + kernfs_drain_open_files(kn);
>
> down_write(&root->kernfs_rwsem);
> }
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index 7060a2a714b8..b510589af427 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -23,6 +23,8 @@ struct kernfs_open_node {
> atomic_t event;
> wait_queue_head_t poll;
> struct list_head files; /* goes through kernfs_open_file.list */
> + unsigned int nr_mmapped;
> + unsigned int nr_to_release;
> };
>
> /*
> @@ -527,6 +529,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
>
> rc = 0;
> of->mmapped = true;
> + of_on(of)->nr_mmapped++;
> of->vm_ops = vma->vm_ops;
> vma->vm_ops = &kernfs_vm_ops;
> out_put:
> @@ -574,6 +577,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
> }

The current code use kmalloc() to alloc kernfs_open_node, leave nr_to_release and
nr_mmapped uninitialized.

Found by below stress test:

```
cd /sys/fs/cgroup

while true; do echo 0 > cgroup.pressure; echo 1 > cgroup.pressure; done

while true; do cat *.pressure; done

```

Thanks.

>
> list_add_tail(&of->list, &on->files);
> + if (kn->flags & KERNFS_HAS_RELEASE)
> + on->nr_to_release++;
>
> mutex_unlock(mutex);
> return 0;
> @@ -606,8 +611,12 @@ static void kernfs_unlink_open_file(struct kernfs_node *kn,
> return;
> }
>
> - if (of)
> + if (of) {
> + WARN_ON_ONCE((kn->flags & KERNFS_HAS_RELEASE) && !of->released);
> + if (of->mmapped)
> + on->nr_mmapped--;
> list_del(&of->list);
> + }
>
> if (list_empty(&on->files)) {
> rcu_assign_pointer(kn->attr.open, NULL);
> @@ -766,6 +775,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
> */
> kn->attr.ops->release(of);
> of->released = true;
> + of_on(of)->nr_to_release--;
> }
> }
>
> @@ -790,25 +800,30 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
> return 0;
> }
>
> -void kernfs_drain_open_files(struct kernfs_node *kn)
> +bool kernfs_should_drain_open_files(struct kernfs_node *kn)
> {
> struct kernfs_open_node *on;
> - struct kernfs_open_file *of;
> - struct mutex *mutex;
> -
> - if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
> - return;
> + bool ret;
>
> /*
> - * lockless opportunistic check is safe below because no one is adding to
> - * ->attr.open at this point of time. This check allows early bail out
> - * if ->attr.open is already NULL. kernfs_unlink_open_file makes
> - * ->attr.open NULL only while holding kernfs_open_file_mutex so below
> - * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
> - * ->attr.open became NULL while waiting for the mutex.
> + * @kn being deactivated guarantees that @kn->attr.open can't change
> + * beneath us making the lockless test below safe.
> */
> - if (!rcu_access_pointer(kn->attr.open))
> - return;
> + WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
> +
> + rcu_read_lock();
> + on = rcu_dereference(kn->attr.open);
> + ret = on && (on->nr_mmapped || on->nr_to_release);
> + rcu_read_unlock();
> +
> + return ret;
> +}
> +
> +void kernfs_drain_open_files(struct kernfs_node *kn)
> +{
> + struct kernfs_open_node *on;
> + struct kernfs_open_file *of;
> + struct mutex *mutex;
>
> mutex = kernfs_open_file_mutex_lock(kn);
> on = kernfs_deref_open_node_locked(kn);
> @@ -820,13 +835,17 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
> list_for_each_entry(of, &on->files, list) {
> struct inode *inode = file_inode(of->file);
>
> - if (kn->flags & KERNFS_HAS_MMAP)
> + if (of->mmapped) {
> unmap_mapping_range(inode->i_mapping, 0, 0, 1);
> + of->mmapped = false;
> + on->nr_mmapped--;
> + }
>
> if (kn->flags & KERNFS_HAS_RELEASE)
> kernfs_release_file(kn, of);
> }
>
> + WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
> mutex_unlock(mutex);
> }
>
> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> index 3ae214d02d44..fc5821effd97 100644
> --- a/fs/kernfs/kernfs-internal.h
> +++ b/fs/kernfs/kernfs-internal.h
> @@ -157,6 +157,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
> */
> extern const struct file_operations kernfs_file_fops;
>
> +bool kernfs_should_drain_open_files(struct kernfs_node *kn);
> void kernfs_drain_open_files(struct kernfs_node *kn);
>
> /*