Re: [RFC PATCH] kernfs: use kernfs_node specific mutex and spinlock.

From: Imran Khan
Date: Thu Dec 23 2021 - 17:53:51 EST


Hi everyone,

On 16/12/21 2:06 am, Imran Khan wrote:
> Right now a global mutex (kernfs_open_file_mutex) protects list of
> kernfs_open_file instances corresponding to a sysfs attribute, so even
> if different tasks are opening or closing different sysfs files they
> can contend on osq_lock of this mutex. The contention is more apparent
> in large scale systems with few hundred CPUs where most of the CPUs have
> running tasks that are opening, accessing or closing sysfs files at any
> point of time. Since each list of kernfs_open_file belongs to a
> kernfs_open_node instance which in turn corresponds to one kernfs_node,
> move global kernfs_open_file_mutex within kernfs_node so that it does
> not block access to kernfs_open_file lists corresponding to other
> kernfs_node.
>
> Also since kernfs_node->attr.open points to kernfs_open_node instance
> corresponding to the kernfs_node, we can use a kernfs_node specific
> spinlock in place of current global spinlock i.e kernfs_open_node_lock.
> So make this spinlock local to kernfs_node instance as well.
>
> Signed-off-by: Imran Khan <imran.f.khan@xxxxxxxxxx>
> ---
> I have kept this patch as RFC, as I am not sure if I have overlooked any
> scenario(s) where these global locks are needed.
>

Could someone please provide some feedback about this change? Also if
there is any issues in this change, can I make these locks per-fs as has
been done in [1].

[1] https://lore.kernel.org/lkml/YZvV0ESA+zHHqHBU@xxxxxxxxxx/

Thanks,
Imran

> fs/kernfs/dir.c | 2 ++
> fs/kernfs/file.c | 48 +++++++++++++++---------------------------
> include/linux/kernfs.h | 2 ++
> 3 files changed, 21 insertions(+), 31 deletions(-)
>
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index e6d9772ddb4c..cd68ac30f71b 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -603,6 +603,8 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
> atomic_set(&kn->count, 1);
> atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
> RB_CLEAR_NODE(&kn->rb);
> + spin_lock_init(&kn->kernfs_open_node_lock);
> + mutex_init(&kn->kernfs_open_file_mutex);
>
> kn->name = name;
> kn->mode = mode;
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index 9414a7a60a9f..4114745d80d5 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -18,20 +18,6 @@
>
> #include "kernfs-internal.h"
>
> -/*
> - * There's one kernfs_open_file for each open file and one kernfs_open_node
> - * for each kernfs_node with one or more open files.
> - *
> - * kernfs_node->attr.open points to kernfs_open_node. attr.open is
> - * protected by kernfs_open_node_lock.
> - *
> - * filp->private_data points to seq_file whose ->private points to
> - * kernfs_open_file. kernfs_open_files are chained at
> - * kernfs_open_node->files, which is protected by kernfs_open_file_mutex.
> - */
> -static DEFINE_SPINLOCK(kernfs_open_node_lock);
> -static DEFINE_MUTEX(kernfs_open_file_mutex);
> -
> struct kernfs_open_node {
> atomic_t refcnt;
> atomic_t event;
> @@ -526,8 +512,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
> struct kernfs_open_node *on, *new_on = NULL;
>
> retry:
> - mutex_lock(&kernfs_open_file_mutex);
> - spin_lock_irq(&kernfs_open_node_lock);
> + mutex_lock(&kn->kernfs_open_file_mutex);
> + spin_lock_irq(&kn->kernfs_open_node_lock);
>
> if (!kn->attr.open && new_on) {
> kn->attr.open = new_on;
> @@ -540,8 +526,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
> list_add_tail(&of->list, &on->files);
> }
>
> - spin_unlock_irq(&kernfs_open_node_lock);
> - mutex_unlock(&kernfs_open_file_mutex);
> + spin_unlock_irq(&kn->kernfs_open_node_lock);
> + mutex_unlock(&kn->kernfs_open_file_mutex);
>
> if (on) {
> kfree(new_on);
> @@ -577,8 +563,8 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
> struct kernfs_open_node *on = kn->attr.open;
> unsigned long flags;
>
> - mutex_lock(&kernfs_open_file_mutex);
> - spin_lock_irqsave(&kernfs_open_node_lock, flags);
> + mutex_lock(&kn->kernfs_open_file_mutex);
> + spin_lock_irqsave(&kn->kernfs_open_node_lock, flags);
>
> if (of)
> list_del(&of->list);
> @@ -588,8 +574,8 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
> else
> on = NULL;
>
> - spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
> - mutex_unlock(&kernfs_open_file_mutex);
> + spin_unlock_irqrestore(&kn->kernfs_open_node_lock, flags);
> + mutex_unlock(&kn->kernfs_open_file_mutex);
>
> kfree(on);
> }
> @@ -733,7 +719,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
> * here because drain path may be called from places which can
> * cause circular dependency.
> */
> - lockdep_assert_held(&kernfs_open_file_mutex);
> + lockdep_assert_held(&kn->kernfs_open_file_mutex);
>
> if (!of->released) {
> /*
> @@ -752,9 +738,9 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
> struct kernfs_open_file *of = kernfs_of(filp);
>
> if (kn->flags & KERNFS_HAS_RELEASE) {
> - mutex_lock(&kernfs_open_file_mutex);
> + mutex_lock(&kn->kernfs_open_file_mutex);
> kernfs_release_file(kn, of);
> - mutex_unlock(&kernfs_open_file_mutex);
> + mutex_unlock(&kn->kernfs_open_file_mutex);
> }
>
> kernfs_put_open_node(kn, of);
> @@ -773,15 +759,15 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
> if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
> return;
>
> - spin_lock_irq(&kernfs_open_node_lock);
> + spin_lock_irq(&kn->kernfs_open_node_lock);
> on = kn->attr.open;
> if (on)
> atomic_inc(&on->refcnt);
> - spin_unlock_irq(&kernfs_open_node_lock);
> + spin_unlock_irq(&kn->kernfs_open_node_lock);
> if (!on)
> return;
>
> - mutex_lock(&kernfs_open_file_mutex);
> + mutex_lock(&kn->kernfs_open_file_mutex);
>
> list_for_each_entry(of, &on->files, list) {
> struct inode *inode = file_inode(of->file);
> @@ -793,7 +779,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
> kernfs_release_file(kn, of);
> }
>
> - mutex_unlock(&kernfs_open_file_mutex);
> + mutex_unlock(&kn->kernfs_open_file_mutex);
>
> kernfs_put_open_node(kn, NULL);
> }
> @@ -922,13 +908,13 @@ void kernfs_notify(struct kernfs_node *kn)
> return;
>
> /* kick poll immediately */
> - spin_lock_irqsave(&kernfs_open_node_lock, flags);
> + spin_lock_irqsave(&kn->kernfs_open_node_lock, flags);
> on = kn->attr.open;
> if (on) {
> atomic_inc(&on->event);
> wake_up_interruptible(&on->poll);
> }
> - spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
> + spin_unlock_irqrestore(&kn->kernfs_open_node_lock, flags);
>
> /* schedule work to kick fsnotify */
> spin_lock_irqsave(&kernfs_notify_lock, flags);
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index 9f650986a81b..22cd01477129 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -162,6 +162,8 @@ struct kernfs_node {
> unsigned short flags;
> umode_t mode;
> struct kernfs_iattrs *iattr;
> + spinlock_t kernfs_open_node_lock;
> + struct mutex kernfs_open_file_mutex;
> };
>
> /*
>
> base-commit: 0bafb8f3ebc84525d0ae0fcea22d12151b99312f