Re: [PATCH 2/5] vfs: Add tracepoints for inode_excl_inuse_trylock/unlock

From: David Howells
Date: Tue Feb 01 2022 - 05:44:28 EST


I forgot to add the tracepoint header to the commit.

David
---
commit c8cefa2ac359254ecebfb20dcd0676bf9a167277
Author: David Howells <dhowells@xxxxxxxxxx>
Date: Mon Jan 31 11:52:44 2022 +0000

vfs: Add tracepoints for inode_excl_inuse_trylock/unlock

Add tracepoints for inode_excl_inuse_trylock/unlock() to record successful
and lock, failed lock, successful unlock and unlock when it wasn't locked.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Amir Goldstein <amir73il@xxxxxxxxx>
cc: Miklos Szeredi <miklos@xxxxxxxxxx>
cc: linux-unionfs@xxxxxxxxxxxxxxx
cc: linux-cachefs@xxxxxxxxxx

diff --git a/fs/inode.c b/fs/inode.c
index 954719f66113..61b93a89853f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -22,6 +22,8 @@
#include <linux/iversion.h>
#include <trace/events/writeback.h>
#include "internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/vfs.h>

/*
* Inode locking rules:
@@ -2409,11 +2411,14 @@ EXPORT_SYMBOL(current_time);
/**
* inode_excl_inuse_trylock - Try to exclusively lock an inode for kernel access
* @dentry: Reference to the inode to be locked
+ * @o: Private reference for the kernel service
+ * @who: Which kernel service is trying to gain the lock
*
* Try to gain exclusive access to an inode for a kernel service, returning
* true if successful.
*/
-bool inode_excl_inuse_trylock(struct dentry *dentry)
+bool inode_excl_inuse_trylock(struct dentry *dentry, unsigned int o,
+ enum inode_excl_inuse_by who)
{
struct inode *inode = d_inode(dentry);
bool locked = false;
@@ -2421,7 +2426,10 @@ bool inode_excl_inuse_trylock(struct dentry *dentry)
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_EXCL_INUSE)) {
inode->i_state |= I_EXCL_INUSE;
+ trace_inode_excl_inuse_lock(inode, o, who);
locked = true;
+ } else {
+ trace_inode_excl_inuse_lock_failed(inode, o, who);
}
spin_unlock(&inode->i_lock);

@@ -2432,18 +2440,23 @@ EXPORT_SYMBOL(inode_excl_inuse_trylock);
/**
* inode_excl_inuse_unlock - Unlock exclusive kernel access to an inode
* @dentry: Reference to the inode to be unlocked
+ * @o: Private reference for the kernel service
*
* Drop exclusive access to an inode for a kernel service. A warning is given
* if the inode was not marked for exclusive access.
*/
-void inode_excl_inuse_unlock(struct dentry *dentry)
+void inode_excl_inuse_unlock(struct dentry *dentry, unsigned int o)
{
if (dentry) {
struct inode *inode = d_inode(dentry);

spin_lock(&inode->i_lock);
- WARN_ON(!(inode->i_state & I_EXCL_INUSE));
- inode->i_state &= ~I_EXCL_INUSE;
+ if (WARN_ON(!(inode->i_state & I_EXCL_INUSE))) {
+ trace_inode_excl_inuse_unlock_bad(inode, o);
+ } else {
+ inode->i_state &= ~I_EXCL_INUSE;
+ trace_inode_excl_inuse_unlock(inode, o);
+ }
spin_unlock(&inode->i_lock);
}
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 5c3361a2dc7c..6434ae11496d 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -224,10 +224,10 @@ static void ovl_free_fs(struct ovl_fs *ofs)
dput(ofs->indexdir);
dput(ofs->workdir);
if (ofs->workdir_locked)
- inode_excl_inuse_unlock(ofs->workbasedir);
+ inode_excl_inuse_unlock(ofs->workbasedir, 0);
dput(ofs->workbasedir);
if (ofs->upperdir_locked)
- inode_excl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
+ inode_excl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root, 0);

/* Hack! Reuse ofs->layers as a vfsmount array before freeing it */
mounts = (struct vfsmount **) ofs->layers;
@@ -1239,7 +1239,8 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
sb->s_flags |= SB_NOSEC;

- if (inode_excl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
+ if (inode_excl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root, 0,
+ inode_excl_inuse_by_overlayfs)) {
ofs->upperdir_locked = true;
} else {
err = ovl_report_in_use(ofs, "upperdir");
@@ -1499,7 +1500,8 @@ static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,

ofs->workbasedir = dget(workpath.dentry);

- if (inode_excl_inuse_trylock(ofs->workbasedir)) {
+ if (inode_excl_inuse_trylock(ofs->workbasedir, 0,
+ inode_excl_inuse_by_overlayfs)) {
ofs->workdir_locked = true;
} else {
err = ovl_report_in_use(ofs, "workdir");
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4c15e270f1ac..f461883d66a8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2389,8 +2389,13 @@ static inline bool inode_is_dirtytime_only(struct inode *inode)
I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
}

-bool inode_excl_inuse_trylock(struct dentry *dentry);
-void inode_excl_inuse_unlock(struct dentry *dentry);
+enum inode_excl_inuse_by {
+ inode_excl_inuse_by_overlayfs,
+};
+
+bool inode_excl_inuse_trylock(struct dentry *dentry, unsigned int o,
+ enum inode_excl_inuse_by who);
+void inode_excl_inuse_unlock(struct dentry *dentry, unsigned int o);

static inline bool inode_is_excl_inuse(struct dentry *dentry)
{
diff --git a/include/trace/events/vfs.h b/include/trace/events/vfs.h
new file mode 100644
index 000000000000..f053752109dd
--- /dev/null
+++ b/include/trace/events/vfs.h
@@ -0,0 +1,134 @@
+/* VFS tracepoints
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@xxxxxxxxxx)
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vfs
+
+#if !defined(_TRACE_VFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_VFS_H
+
+#include <linux/tracepoint.h>
+#include <linux/fs.h>
+
+/*
+ * Define enum -> string mappings for display.
+ */
+#define inode_excl_inuse_by_traces \
+ EM(inode_excl_inuse_by_cachefiles, "cachefiles") \
+ E_(inode_excl_inuse_by_overlayfs, "overlayfs")
+
+
+/*
+ * Export enum symbols via userspace.
+ */
+#undef EM
+#undef E_
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define E_(a, b) TRACE_DEFINE_ENUM(a);
+
+inode_excl_inuse_by_traces;
+
+/*
+ * Now redefine the EM() and E_() macros to map the enums to the strings that
+ * will be printed in the output.
+ */
+#undef EM
+#undef E_
+#define EM(a, b) { a, b },
+#define E_(a, b) { a, b }
+
+
+TRACE_EVENT(inode_excl_inuse_lock,
+ TP_PROTO(struct inode *inode, unsigned int o,
+ enum inode_excl_inuse_by who),
+
+ TP_ARGS(inode, o, who),
+
+ TP_STRUCT__entry(
+ __field(ino_t, inode )
+ __field(unsigned int, o )
+ __field(enum inode_excl_inuse_by, who )
+ ),
+
+ TP_fast_assign(
+ __entry->inode = inode->i_ino;
+ __entry->o = o;
+ __entry->who = who;
+ ),
+
+ TP_printk("B=%lx %s o=%08x",
+ __entry->inode,
+ __print_symbolic(__entry->who, inode_excl_inuse_by_traces),
+ __entry->o)
+ );
+
+TRACE_EVENT(inode_excl_inuse_lock_failed,
+ TP_PROTO(struct inode *inode, unsigned int o,
+ enum inode_excl_inuse_by who),
+
+ TP_ARGS(inode, o, who),
+
+ TP_STRUCT__entry(
+ __field(ino_t, inode )
+ __field(unsigned int, o )
+ __field(enum inode_excl_inuse_by, who )
+ ),
+
+ TP_fast_assign(
+ __entry->inode = inode->i_ino;
+ __entry->o = o;
+ __entry->who = who;
+ ),
+
+ TP_printk("B=%lx %s o=%08x",
+ __entry->inode,
+ __print_symbolic(__entry->who, inode_excl_inuse_by_traces),
+ __entry->o)
+ );
+
+TRACE_EVENT(inode_excl_inuse_unlock,
+ TP_PROTO(struct inode *inode, unsigned int o),
+
+ TP_ARGS(inode, o),
+
+ TP_STRUCT__entry(
+ __field(ino_t, inode )
+ __field(unsigned int, o )
+ ),
+
+ TP_fast_assign(
+ __entry->inode = inode->i_ino;
+ __entry->o = o;
+ ),
+
+ TP_printk("B=%lx o=%08x",
+ __entry->inode,
+ __entry->o)
+ );
+
+TRACE_EVENT(inode_excl_inuse_unlock_bad,
+ TP_PROTO(struct inode *inode, unsigned int o),
+
+ TP_ARGS(inode, o),
+
+ TP_STRUCT__entry(
+ __field(ino_t, inode )
+ __field(unsigned int, o )
+ ),
+
+ TP_fast_assign(
+ __entry->inode = inode->i_ino;
+ __entry->o = o;
+ ),
+
+ TP_printk("B=%lx o=%08x",
+ __entry->inode,
+ __entry->o)
+ );
+
+#endif /* _TRACE_VFS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>