Re: [linus:master] [eventfs] 852e46e239: BUG:unable_to_handle_page_fault_for_address

From: Linus Torvalds
Date: Tue Jan 30 2024 - 12:09:36 EST


On Tue, 30 Jan 2024 at 08:55, Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> Let me just reboot into it to test that I got all the cases.
>
> It makes the code even more obvious, and avoids all the complexity.

Yup, this works at least for the case you pointed out, and it really
feels like the RightThing(tm) from a VFS standpoint.

NOTE! This patch is on top of my previous 5-patch series.

Linus
From 67b31bb53ef65132a65bfbe915628c481a39e73a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Date: Tue, 30 Jan 2024 09:00:18 -0800
Subject: [PATCH] eventfs: clean up dentry ops and add revalidate function

In order for the dentries to stay up-to-date with the eventfs changes,
just add a 'd_revalidate' function that checks the 'is_freed' bit.

Also, clean up the dentry release to actually use d_release() rather
than the slightly odd d_iput() function. We don't care about the inode,
all we want to do is to get rid of the refcount to the eventfs data
added by dentry->d_fsdata.

It would probably be cleaner to make eventfs its own filesystem, or at
least set its own dentry ops when looking up eventfs files. But as it
is, only eventfs dentries use d_fsdata, so we don't really need to split
these things up by use.

Another thing that might be worth doing is to make all eventfs lookups
mark their dentries as not worth caching. We could do that with
d_delete(), but the DCACHE_DONTCACHE flag would likely be even better.

As it is, the dentries are all freeable, but they only tend to get freed
at memory pressure rather than more proactively. But that's a separate
issue.

Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
---
fs/tracefs/event_inode.c | 21 +++++----------------
fs/tracefs/inode.c | 27 ++++++++++++++++++---------
fs/tracefs/internal.h | 3 ++-
3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index a37db0dac302..acdc797bd9c0 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -414,23 +414,14 @@ static inline struct eventfs_inode *alloc_ei(const char *name)


/**
- * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
- * @ti: the tracefs_inode of the dentry
+ * eventfs_d_release - dentry is going away
* @dentry: dentry which has the reference to remove.
*
* Remove the association between a dentry from an eventfs_inode.
*/
-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+void eventfs_d_release(struct dentry *dentry)
{
- struct eventfs_inode *ei;
-
- mutex_lock(&eventfs_mutex);
- ei = dentry->d_fsdata;
- if (ei) {
- dentry->d_fsdata = NULL;
- put_ei(ei);
- }
- mutex_unlock(&eventfs_mutex);
+ put_ei(dentry->d_fsdata);
}

/**
@@ -517,9 +508,8 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
}

enoent:
- /* Nothing found? */
- d_add(dentry, NULL);
- result = NULL;
+ /* Don't cache negative lookups, just return an error */
+ result = ERR_PTR(-ENOENT);

out:
mutex_unlock(&eventfs_mutex);
@@ -857,6 +847,5 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei)
* sticks around while the other ei->dentry are created
* and destroyed dynamically.
*/
- simple_recursive_removal(dentry, NULL);
dput(dentry);
}
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index e1b172c0e091..64122787e5d0 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -379,21 +379,30 @@ static const struct super_operations tracefs_super_operations = {
.show_options = tracefs_show_options,
};

-static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
+/*
+ * It would be cleaner if eventfs had its own dentry ops.
+ *
+ * Note that d_revalidate is called potentially under RCU,
+ * so it can't take the eventfs mutex etc. It's fine - if
+ * we open a file just as it's marked dead, things will
+ * still work just fine, and just see the old stale case.
+ */
+static void tracefs_d_release(struct dentry *dentry)
{
- struct tracefs_inode *ti;
+ if (dentry->d_fsdata)
+ eventfs_d_release(dentry);
+}

- if (!dentry || !inode)
- return;
+static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct eventfs_inode *ei = dentry->d_fsdata;

- ti = get_tracefs(inode);
- if (ti && ti->flags & TRACEFS_EVENT_INODE)
- eventfs_set_ei_status_free(ti, dentry);
- iput(inode);
+ return !(ei && ei->is_freed);
}

static const struct dentry_operations tracefs_dentry_operations = {
- .d_iput = tracefs_dentry_iput,
+ .d_revalidate = tracefs_d_revalidate,
+ .d_release = tracefs_d_release,
};

static int trace_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
index 72db3bdc4dfb..d4194466b643 100644
--- a/fs/tracefs/internal.h
+++ b/fs/tracefs/internal.h
@@ -79,6 +79,7 @@ struct inode *tracefs_get_inode(struct super_block *sb);
struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
struct dentry *eventfs_failed_creating(struct dentry *dentry);
struct dentry *eventfs_end_creating(struct dentry *dentry);
-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+
+void eventfs_d_release(struct dentry *dentry);

#endif /* _TRACEFS_INTERNAL_H */
--
2.43.0.5.g38fb137bdb