[RFC PATCH 03/62] new inode method: ->free_inode()

From: Al Viro
Date: Tue Apr 16 2019 - 14:00:15 EST


From: Al Viro <viro@xxxxxxxxxxxxxxxxxx>

A lot of ->destroy_inode() instances end with call_rcu() of a callback
that does RCU-delayed part of freeing. Introduce a new method for
doing just that, with saner signature.

Rules:
->destroy_inode ->free_inode
f g immediate call of f(),
RCU-delayed call of g()
f NULL immediate call of f(),
no RCU-delayed calls
NULL g RCU-delayed call of g()
NULL NULL RCU-delayed default freeing

IOW, NULL ->free_inode gives the same behaviour as now.

Note that NULL, NULL is equivalent to NULL, free_inode_nonrcu; we could
mandate the latter form, but that would have very little benefit beyond
making rules a bit more symmetric. It would break backwards compatibility,
require extra boilerplate and expected semantics for (NULL, NULL) pair
would have no use whatsoever...

Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
---
Documentation/filesystems/Locking | 2 ++
Documentation/filesystems/porting | 17 ++++++++++++
fs/inode.c | 54 +++++++++++++++++++++++----------------
include/linux/fs.h | 1 +
4 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index efea228ccd8a..7b20c385cc02 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -118,6 +118,7 @@ set: exclusive
--------------------------- super_operations ---------------------------
prototypes:
struct inode *(*alloc_inode)(struct super_block *sb);
+ void (*free_inode)(struct inode *);
void (*destroy_inode)(struct inode *);
void (*dirty_inode) (struct inode *, int flags);
int (*write_inode) (struct inode *, struct writeback_control *wbc);
@@ -139,6 +140,7 @@ locking rules:
All may block [not true, see below]
s_umount
alloc_inode:
+free_inode: called from RCU callback
destroy_inode:
dirty_inode:
write_inode:
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index cf43bc4dbf31..9d80f9e0855e 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -638,3 +638,20 @@ in your dentry operations instead.
inode to d_splice_alias() will also do the right thing (equivalent of
d_add(dentry, NULL); return NULL;), so that kind of special cases
also doesn't need a separate treatment.
+--
+[strongly recommended]
+ take the RCU-delayed parts of ->destroy_inode() into a new method -
+ ->free_inode(). If ->destroy_inode() becomes empty - all the better,
+ just get rid of it. Synchronous work (e.g. the stuff that can't
+ be done from an RCU callback, or any WARN_ON() where we want the
+ stack trace) *might* be movable to ->evict_inode(); however,
+ that goes only for the things that are not needed to balance something
+ done by ->alloc_inode(). IOW, if it's cleaning up the stuff that
+ might have accumulated over the life of in-core inode, ->evict_inode()
+ might be a fit.
+
+ Rules for inode destruction:
+ * if ->destroy_inode() is non-NULL, it gets called
+ * if ->free_inode() is non-NULL, it gets scheduled by call_rcu()
+ * combination of NULL ->destroy_inode and NULL ->free_inode is
+ treated as NULL/free_inode_nonrcu, to preserve the compatibility.
diff --git a/fs/inode.c b/fs/inode.c
index e9d97add2b36..fb45590d284e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -202,12 +202,28 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
}
EXPORT_SYMBOL(inode_init_always);

+void free_inode_nonrcu(struct inode *inode)
+{
+ kmem_cache_free(inode_cachep, inode);
+}
+EXPORT_SYMBOL(free_inode_nonrcu);
+
+static void i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ if (inode->i_sb->s_op->free_inode)
+ inode->i_sb->s_op->free_inode(inode);
+ else
+ free_inode_nonrcu(inode);
+}
+
static struct inode *alloc_inode(struct super_block *sb)
{
+ const struct super_operations *ops = sb->s_op;
struct inode *inode;

- if (sb->s_op->alloc_inode)
- inode = sb->s_op->alloc_inode(sb);
+ if (ops->alloc_inode)
+ inode = ops->alloc_inode(sb);
else
inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);

@@ -215,22 +231,18 @@ static struct inode *alloc_inode(struct super_block *sb)
return NULL;

if (unlikely(inode_init_always(sb, inode))) {
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- kmem_cache_free(inode_cachep, inode);
+ if (ops->destroy_inode) {
+ ops->destroy_inode(inode);
+ if (!ops->free_inode)
+ return NULL;
+ }
+ i_callback(&inode->i_rcu);
return NULL;
}

return inode;
}

-void free_inode_nonrcu(struct inode *inode)
-{
- kmem_cache_free(inode_cachep, inode);
-}
-EXPORT_SYMBOL(free_inode_nonrcu);
-
void __destroy_inode(struct inode *inode)
{
BUG_ON(inode_has_buffers(inode));
@@ -253,20 +265,18 @@ void __destroy_inode(struct inode *inode)
}
EXPORT_SYMBOL(__destroy_inode);

-static void i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(inode_cachep, inode);
-}
-
static void destroy_inode(struct inode *inode)
{
+ const struct super_operations *ops = inode->i_sb->s_op;
+
BUG_ON(!list_empty(&inode->i_lru));
__destroy_inode(inode);
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- call_rcu(&inode->i_rcu, i_callback);
+ if (ops->destroy_inode) {
+ ops->destroy_inode(inode);
+ if (!ops->free_inode)
+ return;
+ }
+ call_rcu(&inode->i_rcu, i_callback);
}

/**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e7679089..2e9b9f87caca 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1903,6 +1903,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
void (*destroy_inode)(struct inode *);
+ void (*free_inode)(struct inode *);

void (*dirty_inode) (struct inode *, int flags);
int (*write_inode) (struct inode *, struct writeback_control *wbc);
--
2.11.0