[RFC PATCH v2 9/9] debugfs: free debugfs_fsdata instances

From: Nicolai Stange
Date: Wed May 03 2017 - 10:19:15 EST


Currently, a dentry's debugfs_fsdata instance is allocated from
debugfs_file_get() at first usage, i.e. at first file opening.

It won't ever get freed though.

Ideally, these instances would get freed after the last open file handle
gets closed again, that is from the fops' ->release().

Unfortunately, we can't hook easily into those ->release()ers of files
created through debugfs_create_file_unsafe(), i.e. of those proxied through
debugfs_open_proxy_file_operations rather than through the "full"
debugfs_full_proxy_file_operations proxy.

Hence, free unreferenced debugfs_fsdata instances from debugfs_file_put(),
with the drawback of a potential allocation + deallocation per
debugfs_file_get() + debugfs_file_put() pair, that is per fops invocation.

In addition to its former role of tracking pending fops, use the
->active_users for reference counting on the debugfs_fsdata instance
itself. In particular, don't keep a dummy reference to be dropped from
__debugfs_remove_file(): a d_delete()ed dentry and thus, request for
completion notification, is now signaled by the d_unlinked() dentry itself.

Once ->active_users drops to zero (and the dentry is still intact), free
the debugfs_fsdata instance from debugfs_file_put(). RCU protects any
concurrent debugfs_file_get() attempts to get a hold of the instance here.
Likewise for full_proxy_release() which lacks a call to debugfs_file_get().

Note that due to non-atomic updates to the d_unlinked() + ->d_fsdata pair,
care must be taken in order to avoid races between debugfs_file_put() and
debugfs_file_get() as well as __debugfs_remove_file(). Rather than
introducing a global lock, exploit the fact that there will ever be only a
single !d_unlinked() -> d_unlinked() transition and add memory barriers
where needed. Given the lack of proper benchmarking, that debugfs fops
aren't performance critical and that we've already got a potential
allocation/deallocation pair anyway, the added code complexity might be
highly questionable though.

Signed-off-by: Nicolai Stange <nicstange@xxxxxxxxx>
---
fs/debugfs/file.c | 100 ++++++++++++++++++++++++++++++++++++++++----------
fs/debugfs/inode.c | 8 +++-
fs/debugfs/internal.h | 1 +
3 files changed, 88 insertions(+), 21 deletions(-)

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 0536072b60d7..b5681111c671 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -22,7 +22,9 @@
#include <linux/slab.h>
#include <linux/atomic.h>
#include <linux/device.h>
+#include <linux/rcupdate.h>
#include <asm/poll.h>
+#include <asm/processor.h>

#include "internal.h"

@@ -86,10 +88,37 @@ int debugfs_file_get(struct dentry *dentry)
struct debugfs_fsdata *fsd;
void *d_fsd;

- d_fsd = READ_ONCE(dentry->d_fsdata);
+retry:
+ rcu_read_lock();
+ d_fsd = rcu_dereference(dentry->d_fsdata);
if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) {
+ /*
+ * Paired with the control dependency in
+ * debugfs_file_put(): if we saw the debugfs_fsdata
+ * instance "restored" there but not the dead dentry,
+ * we'd erroneously instantiate a fresh debugfs_fsdata
+ * instance below.
+ */
+ smp_rmb();
+ if (d_unlinked(dentry)) {
+ rcu_read_unlock();
+ return -EIO;
+ }
+
fsd = d_fsd;
+ if (!refcount_inc_not_zero(&fsd->active_users)) {
+ /*
+ * A concurrent debugfs_file_put() dropped the
+ * count to zero and is about to free the
+ * debugfs_fsdata.
+ */
+ rcu_read_unlock();
+ cpu_relax();
+ goto retry;
+ }
+ rcu_read_unlock();
} else {
+ rcu_read_unlock();
fsd = kmalloc(sizeof(*fsd), GFP_KERNEL);
if (!fsd)
return -ENOMEM;
@@ -99,25 +128,27 @@ int debugfs_file_get(struct dentry *dentry)
refcount_set(&fsd->active_users, 1);
init_completion(&fsd->active_users_drained);
if (cmpxchg(&dentry->d_fsdata, d_fsd, fsd) != d_fsd) {
+ /*
+ * Another debugfs_file_get() has installed a
+ * debugfs_fsdata instance concurrently.
+ * Free ours and retry to grab a reference on
+ * the installed one.
+ */
kfree(fsd);
- fsd = READ_ONCE(dentry->d_fsdata);
+ goto retry;
+ }
+ /*
+ * In case of a successful cmpxchg() above, this check is
+ * strictly necessary and must follow it, see the comment in
+ * __debugfs_remove_file().
+ */
+ if (d_unlinked(dentry)) {
+ if (refcount_dec_and_test(&fsd->active_users))
+ complete(&fsd->active_users_drained);
+ return -EIO;
}
}

- /*
- * In case of a successful cmpxchg() above, this check is
- * strictly necessary and must follow it, see the comment in
- * __debugfs_remove_file().
- * OTOH, if the cmpxchg() hasn't been executed or wasn't
- * successful, this serves the purpose of not starving
- * removers.
- */
- if (d_unlinked(dentry))
- return -EIO;
-
- if (!refcount_inc_not_zero(&fsd->active_users))
- return -EIO;
-
return 0;
}
EXPORT_SYMBOL_GPL(debugfs_file_get);
@@ -134,9 +165,29 @@ EXPORT_SYMBOL_GPL(debugfs_file_get);
void debugfs_file_put(struct dentry *dentry)
{
struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata);
+ void *d_fsd;

- if (refcount_dec_and_test(&fsd->active_users))
- complete(&fsd->active_users_drained);
+ if (refcount_dec_and_test(&fsd->active_users)) {
+ d_fsd = (void *)((unsigned long)fsd->real_fops |
+ DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);
+ RCU_INIT_POINTER(dentry->d_fsdata, d_fsd);
+ /* Paired with smp_mb() in __debugfs_remove_file(). */
+ smp_mb();
+ if (d_unlinked(dentry)) {
+ /*
+ * We have a control dependency paired with the
+ * smp_rmb() in debugfs_file_get() here.
+ *
+ * Restore the debugfs_fsdata instance into
+ * ->d_fsdata s.t. ->d_release() can free
+ * it.
+ */
+ WRITE_ONCE(dentry->d_fsdata, fsd);
+ complete(&fsd->active_users_drained);
+ } else {
+ kfree_rcu(fsd, rcu_head);
+ }
+ }
}
EXPORT_SYMBOL_GPL(debugfs_file_put);

@@ -229,9 +280,20 @@ static unsigned int full_proxy_poll(struct file *filp,
static int full_proxy_release(struct inode *inode, struct file *filp)
{
const struct dentry *dentry = F_DENTRY(filp);
- const struct file_operations *real_fops = debugfs_real_fops(filp);
const struct file_operations *proxy_fops = filp->f_op;
int r = 0;
+ void *d_fsd;
+ const struct file_operations *real_fops;
+
+ rcu_read_lock();
+ d_fsd = rcu_dereference(F_DENTRY(filp)->d_fsdata);
+ if ((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) {
+ real_fops = (void *)((unsigned long)d_fsd &
+ ~DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);
+ } else {
+ real_fops = ((struct debugfs_fsdata *)d_fsd)->real_fops;
+ }
+ rcu_read_unlock();

/*
* We must not protect this against removal races here: the
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 65a40907569b..d08bca9f5085 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -27,6 +27,7 @@
#include <linux/parser.h>
#include <linux/magic.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>

#include "internal.h"

@@ -636,13 +637,16 @@ static void __debugfs_remove_file(struct dentry *dentry, struct dentry *parent)
* cmpxchg() in debugfs_file_get(): either
* debugfs_file_get() must see a dead dentry or we must see a
* debugfs_fsdata instance at ->d_fsdata here (or both).
+ *
+ * Also paired with the smp_mb() in debugfs_file_put(): if we
+ * see a debugfs_fsdata instance here, then debugfs_file_put()
+ * must see a dead dentry.
*/
smp_mb();
fsd = READ_ONCE(dentry->d_fsdata);
if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
return;
- if (!refcount_dec_and_test(&fsd->active_users))
- wait_for_completion(&fsd->active_users_drained);
+ wait_for_completion(&fsd->active_users_drained);
}

static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index cb1e8139c398..0445bd7d11f2 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -23,6 +23,7 @@ struct debugfs_fsdata {
const struct file_operations *real_fops;
refcount_t active_users;
struct completion active_users_drained;
+ struct rcu_head rcu_head;
};

/*
--
2.12.2