[PATCH] fs/pstore: Perform erase from a worker

From: Chris Wilson
Date: Fri Mar 17 2017 - 05:52:45 EST


In order to prevent a cyclic recursion between psi->read_mutex and the
inode_lock, we need to move the pse->erase to a worker.

[ 605.374955] ======================================================
[ 605.381281] [ INFO: possible circular locking dependency detected ]
[ 605.387679] 4.11.0-rc2-CI-CI_DRM_2352+ #1 Not tainted
[ 605.392826] -------------------------------------------------------
[ 605.399196] rm/7298 is trying to acquire lock:
[ 605.403720] (&psinfo->read_mutex){+.+.+.}, at: [<ffffffff813e183f>] pstore_unlink+0x3f/0xa0
[ 605.412300]
[ 605.412300] but task is already holding lock:
[ 605.418237] (&sb->s_type->i_mutex_key#14){++++++}, at: [<ffffffff812157ec>] vfs_unlink+0x4c/0x19
0
[ 605.427397]
[ 605.427397] which lock already depends on the new lock.
[ 605.427397]
[ 605.435770]
[ 605.435770] the existing dependency chain (in reverse order) is:
[ 605.443396]
[ 605.443396] -> #1 (&sb->s_type->i_mutex_key#14){++++++}:
[ 605.450347] lock_acquire+0xc9/0x220
[ 605.454551] down_write+0x3f/0x70
[ 605.458484] pstore_mkfile+0x1f4/0x460
[ 605.462835] pstore_get_records+0x17a/0x320
[ 605.467664] pstore_fill_super+0xa4/0xc0
[ 605.472205] mount_single+0x89/0xb0
[ 605.476314] pstore_mount+0x13/0x20
[ 605.480411] mount_fs+0xf/0x90
[ 605.484122] vfs_kern_mount+0x66/0x170
[ 605.488464] do_mount+0x190/0xd50
[ 605.492397] SyS_mount+0x90/0xd0
[ 605.496212] entry_SYSCALL_64_fastpath+0x1c/0xb1
[ 605.501496]
[ 605.501496] -> #0 (&psinfo->read_mutex){+.+.+.}:
[ 605.507747] __lock_acquire+0x1ac0/0x1bb0
[ 605.512401] lock_acquire+0xc9/0x220
[ 605.516594] __mutex_lock+0x6e/0x990
[ 605.520755] mutex_lock_nested+0x16/0x20
[ 605.525279] pstore_unlink+0x3f/0xa0
[ 605.529465] vfs_unlink+0xb5/0x190
[ 605.533477] do_unlinkat+0x24c/0x2a0
[ 605.537672] SyS_unlinkat+0x16/0x30
[ 605.541781] entry_SYSCALL_64_fastpath+0x1c/0xb1
[ 605.547067]
[ 605.547067] other info that might help us debug this:
[ 605.547067]
[ 605.555221] Possible unsafe locking scenario:
[ 605.555221]
[ 605.561280] CPU0 CPU1
[ 605.565883] ---- ----
[ 605.570502] lock(&sb->s_type->i_mutex_key#14);
[ 605.575217] lock(&psinfo->read_mutex);
[ 605.581803] lock(&sb->s_type->i_mutex_key#14);
[ 605.589159] lock(&psinfo->read_mutex);
[ 605.593156]
[ 605.593156] *** DEADLOCK ***
[ 605.593156]
[ 605.599214] 3 locks held by rm/7298:
[ 605.602896] #0: (sb_writers#11){.+.+..}, at: [<ffffffff8122e85f>] mnt_want_write+0x1f/0x50
[ 605.611490] #1: (&sb->s_type->i_mutex_key#14/1){+.+...}, at: [<ffffffff8121a7dc>] do_unlinkat+0
x11c/0x2a0
[ 605.621417] #2: (&sb->s_type->i_mutex_key#14){++++++}, at: [<ffffffff812157ec>] vfs_unlink+0x4c
/0x190
[ 605.630995]
[ 605.630995] stack backtrace:
[ 605.635450] CPU: 7 PID: 7298 Comm: rm Not tainted 4.11.0-rc2-CI-CI_DRM_2352+ #1
[ 605.642999] Hardware name: Gigabyte Technology Co., Ltd. Z170X-UD5/Z170X-UD5-CF, BIOS F21 01/06/2
017
[ 605.652305] Call Trace:
[ 605.654814] dump_stack+0x67/0x92
[ 605.658184] print_circular_bug+0x1e0/0x2e0
[ 605.662465] __lock_acquire+0x1ac0/0x1bb0
[ 605.666634] ? retint_kernel+0x2d/0x2d
[ 605.670456] lock_acquire+0xc9/0x220
[ 605.674112] ? pstore_unlink+0x3f/0xa0
[ 605.677970] ? pstore_unlink+0x3f/0xa0
[ 605.681818] __mutex_lock+0x6e/0x990
[ 605.685456] ? pstore_unlink+0x3f/0xa0
[ 605.689791] ? pstore_unlink+0x3f/0xa0
[ 605.694124] ? vfs_unlink+0x4c/0x190
[ 605.698310] mutex_lock_nested+0x16/0x20
[ 605.702859] pstore_unlink+0x3f/0xa0
[ 605.707021] vfs_unlink+0xb5/0x190
[ 605.711024] do_unlinkat+0x24c/0x2a0
[ 605.715194] SyS_unlinkat+0x16/0x30
[ 605.719275] entry_SYSCALL_64_fastpath+0x1c/0xb1
[ 605.724543] RIP: 0033:0x7f8b08073ed7
[ 605.728676] RSP: 002b:00007ffe70eff628 EFLAGS: 00000206 ORIG_RAX: 0000000000000107
[ 605.736929] RAX: ffffffffffffffda RBX: ffffffff8147ea93 RCX: 00007f8b08073ed7
[ 605.744711] RDX: 0000000000000000 RSI: 0000000001450000 RDI: 00000000ffffff9c
[ 605.752512] RBP: ffffc9000338ff88 R08: 0000000000000003 R09: 0000000000000000
[ 605.760276] R10: 000000000000015e R11: 0000000000000206 R12: 0000000000000000
[ 605.768040] R13: 00007ffe70eff750 R14: 000000000144ff70 R15: 0000000001451230
[ 605.775800] ? __this_cpu_preempt_check+0x13/0x20

Reported-by: Tomi Sarvela <tomi.p.sarvela@xxxxxxxxx>
Fixes: e9e360b08a44 ("pstore: Protect unlink with read_mutex")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100234
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tomi Sarvela <tomi.p.sarvela@xxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Anton Vorontsov <anton@xxxxxxxxxx>
Cc: Colin Cross <ccross@xxxxxxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: Stefan Hajnoczi <stefanha@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: <stable@xxxxxxxxxxxxxxx> # v4.10+
---
fs/pstore/inode.c | 37 ++++++++++++++++++++++++++++++-------
fs/pstore/platform.c | 2 ++
2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 57c0646479f5..dee5d34d9a64 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -186,6 +186,26 @@ static const struct file_operations pstore_file_operations = {
.release = seq_release,
};

+struct pstore_unlink_work {
+ struct work_struct work;
+ struct dentry *dentry;
+};
+
+static void pstore_unlink_work(struct work_struct *work)
+{
+ struct pstore_unlink_work *arg = container_of(work, typeof(*arg), work);
+ struct dentry *dentry = arg->dentry;
+ struct pstore_private *p = d_inode(dentry)->i_private;
+
+ mutex_lock(&p->psi->read_mutex);
+ p->psi->erase(p->type, p->id, p->count,
+ d_inode(dentry)->i_ctime, p->psi);
+ mutex_unlock(&p->psi->read_mutex);
+
+ dput(dentry);
+ kfree(arg);
+}
+
/*
* When a file is unlinked from our file system we call the
* platform driver to erase the record from persistent store.
@@ -193,20 +213,23 @@ static const struct file_operations pstore_file_operations = {
static int pstore_unlink(struct inode *dir, struct dentry *dentry)
{
struct pstore_private *p = d_inode(dentry)->i_private;
+ struct pstore_unlink_work *work;
int err;

err = pstore_check_syslog_permissions(p);
if (err)
return err;

- if (p->psi->erase) {
- mutex_lock(&p->psi->read_mutex);
- p->psi->erase(p->type, p->id, p->count,
- d_inode(dentry)->i_ctime, p->psi);
- mutex_unlock(&p->psi->read_mutex);
- } else {
+ if (!p->psi->erase)
return -EPERM;
- }
+
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ work->dentry = dget(dentry);
+ INIT_WORK(&work->work, pstore_unlink_work);
+ schedule_work(&work->work);

return simple_unlink(dir, dentry);
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index efab7b64925b..6ffca5ab4782 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -731,6 +731,8 @@ EXPORT_SYMBOL_GPL(pstore_register);

void pstore_unregister(struct pstore_info *psi)
{
+ flush_scheduled_work();
+
if (psi->flags & PSTORE_FLAGS_PMSG)
pstore_unregister_pmsg();
if (psi->flags & PSTORE_FLAGS_FTRACE)
--
2.11.0