[PATCH v2] dmaengine: fix async channel removal for dma_issue_pending_all

From: Dāvis Mosāns
Date: Mon Feb 21 2022 - 20:55:58 EST


dma channels can be removed asynchronously while we iterate over them.
Without this various crashes can happen in dma_issue_pending_all.

One such example:

kernel: BUG: kernel NULL pointer dereference, address: 0000000000000018
kernel: #PF: supervisor read access in kernel mode
kernel: #PF: error_code(0x0000) - not-present page
kernel: PGD 0 P4D 0
kernel: Oops: 0000 [#1] PREEMPT SMP NOPTI
kernel: RIP: 0010:dma_issue_pending_all (drivers/dma/dmaengine.c:562)
All code
========
0: 48 8b 45 20 mov 0x20(%rbp),%rax
4: 48 8d 68 e0 lea -0x20(%rax),%rbp
8: 48 3d 60 36 35 bc cmp $0xffffffffbc353660,%rax
e: 74 4d je 0x5d
10: 48 8b 45 48 mov 0x48(%rbp),%rax
14: f6 c4 02 test $0x2,%ah
17: 75 e7 jne 0x0
19: 48 8b 45 10 mov 0x10(%rbp),%rax
1d: 4c 8d 65 10 lea 0x10(%rbp),%r12
21: 48 8d 58 c8 lea -0x38(%rax),%rbx
25: 49 39 c4 cmp %rax,%r12
28: 74 d6 je 0x0
2a:* 8b 43 50 mov 0x50(%rbx),%eax <-- trapping instruction
2d: 85 c0 test %eax,%eax
2f: 74 0f je 0x40
31: 48 8b 85 88 01 00 00 mov 0x188(%rbp),%rax
38: 48 89 df mov %rbx,%rdi
3b: 0f ae e8 lfence
3e: ff d0 call *%rax

kernel: RSP: 0018:ffffbc5004897de8 EFLAGS: 00010282
kernel: RAX: 0000000000000000 RBX: ffffffffffffffc8 RCX: 0000000000000000
kernel: RDX: ffff9c2300e36a28 RSI: 0000000000000202 RDI: ffff9c2300e36a10
kernel: RBP: ffff9c24473a3970 R08: 0000000000000001 R09: 0000000000000000
kernel: R10: ffff9c23ab165ea9 R11: 0000000000000000 R12: ffff9c24473a3980
kernel: R13: ffff9c2367bca000 R14: ffff9c23112ae000 R15: 0000000000000000
kernel: FS: 0000000000000000(0000) GS:ffff9c2a5d500000(0000) knlGS:0000000000000000
kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kernel: CR2: 0000000000000018 CR3: 0000000700d0a000 CR4: 00000000003506e0
kernel: Call Trace:
kernel: <TASK>
kernel: raid5d (drivers/md/raid5.c:6563) raid456
kernel: ? schedule (arch/x86/include/asm/preempt.h:85 (discriminator 1) kernel/sched/core.c:6370 (discriminator 1))
kernel: md_thread (drivers/md/md.c:7923) md_mod
kernel: ? do_wait_intr (kernel/sched/wait.c:415)
kernel: ? md_submit_bio (drivers/md/md.c:7887) md_mod
kernel: kthread (kernel/kthread.c:377)
kernel: ? kthread_complete_and_exit (kernel/kthread.c:332)
kernel: ret_from_fork (arch/x86/entry/entry_64.S:301)
kernel: </TASK>

Signed-off-by: Dāvis Mosāns <davispuh@xxxxxxxxx>
---
drivers/dma/dmaengine.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 2cfa8458b51be..fe53a507a8f95 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -360,7 +360,7 @@ static struct dma_chan *min_chan(enum dma_transaction_type cap, int cpu)
*/
static void dma_channel_rebalance(void)
{
- struct dma_chan *chan;
+ struct dma_chan *chan, *n;
struct dma_device *device;
int cpu;
int cap;
@@ -373,7 +373,7 @@ static void dma_channel_rebalance(void)
list_for_each_entry(device, &dma_device_list, global_node) {
if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
continue;
- list_for_each_entry(chan, &device->channels, device_node)
+ list_for_each_entry_safe(chan, n, &device->channels, device_node)
chan->table_count = 0;
}

@@ -552,18 +552,18 @@ EXPORT_SYMBOL(dma_find_channel);
*/
void dma_issue_pending_all(void)
{
- struct dma_device *device;
- struct dma_chan *chan;
+ struct dma_device *device, *_d;
+ struct dma_chan *chan, *n;

- rcu_read_lock();
- list_for_each_entry_rcu(device, &dma_device_list, global_node) {
+ mutex_lock(&dma_list_mutex);
+ list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
continue;
- list_for_each_entry(chan, &device->channels, device_node)
+ list_for_each_entry_safe(chan, n, &device->channels, device_node)
if (chan->client_count)
device->device_issue_pending(chan);
}
- rcu_read_unlock();
+ mutex_unlock(&dma_list_mutex);
}
EXPORT_SYMBOL(dma_issue_pending_all);

--
2.35.1