[PATCH 5.18 010/158] drm/scheduler: Dont kill jobs in interrupt context

From: Greg Kroah-Hartman
Date: Wed Jul 27 2022 - 13:26:44 EST


From: Dmitry Osipenko <dmitry.osipenko@xxxxxxxxxxxxx>

commit 9b04369b060fd4885f728b7a4ab4851ffb1abb64 upstream.

Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
mutex when job is released, and thus, that code can sleep. This results
into "BUG: scheduling while atomic" if locks are contented while job is
freed. There is no good reason for releasing scheduler's jobs in IRQ
context, hence use normal context to fix the trouble.

Cc: stable@xxxxxxxxxxxxxxx
Fixes: 542cff7893a3 ("drm/sched: Avoid lockdep spalt on killing a processes")
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@xxxxxxxxxxxxx>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx>
Link: https://patchwork.freedesktop.org/patch/msgid/20220411221536.283312-1-dmitry.osipenko@xxxxxxxxxxxxx
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
drivers/gpu/drm/scheduler/sched_entity.c | 6 +++---
include/drm/gpu_scheduler.h | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)

--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -190,7 +190,7 @@ long drm_sched_entity_flush(struct drm_s
}
EXPORT_SYMBOL(drm_sched_entity_flush);

-static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk)
+static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
{
struct drm_sched_job *job = container_of(wrk, typeof(*job), work);

@@ -207,8 +207,8 @@ static void drm_sched_entity_kill_jobs_c
struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
finish_cb);

- init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work);
- irq_work_queue(&job->work);
+ INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work);
+ schedule_work(&job->work);
}

static struct dma_fence *
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -28,7 +28,7 @@
#include <linux/dma-fence.h>
#include <linux/completion.h>
#include <linux/xarray.h>
-#include <linux/irq_work.h>
+#include <linux/workqueue.h>

#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)

@@ -294,7 +294,7 @@ struct drm_sched_job {
*/
union {
struct dma_fence_cb finish_cb;
- struct irq_work work;
+ struct work_struct work;
};

uint64_t id;