[PATCH] Revert "drm/msm/gpu: Push gpu lock down past runpm"

From: Rob Clark
Date: Tue Jan 09 2024 - 13:22:54 EST


From: Rob Clark <robdclark@xxxxxxxxxxxx>

This reverts commit abe2023b4cea192ab266b351fd38dc9dbd846df0.

Changing the locking order means that scheduler/msm_job_run() can race
with the recovery kthread worker, with the result that the GPU gets an
extra runpm get when we are trying to power it off. Leaving the GPU in
an unrecovered state.

I'll need to come up with a different scheme for appeasing lockdep.

Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx>
---
drivers/gpu/drm/msm/msm_gpu.c | 11 +++++------
drivers/gpu/drm/msm/msm_ringbuffer.c | 7 +++++--
2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 095390774f22..655002b21b0d 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -751,12 +751,14 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned long flags;

- pm_runtime_get_sync(&gpu->pdev->dev);
+ WARN_ON(!mutex_is_locked(&gpu->lock));

- mutex_lock(&gpu->lock);
+ pm_runtime_get_sync(&gpu->pdev->dev);

msm_gpu_hw_init(gpu);

+ submit->seqno = submit->hw_fence->seqno;
+
update_sw_cntrs(gpu);

/*
@@ -781,11 +783,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
gpu->funcs->submit(gpu, submit);
gpu->cur_ctx_seqno = submit->queue->ctx->seqno;

- hangcheck_timer_reset(gpu);
-
- mutex_unlock(&gpu->lock);
-
pm_runtime_put(&gpu->pdev->dev);
+ hangcheck_timer_reset(gpu);
}

/*
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index e0ed27739449..548f5266a7d3 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -21,8 +21,6 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)

msm_fence_init(submit->hw_fence, fctx);

- submit->seqno = submit->hw_fence->seqno;
-
mutex_lock(&priv->lru.lock);

for (i = 0; i < submit->nr_bos; i++) {
@@ -35,8 +33,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)

mutex_unlock(&priv->lru.lock);

+ /* TODO move submit path over to using a per-ring lock.. */
+ mutex_lock(&gpu->lock);
+
msm_gpu_submit(gpu, submit);

+ mutex_unlock(&gpu->lock);
+
return dma_fence_get(submit->hw_fence);
}

--
2.43.0