Re: [PATCH drm-misc-next v9 06/11] drm/nouveau: fence: separate fence alloc and emit

From: Christian König
Date: Tue Aug 08 2023 - 12:20:52 EST




Am 07.08.23 um 20:54 schrieb Danilo Krummrich:
Hi Christian,

On 8/7/23 20:07, Christian König wrote:
Am 03.08.23 um 18:52 schrieb Danilo Krummrich:
The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence,
in order to emit fences within DMA fence signalling critical sections
(e.g. as typically done in the DRM GPU schedulers run_job() callback) we
need to separate fence allocation and fence emitting.

At least from the description that sounds like it might be illegal. Daniel can you take a look as well.

What exactly are you doing here?

I'm basically doing exactly the same as amdgpu_fence_emit() does in amdgpu_ib_schedule() called by amdgpu_job_run().

The difference - and this is what this patch is for - is that I separate the fence allocation from emitting the fence, such that the fence structure is allocated before the job is submitted to the GPU scheduler. amdgpu solves this with GFP_ATOMIC within amdgpu_fence_emit() to allocate the fence structure in this case.

Yeah, that use case is perfectly valid. Maybe update the commit message a bit to better describe that.

Something like "Separate fence allocation and emitting to avoid allocation within DMA fence signalling critical sections inside the DRM scheduler. This helps implementing the new UAPI....".

Regards,
Christian.


- Danilo


Regards,
Christian.


Signed-off-by: Danilo Krummrich <dakr@xxxxxxxxxx>
---
  drivers/gpu/drm/nouveau/dispnv04/crtc.c |  9 ++++-
  drivers/gpu/drm/nouveau/nouveau_bo.c    | 52 +++++++++++++++----------
  drivers/gpu/drm/nouveau/nouveau_chan.c  |  6 ++-
  drivers/gpu/drm/nouveau/nouveau_dmem.c  |  9 +++--
  drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++-----
  drivers/gpu/drm/nouveau/nouveau_fence.h |  3 +-
  drivers/gpu/drm/nouveau/nouveau_gem.c   |  5 ++-
  7 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index a6f2e681bde9..a34924523133 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan,
      PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
      PUSH_KICK(push);
-    ret = nouveau_fence_new(chan, false, pfence);
+    ret = nouveau_fence_new(pfence);
      if (ret)
          goto fail;
+    ret = nouveau_fence_emit(*pfence, chan);
+    if (ret)
+        goto fail_fence_unref;
+
      return 0;
+
+fail_fence_unref:
+    nouveau_fence_unref(pfence);
  fail:
      spin_lock_irqsave(&dev->event_lock, flags);
      list_del(&s->head);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 057bc995f19b..e9cbbf594e6f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
          mutex_lock(&cli->mutex);
      else
          mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
+
      ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, ctx->interruptible);
-    if (ret == 0) {
-        ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
-        if (ret == 0) {
-            ret = nouveau_fence_new(chan, false, &fence);
-            if (ret == 0) {
-                /* TODO: figure out a better solution here
-                 *
-                 * wait on the fence here explicitly as going through
-                 * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
-                 *
-                 * Without this the operation can timeout and we'll fallback to a
-                 * software copy, which might take several minutes to finish.
-                 */
-                nouveau_fence_wait(fence, false, false);
-                ret = ttm_bo_move_accel_cleanup(bo,
-                                &fence->base,
-                                evict, false,
-                                new_reg);
-                nouveau_fence_unref(&fence);
-            }
-        }
+    if (ret)
+        goto out_unlock;
+
+    ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
+    if (ret)
+        goto out_unlock;
+
+    ret = nouveau_fence_new(&fence);
+    if (ret)
+        goto out_unlock;
+
+    ret = nouveau_fence_emit(fence, chan);
+    if (ret) {
+        nouveau_fence_unref(&fence);
+        goto out_unlock;
      }
+
+    /* TODO: figure out a better solution here
+     *
+     * wait on the fence here explicitly as going through
+     * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
+     *
+     * Without this the operation can timeout and we'll fallback to a
+     * software copy, which might take several minutes to finish.
+     */
+    nouveau_fence_wait(fence, false, false);
+    ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false,
+                    new_reg);
+    nouveau_fence_unref(&fence);
+
+out_unlock:
      mutex_unlock(&cli->mutex);
      return ret;
  }
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 6d639314250a..f69be4c8f9f2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan)
          struct nouveau_fence *fence = NULL;
          int ret;
-        ret = nouveau_fence_new(chan, false, &fence);
+        ret = nouveau_fence_new(&fence);
          if (!ret) {
-            ret = nouveau_fence_wait(fence, false, false);
+            ret = nouveau_fence_emit(fence, chan);
+            if (!ret)
+                ret = nouveau_fence_wait(fence, false, false);
              nouveau_fence_unref(&fence);
          }
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 789857faa048..4ad40e42cae1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -209,7 +209,8 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
          goto done;
      }
-    nouveau_fence_new(dmem->migrate.chan, false, &fence);
+    if (!nouveau_fence_new(&fence))
+        nouveau_fence_emit(fence, dmem->migrate.chan);
      migrate_vma_pages(&args);
      nouveau_dmem_fence_done(&fence);
      dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -402,7 +403,8 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
          }
      }
- nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence);
+    if (!nouveau_fence_new(&fence))
+        nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
      migrate_device_pages(src_pfns, dst_pfns, npages);
      nouveau_dmem_fence_done(&fence);
      migrate_device_finalize(src_pfns, dst_pfns, npages);
@@ -675,7 +677,8 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
          addr += PAGE_SIZE;
      }
-    nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
+    if (!nouveau_fence_new(&fence))
+        nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
      migrate_vma_pages(args);
      nouveau_dmem_fence_done(&fence);
      nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ee5e9d40c166..e946408f945b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -210,6 +210,9 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
      struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
      int ret;
+    if (unlikely(!chan->fence))
+        return -ENODEV;
+
      fence->channel  = chan;
      fence->timeout  = jiffies + (15 * HZ);
@@ -396,25 +399,16 @@ nouveau_fence_unref(struct nouveau_fence **pfence)
  }
  int
-nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
-          struct nouveau_fence **pfence)
+nouveau_fence_new(struct nouveau_fence **pfence)
  {
      struct nouveau_fence *fence;
-    int ret = 0;
-
-    if (unlikely(!chan->fence))
-        return -ENODEV;
      fence = kzalloc(sizeof(*fence), GFP_KERNEL);
      if (!fence)
          return -ENOMEM;
-    ret = nouveau_fence_emit(fence, chan);
-    if (ret)
-        nouveau_fence_unref(&fence);
-
      *pfence = fence;
-    return ret;
+    return 0;
  }
  static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 0ca2bc85adf6..7c73c7c9834a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -17,8 +17,7 @@ struct nouveau_fence {
      unsigned long timeout;
  };
-int  nouveau_fence_new(struct nouveau_channel *, bool sysmem,
-               struct nouveau_fence **);
+int  nouveau_fence_new(struct nouveau_fence **);
  void nouveau_fence_unref(struct nouveau_fence **);
  int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index a48f42aaeab9..9c8d1b911a01 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -873,8 +873,11 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
          }
      }
-    ret = nouveau_fence_new(chan, false, &fence);
+    ret = nouveau_fence_new(&fence);
+    if (!ret)
+        ret = nouveau_fence_emit(fence, chan);
      if (ret) {
+        nouveau_fence_unref(&fence);
          NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
          WIND_RING(chan);
          goto out;