Re: [PATCH v4 6/6] drm/shmem-helper: Switch to reservation lock

From: Boris Brezillon
Date: Mon Jun 26 2023 - 05:40:28 EST


Hi Dmitry,

On Tue, 30 May 2023 01:39:35 +0300
Dmitry Osipenko <dmitry.osipenko@xxxxxxxxxxxxx> wrote:

> Replace all drm-shmem locks with a GEM reservation lock. This makes locks
> consistent with dma-buf locking convention where importers are responsible
> for holding reservation lock for all operations performed over dma-bufs,
> preventing deadlock between dma-buf importers and exporters.

I've rebased some of my work on drm-misc-next this morning and noticed
that the drm_gem_shmem_get_pages() I was using to pin pages no longer
exists, so I ended looking at this patch to check what I should use
instead, and I have a few questions/comments.

>
> Suggested-by: Daniel Vetter <daniel@xxxxxxxx>
> Acked-by: Thomas Zimmermann <tzimmermann@xxxxxxx>
> Reviewed-by: Emil Velikov <emil.l.velikov@xxxxxxxxx>
> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@xxxxxxxxxxxxx>
> ---
> drivers/gpu/drm/drm_gem_shmem_helper.c | 210 ++++++++----------
> drivers/gpu/drm/lima/lima_gem.c | 8 +-
> drivers/gpu/drm/panfrost/panfrost_drv.c | 7 +-
> .../gpu/drm/panfrost/panfrost_gem_shrinker.c | 6 +-
> drivers/gpu/drm/panfrost/panfrost_mmu.c | 19 +-
> include/drm/drm_gem_shmem_helper.h | 14 +-
> 6 files changed, 116 insertions(+), 148 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index 4ea6507a77e5..a783d2245599 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -88,8 +88,6 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private)
> if (ret)
> goto err_release;
>
> - mutex_init(&shmem->pages_lock);
> - mutex_init(&shmem->vmap_lock);
> INIT_LIST_HEAD(&shmem->madv_list);
>
> if (!private) {
> @@ -141,11 +139,13 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem)
> {
> struct drm_gem_object *obj = &shmem->base;
>
> - drm_WARN_ON(obj->dev, shmem->vmap_use_count);
> -
> if (obj->import_attach) {
> drm_prime_gem_destroy(obj, shmem->sgt);
> } else {
> + dma_resv_lock(shmem->base.resv, NULL);
> +
> + drm_WARN_ON(obj->dev, shmem->vmap_use_count);
> +
> if (shmem->sgt) {
> dma_unmap_sgtable(obj->dev->dev, shmem->sgt,
> DMA_BIDIRECTIONAL, 0);
> @@ -154,22 +154,24 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem)
> }
> if (shmem->pages)
> drm_gem_shmem_put_pages(shmem);
> - }
>
> - drm_WARN_ON(obj->dev, shmem->pages_use_count);
> + drm_WARN_ON(obj->dev, shmem->pages_use_count);
> +
> + dma_resv_unlock(shmem->base.resv);
> + }
>
> drm_gem_object_release(obj);
> - mutex_destroy(&shmem->pages_lock);
> - mutex_destroy(&shmem->vmap_lock);
> kfree(shmem);
> }
> EXPORT_SYMBOL_GPL(drm_gem_shmem_free);
>
> -static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem)
> +static int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem)

I find this name change confusing, because the function requires the
GEM resv lock to be held, and the _locked suffix was making it pretty
clear.

> {
> struct drm_gem_object *obj = &shmem->base;
> struct page **pages;
>
> + dma_resv_assert_held(shmem->base.resv);
> +
> if (shmem->pages_use_count++ > 0)
> return 0;
>
> @@ -197,35 +199,16 @@ static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem)
> }
>
> /*
> - * drm_gem_shmem_get_pages - Allocate backing pages for a shmem GEM object
> + * drm_gem_shmem_put_pages - Decrease use count on the backing pages for a shmem GEM object
> * @shmem: shmem GEM object
> *
> - * This function makes sure that backing pages exists for the shmem GEM object
> - * and increases the use count.
> - *
> - * Returns:
> - * 0 on success or a negative error code on failure.
> + * This function decreases the use count and puts the backing pages when use drops to zero.
> */
> -int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem)
> +void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem)

Same comment about the name change. That's even more confusing since
this function was previously taking care of the locking. Also not sure
why you'd want to expose this _put() helper when the _get() helper is
private.

> {
> struct drm_gem_object *obj = &shmem->base;
> - int ret;
>
> - drm_WARN_ON(obj->dev, obj->import_attach);
> -
> - ret = mutex_lock_interruptible(&shmem->pages_lock);
> - if (ret)
> - return ret;
> - ret = drm_gem_shmem_get_pages_locked(shmem);
> - mutex_unlock(&shmem->pages_lock);
> -
> - return ret;
> -}
> -EXPORT_SYMBOL(drm_gem_shmem_get_pages);
> -
> -static void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem)
> -{
> - struct drm_gem_object *obj = &shmem->base;
> + dma_resv_assert_held(shmem->base.resv);
>
> if (drm_WARN_ON_ONCE(obj->dev, !shmem->pages_use_count))
> return;
> @@ -243,20 +226,25 @@ static void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem)
> shmem->pages_mark_accessed_on_put);
> shmem->pages = NULL;
> }
> +EXPORT_SYMBOL(drm_gem_shmem_put_pages);
>
> -/*
> - * drm_gem_shmem_put_pages - Decrease use count on the backing pages for a shmem GEM object
> - * @shmem: shmem GEM object
> - *
> - * This function decreases the use count and puts the backing pages when use drops to zero.
> - */
> -void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem)
> +static int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem)
> {
> - mutex_lock(&shmem->pages_lock);
> - drm_gem_shmem_put_pages_locked(shmem);
> - mutex_unlock(&shmem->pages_lock);
> + int ret;
> +
> + dma_resv_assert_held(shmem->base.resv);
> +
> + ret = drm_gem_shmem_get_pages(shmem);
> +
> + return ret;
> +}
> +
> +static void drm_gem_shmem_unpin_locked(struct drm_gem_shmem_object *shmem)
> +{
> + dma_resv_assert_held(shmem->base.resv);
> +
> + drm_gem_shmem_put_pages(shmem);
> }
> -EXPORT_SYMBOL(drm_gem_shmem_put_pages);
>
> /**
> * drm_gem_shmem_pin - Pin backing pages for a shmem GEM object
> @@ -271,10 +259,17 @@ EXPORT_SYMBOL(drm_gem_shmem_put_pages);
> int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem)
> {
> struct drm_gem_object *obj = &shmem->base;
> + int ret;
>
> drm_WARN_ON(obj->dev, obj->import_attach);
>
> - return drm_gem_shmem_get_pages(shmem);
> + ret = dma_resv_lock_interruptible(shmem->base.resv, NULL);
> + if (ret)
> + return ret;

I think here is the major problem I have with this patch: you've made
drm_gem_shmem_{get_pages,pin}() private, which forces me to call
drm_gem_shmem_pin() in a path where I already acquired the resv lock
(using the drm_exec infra proposed by Christian). That would
probably work if you were letting ret == -EALREADY go through, but I'm
wondering if it wouldn't be preferable to expose
drm_gem_shmem_pin_locked().

> + ret = drm_gem_shmem_pin_locked(shmem);
> + dma_resv_unlock(shmem->base.resv);
> +
> + return ret;
> }
> EXPORT_SYMBOL(drm_gem_shmem_pin);
>
> @@ -291,12 +286,29 @@ void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem)
>
> drm_WARN_ON(obj->dev, obj->import_attach);
>
> - drm_gem_shmem_put_pages(shmem);
> + dma_resv_lock(shmem->base.resv, NULL);
> + drm_gem_shmem_unpin_locked(shmem);
> + dma_resv_unlock(shmem->base.resv);
> }
> EXPORT_SYMBOL(drm_gem_shmem_unpin);

If we want to be consistent, let's just expose drm_gem_shmem_unpin()
and drm_gem_shmem_pin() and keep drm_gem_shmem_{get,put}_pages()
private, or even better, rename them drm_gem_shmem_{pin,unpin}_locked()
insert of having drm_gem_shmem_{pin,unpin}_locked() wrappers that just
forward the call to drm_gem_shmem_{get,put}_pages().

>
> -static int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem,
> - struct iosys_map *map)
> +/*
> + * drm_gem_shmem_vmap - Create a virtual mapping for a shmem GEM object
> + * @shmem: shmem GEM object
> + * @map: Returns the kernel virtual address of the SHMEM GEM object's backing
> + * store.
> + *
> + * This function makes sure that a contiguous kernel virtual address mapping
> + * exists for the buffer backing the shmem GEM object. It hides the differences
> + * between dma-buf imported and natively allocated objects.
> + *
> + * Acquired mappings should be cleaned up by calling drm_gem_shmem_vunmap().
> + *
> + * Returns:
> + * 0 on success or a negative error code on failure.
> + */
> +int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem,
> + struct iosys_map *map)

Same problem with this renaming: it's confusing because this function
was previously taking care of the locking, and it's no longer the case.
That's actually true for other public functions your patching, but I
won't go over all of them.

I know this patch has been under discussion for quite some time, and has
been validated by other devs/maintainers, but I'd like to understand the
reasoning behind these decisions. Not the decision to replace all locks
by dma_resv, which I kinda understand, but the decision to change the
behavior of functions without making the name reflect the new behavior
(_locked prefix), or the fact we now prohibit some functions to
succeed when the dma_resv lock is taken by the driver beforehand (which,
unless I'm mistaken, will happen in the VM_BIND logic, and can happen
in the SUBMIT ioctl too depending on the driver).

Regards,

Boris