Re: [PATCH 5/9] drm/ttm: TTM fault handler helpers

From: Thomas Hellstrom
Date: Wed Apr 17 2019 - 06:53:56 EST


On Mon, 2019-04-15 at 08:34 +0200, Christian KÃnig wrote:
> Am 12.04.19 um 18:04 schrieb Thomas Hellstrom:
> > With the vmwgfx dirty tracking, the default TTM fault handler is
> > not
> > completely sufficient (vmwgfx need to modify the vma->vm_flags
> > member,
> > and also needs to restrict the number of prefaults).
> >
> > We also want to replicate the new ttm_bo_vm_reserve() functionality
> >
> > So start turning the TTM vm code into helpers:
> > ttm_bo_vm_fault_reserved()
> > and ttm_bo_vm_reserve(), and provide a default TTM fault handler
> > for other
> > drivers to use.
> >
> > Cc: "Christian KÃnig" <christian.koenig@xxxxxxx>
> > Signed-off-by: Thomas Hellstrom <thellstrom@xxxxxxxxxx>
>
> Two nit picks below, apart from that looks good to me as well.

Thanks Christian,

I'll incoporate those.

/Thomas


>
> > ---
> > drivers/gpu/drm/ttm/ttm_bo_vm.c | 170 ++++++++++++++++++++-------
> > -----
> > include/drm/ttm/ttm_bo_api.h | 10 ++
> > 2 files changed, 116 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > index bfb25b81fed7..3bd28fb97124 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > @@ -42,8 +42,6 @@
> > #include <linux/uaccess.h>
> > #include <linux/mem_encrypt.h>
> >
> > -#define TTM_BO_VM_NUM_PREFAULT 16
> > -
> > static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object
> > *bo,
> > struct vm_fault *vmf)
> > {
> > @@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct
> > ttm_buffer_object *bo,
> > + page_offset;
> > }
> >
> > -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
> > +/**
> > + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm
> > callback
> > + * @bo: The buffer object
> > + * @vmf: The fault structure handed to the callback
> > + *
> > + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem
> > to be dropped
> > + * during long waits, and after the wait the callback will be
> > restarted. This
> > + * is to allow other threads using the same virtual memory space
> > concurrent
> > + * access to map(), unmap() completely unrelated buffer objects.
> > TTM buffer
> > + * object reservations sometimes wait for GPU and should therefore
> > be
> > + * considered long waits. This function reserves the buffer object
> > interruptibly
> > + * taking this into account. Starvation is avoided by the vm
> > system not
> > + * allowing too many repeated restarts.
> > + * This function is intended to be used in customized fault() and
> > _mkwrite()
> > + * handlers.
> > + *
> > + * Return:
> > + * 0 on success and the bo was reserved.
> > + * VM_FAULT_RETRY if blocking wait.
> > + * VM_FAULT_NOPAGE if blocking wait and retrying was not
> > allowed.
> > + */
> > +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
> > + struct vm_fault *vmf)
> > {
> > - struct vm_area_struct *vma = vmf->vma;
> > - struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
> > - vma->vm_private_data;
> > - struct ttm_bo_device *bdev = bo->bdev;
> > - unsigned long page_offset;
> > - unsigned long page_last;
> > - unsigned long pfn;
> > - struct ttm_tt *ttm = NULL;
> > - struct page *page;
> > - int err;
> > - int i;
> > - vm_fault_t ret = VM_FAULT_NOPAGE;
> > - unsigned long address = vmf->address;
> > - struct ttm_mem_type_manager *man =
> > - &bdev->man[bo->mem.mem_type];
> > - struct vm_area_struct cvma;
> > -
> > - /*
> > - * Work around locking order reversal in fault / nopfn
> > - * between mmap_sem and bo_reserve: Perform a trylock operation
> > - * for reserve, and if it fails, retry the fault after waiting
> > - * for the buffer to become unreserved.
> > - */
> > if (unlikely(!reservation_object_trylock(bo->resv))) {
> > if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> > if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> > @@ -151,14 +148,56 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> > return VM_FAULT_NOPAGE;
> > }
> >
> > + return 0;
> > +}
> > +EXPORT_SYMBOL(ttm_bo_vm_reserve);
> > +
> > +/**
> > + * ttm_bo_vm_fault_reserved - TTM fault helper
> > + * @vmf: The struct vm_fault given as argument to the fault
> > callback
> > + * @cvma: The struct vmw_area_struct affected. Note that this may
> > be a
> > + * copy of the real vma object if the caller needs, for example,
> > VM
> > + * flags to be temporarily altered while determining the page
> > protection.
> > + * @num_prefault: Maximum number of prefault pages. The caller may
> > want to
> > + * specify this based on madvice settings and the size of the GPU
> > object
> > + * backed by the memory.
> > + *
> > + * This function inserts one or more page table entries pointing
> > to the
> > + * memory backing the buffer object, and then returns a return
> > code
> > + * instructing the caller to retry the page access.
> > + *
> > + * Return:
> > + * VM_FAULT_NOPAGE on success or pending signal
> > + * VM_FAULT_SIGBUS on unspecified error
> > + * VM_FAULT_OOM on out-of-memory
> > + * VM_FAULT_RETRY if retryable wait
> > + */
> > +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
> > + struct vm_area_struct *cvma,
> > + pgoff_t num_prefault)
> > +{
> > + struct vm_area_struct *vma = vmf->vma;
> > + struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
> > + vma->vm_private_data;
> > + struct ttm_bo_device *bdev = bo->bdev;
> > + unsigned long page_offset;
> > + unsigned long page_last;
> > + unsigned long pfn;
> > + struct ttm_tt *ttm = NULL;
> > + struct page *page;
> > + int err;
> > + pgoff_t i;
> > + vm_fault_t ret = VM_FAULT_NOPAGE;
> > + unsigned long address = vmf->address;
> > + struct ttm_mem_type_manager *man =
> > + &bdev->man[bo->mem.mem_type];
> > +
> > /*
> > * Refuse to fault imported pages. This should be handled
> > * (if at all) by redirecting mmap to the exporter.
> > */
> > - if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > - ret = VM_FAULT_SIGBUS;
> > - goto out_unlock;
> > - }
> > + if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
> > + return VM_FAULT_SIGBUS;
> >
> > if (bdev->driver->fault_reserve_notify) {
> > struct dma_fence *moving = dma_fence_get(bo->moving);
> > @@ -169,11 +208,9 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> > break;
> > case -EBUSY:
> > case -ERESTARTSYS:
> > - ret = VM_FAULT_NOPAGE;
> > - goto out_unlock;
> > + return VM_FAULT_NOPAGE;
> > default:
> > - ret = VM_FAULT_SIGBUS;
> > - goto out_unlock;
> > + return VM_FAULT_SIGBUS;
> > }
> >
> > if (bo->moving != moving) {
> > @@ -189,24 +226,15 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> > * move.
> > */
> > ret = ttm_bo_vm_fault_idle(bo, vmf);
> > - if (unlikely(ret != 0)) {
> > - if (ret == VM_FAULT_RETRY &&
> > - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> > - /* The BO has already been unreserved. */
> > - return ret;
> > - }
> > -
> > - goto out_unlock;
> > - }
> > + if (unlikely(ret != 0))
> > + return ret;
> >
> > err = ttm_mem_io_lock(man, true);
> > - if (unlikely(err != 0)) {
> > - ret = VM_FAULT_NOPAGE;
> > - goto out_unlock;
> > - }
> > + if (unlikely(err != 0))
> > + return VM_FAULT_NOPAGE;
> > err = ttm_mem_io_reserve_vm(bo);
> > if (unlikely(err != 0)) {
> > - ret = VM_FAULT_SIGBUS;
> > + return VM_FAULT_SIGBUS;
> > goto out_io_unlock;
>
> This goto is now superfluous.
>
> > }
> >
> > @@ -220,17 +248,11 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> > goto out_io_unlock;
> > }
> >
> > - /*
> > - * Make a local vma copy to modify the page_prot member
> > - * and vm_flags if necessary. The vma parameter is protected
> > - * by mmap_sem in write mode.
> > - */
> > - cvma = *vma;
> > - cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
> > + cvma->vm_page_prot = vm_get_page_prot(cvma->vm_flags);
> >
> > if (bo->mem.bus.is_iomem) {
> > - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
> > - cvma.vm_page_prot);
> > + cvma->vm_page_prot = ttm_io_prot(bo->mem.placement,
> > + cvma->vm_page_prot);
> > } else {
> > struct ttm_operation_ctx ctx = {
> > .interruptible = false,
> > @@ -240,8 +262,8 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> > };
> >
> > ttm = bo->ttm;
> > - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
> > - cvma.vm_page_prot);
> > + cvma->vm_page_prot = ttm_io_prot(bo->mem.placement,
> > + cvma->vm_page_prot);
> >
> > /* Allocate all page at once, most common usage */
> > if (ttm_tt_populate(ttm, &ctx)) {
> > @@ -254,10 +276,11 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> > * Speculatively prefault a number of pages. Only error on
> > * first page.
> > */
> > - for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
> > + for (i = 0; i < num_prefault; ++i) {
> > if (bo->mem.bus.is_iomem) {
> > /* Iomem should not be marked encrypted */
> > - cvma.vm_page_prot =
> > pgprot_decrypted(cvma.vm_page_prot);
> > + cvma->vm_page_prot =
> > + pgprot_decrypted(cvma->vm_page_prot);
> > pfn = ttm_bo_io_mem_pfn(bo, page_offset);
> > } else {
> > page = ttm->pages[page_offset];
> > @@ -273,10 +296,10 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> > }
> >
> > if (vma->vm_flags & VM_MIXEDMAP)
> > - ret = vmf_insert_mixed(&cvma, address,
> > + ret = vmf_insert_mixed(cvma, address,
> > __pfn_to_pfn_t(pfn, PFN_DEV));
> > else
> > - ret = vmf_insert_pfn(&cvma, address, pfn);
> > + ret = vmf_insert_pfn(cvma, address, pfn);
> >
> > /*
> > * Somebody beat us to this PTE or prefaulting to
> > @@ -295,7 +318,26 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> > ret = VM_FAULT_NOPAGE;
> > out_io_unlock:
> > ttm_mem_io_unlock(man);
> > -out_unlock:
> > + return ret;
> > +}
> > +EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
> > +
> > +static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
> > +{
> > + struct vm_area_struct *vma = vmf->vma;
> > + struct vm_area_struct cvma = *vma;
> > + struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
> > + vma->vm_private_data;
>
> That extra cast can be dropped, the vm_private_data member is a
> void*
> anyway.
>
> Regards,
> Christian.
>
> > + vm_fault_t ret;
> > +
> > + ret = ttm_bo_vm_reserve(bo, vmf);
> > + if (ret)
> > + return ret;
> > +
> > + ret = ttm_bo_vm_fault_reserved(vmf, &cvma,
> > TTM_BO_VM_NUM_PREFAULT);
> > + if (ret == VM_FAULT_RETRY && !(vmf->flags &
> > FAULT_FLAG_RETRY_NOWAIT))
> > + return ret;
> > +
> > reservation_object_unlock(bo->resv);
> > return ret;
> > }
> > diff --git a/include/drm/ttm/ttm_bo_api.h
> > b/include/drm/ttm/ttm_bo_api.h
> > index 49d9cdfc58f2..bebfa16426ca 100644
> > --- a/include/drm/ttm/ttm_bo_api.h
> > +++ b/include/drm/ttm/ttm_bo_api.h
> > @@ -768,4 +768,14 @@ int ttm_bo_swapout(struct ttm_bo_global *glob,
> > struct ttm_operation_ctx *ctx);
> > void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> > int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
> > +
> > +/* Default number of pre-faulted pages in the TTM fault handler */
> > +#define TTM_BO_VM_NUM_PREFAULT 16
> > +
> > +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
> > + struct vm_fault *vmf);
> > +
> > +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
> > + struct vm_area_struct *cvma,
> > + pgoff_t num_prefault);
> > #endif