[PATCH 54/83] hsa/radeon: Switch to new queue scheduler

From: Oded Gabbay
Date: Thu Jul 10 2014 - 18:04:25 EST


From: Ben Goz <ben.goz@xxxxxxx>

This patch makes the switch between the old KFD queue scheduler to the new KFD
queue scheduler. The new scheduler supports H/W CP scheduling, over-subscription
of queues and pre-emption of queues.

Signed-off-by: Ben Goz <ben.goz@xxxxxxx>
Signed-off-by: Oded Gabbay <oded.gabbay@xxxxxxx>
---
drivers/gpu/hsa/radeon/kfd_aperture.c | 1 -
drivers/gpu/hsa/radeon/kfd_chardev.c | 107 +++++++++++++++------------------
drivers/gpu/hsa/radeon/kfd_device.c | 31 ++++++----
drivers/gpu/hsa/radeon/kfd_interrupt.c | 4 +-
drivers/gpu/hsa/radeon/kfd_priv.h | 2 +
drivers/gpu/hsa/radeon/kfd_process.c | 56 ++++-------------
include/uapi/linux/kfd_ioctl.h | 4 +-
7 files changed, 88 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/hsa/radeon/kfd_aperture.c b/drivers/gpu/hsa/radeon/kfd_aperture.c
index 9e2d6da..2c72b21 100644
--- a/drivers/gpu/hsa/radeon/kfd_aperture.c
+++ b/drivers/gpu/hsa/radeon/kfd_aperture.c
@@ -32,7 +32,6 @@
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
#include <linux/mm.h>
#include <uapi/asm-generic/mman-common.h>
#include <asm/processor.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index 07cac88..bb2ef02 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -31,10 +31,11 @@
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
#include <linux/mm.h>
#include <uapi/asm-generic/mman-common.h>
#include <asm/processor.h>
+#include "kfd_hw_pointer_store.h"
+#include "kfd_device_queue_manager.h"

static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -128,24 +129,36 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
struct kfd_dev *dev;
int err = 0;
unsigned int queue_id;
- struct kfd_queue *queue;
struct kfd_process_device *pdd;
+ struct queue_properties q_properties;
+
+ memset(&q_properties, 0, sizeof(struct queue_properties));

if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;

- dev = radeon_kfd_device_by_id(args.gpu_id);
- if (dev == NULL)
- return -EINVAL;
+ /* need to validate parameters */
+
+ q_properties.is_interop = false;
+ q_properties.queue_percent = args.queue_percentage;
+ q_properties.priority = args.queue_priority;
+ q_properties.queue_address = args.ring_base_address;
+ q_properties.queue_size = args.ring_size;

- queue = kzalloc(
- offsetof(struct kfd_queue, scheduler_queue) + dev->device_info->scheduler_class->queue_size,
- GFP_KERNEL);

- if (!queue)
- return -ENOMEM;
+ pr_debug("%s Arguments: Queue Percentage (%d, %d)\n"
+ "Queue Priority (%d, %d)\n"
+ "Queue Address (0x%llX, 0x%llX)\n"
+ "Queue Size (%u64, %ll)\n",
+ __func__,
+ q_properties.queue_percent, args.queue_percentage,
+ q_properties.priority, args.queue_priority,
+ q_properties.queue_address, args.ring_base_address,
+ q_properties.queue_size, args.ring_size);

- queue->dev = dev;
+ dev = radeon_kfd_device_by_id(args.gpu_id);
+ if (dev == NULL)
+ return -EINVAL;

mutex_lock(&p->mutex);

@@ -159,23 +172,14 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
p->pasid,
dev->id);

- if (!radeon_kfd_allocate_queue_id(p, &queue_id))
- goto err_allocate_queue_id;
-
- err = dev->device_info->scheduler_class->create_queue(dev->scheduler, pdd->scheduler_process,
- &queue->scheduler_queue,
- (void __user *)args.ring_base_address,
- args.ring_size,
- (void __user *)args.read_pointer_address,
- (void __user *)args.write_pointer_address,
- radeon_kfd_queue_id_to_doorbell(dev, p, queue_id));
- if (err)
+ err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0, KFD_QUEUE_TYPE_COMPUTE, &queue_id);
+ if (err != 0)
goto err_create_queue;

- radeon_kfd_install_queue(p, queue_id, queue);
-
args.queue_id = queue_id;
- args.doorbell_address = (uint64_t)(uintptr_t)radeon_kfd_get_doorbell(filep, p, dev, queue_id);
+ args.read_pointer_address = (uint64_t)q_properties.read_ptr;
+ args.write_pointer_address = (uint64_t)q_properties.write_ptr;
+ args.doorbell_address = (uint64_t)q_properties.doorbell_ptr;

if (copy_to_user(arg, &args, sizeof(args))) {
err = -EFAULT;
@@ -198,12 +202,9 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
return 0;

err_copy_args_out:
- dev->device_info->scheduler_class->destroy_queue(dev->scheduler, &queue->scheduler_queue);
+ pqm_destroy_queue(&p->pqm, queue_id);
err_create_queue:
- radeon_kfd_remove_queue(p, queue_id);
-err_allocate_queue_id:
err_bind_process:
- kfree(queue);
mutex_unlock(&p->mutex);
return err;
}
@@ -211,36 +212,25 @@ err_bind_process:
static int
kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, void __user *arg)
{
+ int retval;
struct kfd_ioctl_destroy_queue_args args;
- struct kfd_queue *queue;
- struct kfd_dev *dev;

if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;

- mutex_lock(&p->mutex);
-
- queue = radeon_kfd_get_queue(p, args.queue_id);
- if (!queue) {
- mutex_unlock(&p->mutex);
- return -EINVAL;
- }
-
- dev = queue->dev;
-
pr_debug("kfd: destroying queue id %d for PASID %d\n",
- args.queue_id,
- p->pasid);
+ args.queue_id,
+ p->pasid);

- radeon_kfd_remove_queue(p, args.queue_id);
- dev->device_info->scheduler_class->destroy_queue(dev->scheduler, &queue->scheduler_queue);
+ mutex_lock(&p->mutex);

- kfree(queue);
+ retval = pqm_destroy_queue(&p->pqm, args.queue_id);

mutex_unlock(&p->mutex);
- return 0;
+ return retval;
}

+
static long
kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void __user *arg)
{
@@ -281,12 +271,12 @@ kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void __us
alternate_policy = (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;

- if (!dev->device_info->scheduler_class->set_cache_policy(dev->scheduler,
- pdd->scheduler_process,
- default_policy,
- alternate_policy,
- (void __user *)args.alternate_aperture_base,
- args.alternate_aperture_size))
+ if (!dev->dqm->set_cache_memory_policy(dev->dqm,
+ &pdd->qpd,
+ default_policy,
+ alternate_policy,
+ (void __user *)args.alternate_aperture_base,
+ args.alternate_aperture_size))
err = -EINVAL;

out:
@@ -432,11 +422,14 @@ kfd_mmap(struct file *filp, struct vm_area_struct *vma)
if (IS_ERR(process))
return PTR_ERR(process);

- if (pgoff < KFD_MMAP_DOORBELL_START)
- return -EINVAL;
-
- if (pgoff < KFD_MMAP_DOORBELL_END)
+ if (pgoff >= KFD_MMAP_DOORBELL_START && pgoff < KFD_MMAP_DOORBELL_END)
return radeon_kfd_doorbell_mmap(process, vma);

+ if (pgoff >= KFD_MMAP_RPTR_START && pgoff < KFD_MMAP_RPTR_END)
+ return radeon_kfd_hw_pointer_store_mmap(&process->read_ptr, vma);
+
+ if (pgoff >= KFD_MMAP_WPTR_START && pgoff < KFD_MMAP_WPTR_END)
+ return radeon_kfd_hw_pointer_store_mmap(&process->write_ptr, vma);
+
return -EINVAL;
}
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c
index 82febf4..c602e16 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -25,10 +25,9 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
+#include "kfd_device_queue_manager.h"

static const struct kfd_device_info kaveri_device_info = {
- .scheduler_class = &radeon_kfd_cik_static_scheduler_class,
.max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t)
};
@@ -121,7 +120,11 @@ device_iommu_pasid_init(struct kfd_dev *kfd)
}

pasid_limit = min_t(pasid_t, (pasid_t)1 << kfd->device_info->max_pasid_bits, iommu_info.max_pasids);
- pasid_limit = min_t(pasid_t, pasid_limit, kfd->doorbell_process_limit);
+ /*
+ * last pasid is used for kernel queues doorbells
+ * in the future the last pasid might be used for a kernel thread.
+ */
+ pasid_limit = min_t(pasid_t, pasid_limit, kfd->doorbell_process_limit - 1);

err = amd_iommu_init_device(kfd->pdev, pasid_limit);
if (err < 0) {
@@ -168,17 +171,26 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,

amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback);

- if (kfd->device_info->scheduler_class->create(kfd, &kfd->scheduler)) {
+ kfd->dqm = device_queue_manager_init(kfd);
+ if (!kfd->dqm) {
+ kfd_topology_remove_device(kfd);
amd_iommu_free_device(kfd->pdev);
return false;
}

- kfd->device_info->scheduler_class->start(kfd->scheduler);
+ if (kfd->dqm->start(kfd->dqm) != 0) {
+ device_queue_manager_uninit(kfd->dqm);
+ kfd_topology_remove_device(kfd);
+ amd_iommu_free_device(kfd->pdev);
+ return false;
+ }

kfd->init_complete = true;
dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
kfd->pdev->device);

+ pr_debug("kfd: Starting kfd with the following scheduling policy %d\n", sched_policy);
+
return true;
}

@@ -188,13 +200,10 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)

BUG_ON(err != 0);

- if (kfd->init_complete)
- kfd->device_info->scheduler_class->stop(kfd->scheduler);
-
radeon_kfd_interrupt_exit(kfd);

if (kfd->init_complete) {
- kfd->device_info->scheduler_class->destroy(kfd->scheduler);
+ device_queue_manager_uninit(kfd->dqm);
amd_iommu_free_device(kfd->pdev);
}

@@ -206,7 +215,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
BUG_ON(kfd == NULL);

if (kfd->init_complete) {
- kfd->device_info->scheduler_class->stop(kfd->scheduler);
+ kfd->dqm->stop(kfd->dqm);
amd_iommu_free_device(kfd->pdev);
}
}
@@ -225,7 +234,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
if (err < 0)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback);
- kfd->device_info->scheduler_class->start(kfd->scheduler);
+ kfd->dqm->start(kfd->dqm);
}

return 0;
diff --git a/drivers/gpu/hsa/radeon/kfd_interrupt.c b/drivers/gpu/hsa/radeon/kfd_interrupt.c
index 2179780..1c9ad46 100644
--- a/drivers/gpu/hsa/radeon/kfd_interrupt.c
+++ b/drivers/gpu/hsa/radeon/kfd_interrupt.c
@@ -43,7 +43,6 @@
#include <linux/slab.h>
#include <linux/device.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"

#define KFD_INTERRUPT_RING_SIZE 256

@@ -162,7 +161,7 @@ static void interrupt_wq(struct work_struct *work)
uint32_t ih_ring_entry[DIV_ROUND_UP(dev->device_info->ih_ring_entry_size, sizeof(uint32_t))];

while (dequeue_ih_ring_entry(dev, ih_ring_entry))
- dev->device_info->scheduler_class->interrupt_wq(dev->scheduler, ih_ring_entry);
+ ;
}

/* This is called directly from KGD at ISR. */
@@ -171,7 +170,6 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_lock(&kfd->interrupt_lock);

if (kfd->interrupts_active
- && kfd->device_info->scheduler_class->interrupt_isr(kfd->scheduler, ih_ring_entry)
&& enqueue_ih_ring_entry(kfd, ih_ring_entry))
schedule_work(&kfd->interrupt_work);

diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 0af4c71..049671b 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -441,6 +441,8 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type);
void kernel_queue_uninit(struct kernel_queue *kq);

+int get_vmid_from_pasid(struct kfd_dev *dev, pasid_t pasid , unsigned int *vmid);
+
/* Process Queue Manager */
struct process_queue_node {
struct queue *q;
diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
index 80136e6..f967c15 100644
--- a/drivers/gpu/hsa/radeon/kfd_process.c
+++ b/drivers/gpu/hsa/radeon/kfd_process.c
@@ -29,7 +29,6 @@
struct mm_struct;

#include "kfd_priv.h"
-#include "kfd_scheduler.h"

/* Initial size for the array of queues.
* The allocated size is doubled each time it is exceeded up to MAX_PROCESS_QUEUES. */
@@ -91,52 +90,15 @@ radeon_kfd_get_process(const struct task_struct *thread)
return process;
}

-/* Assumes that the kfd_process mutex is held.
- * (Or that it doesn't need to be held because the process is exiting.)
- *
- * dev_filter can be set to only destroy queues for one device.
- * Otherwise all queues for the process are destroyed.
- */
-static void
-destroy_queues(struct kfd_process *p, struct kfd_dev *dev_filter)
-{
- unsigned long queue_id;
-
- for_each_set_bit(queue_id, p->allocated_queue_bitmap, MAX_PROCESS_QUEUES) {
-
- struct kfd_queue *queue = radeon_kfd_get_queue(p, queue_id);
- struct kfd_dev *dev;
-
- BUG_ON(queue == NULL);
-
- dev = queue->dev;
-
- if (!dev_filter || dev == dev_filter) {
- struct kfd_process_device *pdd = radeon_kfd_get_process_device_data(dev, p);
-
- BUG_ON(pdd == NULL); /* A queue exists so pdd must. */
-
- radeon_kfd_remove_queue(p, queue_id);
- dev->device_info->scheduler_class->destroy_queue(dev->scheduler, &queue->scheduler_queue);
-
- kfree(queue);
- }
- }
-}
-
static void free_process(struct kfd_process *p)
{
struct kfd_process_device *pdd, *temp;

BUG_ON(p == NULL);

- destroy_queues(p, NULL);
-
/* doorbell mappings: automatic */

list_for_each_entry_safe(pdd, temp, &p->per_device_data, per_device_list) {
- pdd->dev->device_info->scheduler_class->deregister_process(pdd->dev->scheduler, pdd->scheduler_process);
- pdd->scheduler_process = NULL;
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
list_del(&pdd->per_device_list);
kfree(pdd);
@@ -202,8 +164,17 @@ static struct kfd_process *create_process(const struct task_struct *thread)

INIT_LIST_HEAD(&process->per_device_data);

+ process->read_ptr.page_mapping = process->write_ptr.page_mapping = NULL;
+ err = pqm_init(&process->pqm, process);
+ if (err != 0)
+ goto err_process_pqm_init;
+
return process;

+err_process_pqm_init:
+ radeon_kfd_pasid_free(process->pasid);
+ list_del(&process->processes_list);
+ thread->mm->kfd_process = NULL;
err_alloc:
kfree(process->queues);
kfree(process);
@@ -222,6 +193,9 @@ radeon_kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p)
pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
if (pdd != NULL) {
pdd->dev = dev;
+ INIT_LIST_HEAD(&pdd->qpd.queues_list);
+ INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
+ pdd->qpd.dqm = dev->dqm;
list_add(&pdd->per_device_list, &p->per_device_data);
}

@@ -248,7 +222,6 @@ struct kfd_process_device *radeon_kfd_bind_process_to_device(struct kfd_dev *dev
if (err < 0)
return ERR_PTR(err);

- err = dev->device_info->scheduler_class->register_process(dev->scheduler, p, &pdd->scheduler_process);
if (err < 0) {
amd_iommu_unbind_pasid(dev->pdev, p->pasid);
return ERR_PTR(err);
@@ -282,10 +255,7 @@ void radeon_kfd_unbind_process_from_device(struct kfd_dev *dev, pasid_t pasid)

mutex_lock(&p->mutex);

- destroy_queues(p, dev);
-
- dev->device_info->scheduler_class->deregister_process(dev->scheduler, pdd->scheduler_process);
- pdd->scheduler_process = NULL;
+ pqm_uninit(&p->pqm);

/*
* Just mark pdd as unbound, because we still need it to call
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index e5fcb8b..5134880 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -47,9 +47,9 @@ struct kfd_ioctl_create_queue_args {
uint32_t queue_type; /* to KFD */
uint32_t queue_percentage; /* to KFD */
uint32_t queue_priority; /* to KFD */
- uint64_t write_pointer_address; /* to KFD */
- uint64_t read_pointer_address; /* to KFD */

+ uint64_t write_pointer_address; /* from KFD */
+ uint64_t read_pointer_address; /* from KFD */
uint64_t doorbell_address; /* from KFD */
uint32_t queue_id; /* from KFD */
};
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/