[RFC PATCH v1 1/1] drm/virtio: Support fence-passing feature

From: Dmitry Osipenko
Date: Sat Oct 07 2023 - 16:00:05 EST


Support extended version of VIRTIO_GPU_CMD_SUBMIT_3D command that allows
passing in-fence IDs to host for waiting, removing need to do expensive
host-guest roundtrips in a case of waiting for fences on a guest side.

Guest userspace must enable new VIRTGPU_CONTEXT_PARAM_FENCE_PASSING flag
and host must support new VIRTIO_GPU_F_FENCE_PASSING feature in order to
activate the fence passing for a given virtio-gpu context. Array of
in-fence IDs is then prepended to the VIRTIO_GPU_CMD_SUBMIT_3D's data,
the previously unused padding field of the command is reused for the
number of in-fences.

A new VIRTGPU_EXECBUF_SHARED_FENCE flag is added to the job submission
UAPI and must be set by userspace if it wants to make fence shareable
with/on host. Certain jobs won't want to share fence, in particular Venus
will benefit from this flag.

Link: https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1138
Link: https://gitlab.freedesktop.org/digetx/qemu/-/commits/native-context-iris
Link: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4679609
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@xxxxxxxxxxxxx>
---
drivers/gpu/drm/virtio/virtgpu_drv.c | 1 +
drivers/gpu/drm/virtio/virtgpu_drv.h | 11 ++-
drivers/gpu/drm/virtio/virtgpu_fence.c | 15 +++-
drivers/gpu/drm/virtio/virtgpu_ioctl.c | 11 ++-
drivers/gpu/drm/virtio/virtgpu_kms.c | 8 +-
drivers/gpu/drm/virtio/virtgpu_submit.c | 99 ++++++++++++++++++++++++-
drivers/gpu/drm/virtio/virtgpu_vq.c | 7 +-
include/uapi/drm/virtgpu_drm.h | 3 +
include/uapi/linux/virtio_gpu.h | 11 ++-
9 files changed, 152 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 644b8ee51009..544918bd38e9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -148,6 +148,7 @@ static unsigned int features[] = {
VIRTIO_GPU_F_RESOURCE_UUID,
VIRTIO_GPU_F_RESOURCE_BLOB,
VIRTIO_GPU_F_CONTEXT_INIT,
+ VIRTIO_GPU_F_FENCE_PASSING,
};
static struct virtio_driver virtio_gpu_driver = {
.feature_table = features,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 8513b671f871..1dc503cb53de 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -149,6 +149,7 @@ struct virtio_gpu_fence {
struct virtio_gpu_fence_event *e;
struct virtio_gpu_fence_driver *drv;
struct list_head node;
+ bool host_shareable;
};

struct virtio_gpu_vbuffer {
@@ -246,6 +247,7 @@ struct virtio_gpu_device {
bool has_resource_blob;
bool has_host_visible;
bool has_context_init;
+ bool has_fence_passing;
struct virtio_shm_region host_visible_region;
struct drm_mm host_visible_mm;

@@ -273,6 +275,7 @@ struct virtio_gpu_fpriv {
uint32_t num_rings;
uint64_t base_fence_ctx;
uint64_t ring_idx_mask;
+ bool fence_passing_enabled;
struct mutex context_lock;
};

@@ -367,7 +370,9 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
void *data, uint32_t data_size,
uint32_t ctx_id,
struct virtio_gpu_object_array *objs,
- struct virtio_gpu_fence *fence);
+ struct virtio_gpu_fence *fence,
+ uint32_t cmd_size,
+ unsigned int num_in_fences);
void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev,
uint32_t ctx_id,
uint64_t offset, uint32_t level,
@@ -420,6 +425,9 @@ virtio_gpu_cmd_set_scanout_blob(struct virtio_gpu_device *vgdev,
uint32_t width, uint32_t height,
uint32_t x, uint32_t y);

+void virtio_gpu_cmd_in_fence(struct virtio_gpu_device *vgdev,
+ uint32_t ctx_id, uint64_t fence_id);
+
/* virtgpu_display.c */
int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
@@ -439,6 +447,7 @@ void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
struct virtio_gpu_fence *fence);
void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev,
u64 fence_id);
+struct virtio_gpu_fence *to_virtio_gpu_fence(struct dma_fence *dma_fence);

/* virtgpu_object.c */
void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo);
diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c
index f28357dbde35..1fd3cfeca2f5 100644
--- a/drivers/gpu/drm/virtio/virtgpu_fence.c
+++ b/drivers/gpu/drm/virtio/virtgpu_fence.c
@@ -27,9 +27,6 @@

#include "virtgpu_drv.h"

-#define to_virtio_gpu_fence(x) \
- container_of(x, struct virtio_gpu_fence, f)
-
static const char *virtio_gpu_get_driver_name(struct dma_fence *f)
{
return "virtio_gpu";
@@ -71,6 +68,14 @@ static const struct dma_fence_ops virtio_gpu_fence_ops = {
.timeline_value_str = virtio_gpu_timeline_value_str,
};

+struct virtio_gpu_fence *to_virtio_gpu_fence(struct dma_fence *dma_fence)
+{
+ if (dma_fence->ops != &virtio_gpu_fence_ops)
+ return NULL;
+
+ return container_of(dma_fence, struct virtio_gpu_fence, f);
+}
+
struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *vgdev,
uint64_t base_fence_ctx,
uint32_t ring_idx)
@@ -122,6 +127,10 @@ void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
cpu_to_le32(VIRTIO_GPU_FLAG_INFO_RING_IDX);
cmd_hdr->ring_idx = (u8)fence->ring_idx;
}
+
+ if (fence->host_shareable)
+ cmd_hdr->flags |=
+ cpu_to_le32(VIRTIO_GPU_FLAG_FENCE_SHAREABLE);
}

void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev,
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index b24b11f25197..3028786c59cd 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -514,7 +514,8 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev,
return PTR_ERR(buf);

virtio_gpu_cmd_submit(vgdev, buf, rc_blob->cmd_size,
- vfpriv->ctx_id, NULL, NULL);
+ vfpriv->ctx_id, NULL, NULL,
+ rc_blob->cmd_size, 0);
}

if (guest_blob)
@@ -642,6 +643,14 @@ static int virtio_gpu_context_init_ioctl(struct drm_device *dev,

vfpriv->ring_idx_mask = value;
break;
+ case VIRTGPU_CONTEXT_PARAM_FENCE_PASSING:
+ if (!vgdev->has_fence_passing && value) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ vfpriv->fence_passing_enabled = !!value;
+ break;
default:
ret = -EINVAL;
goto out_unlock;
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 5a3b5aaed1f3..9f4617a75edd 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -197,12 +197,16 @@ int virtio_gpu_init(struct virtio_device *vdev, struct drm_device *dev)
if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_CONTEXT_INIT)) {
vgdev->has_context_init = true;
}
+ if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_FENCE_PASSING)) {
+ vgdev->has_fence_passing = true;
+ }

- DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible",
+ DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible %cfence_passing",
vgdev->has_virgl_3d ? '+' : '-',
vgdev->has_edid ? '+' : '-',
vgdev->has_resource_blob ? '+' : '-',
- vgdev->has_host_visible ? '+' : '-');
+ vgdev->has_host_visible ? '+' : '-',
+ vgdev->has_fence_passing ? '+' : '-');

DRM_INFO("features: %ccontext_init\n",
vgdev->has_context_init ? '+' : '-');
diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c
index 3c00135ead45..129d063029a6 100644
--- a/drivers/gpu/drm/virtio/virtgpu_submit.c
+++ b/drivers/gpu/drm/virtio/virtgpu_submit.c
@@ -25,6 +25,11 @@ struct virtio_gpu_submit_post_dep {
u64 point;
};

+struct virtio_gpu_in_fence {
+ u64 id;
+ u32 context;
+};
+
struct virtio_gpu_submit {
struct virtio_gpu_submit_post_dep *post_deps;
unsigned int num_out_syncobjs;
@@ -32,6 +37,9 @@ struct virtio_gpu_submit {
struct drm_syncobj **in_syncobjs;
unsigned int num_in_syncobjs;

+ struct virtio_gpu_in_fence *in_fences;
+ unsigned int num_in_fences;
+
struct virtio_gpu_object_array *buflist;
struct drm_virtgpu_execbuffer *exbuf;
struct virtio_gpu_fence *out_fence;
@@ -41,6 +49,8 @@ struct virtio_gpu_submit {
struct drm_file *file;
int out_fence_fd;
u64 fence_ctx;
+ u32 data_size;
+ u32 cmd_size;
u32 ring_idx;
void *buf;
};
@@ -48,11 +58,44 @@ struct virtio_gpu_submit {
static int virtio_gpu_do_fence_wait(struct virtio_gpu_submit *submit,
struct dma_fence *in_fence)
{
+ struct virtio_gpu_fence *fence = to_virtio_gpu_fence(in_fence);
u32 context = submit->fence_ctx + submit->ring_idx;
+ struct virtio_gpu_in_fence *vfence, *in_fences;
+ u32 i;

if (dma_fence_match_context(in_fence, context))
return 0;

+ if (fence && fence->host_shareable &&
+ submit->vfpriv->fence_passing_enabled) {
+ /*
+ * Merge sync_file + syncobj in-fences to avoid sending more
+ * than one fence per-context to host. Use latest fence from
+ * the same context.
+ */
+ for (i = 0; i < submit->num_in_fences; i++) {
+ vfence = &submit->in_fences[i];
+
+ if (dma_fence_match_context(in_fence, vfence->context)) {
+ vfence->id = max(vfence->id, fence->fence_id);
+ return 0;
+ }
+ }
+
+ in_fences = krealloc_array(submit->in_fences,
+ submit->num_in_fences + 1,
+ sizeof(*in_fences), GFP_KERNEL);
+ if (!in_fences)
+ return -ENOMEM;
+
+ in_fences[submit->num_in_fences].id = fence->fence_id;
+ in_fences[submit->num_in_fences].context = context;
+ submit->in_fences = in_fences;
+ submit->num_in_fences++;
+
+ return 0;
+ }
+
return dma_fence_wait(in_fence, true);
}

@@ -331,6 +374,7 @@ static void virtio_gpu_cleanup_submit(struct virtio_gpu_submit *submit)
virtio_gpu_reset_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs);
virtio_gpu_free_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs);
virtio_gpu_free_post_deps(submit->post_deps, submit->num_out_syncobjs);
+ kfree(submit->in_fences);

if (!IS_ERR(submit->buf))
kvfree(submit->buf);
@@ -348,12 +392,51 @@ static void virtio_gpu_cleanup_submit(struct virtio_gpu_submit *submit)
fput(submit->sync_file->file);
}

-static void virtio_gpu_submit(struct virtio_gpu_submit *submit)
+static int virtio_gpu_attach_in_fences(struct virtio_gpu_submit *submit)
{
- virtio_gpu_cmd_submit(submit->vgdev, submit->buf, submit->exbuf->size,
+ size_t in_fences_size = sizeof(u64) * submit->num_in_fences;
+ size_t new_data_size = submit->data_size + in_fences_size;
+ void *buf = submit->buf;
+ u64 *in_fences;
+ unsigned int i;
+
+ if (new_data_size < submit->data_size)
+ return -EINVAL;
+
+ buf = kvrealloc(buf, submit->data_size, new_data_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ memmove(buf + in_fences_size, buf, submit->data_size);
+ in_fences = buf;
+
+ for (i = 0; i < submit->num_in_fences; i++)
+ in_fences[i] = cpu_to_le64(submit->in_fences[i].id);
+
+ submit->data_size = new_data_size;
+ submit->buf = buf;
+
+ return 0;
+}
+
+static int virtio_gpu_submit(struct virtio_gpu_submit *submit)
+{
+ int err;
+
+ if (submit->num_in_fences) {
+ err = virtio_gpu_attach_in_fences(submit);
+ if (err)
+ return err;
+ }
+
+ virtio_gpu_cmd_submit(submit->vgdev, submit->buf, submit->data_size,
submit->vfpriv->ctx_id, submit->buflist,
- submit->out_fence);
+ submit->out_fence, submit->cmd_size,
+ submit->num_in_fences);
+
virtio_gpu_notify(submit->vgdev);
+
+ return 0;
}

static void virtio_gpu_complete_submit(struct virtio_gpu_submit *submit)
@@ -401,6 +484,12 @@ static int virtio_gpu_init_submit(struct virtio_gpu_submit *submit,
}
}

+ if ((exbuf->flags & VIRTGPU_EXECBUF_SHARED_FENCE) &&
+ vfpriv->fence_passing_enabled && out_fence)
+ out_fence->host_shareable = true;
+
+ submit->data_size = exbuf->size;
+ submit->cmd_size = exbuf->size;
submit->out_fence = out_fence;
submit->fence_ctx = fence_ctx;
submit->ring_idx = ring_idx;
@@ -527,7 +616,9 @@ int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
if (ret)
goto cleanup;

- virtio_gpu_submit(&submit);
+ ret = virtio_gpu_submit(&submit);
+ if (ret)
+ goto cleanup;

/*
* Set up usr-out data after submitting the job to optimize
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index b1a00c0c25a7..29d462b69bad 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -1079,7 +1079,9 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
void *data, uint32_t data_size,
uint32_t ctx_id,
struct virtio_gpu_object_array *objs,
- struct virtio_gpu_fence *fence)
+ struct virtio_gpu_fence *fence,
+ uint32_t cmd_size,
+ unsigned int num_in_fences)
{
struct virtio_gpu_cmd_submit *cmd_p;
struct virtio_gpu_vbuffer *vbuf;
@@ -1093,7 +1095,8 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,

cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_SUBMIT_3D);
cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id);
- cmd_p->size = cpu_to_le32(data_size);
+ cmd_p->size = cpu_to_le32(cmd_size);
+ cmd_p->num_in_fences = cpu_to_le32(num_in_fences);

virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence);
}
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index b1d0e56565bc..fd486fdf0441 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -52,10 +52,12 @@ extern "C" {
#define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01
#define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02
#define VIRTGPU_EXECBUF_RING_IDX 0x04
+#define VIRTGPU_EXECBUF_SHARED_FENCE 0x08
#define VIRTGPU_EXECBUF_FLAGS (\
VIRTGPU_EXECBUF_FENCE_FD_IN |\
VIRTGPU_EXECBUF_FENCE_FD_OUT |\
VIRTGPU_EXECBUF_RING_IDX |\
+ VIRTGPU_EXECBUF_SHARED_FENCE |\
0)

struct drm_virtgpu_map {
@@ -198,6 +200,7 @@ struct drm_virtgpu_resource_create_blob {
#define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001
#define VIRTGPU_CONTEXT_PARAM_NUM_RINGS 0x0002
#define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003
+#define VIRTGPU_CONTEXT_PARAM_FENCE_PASSING 0x0004
struct drm_virtgpu_context_set_param {
__u64 param;
__u64 value;
diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index f556fde07b76..c3182c8255cf 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -65,6 +65,11 @@
*/
#define VIRTIO_GPU_F_CONTEXT_INIT 4

+/*
+ * VIRTIO_GPU_CMD_SUBMIT_3D
+ */
+#define VIRTIO_GPU_F_FENCE_PASSING 5
+
enum virtio_gpu_ctrl_type {
VIRTIO_GPU_UNDEFINED = 0,

@@ -133,6 +138,10 @@ enum virtio_gpu_shm_id {
* of the command ring that needs to used when creating the fence
*/
#define VIRTIO_GPU_FLAG_INFO_RING_IDX (1 << 1)
+/*
+ * The fence is shareable between host contexts if flag is set.
+ */
+#define VIRTIO_GPU_FLAG_FENCE_SHAREABLE (1 << 2)

struct virtio_gpu_ctrl_hdr {
__le32 type;
@@ -304,7 +313,7 @@ struct virtio_gpu_ctx_resource {
struct virtio_gpu_cmd_submit {
struct virtio_gpu_ctrl_hdr hdr;
__le32 size;
- __le32 padding;
+ __le32 num_in_fences;
};

#define VIRTIO_GPU_CAPSET_VIRGL 1
--
2.41.0