Re: [RFC 1/8] dma-buf/fence: add fence_collection fences

From: Christian KÃnig
Date: Fri Apr 15 2016 - 05:03:57 EST


Am 15.04.2016 um 10:02 schrieb Daniel Vetter:
On Thu, Apr 14, 2016 at 06:29:34PM -0700, Gustavo Padovan wrote:
From: Gustavo Padovan <gustavo.padovan@xxxxxxxxxxxxxxx>

struct fence_collection inherits from struct fence and carries a
collection of fences that needs to be waited together.

It is useful to translate a sync_file to a fence to remove the complexity
of dealing with sync_files from DRM drivers. So even if there are many
fences in the sync_file that needs to waited for a commit to happen
drivers would only worry about a standard struct fence.That means that no
changes needed to any driver besides supporting fences.

fence_collection's fence doesn't belong to any timeline context.

Signed-off-by: Gustavo Padovan <gustavo.padovan@xxxxxxxxxxxxxxx>
---
drivers/dma-buf/Makefile | 2 +-
drivers/dma-buf/fence-collection.c | 138 +++++++++++++++++++++++++++++++++++++
drivers/dma-buf/fence.c | 2 +-
include/linux/fence-collection.h | 56 +++++++++++++++
include/linux/fence.h | 2 +
5 files changed, 198 insertions(+), 2 deletions(-)
create mode 100644 drivers/dma-buf/fence-collection.c
create mode 100644 include/linux/fence-collection.h

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 43325a1..30b8464 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,3 +1,3 @@
-obj-y := dma-buf.o fence.o reservation.o seqno-fence.o
+obj-y := dma-buf.o fence.o reservation.o seqno-fence.o fence-collection.o
obj-$(CONFIG_SYNC_FILE) += sync_file.o sync_timeline.o sync_debug.o
obj-$(CONFIG_SW_SYNC) += sw_sync.o
diff --git a/drivers/dma-buf/fence-collection.c b/drivers/dma-buf/fence-collection.c
new file mode 100644
index 0000000..8a4ecb0
--- /dev/null
+++ b/drivers/dma-buf/fence-collection.c
@@ -0,0 +1,138 @@
+/*
+ * fence-collection: aggregate fences to be waited together
+ *
+ * Copyright (C) 2016 Collabora Ltd
+ * Authors:
+ * Gustavo Padovan <gustavo@xxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/fence-collection.h>
+
+static const char *fence_collection_get_driver_name(struct fence *fence)
+{
+ struct fence_collection *collection = to_fence_collection(fence);
+ struct fence *f = collection->fences[0].fence;
+
+ return f->ops->get_driver_name(fence);
+}

I would rather return some constant name here instead of relying that the collection already has a fence added and that all fences are from the same driver.

+
+static const char *fence_collection_get_timeline_name(struct fence *fence)
+{
+ return "no context";
+}
+
+static bool fence_collection_enable_signaling(struct fence *fence)
+{
+ struct fence_collection *collection = to_fence_collection(fence);
+
+ return atomic_read(&collection->num_pending_fences);
+}
+
+static bool fence_collection_signaled(struct fence *fence)
+{
+ struct fence_collection *collection = to_fence_collection(fence);
+
+ return (atomic_read(&collection->num_pending_fences) == 0);
+}
+
+static void fence_collection_release(struct fence *fence)
+{
+ struct fence_collection *collection = to_fence_collection(fence);
+ int i;
+
+ for (i = 0 ; i < collection->num_fences ; i++) {
+ fence_remove_callback(collection->fences[i].fence,
+ &collection->fences[i].cb);
+ fence_put(collection->fences[i].fence);
+ }
+
+ fence_free(fence);
+}
+
+static signed long fence_collection_wait(struct fence *fence, bool intr,
+ signed long timeout)
+{
+ struct fence_collection *collection = to_fence_collection(fence);
+ int i;
+
+ for (i = 0 ; i < collection->num_fences ; i++) {
+ timeout = fence_wait(collection->fences[i].fence, intr);
+ if (timeout < 0)
+ return timeout;
+ }
+
+ return timeout;
+}
+
+static const struct fence_ops fence_collection_ops = {
+ .get_driver_name = fence_collection_get_driver_name,
+ .get_timeline_name = fence_collection_get_timeline_name,
+ .enable_signaling = fence_collection_enable_signaling,
+ .signaled = fence_collection_signaled,
+ .wait = fence_collection_wait,
+ .release = fence_collection_release,
+};
+
+static void collection_check_cb_func(struct fence *fence, struct fence_cb *cb)
+{
+ struct fence_collection_cb *f_cb;
+ struct fence_collection *collection;
+
+ f_cb = container_of(cb, struct fence_collection_cb, cb);
+ collection = f_cb->collection;
+
+ if (atomic_dec_and_test(&collection->num_pending_fences))
+ fence_signal(&collection->base);
+}
+
+void fence_collection_add(struct fence_collection *collection,
+ struct fence *fence)
+{
+ int n = collection->num_fences;
+
+ collection->fences[n].collection = collection;
+ collection->fences[n].fence = fence;
+
+ if (fence_add_callback(fence, &collection->fences[n].cb,
+ collection_check_cb_func))
+ return;
+
+ fence_get(fence);
+
+ collection->num_fences++;
+ atomic_inc(&collection->num_pending_fences);
+}
For the interface I think we should not split it into _init and _add - it
shouldn't be allowed to change a collection once it's created. So probably
cleaner if we add an array of fence pointers to _init.

Amdgpu also has an implementation for a fence collection which uses a a hashtable to keep the fences grouped by context (e.g. only the latest fence is keept for each context). See amdgpu_sync.c for reference.

We should either make the collection similar in a way that you can add as many fences as you want (like the amdgpu implementation) or make it static and only add a fixed number of fences right from the beginning.

I can certainly see use cases for both, but if you want to stick with a static approach you should probably call the new object fence_array instead of fence_collection and do as Daniel suggested.

Other nitpick: Adding the callback should (I think) only be done in
->enable_signalling.

Yeah, I was about to complain on that as well.

Enabling signaling can have a huge overhead for some fence implementations. So it should only be used when needed


Finally: Have you looked into stitching together a few unit tests for
fence_collection?

Fence collections also break the assumption that every fence is on a
timeline. fence_later and fence_is_later need to be adjusted. We also need
a special collection context to filter these out. This means
fence_collection isn't perfectly opaque abstraction.
+
+struct fence_collection *fence_collection_init(int num_fences)
+{
+ struct fence_collection *collection;
+
+ collection = kzalloc(offsetof(struct fence_collection,
+ fences[num_fences]), GFP_KERNEL);
+ if (!collection)
+ return NULL;
+
+ spin_lock_init(&collection->lock);
+ fence_init(&collection->base, &fence_collection_ops, &collection->lock,
+ FENCE_NO_CONTEXT, 0);
+
+ return collection;
+}
+EXPORT_SYMBOL(fence_collection_init);
+
+void fence_collection_put(struct fence_collection *collection)
+{
+ fence_put(&collection->base);
Not sure a specialized _put function is useful, I'd leave it out.

+}
+EXPORT_SYMBOL(fence_collection_put);
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 7b05dbe..486e95c 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -35,7 +35,7 @@ EXPORT_TRACEPOINT_SYMBOL(fence_emit);
* context or not. One device can have multiple separate contexts,
* and they're used if some engine can run independently of another.
*/
-static atomic_t fence_context_counter = ATOMIC_INIT(0);
+static atomic_t fence_context_counter = ATOMIC_INIT(1);
/**
* fence_context_alloc - allocate an array of fence contexts
diff --git a/include/linux/fence-collection.h b/include/linux/fence-collection.h
new file mode 100644
index 0000000..a798925
--- /dev/null
+++ b/include/linux/fence-collection.h
@@ -0,0 +1,56 @@
+/*
+ * fence-collection: aggregates fence to be waited together
+ *
+ * Copyright (C) 2016 Collabora Ltd
+ * Authors:
+ * Gustavo Padovan <gustavo@xxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_FENCE_COLLECTION_H
+#define __LINUX_FENCE_COLLECTION_H
+
+#include <linux/fence.h>
+
+struct fence_collection_cb {
+ struct fence_cb cb;
+ struct fence *fence;
+ struct fence_collection *collection;
+};
+
+struct fence_collection {
+ struct fence base;
+
+ spinlock_t lock;
+ struct fence_cb fence_cb;
+ atomic_t num_pending_fences;
+ int num_fences;
+ struct fence_collection_cb fences[];
+};
+
+/**
+ * to_fence_collection - cast a fence to a fence_collection
+ * @fence: fence to cast to a fence_collection
+ *
+ * Returns NULL if the fence is not a fence_collection,
+ * or the fence_collection otherwise.
+ */
+static inline struct fence_collection * to_fence_collection(struct fence *fence)
+{
Kerneldoc claims it, but you don't check that the fence is indeed a
fence_collection. That's usually done by comparing the ops pointer.

+ return container_of(fence, struct fence_collection, base);
+}
+
+struct fence_collection *fence_collection_init(int num_fences);
+void fence_collection_add(struct fence_collection *collection,
+ struct fence *fence);
+void fence_collection_put(struct fence_collection *collection);
+
+#endif /* __LINUX_FENCE_COLLECTION_H */
diff --git a/include/linux/fence.h b/include/linux/fence.h
index 2b17698..02170dd 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -30,6 +30,8 @@
#include <linux/printk.h>
#include <linux/rcupdate.h>
+#define FENCE_NO_CONTEXT 0

Might be that how amdgpu uses the fence context and sequence number is a bit questionable, but this will completely break it.

So we need a fix for amdgpu as well before this can go upstream.

Regards,
Christian.

+
struct fence;
struct fence_ops;
struct fence_cb;
--
2.5.5