Re: [PATCH V3 XRT Alveo 05/18] fpga: xrt: group platform driver

From: Max Zhen
Date: Fri Feb 26 2021 - 16:58:23 EST


Hi Tom,


On 2/22/21 10:50 AM, Tom Rix wrote:
CAUTION: This message has originated from an External Source. Please use proper judgment and caution when opening attachments, clicking links, or responding to this email.


On 2/17/21 10:40 PM, Lizhi Hou wrote:
group driver that manages life cycle of a bunch of leaf driver instances
and bridges them with root.

Signed-off-by: Sonal Santan <sonal.santan@xxxxxxxxxx>
Signed-off-by: Max Zhen <max.zhen@xxxxxxxxxx>
Signed-off-by: Lizhi Hou <lizhih@xxxxxxxxxx>
---
drivers/fpga/xrt/include/group.h | 27 ++++
drivers/fpga/xrt/lib/group.c | 265 +++++++++++++++++++++++++++++++
2 files changed, 292 insertions(+)
create mode 100644 drivers/fpga/xrt/include/group.h
create mode 100644 drivers/fpga/xrt/lib/group.c

diff --git a/drivers/fpga/xrt/include/group.h b/drivers/fpga/xrt/include/group.h
new file mode 100644
index 000000000000..1874cdd5120d
--- /dev/null
+++ b/drivers/fpga/xrt/include/group.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Header file for Xilinx Runtime (XRT) driver
A bit too generic, please add a description or remove.


Will remove.


+ *
+ * Copyright (C) 2020-2021 Xilinx, Inc.
+ *
+ * Authors:
+ * Cheng Zhen <maxz@xxxxxxxxxx>
+ */
+
+#ifndef _XRT_GROUP_H_
+#define _XRT_GROUP_H_
+
+#include "xleaf.h"
This is patch 6, consider comments on patch 4.


Will move this header to other patch.


+
+/*
+ * Group driver IOCTL calls.
Are these really ioctl calls?

Seems more like messages between nodes in a tree.

Consider changing to better jagon, maybe ioctl -> msg


You're right. They are not really ioctl calls. They are just calls b/w leaf nodes. I changed to leaf calls/commands.



+ */
+enum xrt_group_ioctl_cmd {
+ XRT_GROUP_GET_LEAF = XRT_XLEAF_CUSTOM_BASE, /* See comments in xleaf.h */
XRT_LEAF_CUSTOM_BASE is a #define, while these are enums. To be consistent, the XRT_LEAF_CUSTOM_BASE should be an enum in xleaf, you can initialize it to 64 there.


Will fix.


+ XRT_GROUP_PUT_LEAF,
+ XRT_GROUP_INIT_CHILDREN,
+ XRT_GROUP_FINI_CHILDREN,
+ XRT_GROUP_TRIGGER_EVENT,
+};
+
+#endif /* _XRT_GROUP_H_ */
diff --git a/drivers/fpga/xrt/lib/group.c b/drivers/fpga/xrt/lib/group.c
new file mode 100644
index 000000000000..6ba56eea479b
--- /dev/null
+++ b/drivers/fpga/xrt/lib/group.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx Alveo FPGA Group Driver
+ *
+ * Copyright (C) 2020-2021 Xilinx, Inc.
+ *
+ * Authors:
+ * Cheng Zhen <maxz@xxxxxxxxxx>
+ */
+
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include "xleaf.h"
+#include "subdev_pool.h"
+#include "group.h"
+#include "metadata.h"
+#include "main.h"
+
+#define XRT_GRP "xrt_group"
+
+struct xrt_group {
+ struct platform_device *pdev;
+ struct xrt_subdev_pool leaves;
+ bool leaves_created;
+ struct mutex lock; /* lock for group */
+};
+
+static int xrt_grp_root_cb(struct device *dev, void *parg,
+ u32 cmd, void *arg)
could 'cmd' be some enum type ?


Will fix.


+{
+ int rc;
+ struct platform_device *pdev =
+ container_of(dev, struct platform_device, dev);
+ struct xrt_group *xg = (struct xrt_group *)parg;
+
+ switch (cmd) {
+ case XRT_ROOT_GET_LEAF_HOLDERS: {
+ struct xrt_root_ioctl_get_holders *holders =
+ (struct xrt_root_ioctl_get_holders *)arg;
+ rc = xrt_subdev_pool_get_holders(&xg->leaves,
+ holders->xpigh_pdev,
+ holders->xpigh_holder_buf,
+ holders->xpigh_holder_buf_len);
+ break;
+ }
+ default:
+ /* Forward parent call to root. */
+ rc = xrt_subdev_root_request(pdev, cmd, arg);
+ break;
+ }
+
+ return rc;
+}
+
+static int xrt_grp_create_leaves(struct xrt_group *xg)
+{
+ struct xrt_subdev_platdata *pdata = DEV_PDATA(xg->pdev);
+ enum xrt_subdev_id did;
+ struct xrt_subdev_endpoints *eps = NULL;
+ int ep_count = 0, i, ret = 0, failed = 0;
+ unsigned long mlen;
+ char *dtb, *grp_dtb = NULL;
+ const char *ep_name;
+
+ mutex_lock(&xg->lock);
+
+ if (xg->leaves_created) {
+ mutex_unlock(&xg->lock);
This happens should be programming error, so print out some error message


The root driver does not remember which group has created leaves or not. It'll just blindly make the call. The group driver itself should remember and tell the root that it's done already or not. So, this is not considered as an error.


+ return -EEXIST;
+ }
+
+ xrt_info(xg->pdev, "bringing up leaves...");
+
+ /* Create all leaves based on dtb. */
+ if (!pdata)
+ goto bail;
move to above the lock and fail with something like -EINVAL


Will fix.


+
+ mlen = xrt_md_size(DEV(xg->pdev), pdata->xsp_dtb);
+ if (mlen == XRT_MD_INVALID_LENGTH) {
+ xrt_err(xg->pdev, "invalid dtb, len %ld", mlen);
+ goto bail;
+ }
+
+ grp_dtb = vmalloc(mlen);
+ if (!grp_dtb)
+ goto bail;
failed is only set in the loop. This is an unreported -ENOMEM


Will fix.


+
+ memcpy(grp_dtb, pdata->xsp_dtb, mlen);
+ for (did = 0; did < XRT_SUBDEV_NUM;) {
why isn't the did incremented ?


This is not a bug, but I will fix it by rewriting this function so that it'll be easier to follow (hopefully :-)).


+ eps = eps ? eps + 1 : xrt_drv_get_endpoints(did);
this assumes the enpoints are in an array and accessed serially.

this is fragile.

convert to using just the xrt_drv_get_endpoints() call


It does not seem to be fragile? The endpoint data structure is private data structure so it has to be an array as defined. It's usage is just like an ID table as we have in other PCIE drivers.



+ if (!eps || !eps->xse_names) {
+ did++;
+ eps = NULL;
+ continue;
+ }
+ ret = xrt_md_create(DEV(xg->pdev), &dtb);
+ if (ret) {
+ xrt_err(xg->pdev, "create md failed, drv %s",
+ xrt_drv_name(did));
+ failed++;
failed but no cleanup of earier successes


The leaves already created will still be there until the group is destroyed. This is not considered as fatal error.


+ continue;
+ }
+ for (i = 0; eps->xse_names[i].ep_name ||
this assumes that xse_names[] always has a guard.

why not use xse_min_ep ?


xse_min_ep is to tell caller the minimum number of end points needs to be collected before instantiate the leaf driver. it does not indicate exactly how many end points this leaf can handle (it might be able to handle more). So, the guard is necessary to tell caller total number of end points it can handle.



+ eps->xse_names[i].regmap_name; i++) {
+ ep_name = (char *)eps->xse_names[i].ep_name;
+ if (!ep_name) {
+ xrt_md_get_compatible_endpoint(DEV(xg->pdev),
+ grp_dtb,
+ eps->xse_names[i].regmap_name,
+ &ep_name);
+ }
+ if (!ep_name)
+ continue;
+
+ ret = xrt_md_copy_endpoint(DEV(xg->pdev),
+ dtb, grp_dtb, ep_name,
+ (char *)eps->xse_names[i].regmap_name,
+ NULL);
+ if (ret)
+ continue;
+ xrt_md_del_endpoint(DEV(xg->pdev), grp_dtb, ep_name,
+ (char *)eps->xse_names[i].regmap_name);
+ ep_count++;
+ }
+ if (ep_count >= eps->xse_min_ep) {
This only happens if all additions are successful.


This happens if the minimum number of end points has been collected. Anyway, the function is going to be rewritten.


+ ret = xrt_subdev_pool_add(&xg->leaves, did,
+ xrt_grp_root_cb, xg, dtb);
+ eps = NULL;
+ if (ret < 0) {
+ failed++;
+ xrt_err(xg->pdev, "failed to create %s: %d",
+ xrt_drv_name(did), ret);
+ }
+ } else if (ep_count > 0) {
+ xrt_md_copy_all_endpoints(DEV(xg->pdev), grp_dtb, dtb);
+ }
+ vfree(dtb);
+ ep_count = 0;
+ }
+
+ xg->leaves_created = true;
This is true even if some failed ?


Yes, this is OK. Some functionality may be missing if some leaves can be instantiated, but the group will be in leaves created state now.


+
+bail:
+ vfree(grp_dtb);
+ mutex_unlock(&xg->lock);
+
+ return failed == 0 ? 0 : -ECHILD;
+}
+
+static void xrt_grp_remove_leaves(struct xrt_group *xg)
+{
+ mutex_lock(&xg->lock);
+
+ if (!xg->leaves_created) {
+ mutex_unlock(&xg->lock);
+ return;
+ }
+
+ xrt_info(xg->pdev, "tearing down leaves...");
+ xrt_subdev_pool_fini(&xg->leaves);
partial failure above and the subdev_pool is not created ?


It is still created, just not fully populated.


+ xg->leaves_created = false;
+
+ mutex_unlock(&xg->lock);
+}
+
+static int xrt_grp_probe(struct platform_device *pdev)
+{
+ struct xrt_group *xg;
+
+ xrt_info(pdev, "probing...");
+
+ xg = devm_kzalloc(&pdev->dev, sizeof(*xg), GFP_KERNEL);
+ if (!xg)
+ return -ENOMEM;
+
+ xg->pdev = pdev;
+ mutex_init(&xg->lock);
+ xrt_subdev_pool_init(DEV(pdev), &xg->leaves);
+ platform_set_drvdata(pdev, xg);
+
+ return 0;
+}
+
+static int xrt_grp_remove(struct platform_device *pdev)
+{
+ struct xrt_group *xg = platform_get_drvdata(pdev);
+
+ xrt_info(pdev, "leaving...");
+ xrt_grp_remove_leaves(xg);
lock ?


xrt_grp_remove_leaves() takes lock internally.


Thanks,

Max


Tom

+ return 0;
+}
+
+static int xrt_grp_ioctl(struct platform_device *pdev, u32 cmd, void *arg)
+{
+ int rc = 0;
+ struct xrt_group *xg = platform_get_drvdata(pdev);
+
+ switch (cmd) {
+ case XRT_XLEAF_EVENT:
+ /* Simply forward to every child. */
+ xrt_subdev_pool_handle_event(&xg->leaves,
+ (struct xrt_event *)arg);
+ break;
+ case XRT_GROUP_GET_LEAF: {
+ struct xrt_root_ioctl_get_leaf *get_leaf =
+ (struct xrt_root_ioctl_get_leaf *)arg;
+
+ rc = xrt_subdev_pool_get(&xg->leaves, get_leaf->xpigl_match_cb,
+ get_leaf->xpigl_match_arg,
+ DEV(get_leaf->xpigl_pdev),
+ &get_leaf->xpigl_leaf);
+ break;
+ }
+ case XRT_GROUP_PUT_LEAF: {
+ struct xrt_root_ioctl_put_leaf *put_leaf =
+ (struct xrt_root_ioctl_put_leaf *)arg;
+
+ rc = xrt_subdev_pool_put(&xg->leaves, put_leaf->xpipl_leaf,
+ DEV(put_leaf->xpipl_pdev));
+ break;
+ }
+ case XRT_GROUP_INIT_CHILDREN:
+ rc = xrt_grp_create_leaves(xg);
+ break;
+ case XRT_GROUP_FINI_CHILDREN:
+ xrt_grp_remove_leaves(xg);
+ break;
+ case XRT_GROUP_TRIGGER_EVENT:
+ xrt_subdev_pool_trigger_event(&xg->leaves, (enum xrt_events)(uintptr_t)arg);
+ break;
+ default:
+ xrt_err(pdev, "unknown IOCTL cmd %d", cmd);
+ rc = -EINVAL;
+ break;
+ }
+ return rc;
+}
+
+static struct xrt_subdev_drvdata xrt_grp_data = {
+ .xsd_dev_ops = {
+ .xsd_ioctl = xrt_grp_ioctl,
+ },
+};
+
+static const struct platform_device_id xrt_grp_id_table[] = {
+ { XRT_GRP, (kernel_ulong_t)&xrt_grp_data },
+ { },
+};
+
+static struct platform_driver xrt_group_driver = {
+ .driver = {
+ .name = XRT_GRP,
+ },
+ .probe = xrt_grp_probe,
+ .remove = xrt_grp_remove,
+ .id_table = xrt_grp_id_table,
+};
+
+void group_leaf_init_fini(bool init)
+{
+ if (init)
+ xleaf_register_driver(XRT_SUBDEV_GRP, &xrt_group_driver, NULL);
+ else
+ xleaf_unregister_driver(XRT_SUBDEV_GRP);
+}