Re: [PATCH V3 XRT Alveo 06/18] fpga: xrt: platform driver infrastructure

From: Tom Rix
Date: Thu Feb 25 2021 - 17:01:45 EST



On 2/17/21 10:40 PM, Lizhi Hou wrote:
> infrastructure code providing APIs for managing leaf driver instance
> groups, facilitating inter-leaf driver calls and root calls, managing leaf
> driver device nodes.
>
> Signed-off-by: Sonal Santan <sonal.santan@xxxxxxxxxx>
> Signed-off-by: Max Zhen <max.zhen@xxxxxxxxxx>
> Signed-off-by: Lizhi Hou <lizhih@xxxxxxxxxx>
> ---
> drivers/fpga/xrt/include/events.h | 48 ++
> drivers/fpga/xrt/include/subdev_id.h | 43 ++
> drivers/fpga/xrt/include/xleaf.h | 276 +++++++++
> drivers/fpga/xrt/lib/cdev.c | 231 +++++++
> drivers/fpga/xrt/lib/subdev.c | 871 +++++++++++++++++++++++++++
> drivers/fpga/xrt/lib/subdev_pool.h | 53 ++
> drivers/fpga/xrt/lib/xroot.c | 598 ++++++++++++++++++
> 7 files changed, 2120 insertions(+)
> create mode 100644 drivers/fpga/xrt/include/events.h
> create mode 100644 drivers/fpga/xrt/include/subdev_id.h
> create mode 100644 drivers/fpga/xrt/include/xleaf.h
> create mode 100644 drivers/fpga/xrt/lib/cdev.c
> create mode 100644 drivers/fpga/xrt/lib/subdev.c
> create mode 100644 drivers/fpga/xrt/lib/subdev_pool.h
> create mode 100644 drivers/fpga/xrt/lib/xroot.c
>
> diff --git a/drivers/fpga/xrt/include/events.h b/drivers/fpga/xrt/include/events.h
> new file mode 100644
> index 000000000000..2a9aae8bceb4
> --- /dev/null
> +++ b/drivers/fpga/xrt/include/events.h
> @@ -0,0 +1,48 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Header file for Xilinx Runtime (XRT) driver
general problem with generic, low information comments
> + *
> + * Copyright (C) 2020-2021 Xilinx, Inc.
> + *
> + * Authors:
> + * Cheng Zhen <maxz@xxxxxxxxxx>
> + */
> +
> +#ifndef _XRT_EVENTS_H_
> +#define _XRT_EVENTS_H_
> +
> +#include <linux/platform_device.h>
why is platform_device.h needed ?
> +#include "subdev_id.h"
> +
> +/*
> + * Event notification.
> + */
> +enum xrt_events {
> + XRT_EVENT_TEST = 0, /* for testing */
> + /*
> + * Events related to specific subdev
> + * Callback arg: struct xrt_event_arg_subdev
> + */
> + XRT_EVENT_POST_CREATION,
> + XRT_EVENT_PRE_REMOVAL,
> + /*
> + * Events related to change of the whole board
> + * Callback arg: <none>
> + */
> + XRT_EVENT_PRE_HOT_RESET,
> + XRT_EVENT_POST_HOT_RESET,
> + XRT_EVENT_PRE_GATE_CLOSE,
> + XRT_EVENT_POST_GATE_OPEN,
> +};
> +
> +struct xrt_event_arg_subdev {
> + enum xrt_subdev_id xevt_subdev_id;
> + int xevt_subdev_instance;
> +};
> +
> +struct xrt_event {
> + enum xrt_events xe_evt;
> + struct xrt_event_arg_subdev xe_subdev;
> +};
> +
> +#endif /* _XRT_EVENTS_H_ */
> diff --git a/drivers/fpga/xrt/include/subdev_id.h b/drivers/fpga/xrt/include/subdev_id.h
> new file mode 100644
> index 000000000000..6205a9f26196
> --- /dev/null
> +++ b/drivers/fpga/xrt/include/subdev_id.h
> @@ -0,0 +1,43 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Header file for Xilinx Runtime (XRT) driver
> + *
> + * Copyright (C) 2020-2021 Xilinx, Inc.
> + *
> + * Authors:
> + * Cheng Zhen <maxz@xxxxxxxxxx>
> + */
> +
> +#ifndef _XRT_SUBDEV_ID_H_
> +#define _XRT_SUBDEV_ID_H_
> +
> +/*
> + * Every subdev driver should have an ID for others to refer to it.
driver has an ID
> + * There can be unlimited number of instances of a subdev driver. A
unlimited? change to 'multiple'
> + * <subdev_id, subdev_instance> tuple should be a unique identification of
tuple is a unique
> + * a specific instance of a subdev driver.
> + * NOTE: PLEASE do not change the order of IDs. Sub devices in the same
> + * group are initialized by this order.
why does the order matter? the enums are all initialized
> + */
> +enum xrt_subdev_id {
> + XRT_SUBDEV_GRP = 0,
> + XRT_SUBDEV_VSEC = 1,
> + XRT_SUBDEV_VSEC_GOLDEN = 2,
> + XRT_SUBDEV_DEVCTL = 3,
> + XRT_SUBDEV_AXIGATE = 4,
> + XRT_SUBDEV_ICAP = 5,
> + XRT_SUBDEV_TEST = 6,
> + XRT_SUBDEV_MGMT_MAIN = 7,
> + XRT_SUBDEV_QSPI = 8,
> + XRT_SUBDEV_MAILBOX = 9,
> + XRT_SUBDEV_CMC = 10,
> + XRT_SUBDEV_CALIB = 11,
> + XRT_SUBDEV_CLKFREQ = 12,
> + XRT_SUBDEV_CLOCK = 13,
> + XRT_SUBDEV_SRSR = 14,
> + XRT_SUBDEV_UCS = 15,
> + XRT_SUBDEV_NUM = 16, /* Total number of subdevs. */
> + XRT_ROOT = -1, /* Special ID for root driver. */
> +};
> +
> +#endif /* _XRT_SUBDEV_ID_H_ */
> diff --git a/drivers/fpga/xrt/include/xleaf.h b/drivers/fpga/xrt/include/xleaf.h
> new file mode 100644
> index 000000000000..10215a75d474
> --- /dev/null
> +++ b/drivers/fpga/xrt/include/xleaf.h
> @@ -0,0 +1,276 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Header file for Xilinx Runtime (XRT) driver
> + *
> + * Copyright (C) 2020-2021 Xilinx, Inc.
> + *
> + * Authors:
> + * Cheng Zhen <maxz@xxxxxxxxxx>
> + * Sonal Santan <sonal.santan@xxxxxxxxxx>
> + */
> +
> +#ifndef _XRT_XLEAF_H_
> +#define _XRT_XLEAF_H_
> +
> +#include <linux/mod_devicetable.h>
not needed
> +#include <linux/platform_device.h>
> +#include <linux/fs.h>
> +#include <linux/cdev.h>
> +#include <linux/pci.h>

not needed

check if includes are actually needed.

> +#include <linux/libfdt_env.h>
> +#include "libfdt.h"
> +#include "subdev_id.h"
> +#include "xroot.h"
> +#include "events.h"
> +
> +/* All subdev drivers should use below common routines to print out msg. */
> +#define DEV(pdev) (&(pdev)->dev)
> +#define DEV_PDATA(pdev) \
> + ((struct xrt_subdev_platdata *)dev_get_platdata(DEV(pdev)))
> +#define DEV_DRVDATA(pdev) \
> + ((struct xrt_subdev_drvdata *) \
> + platform_get_device_id(pdev)->driver_data)
> +#define FMT_PRT(prt_fn, pdev, fmt, args...) \
> + ({typeof(pdev) (_pdev) = (pdev); \
> + prt_fn(DEV(_pdev), "%s %s: " fmt, \
> + DEV_PDATA(_pdev)->xsp_root_name, __func__, ##args); })
> +#define xrt_err(pdev, fmt, args...) FMT_PRT(dev_err, pdev, fmt, ##args)
> +#define xrt_warn(pdev, fmt, args...) FMT_PRT(dev_warn, pdev, fmt, ##args)
> +#define xrt_info(pdev, fmt, args...) FMT_PRT(dev_info, pdev, fmt, ##args)
> +#define xrt_dbg(pdev, fmt, args...) FMT_PRT(dev_dbg, pdev, fmt, ##args)
> +
> +/* Starting IOCTL for common IOCTLs implemented by all leaves. */
> +#define XRT_XLEAF_COMMON_BASE 0
> +/* Starting IOCTL for leaves' specific IOCTLs. */
> +#define XRT_XLEAF_CUSTOM_BASE 64
> +enum xrt_xleaf_common_ioctl_cmd {
> + XRT_XLEAF_EVENT = XRT_XLEAF_COMMON_BASE,
> +};
> +
> +/*
> + * If populated by subdev driver, infra will handle the mechanics of
> + * char device (un)registration.
> + */
> +enum xrt_subdev_file_mode {
> + /* Infra create cdev, default file name */
> + XRT_SUBDEV_FILE_DEFAULT = 0,
> + /* Infra create cdev, need to encode inst num in file name */
> + XRT_SUBDEV_FILE_MULTI_INST,
> + /* No auto creation of cdev by infra, leaf handles it by itself */
> + XRT_SUBDEV_FILE_NO_AUTO,
> +};
> +
> +struct xrt_subdev_file_ops {
> + const struct file_operations xsf_ops;
> + dev_t xsf_dev_t;
> + const char *xsf_dev_name;
> + enum xrt_subdev_file_mode xsf_mode;
> +};
> +
> +/*
> + * Subdev driver callbacks populated by subdev driver.
> + */
> +struct xrt_subdev_drv_ops {
> + /*
> + * Per driver instance callback. The pdev points to the instance.
> + * If defined these are called by other leaf drivers.
If defined,
> + * Note that root driver may call into xsd_ioctl of a group driver.
> + */
> + int (*xsd_ioctl)(struct platform_device *pdev, u32 cmd, void *arg);
> +};
> +
> +/*
> + * Defined and populated by subdev driver, exported as driver_data in
> + * struct platform_device_id.
> + */
> +struct xrt_subdev_drvdata {
> + struct xrt_subdev_file_ops xsd_file_ops;
> + struct xrt_subdev_drv_ops xsd_dev_ops;
> +};
> +
> +/*
> + * Partially initialized by the parent driver, then, passed in as subdev driver's
> + * platform data when creating subdev driver instance by calling platform
> + * device register API (platform_device_register_data() or the likes).
> + *
> + * Once device register API returns, platform driver framework makes a copy of
> + * this buffer and maintains its life cycle. The content of the buffer is
> + * completely owned by subdev driver.
> + *
> + * Thus, parent driver should be very careful when it touches this buffer
> + * again once it's handed over to subdev driver. And the data structure
> + * should not contain pointers pointing to buffers that is managed by
> + * other or parent drivers since it could have been freed before platform
> + * data buffer is freed by platform driver framework.
This sounds complicated and risky, why have two copies ?
> + */
> +struct xrt_subdev_platdata {
> + /*
> + * Per driver instance callback. The pdev points to the instance.
> + * Should always be defined for subdev driver to get service from root.
> + */
> + xrt_subdev_root_cb_t xsp_root_cb;
> + void *xsp_root_cb_arg;
> +
> + /* Something to associate w/ root for msg printing. */
> + const char *xsp_root_name;
> +
> + /*
> + * Char dev support for this subdev instance.
> + * Initialized by subdev driver.
> + */
> + struct cdev xsp_cdev;
> + struct device *xsp_sysdev;
> + struct mutex xsp_devnode_lock; /* devnode lock */
> + struct completion xsp_devnode_comp;
> + int xsp_devnode_ref;
> + bool xsp_devnode_online;
> + bool xsp_devnode_excl;
> +
> + /*
> + * Subdev driver specific init data. The buffer should be embedded
> + * in this data structure buffer after dtb, so that it can be freed
> + * together with platform data.
> + */
> + loff_t xsp_priv_off; /* Offset into this platform data buffer. */
> + size_t xsp_priv_len;
> +
> + /*
> + * Populated by parent driver to describe the device tree for
> + * the subdev driver to handle. Should always be last one since it's
> + * of variable length.
> + */
> + char xsp_dtb[sizeof(struct fdt_header)];
could be xsp_dtb[1] and save including the fdt headers just to get a size that doesn't matter.
> +};
> +
> +/*
> + * this struct define the endpoints belong to the same subdevice
> + */
> +struct xrt_subdev_ep_names {
> + const char *ep_name;
> + const char *regmap_name;
> +};
> +
> +struct xrt_subdev_endpoints {
> + struct xrt_subdev_ep_names *xse_names;
> + /* minimum number of endpoints to support the subdevice */
> + u32 xse_min_ep;

see earlier comment about needed a null entry and checking for it.

a 'size' element would be better here.

> +};
> +
> +struct subdev_match_arg {
> + enum xrt_subdev_id id;
> + int instance;
> +};
> +
> +bool xleaf_has_endpoint(struct platform_device *pdev, const char *endpoint_name);
> +struct platform_device *xleaf_get_leaf(struct platform_device *pdev,
> + xrt_subdev_match_t cb, void *arg);
> +
> +static inline bool subdev_match(enum xrt_subdev_id id, struct platform_device *pdev, void *arg)
> +{
> + const struct subdev_match_arg *a = (struct subdev_match_arg *)arg;
> + bool ret = (id == a->id && (pdev->id == a->instance || PLATFORM_DEVID_NONE == a->instance));
This statement is too complicated, turn this into an if-else
> +
> + return ret;
> +}
> +
> +static inline bool xrt_subdev_match_epname(enum xrt_subdev_id id,
> + struct platform_device *pdev, void *arg)
> +{
> + return xleaf_has_endpoint(pdev, arg);

This function is used only once.

Just inline the function to the caller and remove this function.

> +}
> +
> +static inline struct platform_device *
> +xleaf_get_leaf_by_id(struct platform_device *pdev,
> + enum xrt_subdev_id id, int instance)
> +{
> + struct subdev_match_arg arg = { id, instance };
> +
> + return xleaf_get_leaf(pdev, subdev_match, &arg);
> +}
> +
> +static inline struct platform_device *
> +xleaf_get_leaf_by_epname(struct platform_device *pdev, const char *name)
> +{
> + return xleaf_get_leaf(pdev, xrt_subdev_match_epname, (void *)name);
> +}
> +
> +static inline int xleaf_ioctl(struct platform_device *tgt, u32 cmd, void *arg)
> +{
> + struct xrt_subdev_drvdata *drvdata = DEV_DRVDATA(tgt);
> +
> + return (*drvdata->xsd_dev_ops.xsd_ioctl)(tgt, cmd, arg);
> +}
> +
> +int xleaf_put_leaf(struct platform_device *pdev,
> + struct platform_device *leaf);
> +int xleaf_create_group(struct platform_device *pdev, char *dtb);
> +int xleaf_destroy_group(struct platform_device *pdev, int instance);
> +int xleaf_wait_for_group_bringup(struct platform_device *pdev);
> +void xleaf_hot_reset(struct platform_device *pdev);
> +int xleaf_broadcast_event(struct platform_device *pdev,
> + enum xrt_events evt, bool async);
> +void xleaf_get_barres(struct platform_device *pdev,
> + struct resource **res, uint bar_idx);
> +void xleaf_get_root_id(struct platform_device *pdev,
> + unsigned short *vendor, unsigned short *device,
> + unsigned short *subvendor, unsigned short *subdevice);
> +struct device *xleaf_register_hwmon(struct platform_device *pdev,
> + const char *name, void *drvdata,
> + const struct attribute_group **grps);
> +void xleaf_unregister_hwmon(struct platform_device *pdev, struct device *hwmon);

could better organize these decl's alphabetically.

Also not intermix inlines and decls.

> +
> +/*
> + * Character device helper APIs for use by leaf drivers
> + */
> +static inline bool xleaf_devnode_enabled(struct xrt_subdev_drvdata *drvdata)
> +{
> + return drvdata && drvdata->xsd_file_ops.xsf_ops.open;
> +}
> +
> +int xleaf_devnode_create(struct platform_device *pdev,
> + const char *file_name, const char *inst_name);
> +int xleaf_devnode_destroy(struct platform_device *pdev);
> +
> +struct platform_device *xleaf_devnode_open_excl(struct inode *inode);
> +struct platform_device *xleaf_devnode_open(struct inode *inode);
> +void xleaf_devnode_close(struct inode *inode);
> +
> +/* Helpers. */
> +static inline void xrt_memcpy_fromio(void *buf, void __iomem *iomem, u32 size)
> +{
Replace with mmio_insl/outsl
> + int i;
> +
> + WARN_ON(size & 0x3);
> + for (i = 0; i < size / 4; i++)
> + ((u32 *)buf)[i] = ioread32((char *)(iomem) + sizeof(u32) * i);
> +}
> +
> +static inline void xrt_memcpy_toio(void __iomem *iomem, void *buf, u32 size)
> +{
> + int i;
> +
> + WARN_ON(size & 0x3);
> + for (i = 0; i < size / 4; i++)
> + iowrite32(((u32 *)buf)[i], ((char *)(iomem) + sizeof(u32) * i));
> +}
> +
> +int xleaf_register_driver(enum xrt_subdev_id id, struct platform_driver *drv,
> + struct xrt_subdev_endpoints *eps);
> +void xleaf_unregister_driver(enum xrt_subdev_id id);
> +
> +/* Module's init/fini routines for leaf driver in xrt-lib module */
> +void group_leaf_init_fini(bool init);
> +void vsec_leaf_init_fini(bool init);
> +void vsec_golden_leaf_init_fini(bool init);
> +void devctl_leaf_init_fini(bool init);
> +void axigate_leaf_init_fini(bool init);
> +void icap_leaf_init_fini(bool init);
> +void calib_leaf_init_fini(bool init);
> +void qspi_leaf_init_fini(bool init);
> +void mailbox_leaf_init_fini(bool init);
> +void cmc_leaf_init_fini(bool init);
> +void clkfreq_leaf_init_fini(bool init);
> +void clock_leaf_init_fini(bool init);
> +void ucs_leaf_init_fini(bool init);
Shouldn't these be in the specific leaf drv ?
> +
> +#endif /* _XRT_LEAF_H_ */
> diff --git a/drivers/fpga/xrt/lib/cdev.c b/drivers/fpga/xrt/lib/cdev.c
> new file mode 100644
> index 000000000000..7f104ab3d527
> --- /dev/null
> +++ b/drivers/fpga/xrt/lib/cdev.c
> @@ -0,0 +1,231 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Xilinx Alveo FPGA device node helper functions.
> + *
> + * Copyright (C) 2020-2021 Xilinx, Inc.
> + *
> + * Authors:
> + * Cheng Zhen <maxz@xxxxxxxxxx>
> + */
> +
> +#include "xleaf.h"
> +
> +extern struct class *xrt_class;
> +
> +#define XRT_CDEV_DIR "xfpga"
'xfpga' is not very unique, maybe 'xrt' ?
> +#define INODE2PDATA(inode) \
> + container_of((inode)->i_cdev, struct xrt_subdev_platdata, xsp_cdev)
> +#define INODE2PDEV(inode) \
> + to_platform_device(kobj_to_dev((inode)->i_cdev->kobj.parent))
> +#define CDEV_NAME(sysdev) (strchr((sysdev)->kobj.name, '!') + 1)
> +
> +/* Allow it to be accessed from cdev. */
> +static void xleaf_devnode_allowed(struct platform_device *pdev)
> +{
> + struct xrt_subdev_platdata *pdata = DEV_PDATA(pdev);
> +
> + /* Allow new opens. */
> + mutex_lock(&pdata->xsp_devnode_lock);
> + pdata->xsp_devnode_online = true;
> + mutex_unlock(&pdata->xsp_devnode_lock);
> +}
> +
> +/* Turn off access from cdev and wait for all existing user to go away. */
> +static int xleaf_devnode_disallowed(struct platform_device *pdev)
> +{
> + int ret = 0;
> + struct xrt_subdev_platdata *pdata = DEV_PDATA(pdev);
> +
> + mutex_lock(&pdata->xsp_devnode_lock);
> +
> + /* Prevent new opens. */
> + pdata->xsp_devnode_online = false;
> + /* Wait for existing user to close. */
> + while (!ret && pdata->xsp_devnode_ref) {
> + int rc;
> +
> + mutex_unlock(&pdata->xsp_devnode_lock);
> + rc = wait_for_completion_killable(&pdata->xsp_devnode_comp);
> + mutex_lock(&pdata->xsp_devnode_lock);
> +
> + if (rc == -ERESTARTSYS) {
> + /* Restore online state. */
> + pdata->xsp_devnode_online = true;
> + xrt_err(pdev, "%s is in use, ref=%d",
> + CDEV_NAME(pdata->xsp_sysdev),
> + pdata->xsp_devnode_ref);
> + ret = -EBUSY;
> + }
> + }
> +
> + mutex_unlock(&pdata->xsp_devnode_lock);
> +
> + return ret;
> +}
> +
> +static struct platform_device *
> +__xleaf_devnode_open(struct inode *inode, bool excl)
> +{
> + struct xrt_subdev_platdata *pdata = INODE2PDATA(inode);
> + struct platform_device *pdev = INODE2PDEV(inode);
> + bool opened = false;
> +
> + mutex_lock(&pdata->xsp_devnode_lock);
> +
> + if (pdata->xsp_devnode_online) {
> + if (excl && pdata->xsp_devnode_ref) {
> + xrt_err(pdev, "%s has already been opened exclusively",
> + CDEV_NAME(pdata->xsp_sysdev));
> + } else if (!excl && pdata->xsp_devnode_excl) {
> + xrt_err(pdev, "%s has been opened exclusively",
> + CDEV_NAME(pdata->xsp_sysdev));
> + } else {
> + pdata->xsp_devnode_ref++;
> + pdata->xsp_devnode_excl = excl;
> + opened = true;
> + xrt_info(pdev, "opened %s, ref=%d",
> + CDEV_NAME(pdata->xsp_sysdev),
> + pdata->xsp_devnode_ref);
> + }
> + } else {
> + xrt_err(pdev, "%s is offline", CDEV_NAME(pdata->xsp_sysdev));
> + }
> +
> + mutex_unlock(&pdata->xsp_devnode_lock);
> +
> + pdev = opened ? pdev : NULL;
> + return pdev;
> +}
> +
> +struct platform_device *
> +xleaf_devnode_open_excl(struct inode *inode)
> +{
> + return __xleaf_devnode_open(inode, true);
> +}
> +
> +struct platform_device *
> +xleaf_devnode_open(struct inode *inode)
> +{
> + return __xleaf_devnode_open(inode, false);
> +}
> +EXPORT_SYMBOL_GPL(xleaf_devnode_open);

generally

exported systems should have their decl's in include/linux/fpga/

These are in drivers/fpga/xrt/include/xleaf.h

as exported, they should have a better than average prefix.

maybe 'xrt_fpga_'

> +
> +void xleaf_devnode_close(struct inode *inode)
> +{
> + struct xrt_subdev_platdata *pdata = INODE2PDATA(inode);
> + struct platform_device *pdev = INODE2PDEV(inode);
> + bool notify = false;
> +
> + mutex_lock(&pdata->xsp_devnode_lock);
> +
> + pdata->xsp_devnode_ref--;
check before dec ? or at least warn if ref is already 0
> + if (pdata->xsp_devnode_ref == 0) {
> + pdata->xsp_devnode_excl = false;
> + notify = true;
> + }
> + if (notify) {
> + xrt_info(pdev, "closed %s, ref=%d",
> + CDEV_NAME(pdata->xsp_sysdev), pdata->xsp_devnode_ref);
> + } else {
> + xrt_info(pdev, "closed %s, notifying waiter",
> + CDEV_NAME(pdata->xsp_sysdev));
> + }
> +
> + mutex_unlock(&pdata->xsp_devnode_lock);
> +
> + if (notify)
> + complete(&pdata->xsp_devnode_comp);
> +}
> +EXPORT_SYMBOL_GPL(xleaf_devnode_close);
> +
> +static inline enum xrt_subdev_file_mode
> +devnode_mode(struct xrt_subdev_drvdata *drvdata)
> +{
> + return drvdata->xsd_file_ops.xsf_mode;
> +}
> +
> +int xleaf_devnode_create(struct platform_device *pdev, const char *file_name,
> + const char *inst_name)
> +{
> + struct xrt_subdev_drvdata *drvdata = DEV_DRVDATA(pdev);
> + struct xrt_subdev_file_ops *fops = &drvdata->xsd_file_ops;
> + struct xrt_subdev_platdata *pdata = DEV_PDATA(pdev);
> + struct cdev *cdevp;
> + struct device *sysdev;
> + int ret = 0;
> + char fname[256];

will a /dev/xfpga* created for ever leaf device ?

do they all really need /dev/ support ?

> +
> + mutex_init(&pdata->xsp_devnode_lock);
> + init_completion(&pdata->xsp_devnode_comp);
> +
> + cdevp = &DEV_PDATA(pdev)->xsp_cdev;
no cdev_alloc ?
> + cdev_init(cdevp, &fops->xsf_ops);
> + cdevp->owner = fops->xsf_ops.owner;
> + cdevp->dev = MKDEV(MAJOR(fops->xsf_dev_t), pdev->id);
> +
> + /*
> + * Set pdev as parent of cdev so that when pdev (and its platform
> + * data) will not be freed when cdev is not freed.
> + */
> + cdev_set_parent(cdevp, &DEV(pdev)->kobj);
> +
> + ret = cdev_add(cdevp, cdevp->dev, 1);
> + if (ret) {
> + xrt_err(pdev, "failed to add cdev: %d", ret);
> + goto failed;
> + }
> + if (!file_name)
> + file_name = pdev->name;
> + if (!inst_name) {
> + if (devnode_mode(drvdata) == XRT_SUBDEV_FILE_MULTI_INST) {
> + snprintf(fname, sizeof(fname), "%s/%s/%s.%u",
> + XRT_CDEV_DIR, DEV_PDATA(pdev)->xsp_root_name,
> + file_name, pdev->id);
> + } else {
> + snprintf(fname, sizeof(fname), "%s/%s/%s",
> + XRT_CDEV_DIR, DEV_PDATA(pdev)->xsp_root_name,
> + file_name);
> + }
> + } else {
> + snprintf(fname, sizeof(fname), "%s/%s/%s.%s", XRT_CDEV_DIR,
> + DEV_PDATA(pdev)->xsp_root_name, file_name, inst_name);
> + }
> + sysdev = device_create(xrt_class, NULL, cdevp->dev, NULL, "%s", fname);
> + if (IS_ERR(sysdev)) {
> + ret = PTR_ERR(sysdev);
> + xrt_err(pdev, "failed to create device node: %d", ret);
> + goto failed;
this calls device_destroy, but the create call failed, so is this needed ?
> + }
> + pdata->xsp_sysdev = sysdev;
> +
> + xleaf_devnode_allowed(pdev);
> +
> + xrt_info(pdev, "created (%d, %d): /dev/%s",
> + MAJOR(cdevp->dev), pdev->id, fname);
> + return 0;
> +
> +failed:
> + device_destroy(xrt_class, cdevp->dev);
> + cdev_del(cdevp);
> + cdevp->owner = NULL;
> + return ret;
> +}
> +
> +int xleaf_devnode_destroy(struct platform_device *pdev)
> +{
> + struct xrt_subdev_platdata *pdata = DEV_PDATA(pdev);
> + struct cdev *cdevp = &pdata->xsp_cdev;
> + dev_t dev = cdevp->dev;
> + int rc;
> +
> + rc = xleaf_devnode_disallowed(pdev);
> + if (rc)
> + return rc;
This return is not checked by xrt_subdev_destroy
> +
> + xrt_info(pdev, "removed (%d, %d): /dev/%s/%s", MAJOR(dev), MINOR(dev),
> + XRT_CDEV_DIR, CDEV_NAME(pdata->xsp_sysdev));
> + device_destroy(xrt_class, cdevp->dev);
> + pdata->xsp_sysdev = NULL;
> + cdev_del(cdevp);
> + return 0;
> +}
> diff --git a/drivers/fpga/xrt/lib/subdev.c b/drivers/fpga/xrt/lib/subdev.c
> new file mode 100644
> index 000000000000..73552c549bdb
> --- /dev/null
> +++ b/drivers/fpga/xrt/lib/subdev.c
> @@ -0,0 +1,871 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Xilinx Alveo FPGA device helper functions
> + *
> + * Copyright (C) 2020-2021 Xilinx, Inc.
> + *
> + * Authors:
> + * Cheng Zhen <maxz@xxxxxxxxxx>
> + */
> +
> +#include <linux/platform_device.h>
> +#include <linux/pci.h>
> +#include <linux/vmalloc.h>
> +#include "xleaf.h"
> +#include "subdev_pool.h"
> +#include "main.h"
> +#include "metadata.h"
> +
> +#define DEV_IS_PCI(dev) ((dev)->bus == &pci_bus_type)
> +static inline struct device *find_root(struct platform_device *pdev)
> +{
> + struct device *d = DEV(pdev);
> +
> + while (!DEV_IS_PCI(d))
> + d = d->parent;

Shouldn't the root have no parent ?

Could then check if d->parent == NULL instead of bus type

> + return d;
> +}
> +
> +/*
> + * It represents a holder of a subdev. One holder can repeatedly hold a subdev
> + * as long as there is a unhold corresponding to a hold.
> + */
> +struct xrt_subdev_holder {
> + struct list_head xsh_holder_list;
> + struct device *xsh_holder;
> + int xsh_count;
> + struct kref xsh_kref;

general, i see this in struct xrt_subdev

guessing 'xsh' is xrt subdev holder.

why is this prefix needed for the elements ? consider removing it.

> +};
> +
> +/*
> + * It represents a specific instance of platform driver for a subdev, which
> + * provides services to its clients (another subdev driver or root driver).
> + */
> +struct xrt_subdev {
> + struct list_head xs_dev_list;
> + struct list_head xs_holder_list;
> + enum xrt_subdev_id xs_id; /* type of subdev */
> + struct platform_device *xs_pdev; /* a particular subdev inst */
> + struct completion xs_holder_comp;
> +};
> +
> +static struct xrt_subdev *xrt_subdev_alloc(void)
> +{
> + struct xrt_subdev *sdev = vzalloc(sizeof(*sdev));
similar kzalloc as another patch.
> +
> + if (!sdev)
> + return NULL;
> +
> + INIT_LIST_HEAD(&sdev->xs_dev_list);
> + INIT_LIST_HEAD(&sdev->xs_holder_list);
> + init_completion(&sdev->xs_holder_comp);
> + return sdev;
> +}
> +
> +static void xrt_subdev_free(struct xrt_subdev *sdev)
> +{
> + vfree(sdev);
> +}
> +
> +int xrt_subdev_root_request(struct platform_device *self, u32 cmd, void *arg)
> +{
> + struct device *dev = DEV(self);
> + struct xrt_subdev_platdata *pdata = DEV_PDATA(self);
> +
> + return (*pdata->xsp_root_cb)(dev->parent, pdata->xsp_root_cb_arg, cmd, arg);

xrt_subdev_create does not check if pcb is valid. is a null is passed in, it will crash.

there should at least be a warn or -INVALID returned there

> +}
> +
> +/*
> + * Subdev common sysfs nodes.
> + */
> +static ssize_t holders_show(struct device *dev, struct device_attribute *attr, char *buf)
> +{
> + ssize_t len;
> + struct platform_device *pdev = to_platform_device(dev);
> + struct xrt_root_ioctl_get_holders holders = { pdev, buf, 1024 };
is 1024 a guess ?
> +
> + len = xrt_subdev_root_request(pdev, XRT_ROOT_GET_LEAF_HOLDERS, &holders);
take a closer look at xrt_subdev_get_holders() it stops after it goes past len.
> + if (len >= holders.xpigh_holder_buf_len)
> + return len;
> + buf[len] = '\n';
> + return len + 1;
> +}
> +static DEVICE_ATTR_RO(holders);
> +
> +static struct attribute *xrt_subdev_attrs[] = {
> + &dev_attr_holders.attr,
> + NULL,
> +};
> +
> +static ssize_t metadata_output(struct file *filp, struct kobject *kobj,
> + struct bin_attribute *attr, char *buf, loff_t off, size_t count)
> +{
> + struct device *dev = kobj_to_dev(kobj);
> + struct platform_device *pdev = to_platform_device(dev);
> + struct xrt_subdev_platdata *pdata = DEV_PDATA(pdev);
> + unsigned char *blob;
> + unsigned long size;
> + ssize_t ret = 0;
> +
> + blob = pdata->xsp_dtb;
> + size = xrt_md_size(dev, blob);
> + if (size == XRT_MD_INVALID_LENGTH) {
> + ret = -EINVAL;
> + goto failed;
> + }
> +
> + if (off >= size)
> + goto failed;
silently failed because ret = 0 ?
> +
> + if (off + count > size)
> + count = size - off;
truncating is ok ?
> + memcpy(buf, blob + off, count);
> +
> + ret = count;
> +failed:
> + return ret;
> +}
> +
> +static struct bin_attribute meta_data_attr = {
> + .attr = {
> + .name = "metadata",
> + .mode = 0400
> + },
> + .read = metadata_output,
> + .size = 0
> +};
> +
> +static struct bin_attribute *xrt_subdev_bin_attrs[] = {
> + &meta_data_attr,

is giving the average user access to the meta data a good idea ?

this seems like a developer only need.

> + NULL,
> +};
> +
> +static const struct attribute_group xrt_subdev_attrgroup = {
> + .attrs = xrt_subdev_attrs,
> + .bin_attrs = xrt_subdev_bin_attrs,
> +};
> +
> +/*
> + * Given the device metadata, parse it to get IO ranges and construct
> + * resource array.
> + */
> +static int
> +xrt_subdev_getres(struct device *parent, enum xrt_subdev_id id,
> + char *dtb, struct resource **res, int *res_num)
> +{
> + struct xrt_subdev_platdata *pdata;
> + struct resource *pci_res = NULL;
> + const u64 *bar_range;
> + const u32 *bar_idx;
> + char *ep_name = NULL, *regmap = NULL;
> + uint bar;
> + int count1 = 0, count2 = 0, ret;
> +
> + if (!dtb)
> + return -EINVAL;
> +
> + pdata = DEV_PDATA(to_platform_device(parent));
> +
> + /* go through metadata and count endpoints in it */
> + for (xrt_md_get_next_endpoint(parent, dtb, NULL, NULL, &ep_name, &regmap); ep_name;

Ugly.

Can you preprocess the dtb into a list of end points ?

> + xrt_md_get_next_endpoint(parent, dtb, ep_name, regmap, &ep_name, &regmap)) {
> + ret = xrt_md_get_prop(parent, dtb, ep_name, regmap,
> + XRT_MD_PROP_IO_OFFSET, (const void **)&bar_range, NULL);
> + if (!ret)
> + count1++;
> + }
> + if (!count1)
> + return 0;
> +
> + /* allocate resource array for all endpoints been found in metadata */
> + *res = vzalloc(sizeof(**res) * count1);
> +
> + /* go through all endpoints again and get IO range for each endpoint */
> + for (xrt_md_get_next_endpoint(parent, dtb, NULL, NULL, &ep_name, &regmap); ep_name;
> + xrt_md_get_next_endpoint(parent, dtb, ep_name, regmap, &ep_name, &regmap)) {
> + ret = xrt_md_get_prop(parent, dtb, ep_name, regmap,
> + XRT_MD_PROP_IO_OFFSET, (const void **)&bar_range, NULL);
> + if (ret)
> + continue;
> + xrt_md_get_prop(parent, dtb, ep_name, regmap,
> + XRT_MD_PROP_BAR_IDX, (const void **)&bar_idx, NULL);

bar can fail, but bar idx can not.

should add an assert here

> + bar = bar_idx ? be32_to_cpu(*bar_idx) : 0;
> + xleaf_get_barres(to_platform_device(parent), &pci_res, bar);
> + (*res)[count2].start = pci_res->start +
> + be64_to_cpu(bar_range[0]);
> + (*res)[count2].end = pci_res->start +
> + be64_to_cpu(bar_range[0]) +
> + be64_to_cpu(bar_range[1]) - 1;
> + (*res)[count2].flags = IORESOURCE_MEM;
any irqs need handling?
> + /* check if there is conflicted resource */
> + ret = request_resource(pci_res, *res + count2);
> + if (ret) {
> + dev_err(parent, "Conflict resource %pR\n", *res + count2);
> + vfree(*res);
> + *res_num = 0;
> + *res = NULL;
> + return ret;
> + }
> + release_resource(*res + count2);
> +
> + (*res)[count2].parent = pci_res;
> +
> + xrt_md_find_endpoint(parent, pdata->xsp_dtb, ep_name,
> + regmap, &(*res)[count2].name);
> +
> + count2++;
> + }
> +
> + WARN_ON(count1 != count2);
> + *res_num = count2;
> +
> + return 0;
> +}
> +
> +static inline enum xrt_subdev_file_mode
> +xleaf_devnode_mode(struct xrt_subdev_drvdata *drvdata)
> +{
> + return drvdata->xsd_file_ops.xsf_mode;
> +}
> +
> +static bool xrt_subdev_cdev_auto_creation(struct platform_device *pdev)
> +{
> + struct xrt_subdev_drvdata *drvdata = DEV_DRVDATA(pdev);
> +
> + if (!drvdata)
> + return false;
> +
> + return xleaf_devnode_enabled(drvdata) &&
> + (xleaf_devnode_mode(drvdata) == XRT_SUBDEV_FILE_DEFAULT ||
> + (xleaf_devnode_mode(drvdata) == XRT_SUBDEV_FILE_MULTI_INST));
This is complicated to check, split into checking the call and then checking its side effects.
> +}
> +
> +static struct xrt_subdev *
> +xrt_subdev_create(struct device *parent, enum xrt_subdev_id id,
> + xrt_subdev_root_cb_t pcb, void *pcb_arg, char *dtb)
> +{
> + struct xrt_subdev *sdev = NULL;
> + struct platform_device *pdev = NULL;
> + struct xrt_subdev_platdata *pdata = NULL;
> + unsigned long dtb_len = 0;
> + size_t pdata_sz;
> + int inst = PLATFORM_DEVID_NONE;
> + struct resource *res = NULL;
> + int res_num = 0;
> +
> + sdev = xrt_subdev_alloc();
> + if (!sdev) {
> + dev_err(parent, "failed to alloc subdev for ID %d", id);
> + goto fail;
> + }
> + sdev->xs_id = id;
> +
> + if (dtb) {
> + xrt_md_pack(parent, dtb);
> + dtb_len = xrt_md_size(parent, dtb);
> + if (dtb_len == XRT_MD_INVALID_LENGTH) {
> + dev_err(parent, "invalid metadata len %ld", dtb_len);
> + goto fail;
> + }
> + }
> + pdata_sz = sizeof(struct xrt_subdev_platdata) + dtb_len - 1;

-1 ?

if dtb_len == 0, pdata_sz be too small.

> +
> + /* Prepare platform data passed to subdev. */
> + pdata = vzalloc(pdata_sz);
> + if (!pdata)
> + goto fail;
> +
> + pdata->xsp_root_cb = pcb;
> + pdata->xsp_root_cb_arg = pcb_arg;
> + memcpy(pdata->xsp_dtb, dtb, dtb_len);
> + if (id == XRT_SUBDEV_GRP) {
> + /* Group can only be created by root driver. */
> + pdata->xsp_root_name = dev_name(parent);
> + } else {
> + struct platform_device *grp = to_platform_device(parent);
> + /* Leaf can only be created by group driver. */
> + WARN_ON(strcmp(xrt_drv_name(XRT_SUBDEV_GRP), platform_get_device_id(grp)->name));
> + pdata->xsp_root_name = DEV_PDATA(grp)->xsp_root_name;
> + }
> +
> + /* Obtain dev instance number. */
> + inst = xrt_drv_get_instance(id);
> + if (inst < 0) {
> + dev_err(parent, "failed to obtain instance: %d", inst);
> + goto fail;
> + }
> +
> + /* Create subdev. */
> + if (id == XRT_SUBDEV_GRP) {
> + pdev = platform_device_register_data(parent, xrt_drv_name(XRT_SUBDEV_GRP),
> + inst, pdata, pdata_sz);
> + } else {
> + int rc = xrt_subdev_getres(parent, id, dtb, &res, &res_num);
> +
> + if (rc) {
> + dev_err(parent, "failed to get resource for %s.%d: %d",
> + xrt_drv_name(id), inst, rc);
> + goto fail;
> + }
> + pdev = platform_device_register_resndata(parent, xrt_drv_name(id),
> + inst, res, res_num, pdata, pdata_sz);
> + vfree(res);
> + }

a small optimization

platform_device_register_data is a wrapper to platform_device_register_resndata.

with initial values for res, res_num, just one call need to be made.

> + if (IS_ERR(pdev)) {
> + dev_err(parent, "failed to create subdev for %s inst %d: %ld",
> + xrt_drv_name(id), inst, PTR_ERR(pdev));
> + goto fail;
> + }
> + sdev->xs_pdev = pdev;
> +
> + if (device_attach(DEV(pdev)) != 1) {
> + xrt_err(pdev, "failed to attach");
> + goto fail;
> + }
> +
> + if (sysfs_create_group(&DEV(pdev)->kobj, &xrt_subdev_attrgroup))
> + xrt_err(pdev, "failed to create sysfs group");
no failure ?
> +
> + /*
> + * Create sysfs sym link under root for leaves
> + * under random groups for easy access to them.
> + */
> + if (id != XRT_SUBDEV_GRP) {
> + if (sysfs_create_link(&find_root(pdev)->kobj,
> + &DEV(pdev)->kobj, dev_name(DEV(pdev)))) {
> + xrt_err(pdev, "failed to create sysfs link");
> + }
> + }
> +
> + /* All done, ready to handle req thru cdev. */
> + if (xrt_subdev_cdev_auto_creation(pdev))
> + xleaf_devnode_create(pdev, DEV_DRVDATA(pdev)->xsd_file_ops.xsf_dev_name, NULL);
> +
> + vfree(pdata);
> + return sdev;
> +
> +fail:

Instead of adding checks in the error handling block, add more specific labels and gotos.

I think i have noticed this before, so apply this advice generally.

> + vfree(pdata);
> + if (sdev && !IS_ERR_OR_NULL(sdev->xs_pdev))
> + platform_device_unregister(sdev->xs_pdev);
> + if (inst >= 0)
> + xrt_drv_put_instance(id, inst);
> + xrt_subdev_free(sdev);
> + return NULL;
> +}
> +
> +static void xrt_subdev_destroy(struct xrt_subdev *sdev)
> +{
> + struct platform_device *pdev = sdev->xs_pdev;
> + int inst = pdev->id;
> + struct device *dev = DEV(pdev);
> +
> + /* Take down the device node */
> + if (xrt_subdev_cdev_auto_creation(pdev))
> + xleaf_devnode_destroy(pdev);
> + if (sdev->xs_id != XRT_SUBDEV_GRP)
> + sysfs_remove_link(&find_root(pdev)->kobj, dev_name(dev));
> + sysfs_remove_group(&dev->kobj, &xrt_subdev_attrgroup);
> + platform_device_unregister(pdev);
> + xrt_drv_put_instance(sdev->xs_id, inst);
> + xrt_subdev_free(sdev);
> +}
> +
> +struct platform_device *
> +xleaf_get_leaf(struct platform_device *pdev, xrt_subdev_match_t match_cb, void *match_arg)
> +{
> + int rc;
> + struct xrt_root_ioctl_get_leaf get_leaf = {
> + pdev, match_cb, match_arg, };
> +
> + rc = xrt_subdev_root_request(pdev, XRT_ROOT_GET_LEAF, &get_leaf);
> + if (rc)
> + return NULL;
> + return get_leaf.xpigl_leaf;
> +}
> +EXPORT_SYMBOL_GPL(xleaf_get_leaf);
> +
> +bool xleaf_has_endpoint(struct platform_device *pdev, const char *endpoint_name)
> +{
> + struct resource *res;
> + int i;
whitespace
> +
> + for (i = 0, res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> + res;
> + res = platform_get_resource(pdev, IORESOURCE_MEM, ++i)) {
Do not inc i inside the call, do it at the bottom of the loop
> + if (!strncmp(res->name, endpoint_name, strlen(res->name) + 1))
shouldn't you also check the strlen matches ?
> + return true;
> + }
> +
> + return false;
> +}
> +EXPORT_SYMBOL_GPL(xleaf_has_endpoint);
> +
> +int xleaf_put_leaf(struct platform_device *pdev, struct platform_device *leaf)
> +{
> + struct xrt_root_ioctl_put_leaf put_leaf = { pdev, leaf };
> +
> + return xrt_subdev_root_request(pdev, XRT_ROOT_PUT_LEAF, &put_leaf);
> +}
> +EXPORT_SYMBOL_GPL(xleaf_put_leaf);
> +
> +int xleaf_create_group(struct platform_device *pdev, char *dtb)
> +{
> + return xrt_subdev_root_request(pdev, XRT_ROOT_CREATE_GROUP, dtb);
> +}
> +EXPORT_SYMBOL_GPL(xleaf_create_group);
> +
> +int xleaf_destroy_group(struct platform_device *pdev, int instance)
> +{
> + return xrt_subdev_root_request(pdev, XRT_ROOT_REMOVE_GROUP, (void *)(uintptr_t)instance);

Instead of these clunky casts, why not make the type of the args void *

and leave it to the handler to cast.

this would unify the signature of these functions somewhat.

> +}
> +EXPORT_SYMBOL_GPL(xleaf_destroy_group);
> +
> +int xleaf_wait_for_group_bringup(struct platform_device *pdev)
> +{
> + return xrt_subdev_root_request(pdev, XRT_ROOT_WAIT_GROUP_BRINGUP, NULL);
> +}
> +EXPORT_SYMBOL_GPL(xleaf_wait_for_group_bringup);
> +
> +static ssize_t
> +xrt_subdev_get_holders(struct xrt_subdev *sdev, char *buf, size_t len)
> +{
> + const struct list_head *ptr;
> + struct xrt_subdev_holder *h;
> + ssize_t n = 0;
> +
> + list_for_each(ptr, &sdev->xs_holder_list) {
> + h = list_entry(ptr, struct xrt_subdev_holder, xsh_holder_list);
> + n += snprintf(buf + n, len - n, "%s:%d ",
> + dev_name(h->xsh_holder), kref_read(&h->xsh_kref));
> + if (n >= (len - 1))
This is the overrun i mentioned above.
> + break;
> + }
> + return n;
> +}
> +
> +void xrt_subdev_pool_init(struct device *dev, struct xrt_subdev_pool *spool)
> +{
> + INIT_LIST_HEAD(&spool->xsp_dev_list);
> + spool->xsp_owner = dev;
> + mutex_init(&spool->xsp_lock);
> + spool->xsp_closing = false;
> +}
> +
> +static void xrt_subdev_free_holder(struct xrt_subdev_holder *holder)
> +{
> + list_del(&holder->xsh_holder_list);
> + vfree(holder);
> +}
> +
> +static void xrt_subdev_pool_wait_for_holders(struct xrt_subdev_pool *spool, struct xrt_subdev *sdev)
> +{
> + const struct list_head *ptr, *next;
> + char holders[128];
> + struct xrt_subdev_holder *holder;
> + struct mutex *lk = &spool->xsp_lock;
> +
> + WARN_ON(!mutex_is_locked(lk));
> +
> + while (!list_empty(&sdev->xs_holder_list)) {
> + int rc;
> +
> + /* It's most likely a bug if we ever enters this loop. */
> + xrt_subdev_get_holders(sdev, holders, sizeof(holders));
will overrun, error not reported.
> + xrt_err(sdev->xs_pdev, "awaits holders: %s", holders);
> + mutex_unlock(lk);
> + rc = wait_for_completion_killable(&sdev->xs_holder_comp);
> + mutex_lock(lk);
> + if (rc == -ERESTARTSYS) {
> + xrt_err(sdev->xs_pdev, "give up on waiting for holders, clean up now");
> + list_for_each_safe(ptr, next, &sdev->xs_holder_list) {
> + holder = list_entry(ptr, struct xrt_subdev_holder, xsh_holder_list);
> + xrt_subdev_free_holder(holder);
> + }
> + }
> + }
> +}
> +
> +void xrt_subdev_pool_fini(struct xrt_subdev_pool *spool)
> +{
> + struct list_head *dl = &spool->xsp_dev_list;
> + struct mutex *lk = &spool->xsp_lock;
> +
> + mutex_lock(lk);
> +

i am wondering about the locking here.

xsp_closing is only set to true in this function.

the unlocking then relocking in the loop is strange, why do you need to do this ?

> + if (spool->xsp_closing) {
> + mutex_unlock(lk);
> + return;
> + }
> +
> + spool->xsp_closing = true;
> + /* Remove subdev in the reverse order of added. */
> + while (!list_empty(dl)) {
> + struct xrt_subdev *sdev = list_first_entry(dl, struct xrt_subdev, xs_dev_list);
> +
> + xrt_subdev_pool_wait_for_holders(spool, sdev);
> + list_del(&sdev->xs_dev_list);
> + mutex_unlock(lk);
> + xrt_subdev_destroy(sdev);
> + mutex_lock(lk);
> + }
> +
> + mutex_unlock(lk);
> +}
> +
> +static struct xrt_subdev_holder *xrt_subdev_find_holder(struct xrt_subdev *sdev,
> + struct device *holder_dev)
> +{
> + struct list_head *hl = &sdev->xs_holder_list;
> + struct xrt_subdev_holder *holder;
> + const struct list_head *ptr;
> +
> + list_for_each(ptr, hl) {
> + holder = list_entry(ptr, struct xrt_subdev_holder, xsh_holder_list);
> + if (holder->xsh_holder == holder_dev)
> + return holder;
> + }
> + return NULL;
> +}
> +
> +static int xrt_subdev_hold(struct xrt_subdev *sdev, struct device *holder_dev)
> +{
> + struct xrt_subdev_holder *holder = xrt_subdev_find_holder(sdev, holder_dev);
> + struct list_head *hl = &sdev->xs_holder_list;
> +
> + if (!holder) {
> + holder = vzalloc(sizeof(*holder));
> + if (!holder)
> + return -ENOMEM;
> + holder->xsh_holder = holder_dev;
> + kref_init(&holder->xsh_kref);
> + list_add_tail(&holder->xsh_holder_list, hl);
> + } else {
> + kref_get(&holder->xsh_kref);
> + }
> +
> + return 0;
> +}
> +
> +static void xrt_subdev_free_holder_kref(struct kref *kref)
> +{
> + struct xrt_subdev_holder *holder = container_of(kref, struct xrt_subdev_holder, xsh_kref);
> +
> + xrt_subdev_free_holder(holder);
> +}
> +
> +static int
> +xrt_subdev_release(struct xrt_subdev *sdev, struct device *holder_dev)
> +{
> + struct xrt_subdev_holder *holder = xrt_subdev_find_holder(sdev, holder_dev);
> + struct list_head *hl = &sdev->xs_holder_list;
> +
> + if (!holder) {
> + dev_err(holder_dev, "can't release, %s did not hold %s",
> + dev_name(holder_dev), dev_name(DEV(sdev->xs_pdev)));
> + return -EINVAL;
> + }
> + kref_put(&holder->xsh_kref, xrt_subdev_free_holder_kref);
> +
> + /* kref_put above may remove holder from list. */
> + if (list_empty(hl))
> + complete(&sdev->xs_holder_comp);
> + return 0;
> +}
> +
> +int xrt_subdev_pool_add(struct xrt_subdev_pool *spool, enum xrt_subdev_id id,
> + xrt_subdev_root_cb_t pcb, void *pcb_arg, char *dtb)
> +{
> + struct mutex *lk = &spool->xsp_lock;
> + struct list_head *dl = &spool->xsp_dev_list;
> + struct xrt_subdev *sdev;
> + int ret = 0;
> +
> + sdev = xrt_subdev_create(spool->xsp_owner, id, pcb, pcb_arg, dtb);
> + if (sdev) {
> + mutex_lock(lk);
> + if (spool->xsp_closing) {
> + /* No new subdev when pool is going away. */
> + xrt_err(sdev->xs_pdev, "pool is closing");
> + ret = -ENODEV;
> + } else {
> + list_add(&sdev->xs_dev_list, dl);
> + }
> + mutex_unlock(lk);
> + if (ret)
> + xrt_subdev_destroy(sdev);
> + } else {
> + ret = -EINVAL;
> + }
> +
> + ret = ret ? ret : sdev->xs_pdev->id;
> + return ret;
> +}
> +
> +int xrt_subdev_pool_del(struct xrt_subdev_pool *spool, enum xrt_subdev_id id, int instance)
> +{
> + const struct list_head *ptr;
> + struct mutex *lk = &spool->xsp_lock;
> + struct list_head *dl = &spool->xsp_dev_list;
> + struct xrt_subdev *sdev;
> + int ret = -ENOENT;
> +
> + mutex_lock(lk);
> + list_for_each(ptr, dl) {
> + sdev = list_entry(ptr, struct xrt_subdev, xs_dev_list);
> + if (sdev->xs_id != id || sdev->xs_pdev->id != instance)
> + continue;
> + xrt_subdev_pool_wait_for_holders(spool, sdev);
> + list_del(&sdev->xs_dev_list);
> + ret = 0;
> + break;
> + }
> + mutex_unlock(lk);
> + if (ret)
> + return ret;
> +
> + xrt_subdev_destroy(sdev);
> + return 0;
> +}
> +
> +static int xrt_subdev_pool_get_impl(struct xrt_subdev_pool *spool, xrt_subdev_match_t match,
> + void *arg, struct device *holder_dev, struct xrt_subdev **sdevp)
> +{
> + const struct list_head *ptr;
> + struct mutex *lk = &spool->xsp_lock;
> + struct list_head *dl = &spool->xsp_dev_list;
> + struct xrt_subdev *sdev = NULL;
> + int ret = -ENOENT;
> +
> + mutex_lock(lk);
> +
> + if (match == XRT_SUBDEV_MATCH_PREV) {
> + struct platform_device *pdev = (struct platform_device *)arg;
> + struct xrt_subdev *d = NULL;
> +
> + if (!pdev) {
> + sdev = list_empty(dl) ? NULL :
> + list_last_entry(dl, struct xrt_subdev, xs_dev_list);
> + } else {
> + list_for_each(ptr, dl) {
> + d = list_entry(ptr, struct xrt_subdev, xs_dev_list);
> + if (d->xs_pdev != pdev)
> + continue;
> + if (!list_is_first(ptr, dl))
> + sdev = list_prev_entry(d, xs_dev_list);
> + break;
> + }
> + }
> + } else if (match == XRT_SUBDEV_MATCH_NEXT) {
> + struct platform_device *pdev = (struct platform_device *)arg;
> + struct xrt_subdev *d = NULL;
> +
> + if (!pdev) {
> + sdev = list_first_entry_or_null(dl, struct xrt_subdev, xs_dev_list);
> + } else {
> + list_for_each(ptr, dl) {
> + d = list_entry(ptr, struct xrt_subdev, xs_dev_list);
> + if (d->xs_pdev != pdev)
> + continue;
> + if (!list_is_last(ptr, dl))
> + sdev = list_next_entry(d, xs_dev_list);
> + break;
> + }
> + }
> + } else {
> + list_for_each(ptr, dl) {
> + struct xrt_subdev *d = NULL;
> +
> + d = list_entry(ptr, struct xrt_subdev, xs_dev_list);
> + if (d && !match(d->xs_id, d->xs_pdev, arg))
> + continue;
> + sdev = d;
> + break;
> + }
> + }

3 similar blocks of code

This looks like it could be refactored into this else case and minor changes for match_next/prev

> +
> + if (sdev)
> + ret = xrt_subdev_hold(sdev, holder_dev);
> +
> + mutex_unlock(lk);
> +
> + if (!ret)
> + *sdevp = sdev;
> + return ret;
> +}
> +
> +int xrt_subdev_pool_get(struct xrt_subdev_pool *spool, xrt_subdev_match_t match, void *arg,
> + struct device *holder_dev, struct platform_device **pdevp)
> +{
> + int rc;
> + struct xrt_subdev *sdev;
> +
> + rc = xrt_subdev_pool_get_impl(spool, match, arg, holder_dev, &sdev);
> + if (rc) {
> + if (rc != -ENOENT)
> + dev_err(holder_dev, "failed to hold device: %d", rc);
> + return rc;
> + }
> +
> + if (!DEV_IS_PCI(holder_dev)) {
! root_dev()
> + xrt_dbg(to_platform_device(holder_dev), "%s <<==== %s",
> + dev_name(holder_dev), dev_name(DEV(sdev->xs_pdev)));
> + }
> +
> + *pdevp = sdev->xs_pdev;
> + return 0;
> +}
> +
> +static int xrt_subdev_pool_put_impl(struct xrt_subdev_pool *spool, struct platform_device *pdev,
> + struct device *holder_dev)
> +{
> + const struct list_head *ptr;
> + struct mutex *lk = &spool->xsp_lock;
> + struct list_head *dl = &spool->xsp_dev_list;
> + struct xrt_subdev *sdev;
> + int ret = -ENOENT;
> +
> + mutex_lock(lk);
> + list_for_each(ptr, dl) {
> + sdev = list_entry(ptr, struct xrt_subdev, xs_dev_list);
> + if (sdev->xs_pdev != pdev)
> + continue;
Could this and similar looping be avoided by storing sdev in pdev ?
> + ret = xrt_subdev_release(sdev, holder_dev);
> + break;
> + }
> + mutex_unlock(lk);
> +
> + return ret;
> +}
> +
> +int xrt_subdev_pool_put(struct xrt_subdev_pool *spool, struct platform_device *pdev,
> + struct device *holder_dev)
> +{
> + int ret = xrt_subdev_pool_put_impl(spool, pdev, holder_dev);
> +
> + if (ret)
> + return ret;
> +
> + if (!DEV_IS_PCI(holder_dev)) {

! root_dev() or similar.

If you really need to use DEV_IS_PCI, do it only once so when you need to change something you don not have to find all the calls to DEV_IS_PCI.

> + xrt_dbg(to_platform_device(holder_dev), "%s <<==X== %s",
> + dev_name(holder_dev), dev_name(DEV(pdev)));
> + }
> + return 0;
> +}
> +
> +void xrt_subdev_pool_trigger_event(struct xrt_subdev_pool *spool, enum xrt_events e)
> +{
> + struct platform_device *tgt = NULL;
> + struct xrt_subdev *sdev = NULL;
> + struct xrt_event evt;
> +
> + while (!xrt_subdev_pool_get_impl(spool, XRT_SUBDEV_MATCH_NEXT,
> + tgt, spool->xsp_owner, &sdev)) {
> + tgt = sdev->xs_pdev;
> + evt.xe_evt = e;
> + evt.xe_subdev.xevt_subdev_id = sdev->xs_id;
> + evt.xe_subdev.xevt_subdev_instance = tgt->id;
> + xrt_subdev_root_request(tgt, XRT_ROOT_EVENT, &evt);
> + xrt_subdev_pool_put_impl(spool, tgt, spool->xsp_owner);
> + }
> +}
> +
> +void xrt_subdev_pool_handle_event(struct xrt_subdev_pool *spool, struct xrt_event *evt)
> +{
> + struct platform_device *tgt = NULL;
> + struct xrt_subdev *sdev = NULL;
> +
> + while (!xrt_subdev_pool_get_impl(spool, XRT_SUBDEV_MATCH_NEXT,
> + tgt, spool->xsp_owner, &sdev)) {
> + tgt = sdev->xs_pdev;
> + xleaf_ioctl(tgt, XRT_XLEAF_EVENT, evt);
> + xrt_subdev_pool_put_impl(spool, tgt, spool->xsp_owner);
> + }
> +}
> +
> +ssize_t xrt_subdev_pool_get_holders(struct xrt_subdev_pool *spool,
> + struct platform_device *pdev, char *buf, size_t len)
> +{
> + const struct list_head *ptr;
> + struct mutex *lk = &spool->xsp_lock;
> + struct list_head *dl = &spool->xsp_dev_list;
> + struct xrt_subdev *sdev;
> + ssize_t ret = 0;
> +
> + mutex_lock(lk);
> + list_for_each(ptr, dl) {
> + sdev = list_entry(ptr, struct xrt_subdev, xs_dev_list);
> + if (sdev->xs_pdev != pdev)
> + continue;
> + ret = xrt_subdev_get_holders(sdev, buf, len);
> + break;
> + }
> + mutex_unlock(lk);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(xrt_subdev_pool_get_holders);
> +
> +int xleaf_broadcast_event(struct platform_device *pdev, enum xrt_events evt, bool async)
> +{
> + struct xrt_event e = { evt, };
> + u32 cmd = async ? XRT_ROOT_EVENT_ASYNC : XRT_ROOT_EVENT;
> +
> + WARN_ON(evt == XRT_EVENT_POST_CREATION || evt == XRT_EVENT_PRE_REMOVAL);
> + return xrt_subdev_root_request(pdev, cmd, &e);
> +}
> +EXPORT_SYMBOL_GPL(xleaf_broadcast_event);
> +
> +void xleaf_hot_reset(struct platform_device *pdev)
> +{
> + xrt_subdev_root_request(pdev, XRT_ROOT_HOT_RESET, NULL);
> +}
> +EXPORT_SYMBOL_GPL(xleaf_hot_reset);
> +
> +void xleaf_get_barres(struct platform_device *pdev, struct resource **res, uint bar_idx)
> +{
> + struct xrt_root_ioctl_get_res arg = { 0 };
> +
> + if (bar_idx > PCI_STD_RESOURCE_END) {
> + xrt_err(pdev, "Invalid bar idx %d", bar_idx);
> + *res = NULL;
> + return;
> + }
> +
> + xrt_subdev_root_request(pdev, XRT_ROOT_GET_RESOURCE, &arg);
> +
> + *res = &arg.xpigr_res[bar_idx];

is this correct ?

do all res need to be found to return a single one ?

> +}
> +
> +void xleaf_get_root_id(struct platform_device *pdev, unsigned short *vendor, unsigned short *device,
> + unsigned short *subvendor, unsigned short *subdevice)
> +{
> + struct xrt_root_ioctl_get_id id = { 0 };
> +
> + xrt_subdev_root_request(pdev, XRT_ROOT_GET_ID, (void *)&id);
> + if (vendor)
> + *vendor = id.xpigi_vendor_id;
> + if (device)
> + *device = id.xpigi_device_id;
> + if (subvendor)
> + *subvendor = id.xpigi_sub_vendor_id;
> + if (subdevice)
> + *subdevice = id.xpigi_sub_device_id;
not setting anything because user passed in all nulls would make this function a noop.
> +}
> +
> +struct device *xleaf_register_hwmon(struct platform_device *pdev, const char *name, void *drvdata,
> + const struct attribute_group **grps)
> +{
> + struct xrt_root_ioctl_hwmon hm = { true, name, drvdata, grps, };
> +
> + xrt_subdev_root_request(pdev, XRT_ROOT_HWMON, (void *)&hm);
> + return hm.xpih_hwmon_dev;
> +}
> +
> +void xleaf_unregister_hwmon(struct platform_device *pdev, struct device *hwmon)
> +{
> + struct xrt_root_ioctl_hwmon hm = { false, };
> +
> + hm.xpih_hwmon_dev = hwmon;
> + xrt_subdev_root_request(pdev, XRT_ROOT_HWMON, (void *)&hm);
> +}
> diff --git a/drivers/fpga/xrt/lib/subdev_pool.h b/drivers/fpga/xrt/lib/subdev_pool.h
> new file mode 100644
> index 000000000000..50a8490e0e41

apologies for delay, was busy.

If it seems like i forgot a train of thought, i did.

> --- /dev/null
> +++ b/drivers/fpga/xrt/lib/subdev_pool.h
> @@ -0,0 +1,53 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Header file for Xilinx Runtime (XRT) driver
> + *
> + * Copyright (C) 2020-2021 Xilinx, Inc.
> + *
> + * Authors:
> + * Cheng Zhen <maxz@xxxxxxxxxx>
> + */
> +
> +#ifndef _XRT_SUBDEV_POOL_H_
> +#define _XRT_SUBDEV_POOL_H_
> +
> +#include "xroot.h"
> +
> +/*
> + * It manages a list of xrt_subdevs for root and group drivers.

'It' does not have a lot of context, better would be

The xrt_subdev_pool struct ..

> + */
> +struct xrt_subdev_pool {
> + struct list_head xsp_dev_list;
> + struct device *xsp_owner;
> + struct mutex xsp_lock; /* pool lock */
Header files should be self contained, a quick look at xroot.h makes me suspicious that device and mutex decls assume the includer has added their headers before this one
> + bool xsp_closing;
If you thing additional state will be needed, you could change this to a bitfield. sizewise with compiler padding i don't think the size would change.
> +};
> +
> +/*
> + * Subdev pool API for root and group drivers only.

'API' makes me think these should go in include/linux/fpga

Do/will these functions get called outside of the drivers/fpga ?

> + */
> +void xrt_subdev_pool_init(struct device *dev,
> + struct xrt_subdev_pool *spool);
> +void xrt_subdev_pool_fini(struct xrt_subdev_pool *spool);
> +int xrt_subdev_pool_get(struct xrt_subdev_pool *spool,
> + xrt_subdev_match_t match,
> + void *arg, struct device *holder_dev,
> + struct platform_device **pdevp);
> +int xrt_subdev_pool_put(struct xrt_subdev_pool *spool,
> + struct platform_device *pdev,
> + struct device *holder_dev);
> +int xrt_subdev_pool_add(struct xrt_subdev_pool *spool,
> + enum xrt_subdev_id id, xrt_subdev_root_cb_t pcb,
> + void *pcb_arg, char *dtb);
> +int xrt_subdev_pool_del(struct xrt_subdev_pool *spool,
> + enum xrt_subdev_id id, int instance);
> +ssize_t xrt_subdev_pool_get_holders(struct xrt_subdev_pool *spool,
> + struct platform_device *pdev,
> + char *buf, size_t len);
> +
> +void xrt_subdev_pool_trigger_event(struct xrt_subdev_pool *spool,
> + enum xrt_events evt);
> +void xrt_subdev_pool_handle_event(struct xrt_subdev_pool *spool,
> + struct xrt_event *evt);
> +
> +#endif /* _XRT_SUBDEV_POOL_H_ */
> diff --git a/drivers/fpga/xrt/lib/xroot.c b/drivers/fpga/xrt/lib/xroot.c
> new file mode 100644
> index 000000000000..3dc7b0243277
> --- /dev/null
> +++ b/drivers/fpga/xrt/lib/xroot.c
> @@ -0,0 +1,598 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Xilinx Alveo FPGA Root Functions
> + *
> + * Copyright (C) 2020-2021 Xilinx, Inc.
> + *
> + * Authors:
> + * Cheng Zhen <maxz@xxxxxxxxxx>
> + */
> +
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/hwmon.h>
> +#include "xroot.h"
> +#include "subdev_pool.h"
> +#include "group.h"
> +#include "metadata.h"
> +
> +#define XROOT_PDEV(xr) ((xr)->pdev)
> +#define XROOT_DEV(xr) (&(XROOT_PDEV(xr)->dev))
> +#define xroot_err(xr, fmt, args...) \
> + dev_err(XROOT_DEV(xr), "%s: " fmt, __func__, ##args)
> +#define xroot_warn(xr, fmt, args...) \
> + dev_warn(XROOT_DEV(xr), "%s: " fmt, __func__, ##args)
> +#define xroot_info(xr, fmt, args...) \
> + dev_info(XROOT_DEV(xr), "%s: " fmt, __func__, ##args)
> +#define xroot_dbg(xr, fmt, args...) \
> + dev_dbg(XROOT_DEV(xr), "%s: " fmt, __func__, ##args)
> +
> +#define XRT_VSEC_ID 0x20

Is this the best place to define some pci magic ?

It looks like the xroot is combination of the root of the device tree and the pci setup for the board.

Can the pci-ness be split and the root mostly handling how the subtrees are organized ?

> +
> +#define XROOT_GRP_FIRST (-1)
> +#define XROOT_GRP_LAST (-2)
> +
> +static int xroot_root_cb(struct device *, void *, u32, void *);
> +
> +struct xroot_evt {
> + struct list_head list;
> + struct xrt_event evt;
> + struct completion comp;
> + bool async;
> +};
> +
> +struct xroot_events {
> + struct mutex evt_lock; /* event lock */
> + struct list_head evt_list;
> + struct work_struct evt_work;
> +};
> +
> +struct xroot_grps {
> + struct xrt_subdev_pool pool;
> + struct work_struct bringup_work;
> + atomic_t bringup_pending;
> + atomic_t bringup_failed;
combine with bitfield
> + struct completion bringup_comp;
> +};
> +
> +struct xroot {
> + struct pci_dev *pdev;
> + struct xroot_events events;
> + struct xroot_grps grps;
> + struct xroot_pf_cb pf_cb;
expand pf_cb, maybe 'physical_function_callback' ?
> +};
> +
> +struct xroot_grp_match_arg {
> + enum xrt_subdev_id id;
> + int instance;
> +};
> +
> +static bool xroot_grp_match(enum xrt_subdev_id id,
> + struct platform_device *pdev, void *arg)
> +{
> + struct xroot_grp_match_arg *a = (struct xroot_grp_match_arg *)arg;
> + return id == a->id && pdev->id == a->instance;

scanning the code i expected to see ... && pdev->instance == a->instance

pdev->id == a->instance looks like a bug, a change to pdev->id element name to pdev->instance or in needed of a comment.

> +}
> +
> +static int xroot_get_group(struct xroot *xr, int instance,
> + struct platform_device **grpp)
> +{
> + int rc = 0;
> + struct xrt_subdev_pool *grps = &xr->grps.pool;
> + struct device *dev = DEV(xr->pdev);
> + struct xroot_grp_match_arg arg = { XRT_SUBDEV_GRP, instance };
> +
> + if (instance == XROOT_GRP_LAST) {
> + rc = xrt_subdev_pool_get(grps, XRT_SUBDEV_MATCH_NEXT,
> + *grpp, dev, grpp);
> + } else if (instance == XROOT_GRP_FIRST) {
> + rc = xrt_subdev_pool_get(grps, XRT_SUBDEV_MATCH_PREV,
> + *grpp, dev, grpp);
For consistency, maybe the suffix of ...MATCH_NEXT/PREV should be changed to LAST/FIRST
> + } else {
> + rc = xrt_subdev_pool_get(grps, xroot_grp_match,
> + &arg, dev, grpp);
> + }
> +
> + if (rc && rc != -ENOENT)
> + xroot_err(xr, "failed to hold group %d: %d", instance, rc);
> + return rc;
> +}
> +
> +static void xroot_put_group(struct xroot *xr, struct platform_device *grp)
> +{
> + int inst = grp->id;
> + int rc = xrt_subdev_pool_put(&xr->grps.pool, grp, DEV(xr->pdev));
> +
> + if (rc)
> + xroot_err(xr, "failed to release group %d: %d", inst, rc);
> +}
> +
> +static int xroot_trigger_event(struct xroot *xr,
> + struct xrt_event *e, bool async)
> +{
> + struct xroot_evt *enew = vzalloc(sizeof(*enew));
> +
> + if (!enew)
> + return -ENOMEM;
> +
> + enew->evt = *e;
> + enew->async = async;
> + init_completion(&enew->comp);
> +
> + mutex_lock(&xr->events.evt_lock);
> + list_add(&enew->list, &xr->events.evt_list);
> + mutex_unlock(&xr->events.evt_lock);
> +
> + schedule_work(&xr->events.evt_work);
> +
> + if (async)
> + return 0;
> +
> + wait_for_completion(&enew->comp);
> + vfree(enew);
> + return 0;
> +}
> +
> +static void
> +xroot_group_trigger_event(struct xroot *xr, int inst, enum xrt_events e)
> +{
> + int ret;
> + struct platform_device *pdev = NULL;
> + struct xrt_event evt = { 0 };
> +
> + WARN_ON(inst < 0);
> + /* Only triggers subdev specific events. */
> + if (e != XRT_EVENT_POST_CREATION && e != XRT_EVENT_PRE_REMOVAL) {
> + xroot_err(xr, "invalid event %d", e);
> + return;
> + }
> +
> + ret = xroot_get_group(xr, inst, &pdev);
> + if (ret)
> + return;
> +
> + /* Triggers event for children, first. */
> + (void)xleaf_ioctl(pdev, XRT_GROUP_TRIGGER_EVENT, (void *)(uintptr_t)e);
These voids are not needed, but maybe error checking is.
> +
> + /* Triggers event for itself. */
> + evt.xe_evt = e;
> + evt.xe_subdev.xevt_subdev_id = XRT_SUBDEV_GRP;
> + evt.xe_subdev.xevt_subdev_instance = inst;
> + (void)xroot_trigger_event(xr, &evt, false);
> +
> + (void)xroot_put_group(xr, pdev);
> +}
> +
> +int xroot_create_group(void *root, char *dtb)
> +{
> + struct xroot *xr = (struct xroot *)root;
> + int ret;
> +
> + atomic_inc(&xr->grps.bringup_pending);
could this state and the error be moved to xrt_sbudev_pool_add where locking happens so atomics are not needed ?
> + ret = xrt_subdev_pool_add(&xr->grps.pool, XRT_SUBDEV_GRP,
> + xroot_root_cb, xr, dtb);
> + if (ret >= 0) {
> + schedule_work(&xr->grps.bringup_work);
> + } else {
> + atomic_dec(&xr->grps.bringup_pending);
> + atomic_inc(&xr->grps.bringup_failed);
> + xroot_err(xr, "failed to create group: %d", ret);
> + }
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(xroot_create_group);
> +
> +static int xroot_destroy_single_group(struct xroot *xr, int instance)
> +{
A better name would be 'xroot_destroy_group'
> + struct platform_device *pdev = NULL;
> + int ret;
> +
> + WARN_ON(instance < 0);
> + ret = xroot_get_group(xr, instance, &pdev);
> + if (ret)
> + return ret;
> +
> + xroot_group_trigger_event(xr, instance, XRT_EVENT_PRE_REMOVAL);
> +
> + /* Now tear down all children in this group. */
> + ret = xleaf_ioctl(pdev, XRT_GROUP_FINI_CHILDREN, NULL);
> + (void)xroot_put_group(xr, pdev);
> + if (!ret) {
> + ret = xrt_subdev_pool_del(&xr->grps.pool, XRT_SUBDEV_GRP,
> + instance);
> + }
> +
> + return ret;
> +}
> +
> +static int xroot_destroy_group(struct xroot *xr, int instance)
A better name would be 'xroot_destroy_groups'
> +{
> + struct platform_device *target = NULL;
> + struct platform_device *deps = NULL;
> + int ret;
> +
> + WARN_ON(instance < 0);
> + /*
> + * Make sure target group exists and can't go away before
> + * we remove it's dependents
> + */
> + ret = xroot_get_group(xr, instance, &target);
> + if (ret)
> + return ret;
> +
> + /*
> + * Remove all groups depend on target one.
> + * Assuming subdevs in higher group ID can depend on ones in
> + * lower ID groups, we remove them in the reservse order.
> + */
> + while (xroot_get_group(xr, XROOT_GRP_LAST, &deps) != -ENOENT) {
> + int inst = deps->id;
> +
> + xroot_put_group(xr, deps);
> + if (instance == inst)
> + break;

breaking in the middle does not seem correct.

please add a comment

> + (void)xroot_destroy_single_group(xr, inst);
> + deps = NULL;
> + }
> +
> + /* Now we can remove the target group. */
> + xroot_put_group(xr, target);
> + return xroot_destroy_single_group(xr, instance);
> +}
> +
> +static int xroot_lookup_group(struct xroot *xr,
> + struct xrt_root_ioctl_lookup_group *arg)
> +{
> + int rc = -ENOENT;
> + struct platform_device *grp = NULL;
> +
> + while (rc < 0 && xroot_get_group(xr, XROOT_GRP_LAST, &grp) != -ENOENT) {
> + if (arg->xpilp_match_cb(XRT_SUBDEV_GRP, grp,
> + arg->xpilp_match_arg)) {
> + rc = grp->id;
> + }
> + xroot_put_group(xr, grp);
> + }
> + return rc;
> +}
> +
> +static void xroot_event_work(struct work_struct *work)
> +{
> + struct xroot_evt *tmp;
> + struct xroot *xr = container_of(work, struct xroot, events.evt_work);
> +
> + mutex_lock(&xr->events.evt_lock);
> + while (!list_empty(&xr->events.evt_list)) {
> + tmp = list_first_entry(&xr->events.evt_list,
> + struct xroot_evt, list);
> + list_del(&tmp->list);
> + mutex_unlock(&xr->events.evt_lock);
why is unlocking necessary ?
> +
> + (void)xrt_subdev_pool_handle_event(&xr->grps.pool, &tmp->evt);
> +
> + if (tmp->async)
> + vfree(tmp);
> + else
> + complete(&tmp->comp);
> +
> + mutex_lock(&xr->events.evt_lock);
> + }
> + mutex_unlock(&xr->events.evt_lock);
> +}
> +
> +static void xroot_event_init(struct xroot *xr)
> +{
> + INIT_LIST_HEAD(&xr->events.evt_list);
> + mutex_init(&xr->events.evt_lock);
> + INIT_WORK(&xr->events.evt_work, xroot_event_work);
> +}
> +
> +static void xroot_event_fini(struct xroot *xr)
> +{
> + flush_scheduled_work();
> + WARN_ON(!list_empty(&xr->events.evt_list));
> +}
> +
> +static int xroot_get_leaf(struct xroot *xr, struct xrt_root_ioctl_get_leaf *arg)
> +{
> + int rc = -ENOENT;
> + struct platform_device *grp = NULL;
> +
> + while (rc && xroot_get_group(xr, XROOT_GRP_LAST, &grp) != -ENOENT) {

while (rc) ?

while we see an error on xleaf_ioctl, keep going ?

Seems like would rather have !rc

similar below in put_leaf

> + rc = xleaf_ioctl(grp, XRT_GROUP_GET_LEAF, arg);
> + xroot_put_group(xr, grp);
> + }
> + return rc;
> +}
> +
> +static int xroot_put_leaf(struct xroot *xr, struct xrt_root_ioctl_put_leaf *arg)
> +{
> + int rc = -ENOENT;
> + struct platform_device *grp = NULL;
> +
> + while (rc && xroot_get_group(xr, XROOT_GRP_LAST, &grp) != -ENOENT) {
> + rc = xleaf_ioctl(grp, XRT_GROUP_PUT_LEAF, arg);
> + xroot_put_group(xr, grp);
> + }
> + return rc;
> +}
> +
> +static int xroot_root_cb(struct device *dev, void *parg, u32 cmd, void *arg)
> +{
> + struct xroot *xr = (struct xroot *)parg;
> + int rc = 0;
> +
> + switch (cmd) {
> + /* Leaf actions. */
> + case XRT_ROOT_GET_LEAF: {
> + struct xrt_root_ioctl_get_leaf *getleaf =
> + (struct xrt_root_ioctl_get_leaf *)arg;
> + rc = xroot_get_leaf(xr, getleaf);
> + break;
> + }
> + case XRT_ROOT_PUT_LEAF: {
> + struct xrt_root_ioctl_put_leaf *putleaf =
> + (struct xrt_root_ioctl_put_leaf *)arg;
> + rc = xroot_put_leaf(xr, putleaf);
> + break;
> + }
looking at these two cases without any changes to arg but a cast, i think these and the next pass the void * onto the function and have the function manage the cast.
> + case XRT_ROOT_GET_LEAF_HOLDERS: {
> + struct xrt_root_ioctl_get_holders *holders =
> + (struct xrt_root_ioctl_get_holders *)arg;
> + rc = xrt_subdev_pool_get_holders(&xr->grps.pool,
> + holders->xpigh_pdev,
> + holders->xpigh_holder_buf,
> + holders->xpigh_holder_buf_len);
> + break;
> + }
> +
> + /* Group actions. */
> + case XRT_ROOT_CREATE_GROUP:
> + rc = xroot_create_group(xr, (char *)arg);
> + break;
> + case XRT_ROOT_REMOVE_GROUP:
> + rc = xroot_destroy_group(xr, (int)(uintptr_t)arg);
> + break;
> + case XRT_ROOT_LOOKUP_GROUP: {
> + struct xrt_root_ioctl_lookup_group *getgrp =
> + (struct xrt_root_ioctl_lookup_group *)arg;
> + rc = xroot_lookup_group(xr, getgrp);
> + break;
> + }
> + case XRT_ROOT_WAIT_GROUP_BRINGUP:
> + rc = xroot_wait_for_bringup(xr) ? 0 : -EINVAL;
> + break;
> +
> + /* Event actions. */
> + case XRT_ROOT_EVENT:
> + case XRT_ROOT_EVENT_ASYNC: {
> + bool async = (cmd == XRT_ROOT_EVENT_ASYNC);
> + struct xrt_event *evt = (struct xrt_event *)arg;
> +
> + rc = xroot_trigger_event(xr, evt, async);
> + break;
> + }
> +
> + /* Device info. */
> + case XRT_ROOT_GET_RESOURCE: {
> + struct xrt_root_ioctl_get_res *res =
> + (struct xrt_root_ioctl_get_res *)arg;
> + res->xpigr_res = xr->pdev->resource;
> + break;
> + }
> + case XRT_ROOT_GET_ID: {
> + struct xrt_root_ioctl_get_id *id =
> + (struct xrt_root_ioctl_get_id *)arg;
> +
> + id->xpigi_vendor_id = xr->pdev->vendor;
> + id->xpigi_device_id = xr->pdev->device;
> + id->xpigi_sub_vendor_id = xr->pdev->subsystem_vendor;
> + id->xpigi_sub_device_id = xr->pdev->subsystem_device;
> + break;
> + }
> +
> + /* MISC generic PCIE driver functions. */

misc functions may need a need some place else.

Is there a way to extend the cmd with some additional layer of abstraction ?

> + case XRT_ROOT_HOT_RESET: {
> + xr->pf_cb.xpc_hot_reset(xr->pdev);
> + break;
> + }
> + case XRT_ROOT_HWMON: {
> + struct xrt_root_ioctl_hwmon *hwmon =
> + (struct xrt_root_ioctl_hwmon *)arg;
> +
> + if (hwmon->xpih_register) {
> + hwmon->xpih_hwmon_dev =
> + hwmon_device_register_with_info(DEV(xr->pdev),
> + hwmon->xpih_name,
> + hwmon->xpih_drvdata,
> + NULL,
> + hwmon->xpih_groups);
> + } else {
> + (void)hwmon_device_unregister(hwmon->xpih_hwmon_dev);
> + }
> + break;
> + }
> +
> + default:
> + xroot_err(xr, "unknown IOCTL cmd %d", cmd);
> + rc = -EINVAL;
> + break;
> + }
> +
> + return rc;
> +}
> +
> +static void xroot_bringup_group_work(struct work_struct *work)
> +{
> + struct platform_device *pdev = NULL;
> + struct xroot *xr = container_of(work, struct xroot, grps.bringup_work);
> +
> + while (xroot_get_group(xr, XROOT_GRP_FIRST, &pdev) != -ENOENT) {
> + int r, i;
> +
> + i = pdev->id;
> + r = xleaf_ioctl(pdev, XRT_GROUP_INIT_CHILDREN, NULL);
> + (void)xroot_put_group(xr, pdev);
> + if (r == -EEXIST)
> + continue; /* Already brough up, nothing to do. */
> + if (r)
> + atomic_inc(&xr->grps.bringup_failed);
> +
> + xroot_group_trigger_event(xr, i, XRT_EVENT_POST_CREATION);
> +
> + if (atomic_dec_and_test(&xr->grps.bringup_pending))
> + complete(&xr->grps.bringup_comp);
> + }
> +}
> +
> +static void xroot_grps_init(struct xroot *xr)

Consistency in terms is needed. In the last few lines i see

group, grp, grps, my vote is for group(s)

> +{
> + xrt_subdev_pool_init(DEV(xr->pdev), &xr->grps.pool);
> + INIT_WORK(&xr->grps.bringup_work, xroot_bringup_group_work);
> + atomic_set(&xr->grps.bringup_pending, 0);
> + atomic_set(&xr->grps.bringup_failed, 0);
> + init_completion(&xr->grps.bringup_comp);
> +}
> +
> +static void xroot_grps_fini(struct xroot *xr)
> +{
> + flush_scheduled_work();
> + xrt_subdev_pool_fini(&xr->grps.pool);
> +}
> +
> +int xroot_add_vsec_node(void *root, char *dtb)
> +{
This is the pci part i think needs to move to its own file.
> + struct xroot *xr = (struct xroot *)root;
> + struct device *dev = DEV(xr->pdev);
> + struct xrt_md_endpoint ep = { 0 };
> + int cap = 0, ret = 0;
> + u32 off_low, off_high, vsec_bar, header;
> + u64 vsec_off;
> +
> + while ((cap = pci_find_next_ext_capability(xr->pdev, cap,
> + PCI_EXT_CAP_ID_VNDR))) {
> + pci_read_config_dword(xr->pdev, cap + PCI_VNDR_HEADER, &header);
> + if (PCI_VNDR_HEADER_ID(header) == XRT_VSEC_ID)
> + break;
> + }
> + if (!cap) {
> + xroot_info(xr, "No Vendor Specific Capability.");
> + return -ENOENT;
> + }
> +
> + if (pci_read_config_dword(xr->pdev, cap + 8, &off_low) ||
> + pci_read_config_dword(xr->pdev, cap + 12, &off_high)) {
> + xroot_err(xr, "pci_read vendor specific failed.");
> + return -EINVAL;
> + }
> +
> + ep.ep_name = XRT_MD_NODE_VSEC;
> + ret = xrt_md_add_endpoint(dev, dtb, &ep);
> + if (ret) {
> + xroot_err(xr, "add vsec metadata failed, ret %d", ret);
> + goto failed;
> + }
> +
> + vsec_bar = cpu_to_be32(off_low & 0xf);
> + ret = xrt_md_set_prop(dev, dtb, XRT_MD_NODE_VSEC, NULL,
> + XRT_MD_PROP_BAR_IDX, &vsec_bar, sizeof(vsec_bar));
> + if (ret) {
> + xroot_err(xr, "add vsec bar idx failed, ret %d", ret);
> + goto failed;
> + }
> +
> + vsec_off = cpu_to_be64(((u64)off_high << 32) | (off_low & ~0xfU));
> + ret = xrt_md_set_prop(dev, dtb, XRT_MD_NODE_VSEC, NULL,
> + XRT_MD_PROP_OFFSET, &vsec_off, sizeof(vsec_off));
> + if (ret) {
> + xroot_err(xr, "add vsec offset failed, ret %d", ret);
> + goto failed;
> + }
> +
> +failed:
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(xroot_add_vsec_node);
> +
> +int xroot_add_simple_node(void *root, char *dtb, const char *endpoint)
> +{
> + struct xroot *xr = (struct xroot *)root;
> + struct device *dev = DEV(xr->pdev);
> + struct xrt_md_endpoint ep = { 0 };
> + int ret = 0;
> +
> + ep.ep_name = endpoint;
> + ret = xrt_md_add_endpoint(dev, dtb, &ep);
> + if (ret)
> + xroot_err(xr, "add %s failed, ret %d", endpoint, ret);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(xroot_add_simple_node);
> +
> +bool xroot_wait_for_bringup(void *root)
> +{
> + struct xroot *xr = (struct xroot *)root;
> +
> + wait_for_completion(&xr->grps.bringup_comp);
> + return atomic_xchg(&xr->grps.bringup_failed, 0) == 0;
Is there going to a race in intialization ?
> +}
> +EXPORT_SYMBOL_GPL(xroot_wait_for_bringup);
> +
> +int xroot_probe(struct pci_dev *pdev, struct xroot_pf_cb *cb, void **root)
> +{
> + struct device *dev = DEV(pdev);
> + struct xroot *xr = NULL;
> +
> + dev_info(dev, "%s: probing...", __func__);
> +
> + xr = devm_kzalloc(dev, sizeof(*xr), GFP_KERNEL);
> + if (!xr)
> + return -ENOMEM;
> +
> + xr->pdev = pdev;
> + xr->pf_cb = *cb;
> + xroot_grps_init(xr);
> + xroot_event_init(xr);
> +
> + *root = xr;
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(xroot_probe);
> +
> +void xroot_remove(void *root)
> +{
> + struct xroot *xr = (struct xroot *)root;
> + struct platform_device *grp = NULL;
> +
> + xroot_info(xr, "leaving...");
> +
> + if (xroot_get_group(xr, XROOT_GRP_FIRST, &grp) == 0) {
> + int instance = grp->id;

another instance = id, the variable and element names should be consistent.

earlier (id, instance) is used to uniquely determine a node. if that is so then using the names should be kept seperate.

> +
> + xroot_put_group(xr, grp);
> + (void)xroot_destroy_group(xr, instance);
> + }
> +
> + xroot_event_fini(xr);
> + xroot_grps_fini(xr);
> +}
> +EXPORT_SYMBOL_GPL(xroot_remove);
> +
> +void xroot_broadcast(void *root, enum xrt_events evt)
> +{
> + struct xroot *xr = (struct xroot *)root;
> + struct xrt_event e = { 0 };
> +
> + /* Root pf driver only broadcasts below two events. */
> + if (evt != XRT_EVENT_POST_CREATION && evt != XRT_EVENT_PRE_REMOVAL) {
> + xroot_info(xr, "invalid event %d", evt);
> + return;
> + }
> +
> + e.xe_evt = evt;
> + e.xe_subdev.xevt_subdev_id = XRT_ROOT;
> + e.xe_subdev.xevt_subdev_instance = 0;

see..

id =

instance =

Tom

> + (void)xroot_trigger_event(xr, &e, false);
> +}
> +EXPORT_SYMBOL_GPL(xroot_broadcast);