Re: [PATCH v11 06/11] remoteproc: mediatek: Probe multi-core SCP

From: TingHan Shen (沈廷翰)
Date: Tue May 16 2023 - 00:05:41 EST


Hi Mathieu,

On Mon, 2023-05-15 at 11:07 -0600, Mathieu Poirier wrote:
> External email : Please do not click links or open attachments until you have verified the sender or the content.
>
>
> On Mon, May 15, 2023 at 12:31:04PM +0000, TingHan Shen (沈廷翰) wrote:
> > Hi Mathieu,
> >
> > On Fri, 2023-05-12 at 11:56 -0600, Mathieu Poirier wrote:
> > > External email : Please do not click links or open attachments until you have verified the sender or the content.
> > >
> > >
> > > On Wed, May 10, 2023 at 02:37:44PM +0800, Tinghan Shen wrote:
> > > > The difference of single-core SCP and multi-core SCP device tree is
> > > > the presence of child device nodes described SCP cores. The SCP
> > > > driver populates the platform device and checks the child nodes
> > > > to identify whether it's a single-core SCP or a multi-core SCP.
> > > >
> > > > Add the remoteproc instances of multi-core SCP to the SCP cluster list.
> > > > When the SCP driver is removed, it cleanup resources by walking
> > > > through the cluster list.
> > > >
> > > > Signed-off-by: Tinghan Shen <tinghan.shen@xxxxxxxxxxxx>
> > > > ---
> > > > drivers/remoteproc/mtk_scp.c | 135 ++++++++++++++++++++++++++++++-----
> > > > 1 file changed, 117 insertions(+), 18 deletions(-)
> > > >
> > > > diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c
> > > > index ff73c6dd9637..87215a0e145e 100644
> > > > --- a/drivers/remoteproc/mtk_scp.c
> > > > +++ b/drivers/remoteproc/mtk_scp.c
> > > > @@ -864,7 +864,8 @@ static void scp_remove_rpmsg_subdev(struct mtk_scp *scp)
> > > > }
> > > >
> > > > static struct mtk_scp *scp_rproc_init(struct platform_device *pdev,
> > > > - struct mtk_scp_of_cluster *scp_cluster)
> > > > + struct mtk_scp_of_cluster *scp_cluster,
> > > > + const struct mtk_scp_of_data *of_data)
> > > > {
> > > > struct device *dev = &pdev->dev;
> > > > struct device_node *np = dev->of_node;
> > > > @@ -887,7 +888,7 @@ static struct mtk_scp *scp_rproc_init(struct platform_device *pdev,
> > > > scp = rproc->priv;
> > > > scp->rproc = rproc;
> > > > scp->dev = dev;
> > > > - scp->data = of_device_get_match_data(dev);
> > > > + scp->data = of_data;
> > > > platform_device_add_data(pdev, &scp, sizeof(scp));
> > > >
> > > > scp->reg_base = scp_cluster->reg_base;
> > > > @@ -938,10 +939,6 @@ static struct mtk_scp *scp_rproc_init(struct platform_device *pdev,
> > > > goto remove_subdev;
> > > > }
> > > >
> > > > - ret = rproc_add(rproc);
> > > > - if (ret)
> > > > - goto remove_subdev;
> > > > -
> > >
> > > This should have been done in the previous patch.
> >
> > Ok.
> >
> > >
> > > > return scp;
> > > >
> > > > remove_subdev:
> > > > @@ -956,19 +953,116 @@ static struct mtk_scp *scp_rproc_init(struct platform_device *pdev,
> > > > return ERR_PTR(ret);
> > > > }
> > > >
> > > > +static void scp_rproc_free(struct mtk_scp *scp)
> > > > +{
> > > > + int i;
> > > > +
> > > > + scp_remove_rpmsg_subdev(scp);
> > > > + scp_ipi_unregister(scp, SCP_IPI_INIT);
> > > > + scp_unmap_memory_region(scp);
> > > > + for (i = 0; i < SCP_IPI_MAX; i++)
> > > > + mutex_destroy(&scp->ipi_desc[i].lock);
> > > > + mutex_destroy(&scp->send_lock);
> > > > +}
> > > > +
> > > > +static int scp_is_single_core(struct platform_device *pdev)
> > > > +{
> > > > + struct device *dev = &pdev->dev;
> > > > + struct device_node *np = dev_of_node(dev);
> > > > + struct device_node *child;
> > > > +
> > > > + child = of_get_next_available_child(np, NULL);
> > > > + if (!child)
> > > > + return dev_err_probe(dev, -ENODEV, "No child node\n");
> > > > +
> > > > + of_node_put(child);
> > > > + return of_node_name_eq(child, "cros-ec-rpmsg");
> > > > +}
> > > > +
> > > > static int scp_cluster_init(struct platform_device *pdev)
> > > > {
> > > > - struct mtk_scp *scp;
> > > > + struct mtk_scp *scp, *temp;
> > > > struct mtk_scp_of_cluster *scp_cluster = platform_get_drvdata(pdev);
> > > > struct list_head *cluster = &scp_cluster->mtk_scp_cluster;
> > > >
> > > > - scp = scp_rproc_init(pdev, scp_cluster);
> > > > - if (IS_ERR(scp))
> > > > - return PTR_ERR(scp);
> > > > + struct device *dev = &pdev->dev;
> > > > + struct device_node *np = dev_of_node(dev);
> > > > + struct platform_device *cpdev;
> > > > + struct device_node *child;
> > > > + const struct mtk_scp_of_data **cluster_of_data;
> > > > + int core_id = 0;
> > > > + int ret;
> > > > +
> > > > + ret = scp_is_single_core(pdev);
> > > > + if (ret < 0)
> > > > + return ret;
> > > > +
> > > > + if (ret) {
> > > > + dev_dbg(dev, "single-core scp\n");
> > > > +
> > > > + /* When using the SCP node phandle on exported SCP APIs, the drvdata
> > > > + * is expected to be the mtk_scp object, and as a result, it is intended
> > > > + * to be overwritten for single-core SCP usage.
> > > > + */
> > >
> > > Is this comment still relevant?
> >
> > No, I'll remove it.
> >
> > >
> > > > + scp = scp_rproc_init(pdev, scp_cluster, of_device_get_match_data(dev));
> > > > + if (IS_ERR(scp))
> > > > + return PTR_ERR(scp);
> > > > +
> > > > + list_add_tail(&scp->elem, cluster);
> > >
> > > This is getting messy. Please add two new functions, i.e scp_add_single_core()
> > > and scp_add_multi_core().
> >
> > Ok.
> >
> > >
> > > > + } else {
> > > > + dev_dbg(dev, "multi-core scp\n");
> > > > +
> > > > + cluster_of_data = (const struct mtk_scp_of_data **)of_device_get_match_data(dev);
> > > > +
> > > > + for_each_available_child_of_node(np, child) {
> > > > + if (!cluster_of_data[core_id]) {
> > > > + ret = -EINVAL;
> > > > + dev_err(dev, "Not support core %d\n", core_id);
> > > > + of_node_put(child);
> > > > + goto init_fail;
> > > > + }
> > > > +
> > > > + cpdev = of_find_device_by_node(child);
> > > > + if (!cpdev) {
> > > > + ret = -ENODEV;
> > > > + dev_err(dev, "Not found platform device for core %d\n", core_id);
> > > > + of_node_put(child);
> > > > + goto init_fail;
> > > > + }
> > > > +
> > > > + scp = scp_rproc_init(cpdev, scp_cluster, cluster_of_data[core_id]);
> > > > + if (IS_ERR(scp)) {
> > > > + dev_err(dev, "Failed to initialize core %d rproc\n", core_id);
> > > > + put_device(&cpdev->dev);
> > > > + of_node_put(child);
> > > > + goto init_fail;
> > > > + }
> > > > + list_add_tail(&scp->elem, cluster);
> > > > + put_device(&cpdev->dev);
> > > > +
> > > > + core_id++;
> > > > + }
> > > > + }
> > > >
> > > > - list_add_tail(&scp->elem, cluster);
> > > > + list_for_each_entry_safe_reverse(scp, temp, cluster, elem) {
> > > > + ret = rproc_add(scp->rproc);
> > >
> > > Call rproc_add() in the for_each_available_child_of_node() loop above. That way
> > > it if fails you can call scp_rproc_free() right away and jump to init_fail to
> > > deal with the other cores on the list.
> >
> > Ok, I'll update it in next version.
> >
> > >
> > > > + if (ret)
> > > > + goto add_fail;
> > > > + }
> > > >
> > > > return 0;
> > > > +
> > > > +add_fail:
> > > > + list_for_each_entry_continue(scp, cluster, elem) {
> > > > + rproc_del(scp->rproc);
> > > > + }
> > > > +init_fail:
> > > > + list_for_each_entry_safe_reverse(scp, temp, cluster, elem) {
> > > > + list_del(&scp->elem);
> > > > + scp_rproc_free(scp);
> > > > + }
> > > > +
> > >
> > > There are a serious problems with the code in both labels above. I will let you
> > > think about that and I will stop here for this revision.
> > >
> > > Mathieu
> >
> > May be the problem is the possible null access of *scp?
> > Or, the resources bound with cpdev should be released at here?
>
> The first problem is related to list processing. Function
> list_for_each_entry_safe_reverse() starts at the end of the list and stops if an
> error is encountered. From there we go back the other way with
> list_for_each_entry_continue() and then reverse again for
> list_for_each_entry_safe_reverse(). This back and forth is error prone and very
> hard to maintain.
>
> The second problem is with rproc_del()... Does it need to be there?
>

Ok, I got your point. I agree that the current flow is complex and difficult to understand
at first glance. I also find it burdensome to modify and tune to fulfill the expected behavior
when writing this part. I'll do what you suggest at above to make it more intuitive.

The rproc_del() is included there because the last failed operation is rproc_add().
I thought that I need to reverse the processing order of the list to undo the effect of rproc_add().
Anyway, I'll follow your comment to move the location of rproc_add() at next version,
the rproc_del() will move to somewhere instead of here.



Best regards,
TingHan

> >
> >
> > >
> > > > + return ret;
> > > > }
> > > >
> > > > static int scp_probe(struct platform_device *pdev)
> > > > @@ -1005,6 +1099,10 @@ static int scp_probe(struct platform_device *pdev)
> > > > INIT_LIST_HEAD(&scp_cluster->mtk_scp_cluster);
> > > > platform_set_drvdata(pdev, scp_cluster);
> > > >
> > > > + ret = devm_of_platform_populate(dev);
> > > > + if (ret)
> > > > + return dev_err_probe(dev, ret, "Failed to populate platform devices\n");
> > > > +
> > > > ret = scp_cluster_init(pdev);
> > > > if (ret)
> > > > return ret;
> > > > @@ -1016,17 +1114,11 @@ static int scp_remove(struct platform_device *pdev)
> > > > {
> > > > struct mtk_scp_of_cluster *scp_cluster = platform_get_drvdata(pdev);
> > > > struct mtk_scp *scp, *temp;
> > > > - int i;
> > > >
> > > > list_for_each_entry_safe_reverse(scp, temp, &scp_cluster->mtk_scp_cluster, elem) {
> > > > list_del(&scp->elem);
> > > > rproc_del(scp->rproc);
> > > > - scp_remove_rpmsg_subdev(scp);
> > > > - scp_ipi_unregister(scp, SCP_IPI_INIT);
> > > > - scp_unmap_memory_region(scp);
> > > > - for (i = 0; i < SCP_IPI_MAX; i++)
> > > > - mutex_destroy(&scp->ipi_desc[i].lock);
> > > > - mutex_destroy(&scp->send_lock);
> > > > + scp_rproc_free(scp);
> > > > }
> > > >
> > > > return 0;
> > > > @@ -1106,12 +1198,19 @@ static const struct mtk_scp_of_data mt8195_of_data_c1 = {
> > > > .host_to_scp_int_bit = MT8195_CORE1_HOST_IPC_INT_BIT,
> > > > };
> > > >
> > > > +static const struct mtk_scp_of_data *mt8195_of_data_cores[] = {
> > > > + &mt8195_of_data,
> > > > + &mt8195_of_data_c1,
> > > > + NULL
> > > > +};
> > > > +
> > > > static const struct of_device_id mtk_scp_of_match[] = {
> > > > { .compatible = "mediatek,mt8183-scp", .data = &mt8183_of_data },
> > > > { .compatible = "mediatek,mt8186-scp", .data = &mt8186_of_data },
> > > > { .compatible = "mediatek,mt8188-scp", .data = &mt8188_of_data },
> > > > { .compatible = "mediatek,mt8192-scp", .data = &mt8192_of_data },
> > > > { .compatible = "mediatek,mt8195-scp", .data = &mt8195_of_data },
> > > > + { .compatible = "mediatek,mt8195-scp-dual", .data = &mt8195_of_data_cores },
> > > > {},
> > > > };
> > > > MODULE_DEVICE_TABLE(of, mtk_scp_of_match);
> > > > --
> > > > 2.18.0
> > > >
> >
> > --
> > Best regards,
> > TingHan