Re: [PATCH v2 6/6] firmware: tegra: bpmp: Add support for DRAM MRQ GSCs

From: Thierry Reding
Date: Wed May 10 2023 - 10:33:50 EST


On Wed, May 10, 2023 at 02:31:36PM +0300, Peter De Schrijver wrote:
> Implement support for DRAM MRQ GSCs.
>
> Signed-off-by: Peter De Schrijver <pdeschrijver@xxxxxxxxxx>
> ---
> drivers/firmware/tegra/bpmp-tegra186.c | 214 +++++++++++++++++--------
> drivers/firmware/tegra/bpmp.c | 4 +-
> 2 files changed, 153 insertions(+), 65 deletions(-)
>
> diff --git a/drivers/firmware/tegra/bpmp-tegra186.c b/drivers/firmware/tegra/bpmp-tegra186.c
> index 2e26199041cd..43e2563575fc 100644
> --- a/drivers/firmware/tegra/bpmp-tegra186.c
> +++ b/drivers/firmware/tegra/bpmp-tegra186.c
> @@ -4,8 +4,11 @@
> */
>
> #include <linux/genalloc.h>
> +#include <linux/io.h>
> #include <linux/mailbox_client.h>
> +#include <linux/of_address.h>
> #include <linux/platform_device.h>
> +#include <linux/range.h>

Why do we need range.h?

>
> #include <soc/tegra/bpmp.h>
> #include <soc/tegra/bpmp-abi.h>
> @@ -13,12 +16,13 @@
>
> #include "bpmp-private.h"
>
> +enum tegra_bpmp_mem_type { TEGRA_INVALID, TEGRA_SRAM, TEGRA_RMEM };
> +

This is a strange construct. We can already use the pool pointer to
determine which type of memory is being used. Your usage of this leads
to very unintuitive code when you're error checking, etc. and prevents
you from propagating proper error codes.

> struct tegra186_bpmp {
> struct tegra_bpmp *parent;
>
> struct {
> - struct gen_pool *pool;
> - void __iomem *virt;
> + void *virt;

I think what we really need is a union that contains both an __iomem
annotated pointer and a regular one.

> dma_addr_t phys;
> } tx, rx;
>
> @@ -26,6 +30,12 @@ struct tegra186_bpmp {
> struct mbox_client client;
> struct mbox_chan *channel;
> } mbox;
> +
> + struct {
> + struct gen_pool *tx, *rx;
> + } sram;

Please keep this in the tx/rx structure. This would perhaps be useful if
there was an equivalent "dram" structure, but as it is there's no
advantage in keeping this separate from the other memory-related fields.

> +
> + enum tegra_bpmp_mem_type type;
> };
>
> static inline struct tegra_bpmp *
> @@ -118,8 +128,8 @@ static int tegra186_bpmp_channel_init(struct tegra_bpmp_channel *channel,
> queue_size = tegra_ivc_total_queue_size(message_size);
> offset = queue_size * index;
>
> - iosys_map_set_vaddr_iomem(&rx, priv->rx.virt + offset);
> - iosys_map_set_vaddr_iomem(&tx, priv->tx.virt + offset);
> + iosys_map_set_vaddr_iomem(&rx, (void __iomem *)priv->rx.virt + offset);
> + iosys_map_set_vaddr_iomem(&tx, (void __iomem *)priv->tx.virt + offset);

This completely defies the purpose of using the iosys_map helpers. What
you really want to do is check if we're using SRAM and use the _iomem
variant, otherwise, use the plain one, something like:

if (priv->rx.pool)
iosys_map_set_vaddr_iomem(&rx, priv->rx.sram + offset);
else
iosys_map_set_vaddr(&rx, priv->rx.dram + offset);

And repeat that for TX. I suppose you could also do the iosys_map
assignment for both in the same blocks above since we don't support
mixing SRAM and DRAM modes.

>
> err = tegra_ivc_init(channel->ivc, NULL, &rx, priv->rx.phys + offset, &tx,
> priv->tx.phys + offset, 1, message_size, tegra186_bpmp_ivc_notify,
> @@ -158,64 +168,171 @@ static void mbox_handle_rx(struct mbox_client *client, void *data)
> tegra_bpmp_handle_rx(bpmp);
> }
>
> -static int tegra186_bpmp_init(struct tegra_bpmp *bpmp)
> +static void tegra186_bpmp_channel_deinit(struct tegra_bpmp *bpmp)
> +{
> + int i;

Can be unsigned int. The preferred ordering for variable declarations is
the inverse christmas tree (i.e. sort by length in decreasing order). It
often matches the result of sorting by importance (i.e. the "priv"
pointer is more important than the loop variable).

> + struct tegra186_bpmp *priv = bpmp->priv;
> +
> + for (i = 0; i < bpmp->threaded.count; i++) {
> + if (!bpmp->threaded_channels[i].bpmp)
> + continue;
> +
> + tegra186_bpmp_channel_cleanup(&bpmp->threaded_channels[i]);
> + }
> +
> + tegra186_bpmp_channel_cleanup(bpmp->rx_channel);
> + tegra186_bpmp_channel_cleanup(bpmp->tx_channel);
> +
> + if (priv->type == TEGRA_SRAM) {
> + gen_pool_free(priv->sram.tx, (unsigned long)priv->tx.virt, 4096);
> + gen_pool_free(priv->sram.rx, (unsigned long)priv->rx.virt, 4096);
> + } else if (priv->type == TEGRA_RMEM) {
> + memunmap(priv->tx.virt);
> + }

This introduces a bit of an asymmetry because tegra_bpmp_channel_setup()
doesn't actually set up the pool or reserved-memory. Since the memory is
only used for the channels, we can probably move the initialization into
tegra186_bpmp_channel_setup() below.

> +}
> +
> +static int tegra186_bpmp_channel_setup(struct tegra_bpmp *bpmp)

This name could be confusing because we already use the
tegra186_bpmp_channel_ prefix for functions that operate on individual
channels, whereas this function operates on the BPMP object.

Perhaps something like tegra186_bpmp_setup_channels() would better
reflect what this does.

The same goes for tegra186_bpmp_channel_deinit() above. Maybe something
like tegra186_bpmp_cleanup_channels() to make it more obvious that it's
the counterpart of tegra186_bpmp_setup_channels().

> {
> - struct tegra186_bpmp *priv;
> unsigned int i;
> int err;
>
> - priv = devm_kzalloc(bpmp->dev, sizeof(*priv), GFP_KERNEL);
> - if (!priv)
> - return -ENOMEM;
> + err = tegra186_bpmp_channel_init(bpmp->tx_channel, bpmp,
> + bpmp->soc->channels.cpu_tx.offset);
> + if (err < 0)
> + return err;
>
> - bpmp->priv = priv;
> - priv->parent = bpmp;
> + err = tegra186_bpmp_channel_init(bpmp->rx_channel, bpmp,
> + bpmp->soc->channels.cpu_rx.offset);
> + if (err < 0) {
> + tegra186_bpmp_channel_cleanup(bpmp->tx_channel);
> + return err;
> + }
> +
> + for (i = 0; i < bpmp->threaded.count; i++) {
> + unsigned int index = bpmp->soc->channels.thread.offset + i;
>
> - priv->tx.pool = of_gen_pool_get(bpmp->dev->of_node, "shmem", 0);
> - if (!priv->tx.pool) {
> + err = tegra186_bpmp_channel_init(&bpmp->threaded_channels[i],
> + bpmp, index);
> + if (err < 0)
> + break;
> + }
> +
> + if (err < 0)
> + tegra186_bpmp_channel_deinit(bpmp);

See how the name is confusing here? This is very close to the call to
tegra186_bpmp_channel_init() above and the common prefix makes it seem
like this would undo the effects of the above and then immediately
raises the question why it's only undoing all of the above channel
initializations. You then have to actually look at the implementation to
find out that that's exactly what it does.

> +
> + return err;
> +}
> +
> +static void tegra186_bpmp_reset_channels(struct tegra_bpmp *bpmp)
> +{
> + unsigned int i;
> +
> + tegra186_bpmp_channel_reset(bpmp->tx_channel);
> + tegra186_bpmp_channel_reset(bpmp->rx_channel);
> +
> + for (i = 0; i < bpmp->threaded.count; i++)
> + tegra186_bpmp_channel_reset(&bpmp->threaded_channels[i]);
> +}

I think this now matches the tegra186_bpmp_resume() implementation, so
it could be reused in that.

> +
> +static int tegra186_bpmp_sram_init(struct tegra_bpmp *bpmp)
> +{
> + int err;
> + struct tegra186_bpmp *priv = bpmp->priv;
> +
> + priv->sram.tx = of_gen_pool_get(bpmp->dev->of_node, "shmem", 0);
> + if (!priv->sram.tx) {
> dev_err(bpmp->dev, "TX shmem pool not found\n");
> return -EPROBE_DEFER;
> }
>
> - priv->tx.virt = (void __iomem *)gen_pool_dma_alloc(priv->tx.pool, 4096, &priv->tx.phys);
> + priv->tx.virt = gen_pool_dma_alloc(priv->sram.tx, 4096, &priv->tx.phys);
> if (!priv->tx.virt) {
> dev_err(bpmp->dev, "failed to allocate from TX pool\n");
> return -ENOMEM;
> }
>
> - priv->rx.pool = of_gen_pool_get(bpmp->dev->of_node, "shmem", 1);
> - if (!priv->rx.pool) {
> + priv->sram.rx = of_gen_pool_get(bpmp->dev->of_node, "shmem", 1);
> + if (!priv->sram.rx) {
> dev_err(bpmp->dev, "RX shmem pool not found\n");
> err = -EPROBE_DEFER;
> goto free_tx;
> }
>
> - priv->rx.virt = (void __iomem *)gen_pool_dma_alloc(priv->rx.pool, 4096, &priv->rx.phys);
> + priv->rx.virt = gen_pool_dma_alloc(priv->sram.rx, 4096, &priv->rx.phys);
> if (!priv->rx.virt) {
> dev_err(bpmp->dev, "failed to allocate from RX pool\n");
> err = -ENOMEM;
> goto free_tx;
> }
>
> - err = tegra186_bpmp_channel_init(bpmp->tx_channel, bpmp,
> - bpmp->soc->channels.cpu_tx.offset);
> - if (err < 0)
> - goto free_rx;
> + priv->type = TEGRA_SRAM;
>
> - err = tegra186_bpmp_channel_init(bpmp->rx_channel, bpmp,
> - bpmp->soc->channels.cpu_rx.offset);
> - if (err < 0)
> - goto cleanup_tx_channel;
> + return 0;
>
> - for (i = 0; i < bpmp->threaded.count; i++) {
> - unsigned int index = bpmp->soc->channels.thread.offset + i;
> +free_tx:
> + gen_pool_free(priv->sram.tx, (unsigned long)priv->tx.virt, 4096);
>
> - err = tegra186_bpmp_channel_init(&bpmp->threaded_channels[i],
> - bpmp, index);
> + return err;
> +}
> +
> +static enum tegra_bpmp_mem_type tegra186_bpmp_dram_init(struct tegra_bpmp *bpmp)
> +{
> + int err;
> + struct resource res;
> + struct device_node *np;
> + struct tegra186_bpmp *priv = bpmp->priv;
> +
> + np = of_parse_phandle(bpmp->dev->of_node, "memory-region", 0);
> + if (!np)
> + return TEGRA_INVALID;
> +
> + err = of_address_to_resource(np, 0, &res);
> + if (err) {
> + dev_warn(bpmp->dev, "Parsing memory region returned: %d\n", err);
> + return TEGRA_INVALID;
> + }
> +
> + if ((res.end - res.start + 1) < 0x2000) {

resource_size(), and maybe use SZ_8K instead of the literal here.

> + dev_warn(bpmp->dev, "DRAM region less than 0x2000 bytes\n");

Also, better to use a more human-readable string here. While at it,
perhaps we can make this a bit more assertive, maybe something like:

"DRAM region must be larger than 8 KiB"

?

> + return TEGRA_INVALID;
> + }

This doesn't allow the caller to differentiate between potentially fatal
errors and non-fatal ones. For instance, we don't want the absence of a
"memory-region" property to be fatal (because we want to fall back to
use SRAM in that case, or at least attempt to), but if "memory-region"
exists, any of the subsequent errors probably should be fatal. It's
easier to deal with that situation if you return regular error codes
here. The !np check above could return -ENODEV, for example, as a way of
letting the caller know that we don't have DRAM support in DT. For the
of_address_to_resource() failure we can instead propagate the error code
and so on.

Also, I think it'd be better to use a named constant like SZ_8K instead
of the literal 0x2000 above.

> +
> + priv->tx.phys = res.start;
> + priv->rx.phys = res.start + 0x1000;

SZ_4K

> +
> + priv->tx.virt = memremap(priv->tx.phys, res.end - res.start + 1, MEMREMAP_WC);

Another case where we can use resource_size(). Might be a good idea to
introduce a local "size" variable.

> + if (priv->tx.virt == NULL) {
> + dev_warn(bpmp->dev, "DRAM region mapping failed\n");
> + return TEGRA_INVALID;
> + }
> + priv->rx.virt = priv->tx.virt + 0x1000;

SZ_4K

We should probably do the same thing for the SRAM paths, but that should
be a separate patch and can be done at another time.

> +
> + return TEGRA_RMEM;
> +}
> +
> +static int tegra186_bpmp_init(struct tegra_bpmp *bpmp)
> +{
> + struct tegra186_bpmp *priv;
> + int err;
> +
> + priv = devm_kzalloc(bpmp->dev, sizeof(*priv), GFP_KERNEL);
> + if (!priv)
> + return -ENOMEM;
> +
> + bpmp->priv = priv;
> + priv->parent = bpmp;
> +
> + priv->type = tegra186_bpmp_dram_init(bpmp);
> + if (priv->type == TEGRA_INVALID) {
> + err = tegra186_bpmp_sram_init(bpmp);
> if (err < 0)
> - goto cleanup_channels;
> + return err;
> }

As I mentioned previously, I think we can move the block above into
tegra186_bpmp_setup_channels() to make it symmetric with the teardown of
this in tegra186_bpmp_cleanup_channels().

Thierry

>
> + err = tegra186_bpmp_channel_setup(bpmp);
> + if (err < 0)
> + return err;
> +
> /* mbox registration */
> priv->mbox.client.dev = bpmp->dev;
> priv->mbox.client.rx_callback = mbox_handle_rx;
> @@ -226,51 +343,22 @@ static int tegra186_bpmp_init(struct tegra_bpmp *bpmp)
> if (IS_ERR(priv->mbox.channel)) {
> err = PTR_ERR(priv->mbox.channel);
> dev_err(bpmp->dev, "failed to get HSP mailbox: %d\n", err);
> - goto cleanup_channels;
> + tegra186_bpmp_channel_deinit(bpmp);
> + return err;
> }
>
> - tegra186_bpmp_channel_reset(bpmp->tx_channel);
> - tegra186_bpmp_channel_reset(bpmp->rx_channel);
> -
> - for (i = 0; i < bpmp->threaded.count; i++)
> - tegra186_bpmp_channel_reset(&bpmp->threaded_channels[i]);
> + tegra186_bpmp_reset_channels(bpmp);
>
> return 0;
> -
> -cleanup_channels:
> - for (i = 0; i < bpmp->threaded.count; i++) {
> - if (!bpmp->threaded_channels[i].bpmp)
> - continue;
> -
> - tegra186_bpmp_channel_cleanup(&bpmp->threaded_channels[i]);
> - }
> -
> - tegra186_bpmp_channel_cleanup(bpmp->rx_channel);
> -cleanup_tx_channel:
> - tegra186_bpmp_channel_cleanup(bpmp->tx_channel);
> -free_rx:
> - gen_pool_free(priv->rx.pool, (unsigned long)priv->rx.virt, 4096);
> -free_tx:
> - gen_pool_free(priv->tx.pool, (unsigned long)priv->tx.virt, 4096);
> -
> - return err;
> }
>
> static void tegra186_bpmp_deinit(struct tegra_bpmp *bpmp)
> {
> struct tegra186_bpmp *priv = bpmp->priv;
> - unsigned int i;
>
> mbox_free_channel(priv->mbox.channel);
>
> - for (i = 0; i < bpmp->threaded.count; i++)
> - tegra186_bpmp_channel_cleanup(&bpmp->threaded_channels[i]);
> -
> - tegra186_bpmp_channel_cleanup(bpmp->rx_channel);
> - tegra186_bpmp_channel_cleanup(bpmp->tx_channel);
> -
> - gen_pool_free(priv->rx.pool, (unsigned long)priv->rx.virt, 4096);
> - gen_pool_free(priv->tx.pool, (unsigned long)priv->tx.virt, 4096);
> + tegra186_bpmp_channel_deinit(bpmp);
> }
>
> static int tegra186_bpmp_resume(struct tegra_bpmp *bpmp)
> diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
> index 8b5e5daa9fae..17bd3590aaa2 100644
> --- a/drivers/firmware/tegra/bpmp.c
> +++ b/drivers/firmware/tegra/bpmp.c
> @@ -735,6 +735,8 @@ static int tegra_bpmp_probe(struct platform_device *pdev)
> if (!bpmp->threaded_channels)
> return -ENOMEM;
>
> + platform_set_drvdata(pdev, bpmp);
> +
> err = bpmp->soc->ops->init(bpmp);
> if (err < 0)
> return err;
> @@ -758,8 +760,6 @@ static int tegra_bpmp_probe(struct platform_device *pdev)
>
> dev_info(&pdev->dev, "firmware: %.*s\n", (int)sizeof(tag), tag);
>
> - platform_set_drvdata(pdev, bpmp);
> -
> err = of_platform_default_populate(pdev->dev.of_node, NULL, &pdev->dev);
> if (err < 0)
> goto free_mrq;
> --
> 2.34.1
>

Attachment: signature.asc
Description: PGP signature