Re: [PATCH v3 2/2] Implement SDHCI CQE support for DesignWare SDHCI.

From: Adrian Hunter
Date: Fri Oct 20 2023 - 05:23:45 EST


On 16/10/23 14:42, Sergey Khimich wrote:
> Hello Adrian!
>
> Thanks for review and comments! There are few questions I`d like to clarify:
>
> On 09.10.2023 18:39, Adrian Hunter wrote:
>> On 2/10/23 14:33, Sergey Khimich wrote:
>>> From: Sergey Khimich <serghox@xxxxxxxxx>
>>>
>>> For enabling CQE support just set 'supports-cqe' in your DevTree file
>>> for appropriate mmc node.
>>>
>>> Signed-off-by: Sergey Khimich <serghox@xxxxxxxxx>
>>> ---
>>>   drivers/mmc/host/Kconfig            |   1 +
>>>   drivers/mmc/host/sdhci-of-dwcmshc.c | 233 +++++++++++++++++++++++++++-
>>>   2 files changed, 232 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
>>> index 554e67103c1a..f3380b014ca9 100644
>>> --- a/drivers/mmc/host/Kconfig
>>> +++ b/drivers/mmc/host/Kconfig
>>> @@ -233,6 +233,7 @@ config MMC_SDHCI_OF_DWCMSHC
>>>       depends on MMC_SDHCI_PLTFM
>>>       depends on OF
>>>       depends on COMMON_CLK
>>> +    select MMC_CQHCI
>>>       help
>>>         This selects Synopsys DesignWare Cores Mobile Storage Controller
>>>         support.
>>> diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
>>> index 3a3bae6948a8..7d43ae011811 100644
>>> --- a/drivers/mmc/host/sdhci-of-dwcmshc.c
>>> +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
>>> @@ -20,6 +20,7 @@
>>>   #include <linux/sizes.h>
>>>     #include "sdhci-pltfm.h"
>>> +#include "cqhci.h"
>>>     #define SDHCI_DWCMSHC_ARG2_STUFF    GENMASK(31, 16)
>>>   @@ -36,6 +37,9 @@
>>>   #define DWCMSHC_ENHANCED_STROBE        BIT(8)
>>>   #define DWCMSHC_EMMC_ATCTRL        0x40
>>>   +/* DWC IP vendor area 2 pointer */
>>> +#define DWCMSHC_P_VENDOR_AREA2        0xea
>>> +
>>>   /* Rockchip specific Registers */
>>>   #define DWCMSHC_EMMC_DLL_CTRL        0x800
>>>   #define DWCMSHC_EMMC_DLL_RXCLK        0x804
>>> @@ -75,6 +79,10 @@
>>>   #define BOUNDARY_OK(addr, len) \
>>>       ((addr | (SZ_128M - 1)) == ((addr + len - 1) | (SZ_128M - 1)))
>>>   +#define DWCMSHC_SDHCI_CQE_TRNS_MODE    (SDHCI_TRNS_MULTI | \
>>> +                     SDHCI_TRNS_BLK_CNT_EN | \
>>> +                     SDHCI_TRNS_DMA)
>>> +
>>>   enum dwcmshc_rk_type {
>>>       DWCMSHC_RK3568,
>>>       DWCMSHC_RK3588,
>>> @@ -90,7 +98,8 @@ struct rk35xx_priv {
>>>     struct dwcmshc_priv {
>>>       struct clk    *bus_clk;
>>> -    int vendor_specific_area1; /* P_VENDOR_SPECIFIC_AREA reg */
>>> +    int vendor_specific_area1; /* P_VENDOR_SPECIFIC_AREA1 reg */
>>> +    int vendor_specific_area2; /* P_VENDOR_SPECIFIC_AREA2 reg */
>>>       void *priv; /* pointer to SoC private stuff */
>>>   };
>>>   @@ -210,6 +219,147 @@ static void dwcmshc_hs400_enhanced_strobe(struct mmc_host *mmc,
>>>       sdhci_writel(host, vendor, reg);
>>>   }
>>>   +static u32 dwcmshc_cqe_irq_handler(struct sdhci_host *host, u32 intmask)
>>> +{
>>> +    int cmd_error = 0;
>>> +    int data_error = 0;
>>> +
>>> +    if (!sdhci_cqe_irq(host, intmask, &cmd_error, &data_error))
>>> +        return intmask;
>>> +
>>> +    cqhci_irq(host->mmc, intmask, cmd_error, data_error);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void dwcmshc_sdhci_cqe_enable(struct mmc_host *mmc)
>>> +{
>>> +    struct sdhci_host *host = mmc_priv(mmc);
>>> +    u32 pstate;
>>> +    u8 ctrl;
>>> +    int count = 10;
>>> +
>>> +    /*
>>> +     * CQE gets stuck if it sees Buffer Read Enable bit set, which can be
>>> +     * the case after tuning, so ensure the buffer is drained.
>>> +     */
>>> +    pstate = sdhci_readl(host, SDHCI_PRESENT_STATE);
>>> +    while (pstate & SDHCI_DATA_AVAILABLE) {
>>> +        sdhci_readl(host, SDHCI_BUFFER);
>>> +        pstate = sdhci_readl(host, SDHCI_PRESENT_STATE);
>>> +        if (count-- == 0) {
>>> +            dev_warn(mmc_dev(host->mmc),
>>> +                 "CQE may get stuck because the Buffer Read Enable bit is set\n");
>>> +            break;
>>> +        }
>>> +        mdelay(1);
>>> +    }
>> An alternative, which might be easier, is to do a
>> data reset which may also help allow the device to
>> subsequently enter low power states.
>> Refer commit f8870ae6e2d6be75b1accc2db981169fdfbea7ab
>> and commit 7b7d57fd1b773d25d8358c6017592b4928bf76ce
>
> Thanks, I'll fix it in the next version of the patch.
>
>>
>>> +
>>> +    sdhci_writew(host, DWCMSHC_SDHCI_CQE_TRNS_MODE, SDHCI_TRANSFER_MODE);
>>> +
>>> +    sdhci_cqe_enable(mmc);
>>> +
>>> +    /*
>>> +     * The "DesignWare Cores Mobile Storage Host Controller
>>> +     * DWC_mshc / DWC_mshc_lite Databook" says:
>>> +     * when Host Version 4 Enable" is 1 in Host Control 2 register,
>>> +     * SDHCI_CTRL_ADMA32 bit means ADMA2 is selected.
>>> +     * Selection of 32-bit/64-bit System Addressing:
>>> +     * either 32-bit or 64-bit system addressing is selected by
>>> +     * 64-bit Addressing bit in Host Control 2 register.
>>> +     *
>>> +     * On the other hand the "DesignWare Cores Mobile Storage Host
>>> +     * Controller DWC_mshc / DWC_mshc_lite User Guide" says, that we have to
>>> +     * set DMA_SEL to ADMA2 _only_ mode in the Host Control 2 register.
>>> +     */
>>> +    ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
>>> +    ctrl &= ~SDHCI_CTRL_DMA_MASK;
>>> +    ctrl |= SDHCI_CTRL_ADMA32;
>>> +    sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
>>> +}
>>> +
>>> +static void dwcmshc_sdhci_cqe_disable(struct mmc_host *mmc, bool recovery)
>>> +{
>>> +    /*
>>> +     * If an ioctl was issued, cqe_disable will be called.
>>> +     * For CQE of sdhci-of-dwcmshc, the previous in-flight cmd will be lost quietly.
>>> +     * So wait for mmc idle state.
>> This sounds like it should be fixed in the mmc block driver.
>> Can you provide an example of when this happens?
> Unfortunately I can't provide an example.
> But this part of the patch is updated by me on the advice of Shawn Lin after his reviewing V1 of the patch.
> Please find his message here:
> https://patchwork.kernel.org/project/linux-mmc/patch/20230825143525.869906-2-serghox@xxxxxxxxx/
>
> Just in case here I also quote the part of his comment that refers to this part of the patch:
> "And another issue was found when Rockchip added CQE support for
> sdhci-of-dwcmshc internally, is that if a ioctl was issued, cqe_disable
> will be called. For CQE of sdhci-of-dwcmshc, the previous in-flight cmd
> will be lost quietly. So a mmc->cqe_ops->cqe_wait_for_idle(mmc) should
> be added before sdhci_cqe_disable(), so you need a dwcmshc specified
> cqe_disable hook in sdhci-of-dwcmshc."

mmc block driver already does a wait for idle before ioctl commands,
refer mmc_blk_mq_issue_rq() case MMC_ISSUE_SYNC.

Without more informaton we cannot assume the upstream kernel has
a problem with this.

>
>>
>>> +     */
>>> +    mmc->cqe_ops->cqe_wait_for_idle(mmc);
>>> +
>>> +    return sdhci_cqe_disable(mmc, recovery);
>>> +}
>>> +
>>> +static void dwcmshc_cqhci_set_tran_desc(u8 *desc, dma_addr_t addr, int len, bool end,
>>> +                    bool dma64)
>>> +{
>>> +    __le32 *attr = (__le32 __force *)desc;
>>> +
>>> +    *attr = (CQHCI_VALID(1) |
>>> +         CQHCI_END(end ? 1 : 0) |
>>> +         CQHCI_INT(0) |
>>> +         CQHCI_ACT(0x4) |
>>> +         CQHCI_DAT_LENGTH(len));
>>> +
>>> +    if (dma64) {
>>> +        __le64 *dataddr = (__le64 __force *)(desc + 4);
>>> +
>>> +        dataddr[0] = cpu_to_le64(addr);
>>> +    } else {
>>> +        __le32 *dataddr = (__le32 __force *)(desc + 4);
>>> +
>>> +        dataddr[0] = cpu_to_le32(addr);
>>> +    }
>>> +}
>> This is the same as cqhci_set_tran_desc(). Might as well export that
>> instead.
> Thanks, I'll fix it in the next version of the patch.
>>> +
>>> +static void dwcmshc_cqhci_prep_tran_desc(struct mmc_data *data,
>>> +                     struct cqhci_host *cq_host,
>>> +                     u8 *desc, int sg_count)
>>> +{
>>> +    int i, len, tmplen, offset;
>>> +    bool end = false;
>>> +    bool dma64 = cq_host->dma64;
>>> +    dma_addr_t addr;
>>> +    struct scatterlist *sg;
>>> +
>>> +    for_each_sg(data->sg, sg, sg_count, i) {
>>> +        addr = sg_dma_address(sg);
>>> +        len = sg_dma_len(sg);
>>> +
>>> +        /*
>>> +         * According to the "DesignWare Cores Mobile Storage Host Controller
>>> +         * DWC_mshc / DWC_mshc_lite Databook" the host memory data buffer size
>>> +         * and start address must not exceed 128 Mb. If it exceeds,
>>> +         * the data buffer must be split using two descritors.
>>> +         */
>>> +
>>> +        if (likely(BOUNDARY_OK(addr, len))) {
>>> +            if ((i + 1) == sg_count)
>>> +                end = true;
>>> +            dwcmshc_cqhci_set_tran_desc(desc, addr, len, end, dma64);
>>> +            desc += cq_host->trans_desc_len;
>>> +        } else {
>>> +            offset = addr & (SZ_128M - 1);
>>> +            tmplen = SZ_128M - offset;
>>> +            dwcmshc_cqhci_set_tran_desc(desc, addr, tmplen, end, dma64);
>>> +            desc += cq_host->trans_desc_len;
>>> +
>>> +            if ((i + 1) == sg_count)
>>> +                end = true;
>>> +
>>> +            addr += tmplen;
>>> +            len -= tmplen;
>>> +            dwcmshc_cqhci_set_tran_desc(desc, addr, len, end, dma64);
>>> +            desc += cq_host->trans_desc_len;
>>> +        }
>>> +    }
>>> +}
>> Could this be done more like dwcmshc_adma_write_desc()
> Actually I'm not sure what do you mean. I reused checking boundary construction with
> "BOUNDARY_OK" macro and used the same variable names. I would appreciate it if you could clarify
> what does mean "more like dwcmshc_adma_write_desc()"

Provide a hook for cqhci_set_tran_desc() instead of cqhci_prep_tran_desc()
You'll need to check the details, but something like:


diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c
index b3d7d6d8d654..98e7e9d3030d 100644
--- a/drivers/mmc/host/cqhci-core.c
+++ b/drivers/mmc/host/cqhci-core.c
@@ -522,7 +522,10 @@ static int cqhci_prep_tran_desc(struct mmc_request *mrq,

if ((i+1) == sg_count)
end = true;
- cqhci_set_tran_desc(desc, addr, len, end, dma64);
+ if (cq_host->ops->set_tran_desc)
+ cq_host->ops->set_tran_desc(&desc, addr, len, end, dma64);
+ else
+ cqhci_set_tran_desc(desc, addr, len, end, dma64);
desc += cq_host->trans_desc_len;
}

And:

static void dwcmshc_set_tran_desc(u8 **desc, dma_addr_t addr, int len, bool end, bool dma64)
{
int tmplen, offset;

if (likely(!len || BOUNDARY_OK(addr, len))) {
cqhci_set_tran_desc(*desc, addr, len, end, dma64);
return;
}

offset = addr & (SZ_128M - 1);
tmplen = SZ_128M - offset;
cqhci_set_tran_desc(*desc, addr, tmplen, false, dma64);

addr += tmplen;
len -= tmplen;
*desc += cq_host->trans_desc_len;
cqhci_set_tran_desc(*desc, addr, len, end, dma64);
}



>>
>>> +
>>> +static void dwcmshc_cqhci_dumpregs(struct mmc_host *mmc)
>>> +{
>>> +    sdhci_dumpregs(mmc_priv(mmc));
>>> +}
>>> +
>>>   static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock)
>>>   {
>>>       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
>>> @@ -345,6 +495,7 @@ static const struct sdhci_ops sdhci_dwcmshc_ops = {
>>>       .get_max_clock        = dwcmshc_get_max_clock,
>>>       .reset            = sdhci_reset,
>>>       .adma_write_desc    = dwcmshc_adma_write_desc,
>>> +    .irq            = dwcmshc_cqe_irq_handler,
>>>   };
>>>     static const struct sdhci_ops sdhci_dwcmshc_rk35xx_ops = {
>>> @@ -379,6 +530,70 @@ static const struct sdhci_pltfm_data sdhci_dwcmshc_rk35xx_pdata = {
>>>              SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
>>>   };
>>>   +static const struct cqhci_host_ops dwcmshc_cqhci_ops = {
>>> +    .enable        = dwcmshc_sdhci_cqe_enable,
>>> +    .disable    = dwcmshc_sdhci_cqe_disable,
>>> +    .dumpregs    = dwcmshc_cqhci_dumpregs,
>>> +    .prep_tran_desc    = dwcmshc_cqhci_prep_tran_desc,
>>> +};
>>> +
>>> +static void dwcmshc_cqhci_init(struct sdhci_host *host, struct platform_device *pdev)
>>> +{
>>> +    struct cqhci_host *cq_host;
>>> +    struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
>>> +    struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
>>> +    bool dma64 = false;
>>> +    u16 clk;
>>> +    int err;
>>> +
>>> +    host->mmc->caps2 |= MMC_CAP2_CQE | MMC_CAP2_CQE_DCMD;
>>> +    cq_host = devm_kzalloc(&pdev->dev, sizeof(*cq_host), GFP_KERNEL);
>>> +    if (!cq_host) {
>>> +        dev_err(mmc_dev(host->mmc), "Unable to setup CQE: not enough memory\n");
>>> +        return;
>>> +    }
>>> +
>>> +    /*
>>> +     * For dwcmshc host controller we have to enable internal clock
>>> +     * before access to some registers from Vendor Specific Aria 2.
>>> +     */
>>> +    clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
>>> +    clk |= SDHCI_CLOCK_INT_EN;
>>> +    sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
>>> +    clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
>>> +    if (!(clk & SDHCI_CLOCK_INT_EN)) {
>>> +        dev_err(mmc_dev(host->mmc), "Unable to setup CQE: internal clock enable error\n");
>>> +        goto free_cq_host;
>>> +    }
>>> +
>>> +    cq_host->mmio = host->ioaddr + priv->vendor_specific_area2;
>>> +    cq_host->ops = &dwcmshc_cqhci_ops;
>>> +
>>> +    /* Enable using of 128-bit task descriptors */
>>> +    dma64 = host->flags & SDHCI_USE_64_BIT_DMA;
>>> +    if (dma64) {
>>> +        dev_dbg(mmc_dev(host->mmc), "128-bit task descriptors\n");
>>> +        cq_host->caps |= CQHCI_TASK_DESC_SZ_128;
>>> +    }
>>> +    err = cqhci_init(cq_host, host->mmc, dma64);
>>> +    if (err) {
>>> +        dev_err(mmc_dev(host->mmc), "Unable to setup CQE: error %d\n", err);
>>> +        goto int_clok_disable;
>>> +    }
>>> +
>>> +    dev_dbg(mmc_dev(host->mmc), "CQE init done\n");
>>> +
>>> +    return;
>>> +
>>> +int_clok_disable:
>> 'clok' is an odd abbreviation of 'clock'.  Perhaps 'clk' or just 'clock'
> Thanks, I'll fix it in the next version of the patch.
>>
>>> +    clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
>>> +    clk &= ~SDHCI_CLOCK_INT_EN;
>>> +    sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
>>> +
>>> +free_cq_host:
>>> +    devm_kfree(&pdev->dev, cq_host);
>>> +}
>>> +
>>>   static int dwcmshc_rk35xx_init(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv)
>>>   {
>>>       int err;
>>> @@ -471,7 +686,7 @@ static int dwcmshc_probe(struct platform_device *pdev)
>>>       struct rk35xx_priv *rk_priv = NULL;
>>>       const struct sdhci_pltfm_data *pltfm_data;
>>>       int err;
>>> -    u32 extra;
>>> +    u32 extra, caps;
>>>         pltfm_data = device_get_match_data(&pdev->dev);
>>>       if (!pltfm_data) {
>>> @@ -519,6 +734,8 @@ static int dwcmshc_probe(struct platform_device *pdev)
>>>         priv->vendor_specific_area1 =
>>>           sdhci_readl(host, DWCMSHC_P_VENDOR_AREA1) & DWCMSHC_AREA1_MASK;
>>> +    priv->vendor_specific_area2 =
>>> +        sdhci_readw(host, DWCMSHC_P_VENDOR_AREA2);
>>>         host->mmc_host_ops.request = dwcmshc_request;
>>>       host->mmc_host_ops.hs400_enhanced_strobe = dwcmshc_hs400_enhanced_strobe;
>>> @@ -547,6 +764,10 @@ static int dwcmshc_probe(struct platform_device *pdev)
>>>           sdhci_enable_v4_mode(host);
>>>   #endif
>>>   +    caps = sdhci_readl(host, SDHCI_CAPABILITIES);
>>> +    if (caps & SDHCI_CAN_64BIT_V4)
>>> +        sdhci_enable_v4_mode(host);
>>> +
>>>       host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
>>>         pm_runtime_get_noresume(dev);
>>> @@ -557,6 +778,14 @@ static int dwcmshc_probe(struct platform_device *pdev)
>>>       if (err)
>>>           goto err_rpm;
>>>   +    /* Setup Command Queue Engine if enabled */
>>> +    if (device_property_read_bool(&pdev->dev, "supports-cqe")) {
>>> +        if (caps & SDHCI_CAN_64BIT_V4)
>>> +            dwcmshc_cqhci_init(host, pdev);
>>> +        else
>>> +            dev_warn(dev, "Cannot enable CQE without V4 mode support\n");
>>> +    }
>>> +
>>>       if (rk_priv)
>>>           dwcmshc_rk35xx_postinit(host, priv);
>>>