Re: [PATCH v15 08/10] irqchip/riscv-aplic: Add support for MSI-mode

From: Anup Patel
Date: Wed Mar 06 2024 - 01:53:10 EST


On Wed, Mar 6, 2024 at 11:13 AM Samuel Holland
<samuel.holland@xxxxxxxxxx> wrote:
>
> Hi Anup,
>
> On 2024-02-25 10:07 PM, Anup Patel wrote:
> > The RISC-V advanced platform-level interrupt controller (APLIC) has
> > two modes of operation: 1) Direct mode and 2) MSI mode.
> > (For more details, refer https://github.com/riscv/riscv-aia)
> >
> > In APLIC MSI-mode, wired interrupts are forwared as message signaled
> > interrupts (MSIs) to CPUs via IMSIC.
> >
> > Extend the existing APLIC irqchip driver to support MSI-mode for
> > RISC-V platforms having both wired interrupts and MSIs.
> >
> > Signed-off-by: Anup Patel <apatel@xxxxxxxxxxxxxxxx>
> > ---
> > drivers/irqchip/Kconfig | 6 +
> > drivers/irqchip/Makefile | 1 +
> > drivers/irqchip/irq-riscv-aplic-main.c | 2 +-
> > drivers/irqchip/irq-riscv-aplic-main.h | 8 +
> > drivers/irqchip/irq-riscv-aplic-msi.c | 263 +++++++++++++++++++++++++
> > 5 files changed, 279 insertions(+), 1 deletion(-)
> > create mode 100644 drivers/irqchip/irq-riscv-aplic-msi.c
> >
> > diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> > index dbc8811d3764..806b5fccb3e8 100644
> > --- a/drivers/irqchip/Kconfig
> > +++ b/drivers/irqchip/Kconfig
> > @@ -551,6 +551,12 @@ config RISCV_APLIC
> > depends on RISCV
> > select IRQ_DOMAIN_HIERARCHY
> >
> > +config RISCV_APLIC_MSI
> > + bool
> > + depends on RISCV_APLIC
> > + select GENERIC_MSI_IRQ
> > + default RISCV_APLIC
> > +
> > config RISCV_IMSIC
> > bool
> > depends on RISCV
> > diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
> > index 7f8289790ed8..47995fdb2c60 100644
> > --- a/drivers/irqchip/Makefile
> > +++ b/drivers/irqchip/Makefile
> > @@ -96,6 +96,7 @@ obj-$(CONFIG_CSKY_MPINTC) += irq-csky-mpintc.o
> > obj-$(CONFIG_CSKY_APB_INTC) += irq-csky-apb-intc.o
> > obj-$(CONFIG_RISCV_INTC) += irq-riscv-intc.o
> > obj-$(CONFIG_RISCV_APLIC) += irq-riscv-aplic-main.o irq-riscv-aplic-direct.o
> > +obj-$(CONFIG_RISCV_APLIC_MSI) += irq-riscv-aplic-msi.o
> > obj-$(CONFIG_RISCV_IMSIC) += irq-riscv-imsic-state.o irq-riscv-imsic-early.o irq-riscv-imsic-platform.o
> > obj-$(CONFIG_SIFIVE_PLIC) += irq-sifive-plic.o
> > obj-$(CONFIG_IMX_IRQSTEER) += irq-imx-irqsteer.o
> > diff --git a/drivers/irqchip/irq-riscv-aplic-main.c b/drivers/irqchip/irq-riscv-aplic-main.c
> > index 160ff99d6979..774a0c97fdab 100644
> > --- a/drivers/irqchip/irq-riscv-aplic-main.c
> > +++ b/drivers/irqchip/irq-riscv-aplic-main.c
> > @@ -187,7 +187,7 @@ static int aplic_probe(struct platform_device *pdev)
> > if (is_of_node(dev->fwnode))
> > msi_mode = of_property_present(to_of_node(dev->fwnode), "msi-parent");
> > if (msi_mode)
> > - rc = -ENODEV;
> > + rc = aplic_msi_setup(dev, regs);
> > else
> > rc = aplic_direct_setup(dev, regs);
> > if (rc)
> > diff --git a/drivers/irqchip/irq-riscv-aplic-main.h b/drivers/irqchip/irq-riscv-aplic-main.h
> > index 4cfbadf37ddc..4393927d8c80 100644
> > --- a/drivers/irqchip/irq-riscv-aplic-main.h
> > +++ b/drivers/irqchip/irq-riscv-aplic-main.h
> > @@ -40,5 +40,13 @@ int aplic_irqdomain_translate(struct irq_fwspec *fwspec, u32 gsi_base,
> > void aplic_init_hw_global(struct aplic_priv *priv, bool msi_mode);
> > int aplic_setup_priv(struct aplic_priv *priv, struct device *dev, void __iomem *regs);
> > int aplic_direct_setup(struct device *dev, void __iomem *regs);
> > +#ifdef CONFIG_RISCV_APLIC_MSI
> > +int aplic_msi_setup(struct device *dev, void __iomem *regs);
> > +#else
> > +static inline int aplic_msi_setup(struct device *dev, void __iomem *regs)
> > +{
> > + return -ENODEV;
> > +}
> > +#endif
> >
> > #endif
> > diff --git a/drivers/irqchip/irq-riscv-aplic-msi.c b/drivers/irqchip/irq-riscv-aplic-msi.c
> > new file mode 100644
> > index 000000000000..b2a25e011bb2
> > --- /dev/null
> > +++ b/drivers/irqchip/irq-riscv-aplic-msi.c
> > @@ -0,0 +1,263 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> > + * Copyright (C) 2022 Ventana Micro Systems Inc.
> > + */
> > +
> > +#include <linux/bitfield.h>
> > +#include <linux/bitops.h>
> > +#include <linux/cpu.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/irqchip.h>
> > +#include <linux/irqchip/riscv-aplic.h>
> > +#include <linux/irqchip/riscv-imsic.h>
> > +#include <linux/module.h>
> > +#include <linux/msi.h>
> > +#include <linux/of_irq.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/printk.h>
> > +#include <linux/smp.h>
> > +
> > +#include "irq-riscv-aplic-main.h"
> > +
> > +static void aplic_msi_irq_unmask(struct irq_data *d)
> > +{
> > + aplic_irq_unmask(d);
> > + irq_chip_unmask_parent(d);
> > +}
> > +
> > +static void aplic_msi_irq_mask(struct irq_data *d)
> > +{
> > + irq_chip_mask_parent(d);
> > + aplic_irq_mask(d);
>
> Surely it's not necessary to mask an interrupt at both the APLIC and the
> receiver of the MSI. This ends up with __imsic_local_sync() in the hot path,
> which adds significant overhead.

It's necessary to mask at both places because __imsic_local_sync()
may happen on another CPU allowing another MSI to sneak-in. Also,
we are doing the exact same thing for PCI devices as well.

>
> I would suggest the following:
>
> .irq_mask = aplic_irq_mask,
> .irq_unmask = aplic_irq_unmask,
> .irq_enable = irq_chip_enable_parent,
> .irq_disable = irq_chip_disable_parent,

The x86 and ARM drivers don't do it this way so I am not sure why
we should.

>
> > +}
> > +
> > +static void aplic_msi_irq_eoi(struct irq_data *d)
> > +{
> > + struct aplic_priv *priv = irq_data_get_irq_chip_data(d);
> > + u32 reg_off, reg_mask;
> > +
> > + /*
> > + * EOI handling is required only for level-triggered interrupts
> > + * when APLIC is in MSI mode.
> > + */
> > +
> > + reg_off = APLIC_CLRIP_BASE + ((d->hwirq / APLIC_IRQBITS_PER_REG) * 4);
> > + reg_mask = BIT(d->hwirq % APLIC_IRQBITS_PER_REG);
> > + switch (irqd_get_trigger_type(d)) {
> > + case IRQ_TYPE_LEVEL_LOW:
> > + /*
> > + * If the rectified input value of the source is still low
> > + * then set the interrupt pending bit so that interrupt is
> > + * re-triggered via MSI.
> > + */
> > + if (!(readl(priv->regs + reg_off) & reg_mask))
> > + writel(d->hwirq, priv->regs + APLIC_SETIPNUM_LE);
>
> When a level-low interrupt is active, the rectified input value is high, so this
> case can be combined with the level-high case below.
>
> In fact, there's no need to check the input value at all. The AIA spec mentions
> this interrupt flow explicitly (section 4.9.2, see also section 4.7):
>
> "A second option is for the interrupt service routine to write the APLIC’s
> source identity number for the interrupt to the domain’s setipnum register just
> before exiting. This will cause the interrupt’s pending bit to be set to one
> again if the source is still asserting an interrupt, but not if the source is
> not asserting an interrupt."

Ahh, good catch. I will update it in the next revision.

This would certainly help reduce one MMIO-trap for KVM RISC-V since
we trap-n-emulate APLIC.

>
> Unfortunately, QEMU currently gets this wrong, so the input value check is
> necessary for testing this series until QEMU is fixed.

I will send the QEMU patch as well.

>
> > + break;
> > + case IRQ_TYPE_LEVEL_HIGH:
> > + /*
> > + * If the rectified input value of the source is still high
> > + * then set the interrupt pending bit so that interrupt is
> > + * re-triggered via MSI.
> > + */
> > + if (readl(priv->regs + reg_off) & reg_mask)
> > + writel(d->hwirq, priv->regs + APLIC_SETIPNUM_LE);
> > + break;
> > + }
> > +}
> > +
> > +static void aplic_msi_write_msg(struct irq_data *d, struct msi_msg *msg)
> > +{
> > + unsigned int group_index, hart_index, guest_index, val;
> > + struct aplic_priv *priv = irq_data_get_irq_chip_data(d);
> > + struct aplic_msicfg *mc = &priv->msicfg;
> > + phys_addr_t tppn, tbppn, msg_addr;
> > + void __iomem *target;
> > +
> > + /* For zeroed MSI, simply write zero into the target register */
> > + if (!msg->address_hi && !msg->address_lo && !msg->data) {
> > + target = priv->regs + APLIC_TARGET_BASE;
> > + target += (d->hwirq - 1) * sizeof(u32);
> > + writel(0, target);
> > + return;
> > + }
> > +
> > + /* Sanity check on message data */
> > + WARN_ON(msg->data > APLIC_TARGET_EIID_MASK);
> > +
> > + /* Compute target MSI address */
> > + msg_addr = (((u64)msg->address_hi) << 32) | msg->address_lo;
> > + tppn = msg_addr >> APLIC_xMSICFGADDR_PPN_SHIFT;
> > +
> > + /* Compute target HART Base PPN */
> > + tbppn = tppn;
> > + tbppn &= ~APLIC_xMSICFGADDR_PPN_HART(mc->lhxs);
> > + tbppn &= ~APLIC_xMSICFGADDR_PPN_LHX(mc->lhxw, mc->lhxs);
> > + tbppn &= ~APLIC_xMSICFGADDR_PPN_HHX(mc->hhxw, mc->hhxs);
> > + WARN_ON(tbppn != mc->base_ppn);
> > +
> > + /* Compute target group and hart indexes */
> > + group_index = (tppn >> APLIC_xMSICFGADDR_PPN_HHX_SHIFT(mc->hhxs)) &
> > + APLIC_xMSICFGADDR_PPN_HHX_MASK(mc->hhxw);
> > + hart_index = (tppn >> APLIC_xMSICFGADDR_PPN_LHX_SHIFT(mc->lhxs)) &
> > + APLIC_xMSICFGADDR_PPN_LHX_MASK(mc->lhxw);
> > + hart_index |= (group_index << mc->lhxw);
> > + WARN_ON(hart_index > APLIC_TARGET_HART_IDX_MASK);
> > +
> > + /* Compute target guest index */
> > + guest_index = tppn & APLIC_xMSICFGADDR_PPN_HART(mc->lhxs);
> > + WARN_ON(guest_index > APLIC_TARGET_GUEST_IDX_MASK);
> > +
> > + /* Update IRQ TARGET register */
> > + target = priv->regs + APLIC_TARGET_BASE;
> > + target += (d->hwirq - 1) * sizeof(u32);
> > + val = FIELD_PREP(APLIC_TARGET_HART_IDX, hart_index);
> > + val |= FIELD_PREP(APLIC_TARGET_GUEST_IDX, guest_index);
> > + val |= FIELD_PREP(APLIC_TARGET_EIID, msg->data);
> > + writel(val, target);
> > +}
> > +
> > +static void aplic_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
> > +{
> > + arg->desc = desc;
> > + arg->hwirq = (u32)desc->data.icookie.value;
> > +}
> > +
> > +static int aplic_msi_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
> > + unsigned long *hwirq, unsigned int *type)
> > +{
> > + struct msi_domain_info *info = d->host_data;
> > + struct aplic_priv *priv = info->data;
> > +
> > + return aplic_irqdomain_translate(fwspec, priv->gsi_base, hwirq, type);
> > +}
> > +
> > +static const struct msi_domain_template aplic_msi_template = {
> > + .chip = {
> > + .name = "APLIC-MSI",
> > + .irq_mask = aplic_msi_irq_mask,
> > + .irq_unmask = aplic_msi_irq_unmask,
> > + .irq_set_type = aplic_irq_set_type,
> > + .irq_eoi = aplic_msi_irq_eoi,
> > +#ifdef CONFIG_SMP
> > + .irq_set_affinity = irq_chip_set_affinity_parent,
> > +#endif
> > + .irq_write_msi_msg = aplic_msi_write_msg,
> > + .flags = IRQCHIP_SET_TYPE_MASKED |
> > + IRQCHIP_SKIP_SET_WAKE |
> > + IRQCHIP_MASK_ON_SUSPEND,
> > + },
> > +
> > + .ops = {
> > + .set_desc = aplic_msi_set_desc,
> > + .msi_translate = aplic_msi_translate,
> > + },
> > +
> > + .info = {
> > + .bus_token = DOMAIN_BUS_WIRED_TO_MSI,
> > + .flags = MSI_FLAG_USE_DEV_FWNODE,
> > + .handler = handle_fasteoi_irq,
>
> msi_domain_ops_init() requires .handler_name to be set, or .handler is ignored.
> Either that needs to be changed, or .handler_name needs to be provided here.
> Since the handler is not set, currently the EOI logic for level interrupts is
> never run.

That's right, I will update in the next revision.

Regards,
Anup

>
> Regards,
> Samuel
>
> > + },
> > +};
> > +
> > +int aplic_msi_setup(struct device *dev, void __iomem *regs)
> > +{
> > + const struct imsic_global_config *imsic_global;
> > + struct aplic_priv *priv;
> > + struct aplic_msicfg *mc;
> > + phys_addr_t pa;
> > + int rc;
> > +
> > + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
> > + if (!priv)
> > + return -ENOMEM;
> > +
> > + rc = aplic_setup_priv(priv, dev, regs);
> > + if (rc) {
> > + dev_err(dev, "failed to create APLIC context\n");
> > + return rc;
> > + }
> > + mc = &priv->msicfg;
> > +
> > + /*
> > + * The APLIC outgoing MSI config registers assume target MSI
> > + * controller to be RISC-V AIA IMSIC controller.
> > + */
> > + imsic_global = imsic_get_global_config();
> > + if (!imsic_global) {
> > + dev_err(dev, "IMSIC global config not found\n");
> > + return -ENODEV;
> > + }
> > +
> > + /* Find number of guest index bits (LHXS) */
> > + mc->lhxs = imsic_global->guest_index_bits;
> > + if (APLIC_xMSICFGADDRH_LHXS_MASK < mc->lhxs) {
> > + dev_err(dev, "IMSIC guest index bits big for APLIC LHXS\n");
> > + return -EINVAL;
> > + }
> > +
> > + /* Find number of HART index bits (LHXW) */
> > + mc->lhxw = imsic_global->hart_index_bits;
> > + if (APLIC_xMSICFGADDRH_LHXW_MASK < mc->lhxw) {
> > + dev_err(dev, "IMSIC hart index bits big for APLIC LHXW\n");
> > + return -EINVAL;
> > + }
> > +
> > + /* Find number of group index bits (HHXW) */
> > + mc->hhxw = imsic_global->group_index_bits;
> > + if (APLIC_xMSICFGADDRH_HHXW_MASK < mc->hhxw) {
> > + dev_err(dev, "IMSIC group index bits big for APLIC HHXW\n");
> > + return -EINVAL;
> > + }
> > +
> > + /* Find first bit position of group index (HHXS) */
> > + mc->hhxs = imsic_global->group_index_shift;
> > + if (mc->hhxs < (2 * APLIC_xMSICFGADDR_PPN_SHIFT)) {
> > + dev_err(dev, "IMSIC group index shift should be >= %d\n",
> > + (2 * APLIC_xMSICFGADDR_PPN_SHIFT));
> > + return -EINVAL;
> > + }
> > + mc->hhxs -= (2 * APLIC_xMSICFGADDR_PPN_SHIFT);
> > + if (APLIC_xMSICFGADDRH_HHXS_MASK < mc->hhxs) {
> > + dev_err(dev, "IMSIC group index shift big for APLIC HHXS\n");
> > + return -EINVAL;
> > + }
> > +
> > + /* Compute PPN base */
> > + mc->base_ppn = imsic_global->base_addr >> APLIC_xMSICFGADDR_PPN_SHIFT;
> > + mc->base_ppn &= ~APLIC_xMSICFGADDR_PPN_HART(mc->lhxs);
> > + mc->base_ppn &= ~APLIC_xMSICFGADDR_PPN_LHX(mc->lhxw, mc->lhxs);
> > + mc->base_ppn &= ~APLIC_xMSICFGADDR_PPN_HHX(mc->hhxw, mc->hhxs);
> > +
> > + /* Setup global config and interrupt delivery */
> > + aplic_init_hw_global(priv, true);
> > +
> > + /* Set the APLIC device MSI domain if not available */
> > + if (!dev_get_msi_domain(dev)) {
> > + /*
> > + * The device MSI domain for OF devices is only set at the
> > + * time of populating/creating OF device. If the device MSI
> > + * domain is discovered later after the OF device is created
> > + * then we need to set it explicitly before using any platform
> > + * MSI functions.
> > + *
> > + * In case of APLIC device, the parent MSI domain is always
> > + * IMSIC and the IMSIC MSI domains are created later through
> > + * the platform driver probing so we set it explicitly here.
> > + */
> > + if (is_of_node(dev->fwnode))
> > + of_msi_configure(dev, to_of_node(dev->fwnode));
> > + }
> > +
> > + if (!msi_create_device_irq_domain(dev, MSI_DEFAULT_DOMAIN, &aplic_msi_template,
> > + priv->nr_irqs + 1, priv, priv)) {
> > + dev_err(dev, "failed to create MSI irq domain\n");
> > + return -ENOMEM;
> > + }
> > +
> > + /* Advertise the interrupt controller */
> > + pa = priv->msicfg.base_ppn << APLIC_xMSICFGADDR_PPN_SHIFT;
> > + dev_info(dev, "%d interrupts forwared to MSI base %pa\n", priv->nr_irqs, &pa);
> > +
> > + return 0;
> > +}
>