Re: [PATCH v12 18/25] irqchip: Add RISC-V incoming MSI controller early driver

From: Thomas Gleixner
Date: Fri Feb 16 2024 - 13:41:00 EST


On Sat, Jan 27 2024 at 21:47, Anup Patel wrote:
> +
> +#ifdef CONFIG_SMP
> +static irqreturn_t imsic_local_sync_handler(int irq, void *data)
> +{
> + imsic_local_sync();
> + return IRQ_HANDLED;
> +}
> +
> +static void imsic_ipi_send(unsigned int cpu)
> +{
> + struct imsic_local_config *local =
> + per_cpu_ptr(imsic->global.local, cpu);

Let it stick out. We switched to line length 100 quite some time
ago. Applies to the rest of the series too.

> + writel_relaxed(IMSIC_IPI_ID, local->msi_va);
> +}
> +
> +static void imsic_ipi_starting_cpu(void)
> +{
> + /* Enable IPIs for current CPU. */
> + __imsic_id_set_enable(IMSIC_IPI_ID);
> +
> + /* Enable virtual IPI used for IMSIC ID synchronization */
> + enable_percpu_irq(imsic->ipi_virq, 0);
> +}
> +
> +static void imsic_ipi_dying_cpu(void)
> +{
> + /*
> + * Disable virtual IPI used for IMSIC ID synchronization so
> + * that we don't receive ID synchronization requests.
> + */
> + disable_percpu_irq(imsic->ipi_virq);

Shouldn't this disable the hardware too, i.e.

__imsic_id_clear_enable()

?

> +}
> +
> +static int __init imsic_ipi_domain_init(void)
> +{
> + int virq;
> +
> + /* Create IMSIC IPI multiplexing */
> + virq = ipi_mux_create(IMSIC_NR_IPI, imsic_ipi_send);
> + if (virq <= 0)
> + return (virq < 0) ? virq : -ENOMEM;
> + imsic->ipi_virq = virq;
> +
> + /* First vIRQ is used for IMSIC ID synchronization */
> + virq = request_percpu_irq(imsic->ipi_virq, imsic_local_sync_handler,
> + "riscv-imsic-lsync", imsic->global.local);
> + if (virq)
> + return virq;

Please use a separate 'ret' variable. I had to read this 3 times to make
sense of it.

> + irq_set_status_flags(imsic->ipi_virq, IRQ_HIDDEN);
> + imsic->ipi_lsync_desc = irq_to_desc(imsic->ipi_virq);

What's so special about this particular IPI that it can't be handled
like all the other IPIs?

> +static int __init imsic_early_probe(struct fwnode_handle *fwnode)
> +{
> + int rc;
> + struct irq_domain *domain;

https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#variable-declarations

> +
> + /* Find parent domain and register chained handler */
> + domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
> + DOMAIN_BUS_ANY);
> + if (!domain) {
> + pr_err("%pfwP: Failed to find INTC domain\n", fwnode);
> + return -ENOENT;
> + }
> + imsic_parent_irq = irq_create_mapping(domain, RV_IRQ_EXT);
> + if (!imsic_parent_irq) {
> + pr_err("%pfwP: Failed to create INTC mapping\n", fwnode);
> + return -ENOENT;
> + }
> + irq_set_chained_handler(imsic_parent_irq, imsic_handle_irq);
> +
> + /* Initialize IPI domain */
> + rc = imsic_ipi_domain_init();
> + if (rc) {
> + pr_err("%pfwP: Failed to initialize IPI domain\n", fwnode);
> + return rc;

Leaves the chained handler around and enabled.

> diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c
> +
> +#define imsic_csr_write(__c, __v) \
> +do { \
> + csr_write(CSR_ISELECT, __c); \
> + csr_write(CSR_IREG, __v); \
> +} while (0)

Any reason why these macros can't be inlines?

> +const struct imsic_global_config *imsic_get_global_config(void)
> +{
> + return imsic ? &imsic->global : NULL;
> +}
> +EXPORT_SYMBOL_GPL(imsic_get_global_config);

Why is this exported?

> +#define __imsic_id_read_clear_enabled(__id) \
> + __imsic_eix_read_clear((__id), false)
> +#define __imsic_id_read_clear_pending(__id) \
> + __imsic_eix_read_clear((__id), true)

Please use inlines.

> +void __imsic_eix_update(unsigned long base_id,
> + unsigned long num_id, bool pend, bool val)
> +{
> + unsigned long i, isel, ireg;
> + unsigned long id = base_id, last_id = base_id + num_id;
> +
> + while (id < last_id) {
> + isel = id / BITS_PER_LONG;
> + isel *= BITS_PER_LONG / IMSIC_EIPx_BITS;
> + isel += (pend) ? IMSIC_EIP0 : IMSIC_EIE0;
> +
> + ireg = 0;
> + for (i = id & (__riscv_xlen - 1);
> + (id < last_id) && (i < __riscv_xlen); i++) {
> + ireg |= BIT(i);
> + id++;
> + }

This lacks a comment what this is doing.

> +
> + /*
> + * The IMSIC EIEx and EIPx registers are indirectly
> + * accessed via using ISELECT and IREG CSRs so we
> + * need to access these CSRs without getting preempted.
> + *
> + * All existing users of this function call this
> + * function with local IRQs disabled so we don't
> + * need to do anything special here.
> + */
> + if (val)
> + imsic_csr_set(isel, ireg);
> + else
> + imsic_csr_clear(isel, ireg);
> + }
> +}
> +
> +void imsic_local_sync(void)
> +{
> + struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> + struct imsic_local_config *mlocal;
> + struct imsic_vector *mvec;
> + unsigned long flags;
> + int i;
> +
> + raw_spin_lock_irqsave(&lpriv->ids_lock, flags);
> + for (i = 1; i <= imsic->global.nr_ids; i++) {
> + if (i == IMSIC_IPI_ID)
> + continue;
> +
> + if (test_bit(i, lpriv->ids_enabled_bitmap))
> + __imsic_id_set_enable(i);
> + else
> + __imsic_id_clear_enable(i);
> +
> + mvec = lpriv->ids_move[i];
> + lpriv->ids_move[i] = NULL;
> + if (mvec) {
> + if (__imsic_id_read_clear_pending(i)) {
> + mlocal = per_cpu_ptr(imsic->global.local,
> + mvec->cpu);
> + writel_relaxed(mvec->local_id, mlocal->msi_va);
> + }
> +
> + imsic_vector_free(&lpriv->vectors[i]);
> + }

Again an uncommented piece of magic which you will have forgotten what
it does 3 month down the road :)

> +
> + }
> + raw_spin_unlock_irqrestore(&lpriv->ids_lock, flags);
> +}
> +
> +void imsic_local_delivery(bool enable)
> +{
> + if (enable) {
> + imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_ENABLE_EITHRESHOLD);
> + imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_ENABLE_EIDELIVERY);
> + return;
> + }
> +
> + imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_DISABLE_EIDELIVERY);
> + imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_DISABLE_EITHRESHOLD);
> +}
> +
> +#ifdef CONFIG_SMP
> +static void imsic_remote_sync(unsigned int cpu)
> +{
> + /*
> + * We simply inject ID synchronization IPI to a target CPU
> + * if it is not same as the current CPU. The ipi_send_mask()
> + * implementation of IPI mux will inject ID synchronization
> + * IPI only for CPUs that have enabled it so offline CPUs
> + * won't receive IPI. An offline CPU will unconditionally
> + * synchronize IDs through imsic_starting_cpu() when the
> + * CPU is brought up.
> + */
> + if (cpu_online(cpu)) {
> + if (cpu != smp_processor_id())
> + __ipi_send_mask(imsic->ipi_lsync_desc, cpumask_of(cpu));

Still wondering why this can't use the regular API. There might be a
reason, but then it wants to be documented.

> + else
> + imsic_local_sync();
> + }
> +}
> +#else
> +static inline void imsic_remote_sync(unsigned int cpu)
> +{
> + imsic_local_sync();
> +}
> +#endif
> +
> +void imsic_vector_mask(struct imsic_vector *vec)
> +{
> + struct imsic_local_priv *lpriv;
> + unsigned long flags;
> +
> + lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> + if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
> + return;
> +
> + raw_spin_lock_irqsave(&lpriv->ids_lock, flags);

AFAICT, this is used from an irqchip callback:

static void imsic_irq_mask(struct irq_data *d)
{
imsic_vector_mask(irq_data_get_irq_chip_data(d));
}

So no need to use irqsave() here. Those callbacks run always with
interrupts disabled when called from the core.

> +void imsic_vector_move(struct imsic_vector *old_vec,
> + struct imsic_vector *new_vec)
> +{
> + struct imsic_local_priv *old_lpriv, *new_lpriv;
> + unsigned long flags, flags1;
> +
> + if (WARN_ON(old_vec->cpu == new_vec->cpu))
> + return;
> +
> + old_lpriv = per_cpu_ptr(imsic->lpriv, old_vec->cpu);
> + if (WARN_ON(&old_lpriv->vectors[old_vec->local_id] != old_vec))
> + return;
> +
> + new_lpriv = per_cpu_ptr(imsic->lpriv, new_vec->cpu);
> + if (WARN_ON(&new_lpriv->vectors[new_vec->local_id] != new_vec))
> + return;
> +
> + raw_spin_lock_irqsave(&old_lpriv->ids_lock, flags);
> + raw_spin_lock_irqsave(&new_lpriv->ids_lock, flags1);

Lockdep should yell at you for this, rightfully so. And not only because
of the missing nested() annotation.

Assume there are two CPUs setting affinity for two different interrupts.

CPU0 moves an interrupt to CPU1 and CPU1 moves another interrupt to
CPU0. The resulting lock order is:

CPU0 CPU1
lock(lpriv[CPU0]); lock(lpriv[CPU1]);
lock(lpriv[CPU1]); lock(lpriv[CPU0]);

a classic ABBA deadlock.

You need to take those locks always in the same order. Look at
double_raw_lock() in kernel/sched/sched.h.

> + /* Unmask the new vector entry */
> + if (test_bit(old_vec->local_id, old_lpriv->ids_enabled_bitmap))
> + bitmap_set(new_lpriv->ids_enabled_bitmap,
> + new_vec->local_id, 1);

Either make that one line or please add brackets. See:

https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#bracket-rules

> +static int __init imsic_local_init(void)
> +{
> + struct imsic_global_config *global = &imsic->global;
> + struct imsic_local_priv *lpriv;
> + struct imsic_vector *vec;
> + int cpu, i;
> +
> + /* Allocate per-CPU private state */
> + imsic->lpriv = alloc_percpu(typeof(*(imsic->lpriv)));
> + if (!imsic->lpriv)
> + return -ENOMEM;
> +
> + /* Setup per-CPU private state */
> + for_each_possible_cpu(cpu) {
> + lpriv = per_cpu_ptr(imsic->lpriv, cpu);
> +
> + raw_spin_lock_init(&lpriv->ids_lock);
> +
> + /* Allocate enabled bitmap */
> + lpriv->ids_enabled_bitmap = bitmap_zalloc(global->nr_ids + 1,
> + GFP_KERNEL);
> + if (!lpriv->ids_enabled_bitmap) {
> + imsic_local_cleanup();
> + return -ENOMEM;
> + }
> +
> + /* Allocate move array */
> + lpriv->ids_move = kcalloc(global->nr_ids + 1,
> + sizeof(*lpriv->ids_move), GFP_KERNEL);
> + if (!lpriv->ids_move) {
> + imsic_local_cleanup();
> + return -ENOMEM;
> + }
> +
> + /* Allocate vector array */
> + lpriv->vectors = kcalloc(global->nr_ids + 1,
> + sizeof(*lpriv->vectors), GFP_KERNEL);
> + if (!lpriv->vectors) {
> + imsic_local_cleanup();
> + return -ENOMEM;

Third instance of the same pattern. goto cleanup; perhaps?

> +struct imsic_vector *imsic_vector_alloc(unsigned int hwirq,
> + const struct cpumask *mask)
> +{
> + struct imsic_vector *vec = NULL;
> + struct imsic_local_priv *lpriv;
> + unsigned long flags;
> + unsigned int cpu;
> + int local_id;
> +
> + raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> + local_id = irq_matrix_alloc(imsic->matrix, mask, false, &cpu);
> + raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> + if (local_id < 0)
> + return NULL;
> +
> + lpriv = per_cpu_ptr(imsic->lpriv, cpu);
> + vec = &lpriv->vectors[local_id];
> + vec->hwirq = hwirq;
> +
> + return vec;
> +}

..

> +int imsic_hwirq_alloc(void)
> +{
> + int ret;
> + unsigned long flags;
> +
> + raw_spin_lock_irqsave(&imsic->hwirqs_lock, flags);
> + ret = bitmap_find_free_region(imsic->hwirqs_used_bitmap,
> + imsic->nr_hwirqs, 0);
> + raw_spin_unlock_irqrestore(&imsic->hwirqs_lock, flags);
> +
> + return ret;
> +}

This part is just to create a unique hwirq number, right?

> +
> + /* Find number of guest index bits in MSI address */
> + rc = of_property_read_u32(to_of_node(fwnode),
> + "riscv,guest-index-bits",
> + &global->guest_index_bits);
> + if (rc)
> + global->guest_index_bits = 0;

So here you get the index bits, but then 50 lines further down you do
sanity checking. Wouldn't it make sense to do that right here?

Same for the other bits.

> +
> +/*
> + * The IMSIC driver uses 1 IPI for ID synchronization and
> + * arch/riscv/kernel/smp.c require 6 IPIs so we fix the
> + * total number of IPIs to 8.
> + */
> +#define IMSIC_IPI_ID 1
> +#define IMSIC_NR_IPI 8
> +
> +struct imsic_vector {
> + /* Fixed details of the vector */
> + unsigned int cpu;
> + unsigned int local_id;
> + /* Details saved by driver in the vector */
> + unsigned int hwirq;
> +};
> +
> +struct imsic_local_priv {
> + /* Local state of interrupt identities */
> + raw_spinlock_t ids_lock;
> + unsigned long *ids_enabled_bitmap;
> + struct imsic_vector **ids_move;
> +
> + /* Local vector table */
> + struct imsic_vector *vectors;

Please make those structs tabular:

https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#struct-declarations-and-initializers

> +void __imsic_eix_update(unsigned long base_id,
> + unsigned long num_id, bool pend, bool val);
> +
> +#define __imsic_id_set_enable(__id) \
> + __imsic_eix_update((__id), 1, false, true)
> +#define __imsic_id_clear_enable(__id) \
> + __imsic_eix_update((__id), 1, false, false)

inlines please.

Thanks,

tglx