Re: [PATCH v3 2/2] vfio/cdx: add interrupt support

From: Alex Williamson
Date: Fri Mar 01 2024 - 12:45:58 EST


On Mon, 26 Feb 2024 14:18:13 +0530
Nipun Gupta <nipun.gupta@xxxxxxx> wrote:

> Support the following ioctls for CDX devices:
> - VFIO_DEVICE_GET_IRQ_INFO
> - VFIO_DEVICE_SET_IRQS
>
> This allows user to set an eventfd for cdx device interrupts and
> trigger this interrupt eventfd from userspace.
> All CDX device interrupts are MSIs. The MSIs are allocated from the
> CDX-MSI domain.
>
> Signed-off-by: Nipun Gupta <nipun.gupta@xxxxxxx>
> Reviewed-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@xxxxxxx>
> ---
>
> This patch depends on CDX MSI support patch:
> https://lore.kernel.org/lkml/20240226082816.100872-1-nipun.gupta@xxxxxxx/
>
> Changes v2->v3:
> - Use generic MSI alloc/free APIs instead of CDX MSI APIs
> - Rebased on Linux 6.8-rc6
>
> Changes v1->v2:
> - Rebased on Linux 6.7-rc1
>
> drivers/vfio/cdx/Makefile | 2 +-
> drivers/vfio/cdx/intr.c | 211 +++++++++++++++++++++++++++++++++++++
> drivers/vfio/cdx/main.c | 60 ++++++++++-
> drivers/vfio/cdx/private.h | 18 ++++
> 4 files changed, 289 insertions(+), 2 deletions(-)
> create mode 100644 drivers/vfio/cdx/intr.c
>
> diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile
> index cd4a2e6fe609..df92b320122a 100644
> --- a/drivers/vfio/cdx/Makefile
> +++ b/drivers/vfio/cdx/Makefile
> @@ -5,4 +5,4 @@
>
> obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o
>
> -vfio-cdx-objs := main.o
> +vfio-cdx-objs := main.o intr.o
> diff --git a/drivers/vfio/cdx/intr.c b/drivers/vfio/cdx/intr.c
> new file mode 100644
> index 000000000000..4637b57d0242
> --- /dev/null
> +++ b/drivers/vfio/cdx/intr.c
> @@ -0,0 +1,211 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
> + */
> +
> +#include <linux/vfio.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/eventfd.h>
> +#include <linux/msi.h>
> +#include <linux/interrupt.h>
> +
> +#include "linux/cdx/cdx_bus.h"
> +#include "private.h"
> +
> +static irqreturn_t vfio_cdx_msihandler(int irq_no, void *arg)
> +{
> + struct eventfd_ctx *trigger = arg;
> +
> + eventfd_signal(trigger);
> + return IRQ_HANDLED;
> +}
> +
> +static int vfio_cdx_msi_enable(struct vfio_cdx_device *vdev, int nvec)
> +{
> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> + struct device *dev = vdev->vdev.dev;
> + int msi_idx, ret;
> +
> + vdev->cdx_irqs = kcalloc(nvec, sizeof(struct vfio_cdx_irq), GFP_KERNEL);
> + if (!vdev->cdx_irqs)
> + return -ENOMEM;
> +
> + ret = cdx_enable_msi(cdx_dev);
> + if (ret) {
> + kfree(vdev->cdx_irqs);
> + return ret;
> + }
> +
> + /* Allocate cdx MSIs */
> + ret = msi_domain_alloc_irqs(dev, MSI_DEFAULT_DOMAIN, nvec);
> + if (ret) {
> + cdx_disable_msi(cdx_dev);
> + kfree(vdev->cdx_irqs);
> + return ret;
> + }
> +
> + for (msi_idx = 0; msi_idx < nvec; msi_idx++)
> + vdev->cdx_irqs[msi_idx].irq_no = msi_get_virq(dev, msi_idx);
> +
> + vdev->irq_count = nvec;
> + vdev->config_msi = 1;
> +
> + return 0;
> +}
> +
> +static int vfio_cdx_msi_set_vector_signal(struct vfio_cdx_device *vdev,
> + int vector, int fd)
> +{
> + struct eventfd_ctx *trigger;
> + int irq_no, ret;
> +
> + if (vector < 0 || vector >= vdev->irq_count)
> + return -EINVAL;
> +
> + irq_no = vdev->cdx_irqs[vector].irq_no;
> +
> + if (vdev->cdx_irqs[vector].trigger) {
> + free_irq(irq_no, vdev->cdx_irqs[vector].trigger);
> + kfree(vdev->cdx_irqs[vector].name);
> + eventfd_ctx_put(vdev->cdx_irqs[vector].trigger);
> + vdev->cdx_irqs[vector].trigger = NULL;
> + }
> +
> + if (fd < 0)
> + return 0;
> +
> + vdev->cdx_irqs[vector].name = kasprintf(GFP_KERNEL, "vfio-msi[%d](%s)",
> + vector, dev_name(vdev->vdev.dev));
> + if (!vdev->cdx_irqs[vector].name)
> + return -ENOMEM;
> +
> + trigger = eventfd_ctx_fdget(fd);
> + if (IS_ERR(trigger)) {
> + kfree(vdev->cdx_irqs[vector].name);
> + return PTR_ERR(trigger);
> + }
> +
> + ret = request_irq(irq_no, vfio_cdx_msihandler, 0,
> + vdev->cdx_irqs[vector].name, trigger);
> + if (ret) {
> + kfree(vdev->cdx_irqs[vector].name);
> + eventfd_ctx_put(trigger);
> + return ret;
> + }
> +
> + vdev->cdx_irqs[vector].trigger = trigger;
> +
> + return 0;
> +}
> +
> +static int vfio_cdx_msi_set_block(struct vfio_cdx_device *vdev,
> + unsigned int start, unsigned int count,
> + int32_t *fds)
> +{
> + int i, j, ret = 0;
> +
> + if (start >= vdev->irq_count || start + count > vdev->irq_count)
> + return -EINVAL;
> +
> + for (i = 0, j = start; i < count && !ret; i++, j++) {
> + int fd = fds ? fds[i] : -1;
> +
> + ret = vfio_cdx_msi_set_vector_signal(vdev, j, fd);
> + }
> +
> + if (ret) {
> + for (--j; j >= (int)start; j--)
> + vfio_cdx_msi_set_vector_signal(vdev, j, -1);
> + }
> +
> + return ret;
> +}
> +
> +static void vfio_cdx_msi_disable(struct vfio_cdx_device *vdev)
> +{
> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> + struct device *dev = vdev->vdev.dev;
> +
> + vfio_cdx_msi_set_block(vdev, 0, vdev->irq_count, NULL);
> +
> + if (!vdev->config_msi)
> + return;
> +
> + msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN);
> + cdx_disable_msi(cdx_dev);
> + kfree(vdev->cdx_irqs);
> +
> + vdev->cdx_irqs = NULL;
> + vdev->irq_count = 0;
> + vdev->config_msi = 0;
> +}
> +
> +static int vfio_cdx_set_msi_trigger(struct vfio_cdx_device *vdev,
> + unsigned int index, unsigned int start,
> + unsigned int count, u32 flags,
> + void *data)
> +{
> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> + int i;
> +
> + if (start + count > cdx_dev->num_msi)
> + return -EINVAL;
> +
> + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
> + vfio_cdx_msi_disable(vdev);
> + return 0;
> + }
> +
> + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
> + s32 *fds = data;
> + int ret;
> +
> + if (vdev->config_msi)
> + return vfio_cdx_msi_set_block(vdev, start, count,
> + fds);
> + ret = vfio_cdx_msi_enable(vdev, cdx_dev->num_msi);
> + if (ret)
> + return ret;
> +
> + ret = vfio_cdx_msi_set_block(vdev, start, count, fds);
> + if (ret)
> + vfio_cdx_msi_disable(vdev);
> +
> + return ret;
> + }
> +
> + for (i = start; i < start + count; i++) {
> + if (!vdev->cdx_irqs[i].trigger)
> + continue;
> + if (flags & VFIO_IRQ_SET_DATA_NONE)
> + eventfd_signal(vdev->cdx_irqs[i].trigger);
> + }
> +
> + return 0;
> +}
> +
> +int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
> + u32 flags, unsigned int index,
> + unsigned int start, unsigned int count,
> + void *data)
> +{
> + if (flags & VFIO_IRQ_SET_ACTION_TRIGGER)
> + return vfio_cdx_set_msi_trigger(vdev, index, start,
> + count, flags, data);
> + else
> + return -EINVAL;
> +}
> +
> +/* Free All IRQs for the given device */
> +void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev)
> +{
> + /*
> + * Device does not support any interrupt or the interrupts
> + * were not configured
> + */
> + if (!vdev->cdx_irqs)
> + return;
> +
> + vfio_cdx_set_msi_trigger(vdev, 1, 0, 0, VFIO_IRQ_SET_DATA_NONE, NULL);
> +}
> diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c
> index 9cff8d75789e..eb1c5879150b 100644
> --- a/drivers/vfio/cdx/main.c
> +++ b/drivers/vfio/cdx/main.c
> @@ -61,6 +61,7 @@ static void vfio_cdx_close_device(struct vfio_device *core_vdev)
>
> kfree(vdev->regions);
> cdx_dev_reset(core_vdev->dev);
> + vfio_cdx_irqs_cleanup(vdev);
> }
>
> static int vfio_cdx_bm_ctrl(struct vfio_device *core_vdev, u32 flags,
> @@ -123,7 +124,7 @@ static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev,
> info.flags |= VFIO_DEVICE_FLAGS_RESET;
>
> info.num_regions = cdx_dev->res_count;
> - info.num_irqs = 0;
> + info.num_irqs = cdx_dev->num_msi ? 1 : 0;
>
> return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
> }
> @@ -152,6 +153,59 @@ static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev,
> return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
> }
>
> +static int vfio_cdx_ioctl_get_irq_info(struct vfio_cdx_device *vdev,
> + struct vfio_irq_info __user *arg)
> +{
> + unsigned long minsz = offsetofend(struct vfio_irq_info, count);
> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> + struct vfio_irq_info info;
> +
> + if (copy_from_user(&info, arg, minsz))
> + return -EFAULT;
> +
> + if (info.argsz < minsz)
> + return -EINVAL;
> +
> + if (info.index >= 1)
> + return -EINVAL;
> +
> + info.flags = VFIO_IRQ_INFO_EVENTFD;


I think the way you're using this you'd also need the
VFIO_IRQ_INFO_NORESIZE to indicate the MSI range cannot be expanded
from the initial setting. Thanks,

Alex

> + info.count = cdx_dev->num_msi;
> +
> + return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
> +}
> +
> +static int vfio_cdx_ioctl_set_irqs(struct vfio_cdx_device *vdev,
> + struct vfio_irq_set __user *arg)
> +{
> + unsigned long minsz = offsetofend(struct vfio_irq_set, count);
> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> + struct vfio_irq_set hdr;
> + size_t data_size = 0;
> + u8 *data = NULL;
> + int ret = 0;
> +
> + if (copy_from_user(&hdr, arg, minsz))
> + return -EFAULT;
> +
> + ret = vfio_set_irqs_validate_and_prepare(&hdr, cdx_dev->num_msi,
> + 1, &data_size);
> + if (ret)
> + return ret;
> +
> + if (data_size) {
> + data = memdup_user(arg->data, data_size);
> + if (IS_ERR(data))
> + return PTR_ERR(data);
> + }
> +
> + ret = vfio_cdx_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
> + hdr.start, hdr.count, data);
> + kfree(data);
> +
> + return ret;
> +}
> +
> static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
> unsigned int cmd, unsigned long arg)
> {
> @@ -164,6 +218,10 @@ static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
> return vfio_cdx_ioctl_get_info(vdev, uarg);
> case VFIO_DEVICE_GET_REGION_INFO:
> return vfio_cdx_ioctl_get_region_info(vdev, uarg);
> + case VFIO_DEVICE_GET_IRQ_INFO:
> + return vfio_cdx_ioctl_get_irq_info(vdev, uarg);
> + case VFIO_DEVICE_SET_IRQS:
> + return vfio_cdx_ioctl_set_irqs(vdev, uarg);
> case VFIO_DEVICE_RESET:
> return cdx_dev_reset(core_vdev->dev);
> default:
> diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h
> index 8e9d25913728..7a8477ae4652 100644
> --- a/drivers/vfio/cdx/private.h
> +++ b/drivers/vfio/cdx/private.h
> @@ -13,6 +13,14 @@ static inline u64 vfio_cdx_index_to_offset(u32 index)
> return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT);
> }
>
> +struct vfio_cdx_irq {
> + u32 flags;
> + u32 count;
> + int irq_no;
> + struct eventfd_ctx *trigger;
> + char *name;
> +};
> +
> struct vfio_cdx_region {
> u32 flags;
> u32 type;
> @@ -25,6 +33,16 @@ struct vfio_cdx_device {
> struct vfio_cdx_region *regions;
> u32 flags;
> #define BME_SUPPORT BIT(0)
> + struct vfio_cdx_irq *cdx_irqs;
> + u32 irq_count;
> + u32 config_msi;
> };
>
> +int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
> + u32 flags, unsigned int index,
> + unsigned int start, unsigned int count,
> + void *data);
> +
> +void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev);
> +
> #endif /* VFIO_CDX_PRIVATE_H */