Re: [PATCH v2 6/7] vfio/platform: Create persistent IRQ handlers

From: Eric Auger
Date: Mon Mar 11 2024 - 05:27:21 EST


Hi Alex,

On 3/9/24 00:05, Alex Williamson wrote:
> The vfio-platform SET_IRQS ioctl currently allows loopback triggering of
> an interrupt before a signaling eventfd has been configured by the user,
> which thereby allows a NULL pointer dereference.
>
> Rather than register the IRQ relative to a valid trigger, register all
> IRQs in a disabled state in the device open path. This allows mask
> operations on the IRQ to nest within the overall enable state governed
> by a valid eventfd signal. This decouples @masked, protected by the
> @locked spinlock from @trigger, protected via the @igate mutex.
>
> In doing so, it's guaranteed that changes to @trigger cannot race the
> IRQ handlers because the IRQ handler is synchronously disabled before
> modifying the trigger, and loopback triggering of the IRQ via ioctl is
> safe due to serialization with trigger changes via igate.
>
> For compatibility, request_irq() failures are maintained to be local to
> the SET_IRQS ioctl rather than a fatal error in the open device path.
> This allows, for example, a userspace driver with polling mode support
> to continue to work regardless of moving the request_irq() call site.
> This necessarily blocks all SET_IRQS access to the failed index.
>
> Cc: Eric Auger <eric.auger@xxxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx
> Fixes: 57f972e2b341 ("vfio/platform: trigger an interrupt via eventfd")
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
Reviewed-by: Eric Auger <eric.auger@xxxxxxxxxx>
Eric
> ---
> drivers/vfio/platform/vfio_platform_irq.c | 100 +++++++++++++++-------
> 1 file changed, 68 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c
> index e5dcada9e86c..ef41ecef83af 100644
> --- a/drivers/vfio/platform/vfio_platform_irq.c
> +++ b/drivers/vfio/platform/vfio_platform_irq.c
> @@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev,
> return 0;
> }
>
> +/*
> + * The trigger eventfd is guaranteed valid in the interrupt path
> + * and protected by the igate mutex when triggered via ioctl.
> + */
> +static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx)
> +{
> + if (likely(irq_ctx->trigger))
> + eventfd_signal(irq_ctx->trigger);
> +}
> +
> static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id)
> {
> struct vfio_platform_irq *irq_ctx = dev_id;
> @@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id)
> spin_unlock_irqrestore(&irq_ctx->lock, flags);
>
> if (ret == IRQ_HANDLED)
> - eventfd_signal(irq_ctx->trigger);
> + vfio_send_eventfd(irq_ctx);
>
> return ret;
> }
> @@ -164,52 +174,40 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id)
> {
> struct vfio_platform_irq *irq_ctx = dev_id;
>
> - eventfd_signal(irq_ctx->trigger);
> + vfio_send_eventfd(irq_ctx);
>
> return IRQ_HANDLED;
> }
>
> static int vfio_set_trigger(struct vfio_platform_device *vdev, int index,
> - int fd, irq_handler_t handler)
> + int fd)
> {
> struct vfio_platform_irq *irq = &vdev->irqs[index];
> struct eventfd_ctx *trigger;
> - int ret;
>
> if (irq->trigger) {
> - irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN);
> - free_irq(irq->hwirq, irq);
> - kfree(irq->name);
> + disable_irq(irq->hwirq);
> eventfd_ctx_put(irq->trigger);
> irq->trigger = NULL;
> }
>
> if (fd < 0) /* Disable only */
> return 0;
> - irq->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-irq[%d](%s)",
> - irq->hwirq, vdev->name);
> - if (!irq->name)
> - return -ENOMEM;
>
> trigger = eventfd_ctx_fdget(fd);
> - if (IS_ERR(trigger)) {
> - kfree(irq->name);
> + if (IS_ERR(trigger))
> return PTR_ERR(trigger);
> - }
>
> irq->trigger = trigger;
>
> - irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN);
> - ret = request_irq(irq->hwirq, handler, 0, irq->name, irq);
> - if (ret) {
> - kfree(irq->name);
> - eventfd_ctx_put(trigger);
> - irq->trigger = NULL;
> - return ret;
> - }
> -
> - if (!irq->masked)
> - enable_irq(irq->hwirq);
> + /*
> + * irq->masked effectively provides nested disables within the overall
> + * enable relative to trigger. Specifically request_irq() is called
> + * with NO_AUTOEN, therefore the IRQ is initially disabled. The user
> + * may only further disable the IRQ with a MASK operations because
> + * irq->masked is initially false.
> + */
> + enable_irq(irq->hwirq);
>
> return 0;
> }
> @@ -228,7 +226,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev,
> handler = vfio_irq_handler;
>
> if (!count && (flags & VFIO_IRQ_SET_DATA_NONE))
> - return vfio_set_trigger(vdev, index, -1, handler);
> + return vfio_set_trigger(vdev, index, -1);
>
> if (start != 0 || count != 1)
> return -EINVAL;
> @@ -236,7 +234,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev,
> if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
> int32_t fd = *(int32_t *)data;
>
> - return vfio_set_trigger(vdev, index, fd, handler);
> + return vfio_set_trigger(vdev, index, fd);
> }
>
> if (flags & VFIO_IRQ_SET_DATA_NONE) {
> @@ -260,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev,
> unsigned start, unsigned count, uint32_t flags,
> void *data) = NULL;
>
> + /*
> + * For compatibility, errors from request_irq() are local to the
> + * SET_IRQS path and reflected in the name pointer. This allows,
> + * for example, polling mode fallback for an exclusive IRQ failure.
> + */
> + if (IS_ERR(vdev->irqs[index].name))
> + return PTR_ERR(vdev->irqs[index].name);
> +
> switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
> case VFIO_IRQ_SET_ACTION_MASK:
> func = vfio_platform_set_irq_mask;
> @@ -280,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev,
>
> int vfio_platform_irq_init(struct vfio_platform_device *vdev)
> {
> - int cnt = 0, i;
> + int cnt = 0, i, ret = 0;
>
> while (vdev->get_irq(vdev, cnt) >= 0)
> cnt++;
> @@ -292,29 +298,54 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev)
>
> for (i = 0; i < cnt; i++) {
> int hwirq = vdev->get_irq(vdev, i);
> + irq_handler_t handler = vfio_irq_handler;
>
> - if (hwirq < 0)
> + if (hwirq < 0) {
> + ret = -EINVAL;
> goto err;
> + }
>
> spin_lock_init(&vdev->irqs[i].lock);
>
> vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD;
>
> - if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK)
> + if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) {
> vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE
> | VFIO_IRQ_INFO_AUTOMASKED;
> + handler = vfio_automasked_irq_handler;
> + }
>
> vdev->irqs[i].count = 1;
> vdev->irqs[i].hwirq = hwirq;
> vdev->irqs[i].masked = false;
> + vdev->irqs[i].name = kasprintf(GFP_KERNEL_ACCOUNT,
> + "vfio-irq[%d](%s)", hwirq,
> + vdev->name);
> + if (!vdev->irqs[i].name) {
> + ret = -ENOMEM;
> + goto err;
> + }
> +
> + ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN,
> + vdev->irqs[i].name, &vdev->irqs[i]);
> + if (ret) {
> + kfree(vdev->irqs[i].name);
> + vdev->irqs[i].name = ERR_PTR(ret);
> + }
> }
>
> vdev->num_irqs = cnt;
>
> return 0;
> err:
> + for (--i; i >= 0; i--) {
> + if (!IS_ERR(vdev->irqs[i].name)) {
> + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]);
> + kfree(vdev->irqs[i].name);
> + }
> + }
> kfree(vdev->irqs);
> - return -EINVAL;
> + return ret;
> }
>
> void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev)
> @@ -324,7 +355,12 @@ void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev)
> for (i = 0; i < vdev->num_irqs; i++) {
> vfio_virqfd_disable(&vdev->irqs[i].mask);
> vfio_virqfd_disable(&vdev->irqs[i].unmask);
> - vfio_set_trigger(vdev, i, -1, NULL);
> + if (!IS_ERR(vdev->irqs[i].name)) {
> + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]);
> + if (vdev->irqs[i].trigger)
> + eventfd_ctx_put(vdev->irqs[i].trigger);
> + kfree(vdev->irqs[i].name);
> + }
> }
>
> vdev->num_irqs = 0;