[RFC PATCH 2/2] kvm/eventfd: Use priority waitqueue to catch events before userspace

From: David Woodhouse
Date: Mon Oct 26 2020 - 14:42:53 EST


From: David Woodhouse <dwmw@xxxxxxxxxxxx>

As far as I can tell, when we use posted interrupts we silently cut off
the events from userspace, if it's listening on the same eventfd that
feeds the irqfd.

I like that behaviour. Let's do it all the time, even without posted
interrupts. It makes it much easier to handle IRQ remapping invalidation
without having to constantly add/remove the fd from the userspace poll
set. We can just leave userspace polling on it, and the bypass will...
well... bypass it.

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
---
virt/kvm/eventfd.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index d6408bb497dc..39443e2f72bf 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -191,6 +191,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
struct kvm *kvm = irqfd->kvm;
unsigned seq;
int idx;
+ int ret = 0;

if (flags & EPOLLIN) {
idx = srcu_read_lock(&kvm->irq_srcu);
@@ -204,6 +205,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
false) == -EWOULDBLOCK)
schedule_work(&irqfd->inject);
srcu_read_unlock(&kvm->irq_srcu, idx);
+ ret = 1;
}

if (flags & EPOLLHUP) {
@@ -227,7 +229,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
}

- return 0;
+ return ret;
}

static void
@@ -236,7 +238,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
{
struct kvm_kernel_irqfd *irqfd =
container_of(pt, struct kvm_kernel_irqfd, pt);
- add_wait_queue(wqh, &irqfd->wait);
+ add_wait_queue_priority(wqh, &irqfd->wait);
}

/* Must be called under irqfds.lock */
--
2.26.2