[v4 12/16] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

From: Feng Wu
Date: Thu Jun 11 2015 - 07:05:56 EST


This patch adds the kvm-vfio interface for VT-d Posted-Interrupts.
When guests update MSI/MSI-x information for an assigned-device,
QEMU will use KVM_DEV_VFIO_DEVICE_POST_IRQ attribute to setup
IRTE for VT-d PI. Userspace program can also use
KVM_DEV_VFIO_DEVICE_UNPOST_IRQ to change back to irq remapping mode.
This patch implements these IRQ attributes.

Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
---
include/linux/kvm_host.h | 22 +++++++++
virt/kvm/vfio.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 148 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f591f7c..69f8711 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1073,6 +1073,28 @@ extern struct kvm_device_ops kvm_xics_ops;
extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
extern struct kvm_device_ops kvm_arm_vgic_v3_ops;

+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST
+/*
+ * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set);
+#else
+static inline int kvm_arch_vfio_update_pi_irte(struct kvm *kvm,
+ unsigned int host_irq,
+ uint32_t guest_irq,
+ bool set)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT

static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 80a45e4..547fc51 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
+#include <asm/irq_remapping.h>
#include "vfio.h"

struct kvm_vfio_group {
@@ -276,12 +277,128 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
return -ENXIO;
}

+static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
+{
+ if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
+ u8 pin;
+
+ pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
+ if (pin)
+ return 1;
+ } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
+ return pci_msi_vec_count(pdev);
+ } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
+ return pci_msix_vec_count(pdev);
+ }
+
+ return 0;
+}
+
+static int kvm_vfio_control_pi(struct kvm_device *kdev,
+ int32_t __user *argp, bool set)
+{
+ struct kvm_vfio_dev_irq pi_info;
+ uint32_t *gsi;
+ unsigned long minsz;
+ struct vfio_device *vdev;
+ struct msi_desc *entry;
+ struct device *dev;
+ struct pci_dev *pdev;
+ int i, max, ret;
+
+ minsz = offsetofend(struct kvm_vfio_dev_irq, count);
+
+ if (copy_from_user(&pi_info, (void __user *)argp, minsz))
+ return -EFAULT;
+
+ if (pi_info.argsz < minsz || pi_info.index >= VFIO_PCI_NUM_IRQS)
+ return -EINVAL;
+
+ vdev = kvm_vfio_get_vfio_device(pi_info.fd);
+ if (IS_ERR(vdev))
+ return PTR_ERR(vdev);
+
+ dev = kvm_vfio_external_base_device(vdev);
+ if (!dev || !dev_is_pci(dev)) {
+ ret = -EFAULT;
+ goto put_vfio_device;
+ }
+
+ pdev = to_pci_dev(dev);
+
+ max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index);
+ if (max <= 0) {
+ ret = -EFAULT;
+ goto put_vfio_device;
+ }
+
+ if (pi_info.argsz - minsz < pi_info.count * sizeof(u32) ||
+ pi_info.start >= max || pi_info.start + pi_info.count > max) {
+ ret = -EINVAL;
+ goto put_vfio_device;
+ }
+
+ gsi = memdup_user((void __user *)((unsigned long)argp + minsz),
+ pi_info.count * sizeof(u32));
+ if (IS_ERR(gsi)) {
+ ret = PTR_ERR(gsi);
+ goto put_vfio_device;
+ }
+
+#ifdef CONFIG_PCI_MSI
+ for (i = 0; i < pi_info.count; i++) {
+ list_for_each_entry(entry, &pdev->msi_list, list) {
+ if (entry->msi_attrib.entry_nr != pi_info.start+i)
+ continue;
+
+ ret = kvm_arch_vfio_update_pi_irte(kdev->kvm,
+ entry->irq,
+ gsi[i],
+ set);
+ if (ret)
+ goto free_gsi;
+ }
+ }
+#endif
+
+ ret = 0;
+
+free_gsi:
+ kfree(gsi);
+
+put_vfio_device:
+ kvm_vfio_put_vfio_device(vdev);
+ return ret;
+}
+
+static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
+{
+ int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
+ int ret;
+
+ switch (attr) {
+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST
+ case KVM_DEV_VFIO_DEVICE_POST_IRQ:
+ ret = kvm_vfio_control_pi(kdev, argp, 1);
+ break;
+ case KVM_DEV_VFIO_DEVICE_UNPOST_IRQ:
+ ret = kvm_vfio_control_pi(kdev, argp, 0);
+ break;
+#endif
+ default:
+ ret = -ENXIO;
+ }
+ return ret;
+}
+
static int kvm_vfio_set_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
switch (attr->group) {
case KVM_DEV_VFIO_GROUP:
return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+ case KVM_DEV_VFIO_DEVICE:
+ return kvm_vfio_set_device(dev, attr->attr, attr->addr);
}

return -ENXIO;
@@ -299,6 +416,15 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
}

break;
+ case KVM_DEV_VFIO_DEVICE:
+ switch (attr->attr) {
+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST
+ case KVM_DEV_VFIO_DEVICE_POST_IRQ:
+ case KVM_DEV_VFIO_DEVICE_UNPOST_IRQ:
+ return irq_remapping_cap(IRQ_POSTING_CAP) ? 0 : -ENXIO;
+#endif
+ }
+ break;
}

return -ENXIO;
--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/