[PATCH] Revert a bunch of virtio commits

From: Laura Abbott
Date: Wed Mar 22 2017 - 18:41:27 EST


07ec51480b5e ("virtio_pci: use shared interrupts for virtqueues")
is linked to a bunch of issues. Unfortunately we can't just revert it by
itself. Revert it and dependency patches as well.

Revert "virtio: provide a method to get the IRQ affinity mask for a virtqueue"

This reverts commit bbaba479563910aaa51e59bb9027a09e396d3a3c.

Revert "virtio-console: avoid DMA from stack"

This reverts commit c4baad50297d84bde1a7ad45e50c73adae4a2192.

Revert "vhost: introduce O(1) vq metadata cache"

This reverts commit f889491380582b4ba2981cf0b0d7d6a40fb30ab7.

Conflicts:
drivers/vhost/vhost.c

Revert "virtio_scsi: use virtio IRQ affinity"

This reverts commit 0d9f0a52c8b9f7a003fe1650b7d5fb8518efabe0.

Revert "virtio_blk: use virtio IRQ affinity"

This reverts commit ad71473d9c43725c917fc5a86d54ceb7001ee28c.

Revert "blk-mq: provide a default queue mapping for virtio device"

This reverts commit 73473427bb551686e4b68ecd99bfd27e6635286a.

Revert "virtio: allow drivers to request IRQ affinity when creating VQs"

This reverts commit fb5e31d970ce8b4941f03ed765d7dbefc39f22d9.

Revert "virtio_pci: simplify MSI-X setup"

This reverts commit 52a61516125fa9a21b3bdf4f90928308e2e5573f.

Revert "virtio_pci: don't duplicate the msix_enable flag in struct pci_dev"

This reverts commit 53a020c661741f3b87ad3ac6fa545088aaebac9b.

Revert "virtio_pci: use shared interrupts for virtqueues"

This reverts commit 07ec51480b5eb1233f8c1b0f5d7a7c8d1247c507.
---
block/Kconfig | 5 -
block/Makefile | 1 -
block/blk-mq-virtio.c | 54 ------
drivers/block/virtio_blk.c | 14 +-
drivers/char/virtio_console.c | 14 +-
drivers/crypto/virtio/virtio_crypto_core.c | 2 +-
drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +-
drivers/misc/mic/vop/vop_main.c | 2 +-
drivers/net/caif/caif_virtio.c | 3 +-
drivers/net/virtio_net.c | 2 +-
drivers/remoteproc/remoteproc_virtio.c | 3 +-
drivers/rpmsg/virtio_rpmsg_bus.c | 2 +-
drivers/s390/virtio/kvm_virtio.c | 3 +-
drivers/s390/virtio/virtio_ccw.c | 3 +-
drivers/scsi/virtio_scsi.c | 127 +++++++++++--
drivers/vhost/vhost.c | 136 +++-----------
drivers/vhost/vhost.h | 8 -
drivers/virtio/virtio_balloon.c | 3 +-
drivers/virtio/virtio_input.c | 3 +-
drivers/virtio/virtio_mmio.c | 3 +-
drivers/virtio/virtio_pci_common.c | 287 +++++++++++++++--------------
drivers/virtio/virtio_pci_common.h | 25 ++-
drivers/virtio/virtio_pci_legacy.c | 3 +-
drivers/virtio/virtio_pci_modern.c | 11 +-
include/linux/blk-mq-virtio.h | 10 -
include/linux/cpuhotplug.h | 1 +
include/linux/virtio_config.h | 12 +-
include/uapi/linux/virtio_pci.h | 2 +-
net/vmw_vsock/virtio_transport.c | 3 +-
29 files changed, 337 insertions(+), 407 deletions(-)
delete mode 100644 block/blk-mq-virtio.c
delete mode 100644 include/linux/blk-mq-virtio.h

diff --git a/block/Kconfig b/block/Kconfig
index e9f780f..a2a92e5 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -189,9 +189,4 @@ config BLK_MQ_PCI
depends on BLOCK && PCI
default y

-config BLK_MQ_VIRTIO
- bool
- depends on BLOCK && VIRTIO
- default y
-
source block/Kconfig.iosched
diff --git a/block/Makefile b/block/Makefile
index 081bb68..2ad7c30 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -25,7 +25,6 @@ obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
-obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c
deleted file mode 100644
index c3afbca..0000000
--- a/block/blk-mq-virtio.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016 Christoph Hellwig.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-#include <linux/device.h>
-#include <linux/blk-mq.h>
-#include <linux/blk-mq-virtio.h>
-#include <linux/virtio_config.h>
-#include <linux/module.h>
-#include "blk-mq.h"
-
-/**
- * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device
- * @set: tagset to provide the mapping for
- * @vdev: virtio device associated with @set.
- * @first_vec: first interrupt vectors to use for queues (usually 0)
- *
- * This function assumes the virtio device @vdev has at least as many available
- * interrupt vetors as @set has queues. It will then queuery the vector
- * corresponding to each queue for it's affinity mask and built queue mapping
- * that maps a queue to the CPUs that have irq affinity for the corresponding
- * vector.
- */
-int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
- struct virtio_device *vdev, int first_vec)
-{
- const struct cpumask *mask;
- unsigned int queue, cpu;
-
- if (!vdev->config->get_vq_affinity)
- goto fallback;
-
- for (queue = 0; queue < set->nr_hw_queues; queue++) {
- mask = vdev->config->get_vq_affinity(vdev, first_vec + queue);
- if (!mask)
- goto fallback;
-
- for_each_cpu(cpu, mask)
- set->mq_map[cpu] = queue;
- }
-
- return 0;
-fallback:
- return blk_mq_map_queues(set);
-}
-EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1d4c9f8..024b473 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -5,7 +5,6 @@
#include <linux/hdreg.h>
#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/interrupt.h>
#include <linux/virtio.h>
#include <linux/virtio_blk.h>
#include <linux/scatterlist.h>
@@ -13,7 +12,6 @@
#include <scsi/scsi_cmnd.h>
#include <linux/idr.h>
#include <linux/blk-mq.h>
-#include <linux/blk-mq-virtio.h>
#include <linux/numa.h>

#define PART_BITS 4
@@ -428,7 +426,6 @@ static int init_vq(struct virtio_blk *vblk)
struct virtqueue **vqs;
unsigned short num_vqs;
struct virtio_device *vdev = vblk->vdev;
- struct irq_affinity desc = { 0, };

err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
struct virtio_blk_config, num_queues,
@@ -455,8 +452,7 @@ static int init_vq(struct virtio_blk *vblk)
}

/* Discover virtqueues and write information to configuration. */
- err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
- &desc);
+ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
if (err)
goto out;

@@ -590,18 +586,10 @@ static int virtblk_init_request(void *data, struct request *rq,
return 0;
}

-static int virtblk_map_queues(struct blk_mq_tag_set *set)
-{
- struct virtio_blk *vblk = set->driver_data;
-
- return blk_mq_virtio_map_queues(set, vblk->vdev, 0);
-}
-
static struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq,
.complete = virtblk_request_done,
.init_request = virtblk_init_request,
- .map_queues = virtblk_map_queues,
};

static unsigned int virtblk_queue_depth;
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index e9b7e0b..17857be 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1136,8 +1136,6 @@ static int put_chars(u32 vtermno, const char *buf, int count)
{
struct port *port;
struct scatterlist sg[1];
- void *data;
- int ret;

if (unlikely(early_put_chars))
return early_put_chars(vtermno, buf, count);
@@ -1146,14 +1144,8 @@ static int put_chars(u32 vtermno, const char *buf, int count)
if (!port)
return -EPIPE;

- data = kmemdup(buf, count, GFP_ATOMIC);
- if (!data)
- return -ENOMEM;
-
- sg_init_one(sg, data, count);
- ret = __send_to_port(port, sg, 1, count, data, false);
- kfree(data);
- return ret;
+ sg_init_one(sg, buf, count);
+ return __send_to_port(port, sg, 1, count, (void *)buf, false);
}

/*
@@ -1947,7 +1939,7 @@ static int init_vqs(struct ports_device *portdev)
/* Find the queues. */
err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs,
io_callbacks,
- (const char **)io_names, NULL);
+ (const char **)io_names);
if (err)
goto free;

diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index 21472e4..b5b1533 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -120,7 +120,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi)
}

ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
- names, NULL);
+ names);
if (ret)
goto err_find;

diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 4918668..30f989a 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -176,7 +176,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
#endif

ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs,
- callbacks, names, NULL);
+ callbacks, names);
if (ret) {
DRM_ERROR("failed to find virt queues\n");
goto err_vqs;
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index c2e29d7..1a2b67f3 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -374,7 +374,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc)
+ const char * const names[])
{
struct _vop_vdev *vdev = to_vopvdev(dev);
struct vop_device *vpdev = vdev->vpdev;
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index bc0eb47..b306210 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -679,8 +679,7 @@ static int cfv_probe(struct virtio_device *vdev)
goto err;

/* Get the TX virtio ring. This is a "guest side vring". */
- err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names,
- NULL);
+ err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names);
if (err)
goto err;

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ea9890d..e9d7e2b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2080,7 +2080,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
}

ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
- names, NULL);
+ names);
if (ret)
goto err_find;

diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 0142cc3..364411f 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -137,8 +137,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev)
static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char * const names[],
- struct irq_affinity *desc)
+ const char * const names[])
{
int i, ret;

diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 5e66e08..3090b0d 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -869,7 +869,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
init_waitqueue_head(&vrp->sendq);

/* We expect two virtqueues, rx and tx (and in this order) */
- err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
+ err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names);
if (err)
goto free_vrp;

diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 2ce0b3e..5e5c11f 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -255,8 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev)
static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char * const names[],
- struct irq_affinity *desc)
+ const char * const names[])
{
struct kvm_device *kdev = to_kvmdev(vdev);
int i;
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 0ed209f..648373c 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -628,8 +628,7 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev,
static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char * const names[],
- struct irq_affinity *desc)
+ const char * const names[])
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
unsigned long *indicatorp = NULL;
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 939c47d..c680d76 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -18,7 +18,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/mempool.h>
-#include <linux/interrupt.h>
#include <linux/virtio.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
@@ -30,7 +29,6 @@
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_tcq.h>
#include <linux/seqlock.h>
-#include <linux/blk-mq-virtio.h>

#define VIRTIO_SCSI_MEMPOOL_SZ 64
#define VIRTIO_SCSI_EVENT_LEN 8
@@ -110,6 +108,7 @@ struct virtio_scsi {
bool affinity_hint_set;

struct hlist_node node;
+ struct hlist_node node_dead;

/* Protected by event_vq lock */
bool stop_events;
@@ -119,6 +118,7 @@ struct virtio_scsi {
struct virtio_scsi_vq req_vqs[];
};

+static enum cpuhp_state virtioscsi_online;
static struct kmem_cache *virtscsi_cmd_cache;
static mempool_t *virtscsi_cmd_pool;

@@ -766,13 +766,6 @@ static void virtscsi_target_destroy(struct scsi_target *starget)
kfree(tgt);
}

-static int virtscsi_map_queues(struct Scsi_Host *shost)
-{
- struct virtio_scsi *vscsi = shost_priv(shost);
-
- return blk_mq_virtio_map_queues(&shost->tag_set, vscsi->vdev, 2);
-}
-
static struct scsi_host_template virtscsi_host_template_single = {
.module = THIS_MODULE,
.name = "Virtio SCSI HBA",
@@ -808,7 +801,6 @@ static struct scsi_host_template virtscsi_host_template_multi = {
.use_clustering = ENABLE_CLUSTERING,
.target_alloc = virtscsi_target_alloc,
.target_destroy = virtscsi_target_destroy,
- .map_queues = virtscsi_map_queues,
.track_queue_depth = 1,
};

@@ -825,6 +817,80 @@ static struct scsi_host_template virtscsi_host_template_multi = {
virtio_cwrite(vdev, struct virtio_scsi_config, fld, &__val); \
} while(0)

+static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
+{
+ int i;
+ int cpu;
+
+ /* In multiqueue mode, when the number of cpu is equal
+ * to the number of request queues, we let the qeueues
+ * to be private to one cpu by setting the affinity hint
+ * to eliminate the contention.
+ */
+ if ((vscsi->num_queues == 1 ||
+ vscsi->num_queues != num_online_cpus()) && affinity) {
+ if (vscsi->affinity_hint_set)
+ affinity = false;
+ else
+ return;
+ }
+
+ if (affinity) {
+ i = 0;
+ for_each_online_cpu(cpu) {
+ virtqueue_set_affinity(vscsi->req_vqs[i].vq, cpu);
+ i++;
+ }
+
+ vscsi->affinity_hint_set = true;
+ } else {
+ for (i = 0; i < vscsi->num_queues; i++) {
+ if (!vscsi->req_vqs[i].vq)
+ continue;
+
+ virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1);
+ }
+
+ vscsi->affinity_hint_set = false;
+ }
+}
+
+static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
+{
+ get_online_cpus();
+ __virtscsi_set_affinity(vscsi, affinity);
+ put_online_cpus();
+}
+
+static int virtscsi_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct virtio_scsi *vscsi = hlist_entry_safe(node, struct virtio_scsi,
+ node);
+ __virtscsi_set_affinity(vscsi, true);
+ return 0;
+}
+
+static int virtscsi_cpu_notif_add(struct virtio_scsi *vi)
+{
+ int ret;
+
+ ret = cpuhp_state_add_instance(virtioscsi_online, &vi->node);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(CPUHP_VIRT_SCSI_DEAD, &vi->node_dead);
+ if (ret)
+ cpuhp_state_remove_instance(virtioscsi_online, &vi->node);
+ return ret;
+}
+
+static void virtscsi_cpu_notif_remove(struct virtio_scsi *vi)
+{
+ cpuhp_state_remove_instance_nocalls(virtioscsi_online, &vi->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_SCSI_DEAD,
+ &vi->node_dead);
+}
+
static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
struct virtqueue *vq)
{
@@ -834,8 +900,14 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,

static void virtscsi_remove_vqs(struct virtio_device *vdev)
{
+ struct Scsi_Host *sh = virtio_scsi_host(vdev);
+ struct virtio_scsi *vscsi = shost_priv(sh);
+
+ virtscsi_set_affinity(vscsi, false);
+
/* Stop all the virtqueues. */
vdev->config->reset(vdev);
+
vdev->config->del_vqs(vdev);
}

@@ -848,7 +920,6 @@ static int virtscsi_init(struct virtio_device *vdev,
vq_callback_t **callbacks;
const char **names;
struct virtqueue **vqs;
- struct irq_affinity desc = { .pre_vectors = 2 };

num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE;
vqs = kmalloc(num_vqs * sizeof(struct virtqueue *), GFP_KERNEL);
@@ -870,8 +941,7 @@ static int virtscsi_init(struct virtio_device *vdev,
}

/* Discover virtqueues and write information to configuration. */
- err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
- &desc);
+ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
if (err)
goto out;

@@ -937,6 +1007,10 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (err)
goto virtscsi_init_failed;

+ err = virtscsi_cpu_notif_add(vscsi);
+ if (err)
+ goto scsi_add_host_failed;
+
cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
@@ -991,6 +1065,9 @@ static void virtscsi_remove(struct virtio_device *vdev)
virtscsi_cancel_event_work(vscsi);

scsi_remove_host(shost);
+
+ virtscsi_cpu_notif_remove(vscsi);
+
virtscsi_remove_vqs(vdev);
scsi_host_put(shost);
}
@@ -998,6 +1075,10 @@ static void virtscsi_remove(struct virtio_device *vdev)
#ifdef CONFIG_PM_SLEEP
static int virtscsi_freeze(struct virtio_device *vdev)
{
+ struct Scsi_Host *sh = virtio_scsi_host(vdev);
+ struct virtio_scsi *vscsi = shost_priv(sh);
+
+ virtscsi_cpu_notif_remove(vscsi);
virtscsi_remove_vqs(vdev);
return 0;
}
@@ -1012,6 +1093,11 @@ static int virtscsi_restore(struct virtio_device *vdev)
if (err)
return err;

+ err = virtscsi_cpu_notif_add(vscsi);
+ if (err) {
+ vdev->config->del_vqs(vdev);
+ return err;
+ }
virtio_device_ready(vdev);

if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
@@ -1066,6 +1152,16 @@ static int __init init(void)
pr_err("mempool_create() for virtscsi_cmd_pool failed\n");
goto error;
}
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "scsi/virtio:online",
+ virtscsi_cpu_online, NULL);
+ if (ret < 0)
+ goto error;
+ virtioscsi_online = ret;
+ ret = cpuhp_setup_state_multi(CPUHP_VIRT_SCSI_DEAD, "scsi/virtio:dead",
+ NULL, virtscsi_cpu_online);
+ if (ret)
+ goto error;
ret = register_virtio_driver(&virtio_scsi_driver);
if (ret < 0)
goto error;
@@ -1081,12 +1177,17 @@ static int __init init(void)
kmem_cache_destroy(virtscsi_cmd_cache);
virtscsi_cmd_cache = NULL;
}
+ if (virtioscsi_online)
+ cpuhp_remove_multi_state(virtioscsi_online);
+ cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
return ret;
}

static void __exit fini(void)
{
unregister_virtio_driver(&virtio_scsi_driver);
+ cpuhp_remove_multi_state(virtioscsi_online);
+ cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
mempool_destroy(virtscsi_cmd_pool);
kmem_cache_destroy(virtscsi_cmd_cache);
}
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index f0ba362..c323bce 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -284,22 +284,6 @@ void vhost_poll_queue(struct vhost_poll *poll)
}
EXPORT_SYMBOL_GPL(vhost_poll_queue);

-static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
-{
- int j;
-
- for (j = 0; j < VHOST_NUM_ADDRS; j++)
- vq->meta_iotlb[j] = NULL;
-}
-
-static void vhost_vq_meta_reset(struct vhost_dev *d)
-{
- int i;
-
- for (i = 0; i < d->nvqs; ++i)
- __vhost_vq_meta_reset(d->vqs[i]);
-}
-
static void vhost_vq_reset(struct vhost_dev *dev,
struct vhost_virtqueue *vq)
{
@@ -330,7 +314,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->busyloop_timeout = 0;
vq->umem = NULL;
vq->iotlb = NULL;
- __vhost_vq_meta_reset(vq);
}

static int vhost_worker(void *data)
@@ -710,18 +693,6 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
return 1;
}

-static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
- u64 addr, unsigned int size,
- int type)
-{
- const struct vhost_umem_node *node = vq->meta_iotlb[type];
-
- if (!node)
- return NULL;
-
- return (void *)(uintptr_t)(node->userspace_addr + addr - node->start);
-}
-
/* Can we switch to this memory table? */
/* Caller should have device mutex but not vq mutex */
static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
@@ -764,14 +735,8 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
* could be access through iotlb. So -EAGAIN should
* not happen in this case.
*/
+ /* TODO: more fast path */
struct iov_iter t;
- void __user *uaddr = vhost_vq_meta_fetch(vq,
- (u64)(uintptr_t)to, size,
- VHOST_ADDR_DESC);
-
- if (uaddr)
- return __copy_to_user(uaddr, from, size);
-
ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
ARRAY_SIZE(vq->iotlb_iov),
VHOST_ACCESS_WO);
@@ -799,14 +764,8 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
* could be access through iotlb. So -EAGAIN should
* not happen in this case.
*/
- void __user *uaddr = vhost_vq_meta_fetch(vq,
- (u64)(uintptr_t)from, size,
- VHOST_ADDR_DESC);
+ /* TODO: more fast path */
struct iov_iter f;
-
- if (uaddr)
- return __copy_from_user(to, uaddr, size);
-
ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
ARRAY_SIZE(vq->iotlb_iov),
VHOST_ACCESS_RO);
@@ -826,12 +785,17 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
return ret;
}

-static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
- void __user *addr, unsigned int size,
- int type)
+static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
+ void __user *addr, unsigned size)
{
int ret;

+ /* This function should be called after iotlb
+ * prefetch, which means we're sure that vq
+ * could be access through iotlb. So -EAGAIN should
+ * not happen in this case.
+ */
+ /* TODO: more fast path */
ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
ARRAY_SIZE(vq->iotlb_iov),
VHOST_ACCESS_RO);
@@ -852,32 +816,14 @@ static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
return vq->iotlb_iov[0].iov_base;
}

-/* This function should be called after iotlb
- * prefetch, which means we're sure that vq
- * could be access through iotlb. So -EAGAIN should
- * not happen in this case.
- */
-static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
- void *addr, unsigned int size,
- int type)
-{
- void __user *uaddr = vhost_vq_meta_fetch(vq,
- (u64)(uintptr_t)addr, size, type);
- if (uaddr)
- return uaddr;
-
- return __vhost_get_user_slow(vq, addr, size, type);
-}
-
-#define vhost_put_user(vq, x, ptr) \
+#define vhost_put_user(vq, x, ptr) \
({ \
int ret = -EFAULT; \
if (!vq->iotlb) { \
ret = __put_user(x, ptr); \
} else { \
__typeof__(ptr) to = \
- (__typeof__(ptr)) __vhost_get_user(vq, ptr, \
- sizeof(*ptr), VHOST_ADDR_USED); \
+ (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
if (to != NULL) \
ret = __put_user(x, to); \
else \
@@ -886,16 +832,14 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
ret; \
})

-#define vhost_get_user(vq, x, ptr, type) \
+#define vhost_get_user(vq, x, ptr) \
({ \
int ret; \
if (!vq->iotlb) { \
ret = __get_user(x, ptr); \
} else { \
__typeof__(ptr) from = \
- (__typeof__(ptr)) __vhost_get_user(vq, ptr, \
- sizeof(*ptr), \
- type); \
+ (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
if (from != NULL) \
ret = __get_user(x, from); \
else \
@@ -904,12 +848,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
ret; \
})

-#define vhost_get_avail(vq, x, ptr) \
- vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
-
-#define vhost_get_used(vq, x, ptr) \
- vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
-
static void vhost_dev_lock_vqs(struct vhost_dev *d)
{
int i = 0;
@@ -1015,7 +953,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
ret = -EFAULT;
break;
}
- vhost_vq_meta_reset(dev);
if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size,
msg->iova + msg->size - 1,
msg->uaddr, msg->perm)) {
@@ -1025,7 +962,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
vhost_iotlb_notify_vq(dev, msg);
break;
case VHOST_IOTLB_INVALIDATE:
- vhost_vq_meta_reset(dev);
vhost_del_umem_range(dev->iotlb, msg->iova,
msg->iova + msg->size - 1);
break;
@@ -1169,26 +1105,12 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
sizeof *used + num * sizeof *used->ring + s);
}

-static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
- const struct vhost_umem_node *node,
- int type)
-{
- int access = (type == VHOST_ADDR_USED) ?
- VHOST_ACCESS_WO : VHOST_ACCESS_RO;
-
- if (likely(node->perm & access))
- vq->meta_iotlb[type] = node;
-}
-
static int iotlb_access_ok(struct vhost_virtqueue *vq,
- int access, u64 addr, u64 len, int type)
+ int access, u64 addr, u64 len)
{
const struct vhost_umem_node *node;
struct vhost_umem *umem = vq->iotlb;
- u64 s = 0, size, orig_addr = addr;
-
- if (vhost_vq_meta_fetch(vq, addr, len, type))
- return true;
+ u64 s = 0, size;

while (len > s) {
node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
@@ -1205,10 +1127,6 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq,
}

size = node->size - addr + node->start;
-
- if (orig_addr == addr && size >= len)
- vhost_vq_meta_update(vq, node, type);
-
s += size;
addr += size;
}
@@ -1225,15 +1143,13 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
return 1;

return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
- num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
+ num * sizeof *vq->desc) &&
iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
sizeof *vq->avail +
- num * sizeof(*vq->avail->ring) + s,
- VHOST_ADDR_AVAIL) &&
+ num * sizeof *vq->avail->ring + s) &&
iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
sizeof *vq->used +
- num * sizeof(*vq->used->ring) + s,
- VHOST_ADDR_USED);
+ num * sizeof *vq->used->ring + s);
}
EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);

@@ -1814,7 +1730,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
r = -EFAULT;
goto err;
}
- r = vhost_get_used(vq, last_used_idx, &vq->used->idx);
+ r = vhost_get_user(vq, last_used_idx, &vq->used->idx);
if (r) {
vq_err(vq, "Can't access used idx at %p\n",
&vq->used->idx);
@@ -2018,7 +1934,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
last_avail_idx = vq->last_avail_idx;

if (vq->avail_idx == vq->last_avail_idx) {
- if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
+ if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
vq_err(vq, "Failed to access avail idx at %p\n",
&vq->avail->idx);
return -EFAULT;
@@ -2045,7 +1961,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,

/* Grab the next descriptor number they're advertising, and increment
* the index we've seen. */
- if (unlikely(vhost_get_avail(vq, ring_head,
+ if (unlikely(vhost_get_user(vq, ring_head,
&vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
vq_err(vq, "Failed to read head: idx %d address %p\n",
last_avail_idx,
@@ -2261,7 +2177,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
* with the barrier that the Guest executes when enabling
* interrupts. */
smp_mb();
- if (vhost_get_avail(vq, flags, &vq->avail->flags)) {
+ if (vhost_get_user(vq, flags, &vq->avail->flags)) {
vq_err(vq, "Failed to get flags");
return true;
}
@@ -2288,7 +2204,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
* interrupts. */
smp_mb();

- if (vhost_get_avail(vq, event, vhost_used_event(vq))) {
+ if (vhost_get_user(vq, event, vhost_used_event(vq))) {
vq_err(vq, "Failed to get used event idx");
return true;
}
@@ -2335,7 +2251,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
if (vq->avail_idx != vq->last_avail_idx)
return false;

- r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
+ r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
if (unlikely(r))
return false;
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
@@ -2371,7 +2287,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
/* They could have slipped one in as we were doing that: make
* sure it's written, then check again. */
smp_mb();
- r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
+ r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
if (r) {
vq_err(vq, "Failed to check avail idx at %p: %d\n",
&vq->avail->idx, r);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index f55671d..a9cbbb1 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -76,13 +76,6 @@ struct vhost_umem {
int numem;
};

-enum vhost_uaddr_type {
- VHOST_ADDR_DESC = 0,
- VHOST_ADDR_AVAIL = 1,
- VHOST_ADDR_USED = 2,
- VHOST_NUM_ADDRS = 3,
-};
-
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@@ -93,7 +86,6 @@ struct vhost_virtqueue {
struct vring_desc __user *desc;
struct vring_avail __user *avail;
struct vring_used __user *used;
- const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
struct file *call;
struct file *error;
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 4e11915..a610061 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -414,8 +414,7 @@ static int init_vqs(struct virtio_balloon *vb)
* optionally stat.
*/
nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
- err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names,
- NULL);
+ err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names);
if (err)
return err;

diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index 79f1293..350a2a5 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -173,8 +173,7 @@ static int virtinput_init_vqs(struct virtio_input *vi)
static const char * const names[] = { "events", "status" };
int err;

- err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names,
- NULL);
+ err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names);
if (err)
return err;
vi->evt = vqs[0];
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 78343b8..08357d7 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -446,8 +446,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char * const names[],
- struct irq_affinity *desc)
+ const char * const names[])
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index df548a6..a3376731 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -33,8 +33,10 @@ void vp_synchronize_vectors(struct virtio_device *vdev)
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i;

- synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0));
- for (i = 1; i < vp_dev->msix_vectors; i++)
+ if (vp_dev->intx_enabled)
+ synchronize_irq(vp_dev->pci_dev->irq);
+
+ for (i = 0; i < vp_dev->msix_vectors; ++i)
synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
}

@@ -97,10 +99,77 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
return vp_vring_interrupt(irq, opaque);
}

-static void vp_remove_vqs(struct virtio_device *vdev)
+static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
+ bool per_vq_vectors)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ const char *name = dev_name(&vp_dev->vdev.dev);
+ unsigned i, v;
+ int err = -ENOMEM;
+
+ vp_dev->msix_vectors = nvectors;
+
+ vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
+ GFP_KERNEL);
+ if (!vp_dev->msix_names)
+ goto error;
+ vp_dev->msix_affinity_masks
+ = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
+ GFP_KERNEL);
+ if (!vp_dev->msix_affinity_masks)
+ goto error;
+ for (i = 0; i < nvectors; ++i)
+ if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
+ GFP_KERNEL))
+ goto error;
+
+ err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
+ PCI_IRQ_MSIX);
+ if (err < 0)
+ goto error;
+ vp_dev->msix_enabled = 1;
+
+ /* Set the vector used for configuration */
+ v = vp_dev->msix_used_vectors;
+ snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+ "%s-config", name);
+ err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
+ vp_config_changed, 0, vp_dev->msix_names[v],
+ vp_dev);
+ if (err)
+ goto error;
+ ++vp_dev->msix_used_vectors;
+
+ v = vp_dev->config_vector(vp_dev, v);
+ /* Verify we had enough resources to assign the vector */
+ if (v == VIRTIO_MSI_NO_VECTOR) {
+ err = -EBUSY;
+ goto error;
+ }
+
+ if (!per_vq_vectors) {
+ /* Shared vector for all VQs */
+ v = vp_dev->msix_used_vectors;
+ snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+ "%s-virtqueues", name);
+ err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
+ vp_vring_interrupt, 0, vp_dev->msix_names[v],
+ vp_dev);
+ if (err)
+ goto error;
+ ++vp_dev->msix_used_vectors;
+ }
+ return 0;
+error:
+ return err;
+}
+
+/* the config->del_vqs() implementation */
+void vp_del_vqs(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq, *n;
+ int i;

list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
if (vp_dev->msix_vector_map) {
@@ -112,170 +181,117 @@ static void vp_remove_vqs(struct virtio_device *vdev)
}
vp_dev->del_vq(vq);
}
-}

-/* the config->del_vqs() implementation */
-void vp_del_vqs(struct virtio_device *vdev)
-{
- struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- int i;
-
- if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs)))
- return;
+ if (vp_dev->intx_enabled) {
+ free_irq(vp_dev->pci_dev->irq, vp_dev);
+ vp_dev->intx_enabled = 0;
+ }

- vp_remove_vqs(vdev);
+ for (i = 0; i < vp_dev->msix_used_vectors; ++i)
+ free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);

- if (vp_dev->pci_dev->msix_enabled) {
- for (i = 0; i < vp_dev->msix_vectors; i++)
+ for (i = 0; i < vp_dev->msix_vectors; i++)
+ if (vp_dev->msix_affinity_masks[i])
free_cpumask_var(vp_dev->msix_affinity_masks[i]);

+ if (vp_dev->msix_enabled) {
/* Disable the vector used for configuration */
vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);

- kfree(vp_dev->msix_affinity_masks);
- kfree(vp_dev->msix_names);
- kfree(vp_dev->msix_vector_map);
+ pci_free_irq_vectors(vp_dev->pci_dev);
+ vp_dev->msix_enabled = 0;
}

- free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
- pci_free_irq_vectors(vp_dev->pci_dev);
+ vp_dev->msix_vectors = 0;
+ vp_dev->msix_used_vectors = 0;
+ kfree(vp_dev->msix_names);
+ vp_dev->msix_names = NULL;
+ kfree(vp_dev->msix_affinity_masks);
+ vp_dev->msix_affinity_masks = NULL;
+ kfree(vp_dev->msix_vector_map);
+ vp_dev->msix_vector_map = NULL;
}

static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc)
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[],
+ bool per_vq_vectors)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- const char *name = dev_name(&vp_dev->vdev.dev);
- int i, err = -ENOMEM, allocated_vectors, nvectors;
- unsigned flags = PCI_IRQ_MSIX;
- bool shared = false;
u16 msix_vec;
-
- if (desc) {
- flags |= PCI_IRQ_AFFINITY;
- desc->pre_vectors++; /* virtio config vector */
- }
-
- nvectors = 1;
- for (i = 0; i < nvqs; i++)
- if (callbacks[i])
- nvectors++;
-
- /* Try one vector per queue first. */
- err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
- nvectors, flags, desc);
- if (err < 0) {
- /* Fallback to one vector for config, one shared for queues. */
- shared = true;
- err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
- PCI_IRQ_MSIX);
- if (err < 0)
- return err;
- }
- if (err < 0)
- return err;
-
- vp_dev->msix_vectors = nvectors;
- vp_dev->msix_names = kmalloc_array(nvectors,
- sizeof(*vp_dev->msix_names), GFP_KERNEL);
- if (!vp_dev->msix_names)
- goto out_free_irq_vectors;
-
- vp_dev->msix_affinity_masks = kcalloc(nvectors,
- sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL);
- if (!vp_dev->msix_affinity_masks)
- goto out_free_msix_names;
-
- for (i = 0; i < nvectors; ++i) {
- if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
- GFP_KERNEL))
- goto out_free_msix_affinity_masks;
+ int i, err, nvectors, allocated_vectors;
+
+ if (per_vq_vectors) {
+ /* Best option: one for change interrupt, one per vq. */
+ nvectors = 1;
+ for (i = 0; i < nvqs; ++i)
+ if (callbacks[i])
+ ++nvectors;
+ } else {
+ /* Second best: one for change, shared for all vqs. */
+ nvectors = 2;
}

- /* Set the vector used for configuration */
- snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names),
- "%s-config", name);
- err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
- 0, vp_dev->msix_names[0], vp_dev);
+ err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
if (err)
- goto out_free_msix_affinity_masks;
+ goto error_find;

- /* Verify we had enough resources to assign the vector */
- if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) {
- err = -EBUSY;
- goto out_free_config_irq;
+ if (per_vq_vectors) {
+ vp_dev->msix_vector_map = kmalloc_array(nvqs,
+ sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
+ if (!vp_dev->msix_vector_map)
+ goto error_find;
}

- vp_dev->msix_vector_map = kmalloc_array(nvqs,
- sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
- if (!vp_dev->msix_vector_map)
- goto out_disable_config_irq;
-
- allocated_vectors = 1; /* vector 0 is the config interrupt */
+ allocated_vectors = vp_dev->msix_used_vectors;
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
continue;
}

- if (callbacks[i])
- msix_vec = allocated_vectors;
- else
+ if (!callbacks[i])
msix_vec = VIRTIO_MSI_NO_VECTOR;
-
+ else if (per_vq_vectors)
+ msix_vec = allocated_vectors++;
+ else
+ msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
msix_vec);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
- goto out_remove_vqs;
+ goto error_find;
}

+ if (!per_vq_vectors)
+ continue;
+
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
continue;
}

- snprintf(vp_dev->msix_names[i + 1],
- sizeof(*vp_dev->msix_names), "%s-%s",
+ /* allocate per-vq irq if available and necessary */
+ snprintf(vp_dev->msix_names[msix_vec],
+ sizeof *vp_dev->msix_names,
+ "%s-%s",
dev_name(&vp_dev->vdev.dev), names[i]);
err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
- vring_interrupt, IRQF_SHARED,
- vp_dev->msix_names[i + 1], vqs[i]);
+ vring_interrupt, 0,
+ vp_dev->msix_names[msix_vec],
+ vqs[i]);
if (err) {
/* don't free this irq on error */
vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
- goto out_remove_vqs;
+ goto error_find;
}
vp_dev->msix_vector_map[i] = msix_vec;
-
- /*
- * Use a different vector for each queue if they are available,
- * else share the same vector for all VQs.
- */
- if (!shared)
- allocated_vectors++;
}
-
return 0;

-out_remove_vqs:
- vp_remove_vqs(vdev);
- kfree(vp_dev->msix_vector_map);
-out_disable_config_irq:
- vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
-out_free_config_irq:
- free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
-out_free_msix_affinity_masks:
- for (i = 0; i < nvectors; i++) {
- if (vp_dev->msix_affinity_masks[i])
- free_cpumask_var(vp_dev->msix_affinity_masks[i]);
- }
- kfree(vp_dev->msix_affinity_masks);
-out_free_msix_names:
- kfree(vp_dev->msix_names);
-out_free_irq_vectors:
- pci_free_irq_vectors(vp_dev->pci_dev);
+error_find:
+ vp_del_vqs(vdev);
return err;
}

@@ -289,8 +305,9 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
dev_name(&vdev->dev), vp_dev);
if (err)
- return err;
+ goto out_del_vqs;

+ vp_dev->intx_enabled = 1;
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
@@ -300,28 +317,33 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
VIRTIO_MSI_NO_VECTOR);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
- goto out_remove_vqs;
+ goto out_del_vqs;
}
}

return 0;
-
-out_remove_vqs:
- vp_remove_vqs(vdev);
- free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
+out_del_vqs:
+ vp_del_vqs(vdev);
return err;
}

/* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc)
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[])
{
int err;

- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, desc);
+ /* Try MSI-X with one vector per queue. */
+ err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true);
if (!err)
return 0;
+ /* Fallback: MSI-X with one vector for config, one shared for queues. */
+ err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false);
+ if (!err)
+ return 0;
+ /* Finally fall back to regular interrupts. */
return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
}

@@ -345,7 +367,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
if (!vq->callback)
return -EINVAL;

- if (vp_dev->pci_dev->msix_enabled) {
+ if (vp_dev->msix_enabled) {
int vec = vp_dev->msix_vector_map[vq->index];
struct cpumask *mask = vp_dev->msix_affinity_masks[vec];
unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec);
@@ -361,17 +383,6 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
return 0;
}

-const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index)
-{
- struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- unsigned int *map = vp_dev->msix_vector_map;
-
- if (!map || map[index] == VIRTIO_MSI_NO_VECTOR)
- return NULL;
-
- return pci_irq_get_affinity(vp_dev->pci_dev, map[index]);
-}
-
#ifdef CONFIG_PM_SLEEP
static int virtio_pci_freeze(struct device *dev)
{
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index ac8c9d7..2038887 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -64,12 +64,18 @@ struct virtio_pci_device {
/* the IO mapping for the PCI config space */
void __iomem *ioaddr;

+ /* MSI-X support */
+ int msix_enabled;
+ int intx_enabled;
cpumask_var_t *msix_affinity_masks;
/* Name strings for interrupts. This size should be enough,
* and I'm too lazy to allocate each name separately. */
char (*msix_names)[256];
- /* Total Number of MSI-X vectors (including per-VQ ones). */
- int msix_vectors;
+ /* Number of available vectors */
+ unsigned msix_vectors;
+ /* Vectors allocated, excluding per-vq vectors if any */
+ unsigned msix_used_vectors;
+
/* Map of per-VQ MSI-X vectors, may be NULL */
unsigned *msix_vector_map;

@@ -83,6 +89,14 @@ struct virtio_pci_device {
u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
};

+/* Constants for MSI-X */
+/* Use first vector for configuration changes, second and the rest for
+ * virtqueues Thus, we need at least 2 vectors for MSI. */
+enum {
+ VP_MSIX_CONFIG_VECTOR = 0,
+ VP_MSIX_VQ_VECTOR = 1,
+};
+
/* Convert a generic virtio device to our structure */
static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
{
@@ -97,8 +111,9 @@ bool vp_notify(struct virtqueue *vq);
void vp_del_vqs(struct virtio_device *vdev);
/* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc);
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[]);
const char *vp_bus_name(struct virtio_device *vdev);

/* Setup the affinity for a virtqueue:
@@ -108,8 +123,6 @@ const char *vp_bus_name(struct virtio_device *vdev);
*/
int vp_set_vq_affinity(struct virtqueue *vq, int cpu);

-const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index);
-
#if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY)
int virtio_pci_legacy_probe(struct virtio_pci_device *);
void virtio_pci_legacy_remove(struct virtio_pci_device *);
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index f7362c5..47292da 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -165,7 +165,7 @@ static void del_vq(struct virtqueue *vq)

iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);

- if (vp_dev->pci_dev->msix_enabled) {
+ if (vp_dev->msix_enabled) {
iowrite16(VIRTIO_MSI_NO_VECTOR,
vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
/* Flush the write out to device */
@@ -190,7 +190,6 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
.finalize_features = vp_finalize_features,
.bus_name = vp_bus_name,
.set_vq_affinity = vp_set_vq_affinity,
- .get_vq_affinity = vp_get_vq_affinity,
};

/* the PCI probing function */
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 7bc3004..00e6fc1 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -384,12 +384,13 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
}

static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc)
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[])
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq;
- int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc);
+ int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names);

if (rc)
return rc;
@@ -411,7 +412,7 @@ static void del_vq(struct virtqueue *vq)

vp_iowrite16(vq->index, &vp_dev->common->queue_select);

- if (vp_dev->pci_dev->msix_enabled) {
+ if (vp_dev->msix_enabled) {
vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
&vp_dev->common->queue_msix_vector);
/* Flush the write out to device */
@@ -437,7 +438,6 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
.finalize_features = vp_finalize_features,
.bus_name = vp_bus_name,
.set_vq_affinity = vp_set_vq_affinity,
- .get_vq_affinity = vp_get_vq_affinity,
};

static const struct virtio_config_ops virtio_pci_config_ops = {
@@ -453,7 +453,6 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
.finalize_features = vp_finalize_features,
.bus_name = vp_bus_name,
.set_vq_affinity = vp_set_vq_affinity,
- .get_vq_affinity = vp_get_vq_affinity,
};

/**
diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h
deleted file mode 100644
index b1ef6e1..0000000
--- a/include/linux/blk-mq-virtio.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _LINUX_BLK_MQ_VIRTIO_H
-#define _LINUX_BLK_MQ_VIRTIO_H
-
-struct blk_mq_tag_set;
-struct virtio_device;
-
-int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
- struct virtio_device *vdev, int first_vec);
-
-#endif /* _LINUX_BLK_MQ_VIRTIO_H */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 62d240e..bb790c4 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -26,6 +26,7 @@ enum cpuhp_state {
CPUHP_ARM_OMAP_WAKE_DEAD,
CPUHP_IRQ_POLL_DEAD,
CPUHP_BLOCK_SOFTIRQ_DEAD,
+ CPUHP_VIRT_SCSI_DEAD,
CPUHP_ACPI_CPUDRV_DEAD,
CPUHP_S390_PFAULT_DEAD,
CPUHP_BLK_MQ_DEAD,
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 8355bab..26c155b 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -7,8 +7,6 @@
#include <linux/virtio_byteorder.h>
#include <uapi/linux/virtio_config.h>

-struct irq_affinity;
-
/**
* virtio_config_ops - operations for configuring a virtio device
* @get: read the value of a configuration field
@@ -58,7 +56,6 @@ struct irq_affinity;
* This returns a pointer to the bus name a la pci_name from which
* the caller can then copy.
* @set_vq_affinity: set the affinity for a virtqueue.
- * @get_vq_affinity: get the affinity for a virtqueue (optional).
*/
typedef void vq_callback_t(struct virtqueue *);
struct virtio_config_ops {
@@ -71,15 +68,14 @@ struct virtio_config_ops {
void (*set_status)(struct virtio_device *vdev, u8 status);
void (*reset)(struct virtio_device *vdev);
int (*find_vqs)(struct virtio_device *, unsigned nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], struct irq_affinity *desc);
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[]);
void (*del_vqs)(struct virtio_device *);
u64 (*get_features)(struct virtio_device *vdev);
int (*finalize_features)(struct virtio_device *vdev);
const char *(*bus_name)(struct virtio_device *vdev);
int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
- const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev,
- int index);
};

/* If driver didn't advertise the feature, it will never appear. */
@@ -173,7 +169,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
vq_callback_t *callbacks[] = { c };
const char *names[] = { n };
struct virtqueue *vq;
- int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL);
+ int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names);
if (err < 0)
return ERR_PTR(err);
return vq;
diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 15b4385..90007a1 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -79,7 +79,7 @@
* configuration space */
#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20)
/* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */
-#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled)
+#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled)

/* Virtio ABI version, this must match exactly */
#define VIRTIO_PCI_ABI_VERSION 0
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 9d24c0e..6788264 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -532,8 +532,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->vdev = vdev;

ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
- vsock->vqs, callbacks, names,
- NULL);
+ vsock->vqs, callbacks, names);
if (ret < 0)
goto out;

--
2.7.4


--Bn2rw/3z4jIqBvZU--