Re: [Qemu-devel] [PATCH v2 2/2] vfio : add aer process

From: Alex Williamson
Date: Fri Jul 29 2016 - 13:12:24 EST


On Tue, 19 Jul 2016 15:32:43 +0800
Zhou Jie <zhoujie2011@xxxxxxxxxxxxxx> wrote:

> From: Chen Fan <chen.fan.fnst@xxxxxxxxxxxxxx>
>
> During aer err occurs and resume do following to
> protect device from being accessed.
> 1. Make config space read only.
> 2. Disable INTx/MSI Interrupt.
> 3. Do nothing for bar regions.
>
> Signed-off-by: Zhou Jie <zhoujie2011@xxxxxxxxxxxxxx>
> ---
> drivers/vfio/pci/vfio_pci.c | 30 ++++++++++++++++++++++++++++++
> drivers/vfio/pci/vfio_pci_private.h | 2 ++
> include/uapi/linux/vfio.h | 2 ++
> 3 files changed, 34 insertions(+)
>
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index 2d12b03..dd96b60 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -318,6 +318,7 @@ static int vfio_pci_open(void *device_data)
> return -ENODEV;
>
> mutex_lock(&driver_lock);
> + init_completion(&vdev->aer_error_completion);
>
> if (!vdev->refcnt) {
> ret = vfio_pci_enable(vdev);
> @@ -571,6 +572,16 @@ static long vfio_pci_ioctl(void *device_data,
> struct vfio_pci_device *vdev = device_data;
> unsigned long minsz;
>
> + if (vdev->aer_error_in_progress && (cmd == VFIO_DEVICE_SET_IRQS ||
> + cmd == VFIO_DEVICE_RESET || cmd == VFIO_DEVICE_PCI_HOT_RESET)) {
> + int ret;
> + ret = wait_for_completion_interruptible(
> + &vdev->aer_error_completion);
> + if (ret) {
> + return ret;
> + }

No brackets necessary.

> + }
> +
> if (cmd == VFIO_DEVICE_GET_INFO) {
> struct vfio_device_info info;
>
> @@ -587,6 +598,10 @@ static long vfio_pci_ioctl(void *device_data,
> if (vdev->reset_works)
> info.flags |= VFIO_DEVICE_FLAGS_RESET;
>
> + info.flags |= VFIO_DEVICE_FLAGS_AERPROCESS;
> + if (vdev->aer_error_in_progress)
> + info.flags |= VFIO_DEVICE_FLAGS_INAERPROCESS;
> +
> info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
> info.num_irqs = VFIO_PCI_NUM_IRQS;
>
> @@ -996,6 +1011,14 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
>
> switch (index) {
> case VFIO_PCI_CONFIG_REGION_INDEX:
> + if (vdev->aer_error_in_progress && iswrite) {
> + int ret;
> + ret = wait_for_completion_interruptible(
> + &vdev->aer_error_completion);
> + if (ret) {
> + return ret;
> + }
> + }
> return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
>
> case VFIO_PCI_ROM_REGION_INDEX:
> @@ -1226,6 +1249,10 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
>
> mutex_lock(&vdev->igate);
>
> + vdev->aer_error_in_progress = true;
> + vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
> + VFIO_IRQ_SET_ACTION_TRIGGER,
> + vdev->irq_type, 0, 0, NULL);
> if (vdev->err_trigger)
> eventfd_signal(vdev->err_trigger, 1);
>
> @@ -1252,6 +1279,9 @@ static void vfio_pci_aer_resume(struct pci_dev *pdev)
> }
>
> mutex_lock(&vdev->igate);
> +
> + vdev->aer_error_in_progress = false;
> + complete_all(&vdev->aer_error_completion);
> if (vdev->resume_trigger)
> eventfd_signal(vdev->resume_trigger, 1);
>
> diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
> index 80d4ddd..2f151f5 100644
> --- a/drivers/vfio/pci/vfio_pci_private.h
> +++ b/drivers/vfio/pci/vfio_pci_private.h
> @@ -84,6 +84,8 @@ struct vfio_pci_device {
> bool has_vga;
> bool needs_reset;
> bool nointx;
> + bool aer_error_in_progress;
> + struct completion aer_error_completion;
> struct pci_saved_state *pci_saved_state;
> int refcnt;
> struct eventfd_ctx *err_trigger;
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 34ab138..276ce50 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -198,6 +198,8 @@ struct vfio_device_info {
> #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */
> #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */
> #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */
> +#define VFIO_DEVICE_FLAGS_AERPROCESS (1 << 4) /* support aer error progress */
> +#define VFIO_DEVICE_FLAGS_INAERPROCESS (1 << 5)/* status in aer error progress */
> __u32 num_regions; /* Max region index + 1 */
> __u32 num_irqs; /* Max IRQ index + 1 */
> };

Clearly this has only been tested for a single instance of an AER error
event and resume per device. Are the things you're intending to block
actually blocked for subsequent events? Note how complete_all() fills
the done field to let all current and future waiters go through and
nowhere is there a call to reinit_completion() to drain that path.
Thanks,

Alex