Re: [PATCH net-next 8/8] vhost: event suppression for packed ring

From: Wei Xu
Date: Wed Jul 04 2018 - 00:14:03 EST


On Tue, Jul 03, 2018 at 01:38:04PM +0800, Jason Wang wrote:
> This patch introduces support for event suppression. This is done by
> have a two areas: device area and driver area. One side could then try
> to disable or enable (delayed) notification from other side by using a
> boolean hint or event index interface in the areas.
>
> For more information, please refer Virtio spec.
>
> Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>
> ---
> drivers/vhost/vhost.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++----
> drivers/vhost/vhost.h | 10 ++-
> 2 files changed, 185 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 0f3f07c..cccbc82 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -1115,10 +1115,15 @@ static int vq_access_ok_packed(struct vhost_virtqueue *vq, unsigned int num,
> struct vring_used __user *used)
> {
> struct vring_desc_packed *packed = (struct vring_desc_packed *)desc;
> + struct vring_packed_desc_event *driver_event =
> + (struct vring_packed_desc_event *)avail;
> + struct vring_packed_desc_event *device_event =
> + (struct vring_packed_desc_event *)used;
>
> - /* TODO: check device area and driver area */
> return access_ok(VERIFY_READ, packed, num * sizeof(*packed)) &&
> - access_ok(VERIFY_WRITE, packed, num * sizeof(*packed));
> + access_ok(VERIFY_WRITE, packed, num * sizeof(*packed)) &&

R/W parameter doesn't make sense to most architectures and the comment in x86
says WRITE is a superset of READ, is it possible to converge them here?

/**
* access_ok: - Checks if a user space pointer is valid
* @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
* %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
* to write to a block, it is always safe to read from it.
* @addr: User space pointer to start of block to check
* @size: Size of block to check
*
* Context: User context only. This function may sleep if pagefaults are
* enabled.
*
* Checks if a pointer to a block of memory in user space is valid.
*
* Returns true (nonzero) if the memory block may be valid, false (zero)
* if it is definitely invalid.
*
* Note that, depending on architecture, this function probably just
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
*/
#define access_ok(type, addr, size)
......

Thanks,
Wei

> + access_ok(VERIFY_READ, driver_event, sizeof(*driver_event)) &&
> + access_ok(VERIFY_WRITE, device_event, sizeof(*device_event));
> }
>
> static int vq_access_ok_split(struct vhost_virtqueue *vq, unsigned int num,
> @@ -1193,14 +1198,27 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq,
> return true;
> }
>
> -int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
> +int vq_iotlb_prefetch_packed(struct vhost_virtqueue *vq)
> +{
> + int num = vq->num;
> +
> + return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
> + num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
> + iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->desc,
> + num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
> + iotlb_access_ok(vq, VHOST_ACCESS_RO,
> + (u64)(uintptr_t)vq->driver_event,
> + sizeof(*vq->driver_event), VHOST_ADDR_AVAIL) &&
> + iotlb_access_ok(vq, VHOST_ACCESS_WO,
> + (u64)(uintptr_t)vq->device_event,
> + sizeof(*vq->device_event), VHOST_ADDR_USED);
> +}
> +
> +int vq_iotlb_prefetch_split(struct vhost_virtqueue *vq)
> {
> size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
> unsigned int num = vq->num;
>
> - if (!vq->iotlb)
> - return 1;
> -
> return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
> num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
> iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
> @@ -1212,6 +1230,17 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
> num * sizeof(*vq->used->ring) + s,
> VHOST_ADDR_USED);
> }
> +
> +int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
> +{
> + if (!vq->iotlb)
> + return 1;
> +
> + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
> + return vq_iotlb_prefetch_packed(vq);
> + else
> + return vq_iotlb_prefetch_split(vq);
> +}
> EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
>
> /* Can we log writes? */
> @@ -1771,6 +1800,50 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
> return 0;
> }
>
> +static int vhost_update_device_flags(struct vhost_virtqueue *vq,
> + __virtio16 device_flags)
> +{
> + void __user *flags;
> +
> + if (vhost_put_user(vq, device_flags, &vq->device_event->flags,
> + VHOST_ADDR_USED) < 0)
> + return -EFAULT;
> + if (unlikely(vq->log_used)) {
> + /* Make sure the flag is seen before log. */
> + smp_wmb();
> + /* Log used flag write. */
> + flags = &vq->device_event->flags;
> + log_write(vq->log_base, vq->log_addr +
> + (flags - (void __user *)vq->device_event),
> + sizeof(vq->device_event->flags));
> + if (vq->log_ctx)
> + eventfd_signal(vq->log_ctx, 1);
> + }
> + return 0;
> +}
> +
> +static int vhost_update_device_off_wrap(struct vhost_virtqueue *vq,
> + __virtio16 device_off_wrap)
> +{
> + void __user *off_wrap;
> +
> + if (vhost_put_user(vq, device_off_wrap, &vq->device_event->off_wrap,
> + VHOST_ADDR_USED) < 0)
> + return -EFAULT;
> + if (unlikely(vq->log_used)) {
> + /* Make sure the flag is seen before log. */
> + smp_wmb();
> + /* Log used flag write. */
> + off_wrap = &vq->device_event->off_wrap;
> + log_write(vq->log_base, vq->log_addr +
> + (off_wrap - (void __user *)vq->device_event),
> + sizeof(vq->device_event->off_wrap));
> + if (vq->log_ctx)
> + eventfd_signal(vq->log_ctx, 1);
> + }
> + return 0;
> +}
> +
> static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
> {
> if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
> @@ -2756,16 +2829,13 @@ int vhost_add_used_n(struct vhost_virtqueue *vq,
> }
> EXPORT_SYMBOL_GPL(vhost_add_used_n);
>
> -static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
> +static bool vhost_notify_split(struct vhost_dev *dev,
> + struct vhost_virtqueue *vq)
> {
> __u16 old, new;
> __virtio16 event;
> bool v;
>
> - /* TODO: check driver area */
> - if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
> - return true;
> -
> /* Flush out used index updates. This is paired
> * with the barrier that the Guest executes when enabling
> * interrupts. */
> @@ -2798,6 +2868,64 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
> return vring_need_event(vhost16_to_cpu(vq, event), new, old);
> }
>
> +static bool vhost_notify_packed(struct vhost_dev *dev,
> + struct vhost_virtqueue *vq)
> +{
> + __virtio16 event_off_wrap, event_flags;
> + __u16 old, new, off_wrap;
> + bool v;
> +
> + /* Flush out used descriptors updates. This is paired
> + * with the barrier that the Guest executes when enabling
> + * interrupts.
> + */
> + smp_mb();
> +
> + if (vhost_get_avail(vq, event_flags,
> + &vq->driver_event->flags) < 0) {
> + vq_err(vq, "Failed to get driver desc_event_flags");
> + return true;
> + }
> +
> + if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX))
> + return event_flags !=
> + cpu_to_vhost16(vq, RING_EVENT_FLAGS_DISABLE);
> +
> + old = vq->signalled_used;
> + v = vq->signalled_used_valid;
> + new = vq->signalled_used = vq->last_used_idx;
> + vq->signalled_used_valid = true;
> +
> + if (event_flags != cpu_to_vhost16(vq, RING_EVENT_FLAGS_DESC))
> + return event_flags !=
> + cpu_to_vhost16(vq, RING_EVENT_FLAGS_DISABLE);
> +
> + /* Read desc event flags before event_off and event_wrap */
> + smp_rmb();
> +
> + if (vhost_get_avail(vq, event_off_wrap,
> + &vq->driver_event->off_wrap) < 0) {
> + vq_err(vq, "Failed to get driver desc_event_off/wrap");
> + return true;
> + }
> +
> + off_wrap = vhost16_to_cpu(vq, event_off_wrap);
> +
> + if (unlikely(!v))
> + return true;
> +
> + return vhost_vring_packed_need_event(vq, vq->last_used_wrap_counter,
> + off_wrap, new, old);
> +}
> +
> +static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
> +{
> + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
> + return vhost_notify_packed(dev, vq);
> + else
> + return vhost_notify_split(dev, vq);
> +}
> +
> /* This actually signals the guest, using eventfd. */
> void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
> {
> @@ -2875,10 +3003,34 @@ static bool vhost_enable_notify_packed(struct vhost_dev *dev,
> struct vhost_virtqueue *vq)
> {
> struct vring_desc_packed *d = vq->desc_packed + vq->avail_idx;
> - __virtio16 flags;
> + __virtio16 flags = cpu_to_vhost16(vq, RING_EVENT_FLAGS_ENABLE);
> int ret;
>
> - /* TODO: enable notification through device area */
> + if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
> + return false;
> + vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
> +
> + if (vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
> + __virtio16 off_wrap = cpu_to_vhost16(vq, vq->avail_idx |
> + vq->avail_wrap_counter << 15);
> +
> + ret = vhost_update_device_off_wrap(vq, off_wrap);
> + if (ret) {
> + vq_err(vq, "Failed to write to off warp at %p: %d\n",
> + &vq->device_event->off_wrap, ret);
> + return false;
> + }
> + /* Make sure off_wrap is wrote before flags */
> + smp_wmb();
> + flags = cpu_to_vhost16(vq, RING_EVENT_FLAGS_DESC);
> + }
> +
> + ret = vhost_update_device_flags(vq, flags);
> + if (ret) {
> + vq_err(vq, "Failed to enable notification at %p: %d\n",
> + &vq->device_event->flags, ret);
> + return false;
> + }
>
> /* They could have slipped one in as we were doing that: make
> * sure it's written, then check again.
> @@ -2945,7 +3097,18 @@ EXPORT_SYMBOL_GPL(vhost_enable_notify);
> static void vhost_disable_notify_packed(struct vhost_dev *dev,
> struct vhost_virtqueue *vq)
> {
> - /* TODO: disable notification through device area */
> + __virtio16 flags;
> + int r;
> +
> + if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
> + return;
> + vq->used_flags |= VRING_USED_F_NO_NOTIFY;
> +
> + flags = cpu_to_vhost16(vq, RING_EVENT_FLAGS_DISABLE);
> + r = vhost_update_device_flags(vq, flags);
> + if (r)
> + vq_err(vq, "Failed to enable notification at %p: %d\n",
> + &vq->device_event->flags, r);
> }
>
> static void vhost_disable_notify_split(struct vhost_dev *dev,
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index db09958..d071daf 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -96,8 +96,14 @@ struct vhost_virtqueue {
> struct vring_desc __user *desc;
> struct vring_desc_packed __user *desc_packed;
> };
> - struct vring_avail __user *avail;
> - struct vring_used __user *used;
> + union {
> + struct vring_avail __user *avail;
> + struct vring_packed_desc_event __user *driver_event;
> + };
> + union {
> + struct vring_used __user *used;
> + struct vring_packed_desc_event __user *device_event;
> + };
> const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
> struct file *kick;
> struct eventfd_ctx *call_ctx;
> --
> 2.7.4
>