Re: [PATCH net-next v2 2/5] virtio_ring: support creating packed ring

From: Tiwei Bie
Date: Sun Sep 09 2018 - 22:29:52 EST


On Fri, Sep 07, 2018 at 10:03:24AM -0400, Michael S. Tsirkin wrote:
> On Wed, Jul 11, 2018 at 10:27:08AM +0800, Tiwei Bie wrote:
> > This commit introduces the support for creating packed ring.
> > All split ring specific functions are added _split suffix.
> > Some necessary stubs for packed ring are also added.
> >
> > Signed-off-by: Tiwei Bie <tiwei.bie@xxxxxxxxx>
>
> I'd rather have a patch just renaming split functions, then
> add all packed stuff in as a separate patch on top.

Sure, I will do that.

>
>
> > ---
> > drivers/virtio/virtio_ring.c | 801 +++++++++++++++++++++++------------
> > include/linux/virtio_ring.h | 8 +-
> > 2 files changed, 546 insertions(+), 263 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 814b395007b2..c4f8abc7445a 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -60,11 +60,15 @@ struct vring_desc_state {
> > struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
> > };
> >
> > +struct vring_desc_state_packed {
> > + int next; /* The next desc state. */
>
> So this can go away with IN_ORDER?

Yes. If IN_ORDER is negotiated, next won't be needed anymore.
Currently, IN_ORDER isn't included in this patch set, because
some changes for split ring are needed to make sure that it
will use the descs in the expected order. After that,
optimizations can be done for both of split ring and packed
ring respectively.

>
> > +};
> > +
> > struct vring_virtqueue {
> > struct virtqueue vq;
> >
> > - /* Actual memory layout for this queue */
> > - struct vring vring;
> > + /* Is this a packed ring? */
> > + bool packed;
> >
> > /* Can we use weak barriers? */
> > bool weak_barriers;
> > @@ -86,11 +90,39 @@ struct vring_virtqueue {
> > /* Last used index we've seen. */
> > u16 last_used_idx;
> >
> > - /* Last written value to avail->flags */
> > - u16 avail_flags_shadow;
> > + union {
> > + /* Available for split ring */
> > + struct {
> > + /* Actual memory layout for this queue. */
> > + struct vring vring;
> >
> > - /* Last written value to avail->idx in guest byte order */
> > - u16 avail_idx_shadow;
> > + /* Last written value to avail->flags */
> > + u16 avail_flags_shadow;
> > +
> > + /* Last written value to avail->idx in
> > + * guest byte order. */
> > + u16 avail_idx_shadow;
> > + };
>
> Name this field split so it's easier to detect misuse of e.g.
> packed fields in split code?

Good point, I'll do that.

>
> > +
> > + /* Available for packed ring */
> > + struct {
> > + /* Actual memory layout for this queue. */
> > + struct vring_packed vring_packed;
> > +
> > + /* Driver ring wrap counter. */
> > + bool avail_wrap_counter;
> > +
> > + /* Device ring wrap counter. */
> > + bool used_wrap_counter;
> > +
> > + /* Index of the next avail descriptor. */
> > + u16 next_avail_idx;
> > +
> > + /* Last written value to driver->flags in
> > + * guest byte order. */
> > + u16 event_flags_shadow;
> > + };
> > + };
[...]
> > +
> > +/*
> > + * The layout for the packed ring is a continuous chunk of memory
> > + * which looks like this.
> > + *
> > + * struct vring_packed {
> > + * // The actual descriptors (16 bytes each)
> > + * struct vring_packed_desc desc[num];
> > + *
> > + * // Padding to the next align boundary.
> > + * char pad[];
> > + *
> > + * // Driver Event Suppression
> > + * struct vring_packed_desc_event driver;
> > + *
> > + * // Device Event Suppression
> > + * struct vring_packed_desc_event device;
> > + * };
> > + */
>
> Why not just allocate event structures separately?
> Is it a win to have them share a cache line for some reason?

Will do that.

>
> > +static inline void vring_init_packed(struct vring_packed *vr, unsigned int num,
> > + void *p, unsigned long align)
> > +{
> > + vr->num = num;
> > + vr->desc = p;
> > + vr->driver = (void *)ALIGN(((uintptr_t)p +
> > + sizeof(struct vring_packed_desc) * num), align);
> > + vr->device = vr->driver + 1;
> > +}
>
> What's all this about alignment? Where does it come from?

It comes from the `vring_align` parameter of vring_create_virtqueue()
and vring_new_virtqueue():

https://github.com/torvalds/linux/blob/a49a9dcce802/drivers/virtio/virtio_ring.c#L1061
https://github.com/torvalds/linux/blob/a49a9dcce802/drivers/virtio/virtio_ring.c#L1123

Should I just ignore it in packed ring?

CCW defined this:

#define KVM_VIRTIO_CCW_RING_ALIGN 4096

I'm not familiar with CCW. Currently, in this patch set, packed ring
isn't enabled on CCW dues to some legacy accessors are not implemented
in packed ring yet.

>
> > +
> > +static inline unsigned vring_size_packed(unsigned int num, unsigned long align)
> > +{
> > + return ((sizeof(struct vring_packed_desc) * num + align - 1)
> > + & ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
> > +}
[...]
> > @@ -1129,10 +1388,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
> > void (*callback)(struct virtqueue *vq),
> > const char *name)
> > {
> > - struct vring vring;
> > - vring_init(&vring, num, pages, vring_align);
> > - return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
> > - notify, callback, name);
> > + union vring_union vring;
> > + bool packed;
> > +
> > + packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
> > + if (packed)
> > + vring_init_packed(&vring.vring_packed, num, pages, vring_align);
> > + else
> > + vring_init(&vring.vring_split, num, pages, vring_align);
>
>
> vring_init in the UAPI header is more or less a bug.
> I'd just stop using it, keep it around for legacy userspace.

Got it. I'd like to do that. Thanks.

>
> > +
> > + return __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
> > + context, notify, callback, name);
> > }
> > EXPORT_SYMBOL_GPL(vring_new_virtqueue);
> >
> > @@ -1142,7 +1408,9 @@ void vring_del_virtqueue(struct virtqueue *_vq)
> >
> > if (vq->we_own_ring) {
> > vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
> > - vq->vring.desc, vq->queue_dma_addr);
> > + vq->packed ? (void *)vq->vring_packed.desc :
> > + (void *)vq->vring.desc,
> > + vq->queue_dma_addr);
> > }
> > list_del(&_vq->list);
> > kfree(vq);
> > @@ -1184,7 +1452,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
> >
> > struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > - return vq->vring.num;
> > + return vq->packed ? vq->vring_packed.num : vq->vring.num;
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
> >
> > @@ -1227,6 +1495,10 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
> >
> > BUG_ON(!vq->we_own_ring);
> >
> > + if (vq->packed)
> > + return vq->queue_dma_addr + ((char *)vq->vring_packed.driver -
> > + (char *)vq->vring_packed.desc);
> > +
> > return vq->queue_dma_addr +
> > ((char *)vq->vring.avail - (char *)vq->vring.desc);
> > }
> > @@ -1238,11 +1510,16 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
> >
> > BUG_ON(!vq->we_own_ring);
> >
> > + if (vq->packed)
> > + return vq->queue_dma_addr + ((char *)vq->vring_packed.device -
> > + (char *)vq->vring_packed.desc);
> > +
> > return vq->queue_dma_addr +
> > ((char *)vq->vring.used - (char *)vq->vring.desc);
> > }
> > EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
> >
> > +/* Only available for split ring */
> > const struct vring *virtqueue_get_vring(struct virtqueue *vq)
> > {
> > return &to_vvq(vq)->vring;
> > diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
> > index fab02133a919..992142b35f55 100644
> > --- a/include/linux/virtio_ring.h
> > +++ b/include/linux/virtio_ring.h
> > @@ -60,6 +60,11 @@ static inline void virtio_store_mb(bool weak_barriers,
> > struct virtio_device;
> > struct virtqueue;
> >
> > +union vring_union {
> > + struct vring vring_split;
> > + struct vring_packed vring_packed;
> > +};
> > +
> > /*
> > * Creates a virtqueue and allocates the descriptor ring. If
> > * may_reduce_num is set, then this may allocate a smaller ring than
> > @@ -79,7 +84,8 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
> >
> > /* Creates a virtqueue with a custom layout. */
> > struct virtqueue *__vring_new_virtqueue(unsigned int index,
> > - struct vring vring,
> > + union vring_union vring,
> > + bool packed,
> > struct virtio_device *vdev,
> > bool weak_barriers,
> > bool ctx,
> > --
> > 2.18.0