Re: [RFC PATCH v3 05/17] vsock/virtio: MSG_ZEROCOPY flag support

From: Arseniy Krasnov
Date: Tue May 23 2023 - 01:50:46 EST




On 22.05.2023 16:09, Arseniy Krasnov wrote:
>
>
> On 22.05.2023 16:11, Simon Horman wrote:
>> On Mon, May 22, 2023 at 10:39:38AM +0300, Arseniy Krasnov wrote:
>>> This adds handling of MSG_ZEROCOPY flag on transmission path: if this
>>> flag is set and zerocopy transmission is possible, then non-linear skb
>>> will be created and filled with the pages of user's buffer. Pages of
>>> user's buffer are locked in memory by 'get_user_pages()'.
>>>
>>> Signed-off-by: Arseniy Krasnov <AVKrasnov@xxxxxxxxxxxxxx>
>>> ---
>>> net/vmw_vsock/virtio_transport_common.c | 305 +++++++++++++++++++-----
>>> 1 file changed, 243 insertions(+), 62 deletions(-)
>>>
>>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>>> index 9854f48a0544..5acf824afe41 100644
>>> --- a/net/vmw_vsock/virtio_transport_common.c
>>> +++ b/net/vmw_vsock/virtio_transport_common.c
>>> @@ -37,73 +37,161 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
>>> return container_of(t, struct virtio_transport, transport);
>>> }
>>>
>>> -/* Returns a new packet on success, otherwise returns NULL.
>>> - *
>>> - * If NULL is returned, errp is set to a negative errno.
>>> - */
>>> -static struct sk_buff *
>>> -virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
>>> - size_t len,
>>> - u32 src_cid,
>>> - u32 src_port,
>>> - u32 dst_cid,
>>> - u32 dst_port)
>>> -{
>>> - const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
>>> - struct virtio_vsock_hdr *hdr;
>>> - struct sk_buff *skb;
>>> - void *payload;
>>> - int err;
>>> +static bool virtio_transport_can_zcopy(struct virtio_vsock_pkt_info *info,
>>> + size_t max_to_send)
>>> +{
>>> + struct iov_iter *iov_iter;
>>> + size_t max_skb_cap;
>>> + size_t bytes;
>>> + int i;
>>>
>>> - skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
>>> - if (!skb)
>>> - return NULL;
>>> + if (!info->msg)
>>> + return false;
>>>
>>> - hdr = virtio_vsock_hdr(skb);
>>> - hdr->type = cpu_to_le16(info->type);
>>> - hdr->op = cpu_to_le16(info->op);
>>> - hdr->src_cid = cpu_to_le64(src_cid);
>>> - hdr->dst_cid = cpu_to_le64(dst_cid);
>>> - hdr->src_port = cpu_to_le32(src_port);
>>> - hdr->dst_port = cpu_to_le32(dst_port);
>>> - hdr->flags = cpu_to_le32(info->flags);
>>> - hdr->len = cpu_to_le32(len);
>>> + if (!(info->flags & MSG_ZEROCOPY) && !info->msg->msg_ubuf)
>>> + return false;
>>>
>>> - if (info->msg && len > 0) {
>>> - payload = skb_put(skb, len);
>>> - err = memcpy_from_msg(payload, info->msg, len);
>>> - if (err)
>>> - goto out;
>>> + iov_iter = &info->msg->msg_iter;
>>> +
>>> + if (iter_is_ubuf(iov_iter)) {
>>> + if (offset_in_page(iov_iter->ubuf))
>>> + return false;
>>> +
>>> + return true;
>>> + }
>>> +
>>> + if (!iter_is_iovec(iov_iter))
>>> + return false;
>>> +
>>> + if (iov_iter->iov_offset)
>>> + return false;
>>> +
>>> + /* We can't send whole iov. */
>>> + if (iov_iter->count > max_to_send)
>>> + return false;
>>> +
>>> + for (bytes = 0, i = 0; i < iov_iter->nr_segs; i++) {
>>> + const struct iovec *iovec;
>>> + int pages_in_elem;
>>> +
>>> + iovec = &iov_iter->__iov[i];
>>> +
>>> + /* Base must be page aligned. */
>>> + if (offset_in_page(iovec->iov_base))
>>> + return false;
>>>
>>> - if (msg_data_left(info->msg) == 0 &&
>>> - info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
>>> - hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
>>> + /* Only last element could have non page aligned size. */
>>> + if (i != (iov_iter->nr_segs - 1)) {
>>> + if (offset_in_page(iovec->iov_len))
>>> + return false;
>>>
>>> - if (info->msg->msg_flags & MSG_EOR)
>>> - hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
>>> + pages_in_elem = iovec->iov_len >> PAGE_SHIFT;
>>> + } else {
>>> + pages_in_elem = round_up(iovec->iov_len, PAGE_SIZE);
>>> + pages_in_elem >>= PAGE_SHIFT;
>>> }
>>> +
>>> + bytes += (pages_in_elem * PAGE_SIZE);

^^^
This alignment (base and length) checks are not needed, because virtio supports unaligned buffers. I'll remove
it in v4.

Thanks, Arseniy

>>> }
>>
>> Hi Arseniy,
>>
>> bytes is set but the loop above, but seems otherwise unused in this function.
>>
>>>
>>> - if (info->reply)
>>> - virtio_vsock_skb_set_reply(skb);
>>> + /* How many bytes we can pack to single skb. Maximum packet
>>> + * buffer size is needed to allow vhost handle such packets,
>>> + * otherwise they will be dropped.
>>> + */
>>> + max_skb_cap = min((unsigned int)(MAX_SKB_FRAGS * PAGE_SIZE),
>>> + (unsigned int)VIRTIO_VSOCK_MAX_PKT_BUF_SIZE);
>>
>> Likewise, max_skb_cap seems to be set but unused in this function.
>>
>
> Exactly! Seems I forgot to remove it since v2. Thanks for this and above!
>
>>>
>>> - trace_virtio_transport_alloc_pkt(src_cid, src_port,
>>> - dst_cid, dst_port,
>>> - len,
>>> - info->type,
>>> - info->op,
>>> - info->flags);
>>> + return true;
>>> +}
>>
>> ...