Re: [RFC PATCH v2 2/8] vhost/vsock: rework packet allocation logic

From: Stefano Garzarella
Date: Thu Jun 09 2022 - 04:38:55 EST


On Fri, Jun 03, 2022 at 05:33:04AM +0000, Arseniy Krasnov wrote:
For packets received from virtio RX queue, use buddy
allocator instead of 'kmalloc()' to be able to insert
such pages to user provided vma. Single call to
'copy_from_iter()' replaced with per-page loop.

Signed-off-by: Arseniy Krasnov <AVKrasnov@xxxxxxxxxxxxxx>
---
drivers/vhost/vsock.c | 81 ++++++++++++++++++++++++++++++++++++-------
1 file changed, 69 insertions(+), 12 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index e6c9d41db1de..0dc2229f18f7 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -58,6 +58,7 @@ struct vhost_vsock {

u32 guest_cid;
bool seqpacket_allow;
+ bool zerocopy_rx_on;

This is per-device, so a single socket can change the behaviour of all the sockets of this device.

Can we do something better?

Maybe we can allocate the header, copy it, find the socket and check if zero-copy is enabled or not for that socket.

Of course we should change or extend virtio_transport_recv_pkt() to avoid to find the socket again.


};

static u32 vhost_transport_get_local_cid(void)
@@ -357,6 +358,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
unsigned int out, unsigned int in)
{
struct virtio_vsock_pkt *pkt;
+ struct vhost_vsock *vsock;
struct iov_iter iov_iter;
size_t nbytes;
size_t len;
@@ -393,20 +395,75 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
return NULL;
}

- pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
- if (!pkt->buf) {
- kfree(pkt);
- return NULL;
- }
-
pkt->buf_len = pkt->len;
+ vsock = container_of(vq->dev, struct vhost_vsock, dev);

- nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
- if (nbytes != pkt->len) {
- vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
- pkt->len, nbytes);
- virtio_transport_free_pkt(pkt);
- return NULL;
+ if (!vsock->zerocopy_rx_on) {
+ pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
+
+ if (!pkt->buf) {
+ kfree(pkt);
+ return NULL;
+ }
+
+ pkt->slab_buf = true;
+ nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
+ if (nbytes != pkt->len) {
+ vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
+ pkt->len, nbytes);
+ virtio_transport_free_pkt(pkt);
+ return NULL;
+ }
+ } else {
+ struct page *buf_page;
+ ssize_t pkt_len;
+ int page_idx;
+
+ /* This creates memory overrun, as we allocate
+ * at least one page for each packet.
+ */
+ buf_page = alloc_pages(GFP_KERNEL, get_order(pkt->len));
+
+ if (buf_page == NULL) {
+ kfree(pkt);
+ return NULL;
+ }
+
+ pkt->buf = page_to_virt(buf_page);
+
+ page_idx = 0;
+ pkt_len = pkt->len;
+
+ /* As allocated pages are not mapped, process
+ * pages one by one.
+ */
+ while (pkt_len > 0) {
+ void *mapped;
+ size_t to_copy;
+
+ mapped = kmap(buf_page + page_idx);
+
+ if (mapped == NULL) {
+ virtio_transport_free_pkt(pkt);
+ return NULL;
+ }
+
+ to_copy = min(pkt_len, ((ssize_t)PAGE_SIZE));
+
+ nbytes = copy_from_iter(mapped, to_copy, &iov_iter);
+ if (nbytes != to_copy) {
+ vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
+ to_copy, nbytes);
+ kunmap(mapped);
+ virtio_transport_free_pkt(pkt);
+ return NULL;
+ }
+
+ kunmap(mapped);
+
+ pkt_len -= to_copy;
+ page_idx++;
+ }
}

return pkt;
--
2.25.1