Re: [RFC PATCH 6/6] vdpa_sim: add support for user VA

From: Stefano Garzarella
Date: Fri Dec 16 2022 - 03:14:09 EST


On Fri, Dec 16, 2022 at 03:26:46PM +0800, Jason Wang wrote:
On Thu, Dec 15, 2022 at 12:31 AM Stefano Garzarella <sgarzare@xxxxxxxxxx> wrote:

The new "use_va" module parameter (default: false) is used in
vdpa_alloc_device() to inform the vDPA framework that the device
supports VA.

vringh is initialized to use VA only when "use_va" is true and the
user's mm has been bound. So, only when the bus supports user VA
(e.g. vhost-vdpa).

vdpasim_mm_work_fn work is used to attach the kthread to the user
address space when the .bind_mm callback is invoked, and to detach
it when the device is reset.

One thing in my mind is that the current datapath is running under
spinlock which prevents us from using iov_iter (which may have page
faults).

We need to get rid of the spinlock first.

Right! I already have a patch for that since I used for the vdpa-blk software device in-kernel PoC where I had the same issue.

I'll add it to the series!



Signed-off-by: Stefano Garzarella <sgarzare@xxxxxxxxxx>
---
drivers/vdpa/vdpa_sim/vdpa_sim.h | 1 +
drivers/vdpa/vdpa_sim/vdpa_sim.c | 104 ++++++++++++++++++++++++++++++-
2 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 07ef53ea375e..1b010e5c0445 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -55,6 +55,7 @@ struct vdpasim {
struct vdpasim_virtqueue *vqs;
struct kthread_worker *worker;
struct kthread_work work;
+ struct mm_struct *mm_bound;
struct vdpasim_dev_attr dev_attr;
/* spinlock to synchronize virtqueue state */
spinlock_t lock;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 36a1d2e0a6ba..6e07cedef30c 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -36,10 +36,90 @@ module_param(max_iotlb_entries, int, 0444);
MODULE_PARM_DESC(max_iotlb_entries,
"Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");

+static bool use_va;
+module_param(use_va, bool, 0444);
+MODULE_PARM_DESC(use_va, "Enable the device's ability to use VA");
+
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
#define VDPASIM_QUEUE_MAX 256
#define VDPASIM_VENDOR_ID 0

+struct vdpasim_mm_work {
+ struct kthread_work work;
+ struct task_struct *owner;
+ struct mm_struct *mm;
+ bool bind;
+ int ret;
+};
+
+static void vdpasim_mm_work_fn(struct kthread_work *work)
+{
+ struct vdpasim_mm_work *mm_work =
+ container_of(work, struct vdpasim_mm_work, work);
+
+ mm_work->ret = 0;
+
+ if (mm_work->bind) {
+ kthread_use_mm(mm_work->mm);
+#if 0
+ if (mm_work->owner)
+ mm_work->ret = cgroup_attach_task_all(mm_work->owner,
+ current);
+#endif
+ } else {
+#if 0
+ //TODO: check it
+ cgroup_release(current);
+#endif
+ kthread_unuse_mm(mm_work->mm);
+ }
+}
+
+static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim,
+ struct vdpasim_mm_work *mm_work)
+{
+ struct kthread_work *work = &mm_work->work;
+
+ kthread_init_work(work, vdpasim_mm_work_fn);
+ kthread_queue_work(vdpasim->worker, work);
+
+ spin_unlock(&vdpasim->lock);
+ kthread_flush_work(work);
+ spin_lock(&vdpasim->lock);
+}
+
+static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim,
+ struct mm_struct *new_mm,
+ struct task_struct *owner)
+{
+ struct vdpasim_mm_work mm_work;
+
+ mm_work.owner = owner;
+ mm_work.mm = new_mm;
+ mm_work.bind = true;
+
+ vdpasim_worker_queue_mm(vdpasim, &mm_work);
+

Should we wait for the work to be finished?

Yep, I'm waiting inside vdpasim_worker_queue_mm() calling kthread_flush_work().

If we will use mutex, I think we can avoid the lock release around that call.


+ if (!mm_work.ret)
+ vdpasim->mm_bound = new_mm;
+
+ return mm_work.ret;
+}
+
+static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim)
+{
+ struct vdpasim_mm_work mm_work;
+
+ if (!vdpasim->mm_bound)
+ return;
+
+ mm_work.mm = vdpasim->mm_bound;
+ mm_work.bind = false;
+
+ vdpasim_worker_queue_mm(vdpasim, &mm_work);
+
+ vdpasim->mm_bound = NULL;
+}
static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
{
return container_of(vdpa, struct vdpasim, vdpa);
@@ -66,8 +146,10 @@ static void vdpasim_vq_notify(struct vringh *vring)
static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
{
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+ bool va_enabled = use_va && vdpasim->mm_bound;

- vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false,
+ vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
+ va_enabled,
(struct vring_desc *)(uintptr_t)vq->desc_addr,
(struct vring_avail *)
(uintptr_t)vq->driver_addr,
@@ -96,6 +178,9 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
{
int i;

+ //TODO: should we cancel the works?
+ vdpasim_worker_unbind_mm(vdpasim);

We probably don't need this since it's the virtio level reset so we
need to keep the mm bound in this case. Otherwise we may break the
guest. It should be the responsibility of the driver to call
config_ops->unbind if it needs to do that.

Got it, my biggest concern was when we go from a vhost-vdpa virtio-vdpa, but as you said, in vhost-vdpa I can call unbind before releasing the device.

Thanks,
Stefano