Re: [PATCH RFC 1/3] vdpa/mlx5: Implement susupend virtqueue callback

From: Eugenio Perez Martin
Date: Mon Jun 20 2022 - 07:11:10 EST


On Mon, Jun 20, 2022 at 12:07 PM Michael S. Tsirkin <mst@xxxxxxxxxx> wrote:
>
> On Mon, Jun 20, 2022 at 11:58:33AM +0200, Eugenio Perez Martin wrote:
> > On Mon, Jun 20, 2022 at 10:56 AM Jason Wang <jasowang@xxxxxxxxxx> wrote:
> > >
> > > On Thu, Jun 16, 2022 at 9:27 PM Eli Cohen <elic@xxxxxxxxxx> wrote:
> > > >
> > > > Implement the suspend callback allowing to suspend the virtqueues so
> > > > they stop processing descriptors. This is required to allow the shadow
> > > > virtqueue to kick in.
> > > >
> > > > Signed-off-by: Eli Cohen <elic@xxxxxxxxxx>
> > > > ---
> > > > drivers/vdpa/mlx5/net/mlx5_vnet.c | 68 +++++++++++++++++++++++++++++-
> > > > include/linux/mlx5/mlx5_ifc_vdpa.h | 8 ++++
> > > > 2 files changed, 75 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > index fb0b23e71383..ea4bc8a0cd25 100644
> > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > @@ -895,6 +895,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > if (err)
> > > > goto err_cmd;
> > > >
> > > > + mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
> > > > kfree(in);
> > > > mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
> > > >
> > > > @@ -922,6 +923,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
> > > > mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
> > > > return;
> > > > }
> > > > + mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > umems_destroy(ndev, mvq);
> > > > }
> > > >
> > > > @@ -1121,6 +1123,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
> > > > return err;
> > > > }
> > > >
> > > > +static bool is_valid_state_change(int oldstate, int newstate)
> > > > +{
> > > > + switch (oldstate) {
> > > > + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
> > > > + return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
> > > > + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
> > > > + return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
> > > > + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
> > > > + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
> > > > + default:
> > > > + return false;
> > > > + }
> > > > +}
> > > > +
> > > > static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
> > > > {
> > > > int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
> > > > @@ -1130,6 +1146,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> > > > void *in;
> > > > int err;
> > > >
> > > > + if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
> > > > + return 0;
> > > > +
> > > > + if (!is_valid_state_change(mvq->fw_state, state))
> > > > + return -EINVAL;
> > > > +
> > > > in = kzalloc(inlen, GFP_KERNEL);
> > > > if (!in)
> > > > return -ENOMEM;
> > > > @@ -1991,6 +2013,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > struct mlx5_vdpa_virtqueue *mvq;
> > > > + int err;
> > > >
> > > > if (!mvdev->actual_features)
> > > > return;
> > > > @@ -2004,8 +2027,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > }
> > > >
> > > > mvq = &ndev->vqs[idx];
> > > > - if (!ready)
> > > > + if (!ready) {
> > > > suspend_vq(ndev, mvq);
> > > > + } else {
> > > > + err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> > > > + if (err) {
> > > > + mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
> > > > + ready = false;
> > > > + }
> > > > + }
> > > > +
> > > >
> > > > mvq->ready = ready;
> > > > }
> > > > @@ -2732,6 +2763,39 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
> > > > return err;
> > > > }
> > > >
> > > > +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev, bool suspend)
> > > > +{
> > > > + struct mlx5_control_vq *cvq;
> > > > +
> > > > + if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
> > > > + return;
> > > > +
> > > > + cvq = &mvdev->cvq;
> > > > + cvq->ready = !suspend;
> > > > +}
> > >
> > > It looks to me we need to synchronize this with reslock. And this
> > > probably deserve a dedicated fix.
> > >
> > > > +
> > > > +static int mlx5_vdpa_suspend(struct vdpa_device *vdev, bool suspend)
> > > > +{
> > > > + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> > > > + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> > > > + struct mlx5_vdpa_virtqueue *mvq;
> > > > + int i;
> > > > +
> > > > + if (!suspend) {
> > > > + mlx5_vdpa_warn(mvdev, "Resume of virtqueues is not supported\n");
> > > > + return -EOPNOTSUPP;
> > > > + }
> > > > +
> > > > + down_write(&ndev->reslock);
> > > > + for (i = 0; i < ndev->cur_num_vqs; i++) {
> > > > + mvq = &ndev->vqs[i];
> > > > + suspend_vq(ndev, mvq);
> > > > + }
> > > > + mlx5_vdpa_cvq_suspend(mvdev, suspend);
> > >
> > > Do we need to synchronize with the carrier work here? Otherwise we may
> > > get config notification after suspending.
> > >
> > > > + up_write(&ndev->reslock);
> > > > + return 0;
> > > > +}
> > > > +
> > > > static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > .set_vq_address = mlx5_vdpa_set_vq_address,
> > > > .set_vq_num = mlx5_vdpa_set_vq_num,
> > > > @@ -2762,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
> > > > .get_generation = mlx5_vdpa_get_generation,
> > > > .set_map = mlx5_vdpa_set_map,
> > > > .free = mlx5_vdpa_free,
> > > > + .suspend = mlx5_vdpa_suspend,
> > >
> > > I don't see the vDPA bus patch to enable this method. Or anything I missed here?
> > >
> >
> > Should we add
> > Based-on: <20220526124338.36247-1-eperezma@xxxxxxxxxx>
> >
> > To this series?
>
> If it's based on your patch then mentioning this in the log and
> including the S.O.B. is customary. what would this tag add?
> was there relevant discussion?
>

Sorry I think I need to expand it. I was using the meaning of qemu's
submitting patches guide, which is:
It is also okay to base patches on top of other on-going work that is
not yet part of the git master branch.

So these patches are not modifications of my patches, they should be
applied on top of that series.

My series is the one that adds the "vdpa bus method" Jason is
referring to (.suspend). That series is able to suspend the vdpa net
simulator by itself. If we apply this series *on top* of my previous
series, it gives us the vdpa op to suspend the mlx device.

Thanks!

>
> > > Thanks
> > >
> > > > };
> > > >
> > > > static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
> > > > @@ -2827,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
> > > > mvq->index = i;
> > > > mvq->ndev = ndev;
> > > > mvq->fwqp.fw = true;
> > > > + mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
> > > > }
> > > > for (; i < ndev->mvdev.max_vqs; i++) {
> > > > mvq = &ndev->vqs[i];
> > > > diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > index 4414ed5b6ed2..423562f39d3c 100644
> > > > --- a/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
> > > > @@ -150,6 +150,14 @@ enum {
> > > > MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3,
> > > > };
> > > >
> > > > +/* This indicates that the object was not created or has alreadyi
> > > > + * been desroyed. It is very safe to assume that this object will never
> > > > + * have so many states
> > > > + */
> > > > +enum {
> > > > + MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
> > > > +};
> > > > +
> > > > enum {
> > > > MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0,
> > > > MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1,
> > > > --
> > > > 2.35.1
> > > >
> > >
>