Re: [PATCH 4/4] virtiofs: Support blocking posix locks (fcntl(F_SETLKW))

From: Stefan Hajnoczi
Date: Thu Nov 21 2019 - 12:00:34 EST


On Fri, Nov 15, 2019 at 03:57:05PM -0500, Vivek Goyal wrote:
> As of now we don't support blocking variant of posix locks and daemon returns
> -EOPNOTSUPP. Reason being that it can lead to deadlocks. Virtqueue size is
> limited and it is possible we fill virtqueue with all the requests of
> fcntl(F_SETLKW) and wait for reply. And later a subsequent unlock request
> can't make progress because virtqueue is full. And that means F_SETLKW can't
> make progress and we are deadlocked.
>
> Use notification queue to solve this problem. After submitting lock request
> device will send a reply asking requester to wait. Once lock is available,
> requester will get a notification saying locking is available. That way
> we don't keep the request virtueue busy while we are waiting for lock
> and further unlock requests can make progress.
>
> When we get a reply in response to lock request, we need a way to know if
> we need to wait for notification or not. I have overloaded the
> fuse_out_header->error field. If value is ->error is 1, that's a signal
> to caller to wait for lock notification.
>
> Signed-off-by: Vivek Goyal <vgoyal@xxxxxxxxxx>
> ---
> fs/fuse/virtio_fs.c | 78 ++++++++++++++++++++++++++++++++++++++-
> include/uapi/linux/fuse.h | 7 ++++
> 2 files changed, 84 insertions(+), 1 deletion(-)
>
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 21d8d9d7d317..8aa9fc996556 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -35,6 +35,7 @@ struct virtio_fs_vq {
> struct work_struct done_work;
> struct list_head queued_reqs;
> struct list_head end_reqs; /* End these requests */
> + struct list_head wait_reqs; /* requests waiting for notification */
> struct virtio_fs_notify_node *notify_nodes;
> struct list_head notify_reqs; /* List for queuing notify requests */
> struct delayed_work dispatch_work;
> @@ -85,7 +86,6 @@ struct virtio_fs_notify_node {
>
> static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq);
>
> -
> static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
> {
> struct virtio_fs *fs = vq->vdev->priv;
> @@ -513,13 +513,75 @@ static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq)
> return 0;
> }
>
> +static int notify_complete_waiting_req(struct virtio_fs *vfs,
> + struct fuse_notify_lock_out *out_args)
> +{
> + struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_REQUEST];
> + struct fuse_req *req, *next;
> + bool found = false;
> + struct fuse_conn *fc = fsvq->fud->fc;
> +
> + /* Find waiting request with the unique number and end it */
> + spin_lock(&fsvq->lock);
> + list_for_each_entry_safe(req, next, &fsvq->wait_reqs, list) {
> + if (req->in.h.unique == out_args->id) {
> + list_del_init(&req->list);
> + clear_bit(FR_SENT, &req->flags);
> + /* Transfer error code from notify */
> + req->out.h.error = out_args->error;
> + found = true;
> + break;
> + }
> + }
> + spin_unlock(&fsvq->lock);
> +
> + /*
> + * TODO: It is possible that some re-ordering happens in notify
> + * comes before request is complete. Deal with it.
> + */
> + if (found) {
> + fuse_request_end(fc, req);
> + spin_lock(&fsvq->lock);
> + dec_in_flight_req(fsvq);
> + spin_unlock(&fsvq->lock);
> + } else
> + pr_debug("virtio-fs: Did not find waiting request with"
> + " unique=0x%llx\n", out_args->id);
> +
> + return 0;
> +}
> +
> +static int virtio_fs_handle_notify(struct virtio_fs *vfs,
> + struct virtio_fs_notify *notify)
> +{
> + int ret = 0;
> + struct fuse_out_header *oh = &notify->out_hdr;
> + struct fuse_notify_lock_out *lo;
> +
> + /*
> + * For notifications, oh.unique is 0 and oh->error contains code
> + * for which notification as arrived.
> + */
> + switch(oh->error) {
> + case FUSE_NOTIFY_LOCK:
> + lo = (struct fuse_notify_lock_out *) &notify->outarg;
> + notify_complete_waiting_req(vfs, lo);
> + break;
> + default:
> + printk("virtio-fs: Unexpected notification %d\n", oh->error);
> + }
> + return ret;
> +}

Is this specific to virtio or can be it handled in common code?

> +
> static void virtio_fs_notify_done_work(struct work_struct *work)
> {
> struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
> done_work);
> struct virtqueue *vq = fsvq->vq;
> + struct virtio_fs *vfs = vq->vdev->priv;
> LIST_HEAD(reqs);
> struct virtio_fs_notify_node *notify, *next;
> + struct fuse_out_header *oh;
>
> spin_lock(&fsvq->lock);
> do {
> @@ -535,6 +597,10 @@ static void virtio_fs_notify_done_work(struct work_struct *work)
>
> /* Process notify */
> list_for_each_entry_safe(notify, next, &reqs, list) {
> + oh = &notify->notify.out_hdr;
> + WARN_ON(oh->unique);
> + /* Handle notification */
> + virtio_fs_handle_notify(vfs, &notify->notify);
> spin_lock(&fsvq->lock);
> dec_in_flight_req(fsvq);
> list_del_init(&notify->list);
> @@ -656,6 +722,15 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
> * TODO verify that server properly follows FUSE protocol
> * (oh.uniq, oh.len)
> */
> + if (req->out.h.error == 1) {
> + /* Wait for notification to complete request */
> + list_del_init(&req->list);
> + spin_lock(&fsvq->lock);
> + list_add_tail(&req->list, &fsvq->wait_reqs);
> + spin_unlock(&fsvq->lock);
> + continue;
> + }
> +
> args = req->args;
> copy_args_from_argbuf(args, req);
>
> @@ -705,6 +780,7 @@ static int virtio_fs_init_vq(struct virtio_fs *fs, struct virtio_fs_vq *fsvq,
> strncpy(fsvq->name, name, VQ_NAME_LEN);
> spin_lock_init(&fsvq->lock);
> INIT_LIST_HEAD(&fsvq->queued_reqs);
> + INIT_LIST_HEAD(&fsvq->wait_reqs);
> INIT_LIST_HEAD(&fsvq->end_reqs);
> INIT_LIST_HEAD(&fsvq->notify_reqs);
> init_completion(&fsvq->in_flight_zero);
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 373cada89815..45f0c4efec8e 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -481,6 +481,7 @@ enum fuse_notify_code {
> FUSE_NOTIFY_STORE = 4,
> FUSE_NOTIFY_RETRIEVE = 5,
> FUSE_NOTIFY_DELETE = 6,
> + FUSE_NOTIFY_LOCK = 7,
> FUSE_NOTIFY_CODE_MAX,
> };
>
> @@ -868,6 +869,12 @@ struct fuse_notify_retrieve_in {
> uint64_t dummy4;
> };
>
> +struct fuse_notify_lock_out {
> + uint64_t id;

Please call this field "unique" or "lock_unique" so it's clear this
identifier is the fuse_header_in->unique value of the lock request.

> + int32_t error;
> + int32_t padding;
> +};
> +
> /* Device ioctls: */
> #define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t)
>
> --
> 2.20.1
>

Attachment: signature.asc
Description: PGP signature