Re: [patch V2 02/15] pci/switchtec: Replace completion wait queue usage for poll

From: Logan Gunthorpe
Date: Wed Mar 18 2020 - 18:12:45 EST




On 2020-03-18 2:43 p.m., Thomas Gleixner wrote:
> From: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
>
> The poll callback is using the completion wait queue and sticks it into
> poll_wait() to wake up pollers after a command has completed.
>
> This works to some extent, but cannot provide EPOLLEXCLUSIVE support
> because the waker side uses complete_all() which unconditionally wakes up
> all waiters. complete_all() is required because completions internally use
> exclusive wait and complete() only wakes up one waiter by default.
>
> This mixes conceptually different mechanisms and relies on internal
> implementation details of completions, which in turn puts contraints on
> changing the internal implementation of completions.
>
> Replace it with a regular wait queue and store the state in struct
> switchtec_user.
>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
> Acked-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>

While I've been against open coding the completion in this driver for a
while, I'm convinced by the EPOLLEXCLUSIVE argument for this change.
I've reviewed and lightly tested the change with hardware:

Reviewed-by: Logan Gunthorpe <logang@xxxxxxxxxxxx>

Thanks,

Logan

> Cc: Kurt Schwemmer <kurt.schwemmer@xxxxxxxxxxxxx>
> Cc: Logan Gunthorpe <logang@xxxxxxxxxxxx>
> Cc: Bjorn Helgaas <bhelgaas@xxxxxxxxxx>
> Cc: linux-pci@xxxxxxxxxxxxxxx
> ---
> V2: Reworded changelog.
> ---
> drivers/pci/switch/switchtec.c | 22 +++++++++++++---------
> 1 file changed, 13 insertions(+), 9 deletions(-)
>
> --- a/drivers/pci/switch/switchtec.c
> +++ b/drivers/pci/switch/switchtec.c
> @@ -52,10 +52,11 @@ struct switchtec_user {
>
> enum mrpc_state state;
>
> - struct completion comp;
> + wait_queue_head_t cmd_comp;
> struct kref kref;
> struct list_head list;
>
> + bool cmd_done;
> u32 cmd;
> u32 status;
> u32 return_code;
> @@ -77,7 +78,7 @@ static struct switchtec_user *stuser_cre
> stuser->stdev = stdev;
> kref_init(&stuser->kref);
> INIT_LIST_HEAD(&stuser->list);
> - init_completion(&stuser->comp);
> + init_waitqueue_head(&stuser->cmd_comp);
> stuser->event_cnt = atomic_read(&stdev->event_cnt);
>
> dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
> @@ -175,7 +176,7 @@ static int mrpc_queue_cmd(struct switcht
> kref_get(&stuser->kref);
> stuser->read_len = sizeof(stuser->data);
> stuser_set_state(stuser, MRPC_QUEUED);
> - reinit_completion(&stuser->comp);
> + stuser->cmd_done = false;
> list_add_tail(&stuser->list, &stdev->mrpc_queue);
>
> mrpc_cmd_submit(stdev);
> @@ -222,7 +223,8 @@ static void mrpc_complete_cmd(struct swi
> memcpy_fromio(stuser->data, &stdev->mmio_mrpc->output_data,
> stuser->read_len);
> out:
> - complete_all(&stuser->comp);
> + stuser->cmd_done = true;
> + wake_up_interruptible(&stuser->cmd_comp);
> list_del_init(&stuser->list);
> stuser_put(stuser);
> stdev->mrpc_busy = 0;
> @@ -529,10 +531,11 @@ static ssize_t switchtec_dev_read(struct
> mutex_unlock(&stdev->mrpc_mutex);
>
> if (filp->f_flags & O_NONBLOCK) {
> - if (!try_wait_for_completion(&stuser->comp))
> + if (!stuser->cmd_done)
> return -EAGAIN;
> } else {
> - rc = wait_for_completion_interruptible(&stuser->comp);
> + rc = wait_event_interruptible(stuser->cmd_comp,
> + stuser->cmd_done);
> if (rc < 0)
> return rc;
> }
> @@ -580,7 +583,7 @@ static __poll_t switchtec_dev_poll(struc
> struct switchtec_dev *stdev = stuser->stdev;
> __poll_t ret = 0;
>
> - poll_wait(filp, &stuser->comp.wait, wait);
> + poll_wait(filp, &stuser->cmd_comp, wait);
> poll_wait(filp, &stdev->event_wq, wait);
>
> if (lock_mutex_and_test_alive(stdev))
> @@ -588,7 +591,7 @@ static __poll_t switchtec_dev_poll(struc
>
> mutex_unlock(&stdev->mrpc_mutex);
>
> - if (try_wait_for_completion(&stuser->comp))
> + if (stuser->cmd_done)
> ret |= EPOLLIN | EPOLLRDNORM;
>
> if (stuser->event_cnt != atomic_read(&stdev->event_cnt))
> @@ -1272,7 +1275,8 @@ static void stdev_kill(struct switchtec_
>
> /* Wake up and kill any users waiting on an MRPC request */
> list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) {
> - complete_all(&stuser->comp);
> + stuser->cmd_done = true;
> + wake_up_interruptible(&stuser->cmd_comp);
> list_del_init(&stuser->list);
> stuser_put(stuser);
> }
>