[PATCH 2/2 V3 net] net: mana: Fix MANA VF unload when host is unresponsive

From: souradeep chakrabarti
Date: Mon Jun 26 2023 - 05:23:01 EST


From: Souradeep Chakrabarti <schakrabarti@xxxxxxxxxxxxxxxxxxx>

This is the second part of the fix.

Also this patch adds a new attribute in mana_context, which gets set when
mana_hwc_send_request() hits a timeout because of host unresponsiveness.
This flag then helps to avoid the timeouts in successive calls.

Fixes: ca9c54d2d6a5ab2430c4eda364c77125d62e5e0f (net: mana: Add a driver for
Microsoft Azure Network Adapter)
Signed-off-by: Souradeep Chakrabarti <schakrabarti@xxxxxxxxxxxxxxxxxxx>
---
V2 -> V3:
* Removed the initialization of vf_unload_timeout
* Splitted the patch in two.
* Fixed extra space from the commit message.
---
drivers/net/ethernet/microsoft/mana/gdma_main.c | 4 +++-
drivers/net/ethernet/microsoft/mana/hw_channel.c | 12 +++++++++++-
include/net/mana/mana.h | 2 ++
3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 8f3f78b68592..6411f01be0d9 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -946,10 +946,12 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
struct gdma_context *gc = gd->gdma_context;
struct gdma_general_resp resp = {};
struct gdma_general_req req = {};
+ struct mana_context *ac;
int err;

if (gd->pdid == INVALID_PDID)
return -EINVAL;
+ ac = gd->driver_data;

mana_gd_init_req_hdr(&req.hdr, GDMA_DEREGISTER_DEVICE, sizeof(req),
sizeof(resp));
@@ -957,7 +959,7 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
req.hdr.dev_id = gd->dev_id;

err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
- if (err || resp.hdr.status) {
+ if ((err || resp.hdr.status) && !ac->vf_unload_timeout) {
dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
err, resp.hdr.status);
if (!err)
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index 9d1507eba5b9..492cb2c6e2cb 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2021, Microsoft Corporation. */

+#include "asm-generic/errno.h"
#include <net/mana/gdma.h>
#include <net/mana/hw_channel.h>
+#include <net/mana/mana.h>

static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id)
{
@@ -786,12 +788,19 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
struct hwc_wq *txq = hwc->txq;
struct gdma_req_hdr *req_msg;
struct hwc_caller_ctx *ctx;
+ struct mana_context *ac;
u32 dest_vrcq = 0;
u32 dest_vrq = 0;
u16 msg_id;
int err;

mana_hwc_get_msg_index(hwc, &msg_id);
+ ac = hwc->gdma_dev->driver_data;
+ if (ac->vf_unload_timeout) {
+ dev_err(hwc->dev, "HWC: vport is already unloaded.\n");
+ err = -ETIMEDOUT;
+ goto out;
+ }

tx_wr = &txq->msg_buf->reqs[msg_id];

@@ -825,9 +834,10 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
goto out;
}

- if (!wait_for_completion_timeout(&ctx->comp_event, 30 * HZ)) {
+ if (!wait_for_completion_timeout(&ctx->comp_event, 5 * HZ)) {
dev_err(hwc->dev, "HWC: Request timed out!\n");
err = -ETIMEDOUT;
+ ac->vf_unload_timeout = true;
goto out;
}

diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 9eef19972845..5f5affdca1eb 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -358,6 +358,8 @@ struct mana_context {

u16 num_ports;

+ bool vf_unload_timeout;
+
struct mana_eq *eqs;

struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
--
2.34.1