RE: [PATCH net-next 4/4] net: mana: Support hibernation and kexec

From: Dexuan Cui
Date: Mon Nov 01 2021 - 03:03:43 EST


> From: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx>
> Sent: Saturday, October 30, 2021 8:55 AM
> >
> > @@ -1844,44 +1845,70 @@ int mana_probe(struct gdma_dev *gd)
> > if (err)
> > return err;
> >
> > - ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> > - if (!ac)
> > - return -ENOMEM;
> > + if (!resuming) {
> > + ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> > + if (!ac)
> > + return -ENOMEM;
> >
> > - ac->gdma_dev = gd;
> > - ac->num_ports = 1;
> > - gd->driver_data = ac;
> > + ac->gdma_dev = gd;
> > + gd->driver_data = ac;
> > + }
> >
> > err = mana_create_eq(ac);
> > if (err)
> > goto out;
> >
> > err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION,
> > MANA_MINOR_VERSION,
> > - MANA_MICRO_VERSION, &ac->num_ports);
> > + MANA_MICRO_VERSION, &num_ports);
> > if (err)
> > goto out;
> >
> > + if (!resuming) {
> > + ac->num_ports = num_ports;
> > + } else {
> > + if (ac->num_ports != num_ports) {
> > + dev_err(dev, "The number of vPorts changed: %d->%d\n",
> > + ac->num_ports, num_ports);
> > + err = -EPROTO;
> > + goto out;
> > + }
> > + }
> > +
> > + if (ac->num_ports == 0)
> > + dev_err(dev, "Failed to detect any vPort\n");
> > +
> > if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
> > ac->num_ports = MAX_PORTS_IN_MANA_DEV;
> >
> > - for (i = 0; i < ac->num_ports; i++) {
> > - err = mana_probe_port(ac, i, &ac->ports[i]);
> > - if (err)
> > - break;
> > + if (!resuming) {
> > + for (i = 0; i < ac->num_ports; i++) {
> > + err = mana_probe_port(ac, i, &ac->ports[i]);
> > + if (err)
> > + break;
> > + }
> > + } else {
> > + for (i = 0; i < ac->num_ports; i++) {
> > + rtnl_lock();
> > + err = mana_attach(ac->ports[i]);
> > + rtnl_unlock();
> > + if (err)
> > + break;
> > + }
> > }
> > out:
> > if (err)
> > - mana_remove(gd);
> > + mana_remove(gd, false);
>
> The "goto out" can happen in both resuming true/false cases,
> should the error handling path deal with the two cases
> differently?

Let me make the below change in v2. Please let me know
if any further change is needed:

--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1850,8 +1850,10 @@ int mana_probe(struct gdma_dev *gd, bool resuming)

if (!resuming) {
ac = kzalloc(sizeof(*ac), GFP_KERNEL);
- if (!ac)
- return -ENOMEM;
+ if (!ac) {
+ err = -ENOMEM;
+ goto out;
+ }

ac->gdma_dev = gd;
gd->driver_data = ac;
@@ -1872,8 +1874,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
if (ac->num_ports != num_ports) {
dev_err(dev, "The number of vPorts changed: %d->%d\n",
ac->num_ports, num_ports);
- err = -EPROTO;
- goto out;
+ /* It's unsafe to proceed. */
+ return -EPROTO;
}
}

@@ -1886,22 +1888,36 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
if (!resuming) {
for (i = 0; i < ac->num_ports; i++) {
err = mana_probe_port(ac, i, &ac->ports[i]);
- if (err)
- break;
+ if (err) {
+ dev_err(dev, "Failed to probe vPort %u: %d\n",
+ i, err);
+ goto out;
+ }
}
} else {
for (i = 0; i < ac->num_ports; i++) {
rtnl_lock();
err = mana_attach(ac->ports[i]);
rtnl_unlock();
- if (err)
- break;
+
+ if (err) {
+ netdev_err(ac->ports[i],
+ "Failed to resume vPort %u: %d\n",
+ i, err);
+ return err;
+ }
}
}
+
+ return 0;
out:
- if (err)
- mana_remove(gd, false);
+ /* In the resuming path, it's safer to leave the device in the failed
+ * state than try to invoke mana_detach().
+ */
+ if (resuming)
+ return err;

+ mana_remove(gd, false);
return err;
}

@@ -1919,7 +1935,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
if (!ndev) {
if (i == 0)
dev_err(dev, "No net device to remove\n");
- goto out;
+ break;
}

/* All cleanup actions should stay after rtnl_lock(), otherwise

For your easy reviewing, the new code of the function in v2 will be:

int mana_probe(struct gdma_dev *gd, bool resuming)
{
struct gdma_context *gc = gd->gdma_context;
struct mana_context *ac = gd->driver_data;
struct device *dev = gc->dev;
u16 num_ports = 0;
int err;
int i;

dev_info(dev,
"Microsoft Azure Network Adapter protocol version: %d.%d.%d\n",
MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION);

err = mana_gd_register_device(gd);
if (err)
return err;

if (!resuming) {
ac = kzalloc(sizeof(*ac), GFP_KERNEL);
if (!ac) {
err = -ENOMEM;
goto out;
}

ac->gdma_dev = gd;
gd->driver_data = ac;
}

err = mana_create_eq(ac);
if (err)
goto out;

err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
MANA_MICRO_VERSION, &num_ports);
if (err)
goto out;

if (!resuming) {
ac->num_ports = num_ports;
} else {
if (ac->num_ports != num_ports) {
dev_err(dev, "The number of vPorts changed: %d->%d\n",
ac->num_ports, num_ports);
/* It's unsafe to proceed. */
return -EPROTO;
}
}

if (ac->num_ports == 0)
dev_err(dev, "Failed to detect any vPort\n");

if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
ac->num_ports = MAX_PORTS_IN_MANA_DEV;

if (!resuming) {
for (i = 0; i < ac->num_ports; i++) {
err = mana_probe_port(ac, i, &ac->ports[i]);
if (err) {
dev_err(dev, "Failed to probe vPort %u: %d\n",
i, err);
goto out;
}
}
} else {
for (i = 0; i < ac->num_ports; i++) {
rtnl_lock();
err = mana_attach(ac->ports[i]);
rtnl_unlock();

if (err) {
netdev_err(ac->ports[i],
"Failed to resume vPort %u: %d\n",
i, err);
return err;
}
}
}

return 0;
out:
/* In the resuming path, it's safer to leave the device in the failed
* state than try to invoke mana_detach().
*/
if (resuming)
return err;

mana_remove(gd, false);
return err;
}