Re: [PATCH v8 11/14] PCI/RCEC: Add RCiEP's linked RCEC to AER/ERR

From: Kelley, Sean V
Date: Fri Oct 09 2020 - 19:57:36 EST


On Fri, 2020-10-09 at 15:07 -0700, Sean V Kelley wrote:
> On 9 Oct 2020, at 14:30, Bjorn Helgaas wrote:
>
> > On Fri, Oct 09, 2020 at 12:57:45PM -0500, Bjorn Helgaas wrote:
> > > On Fri, Oct 02, 2020 at 11:47:32AM -0700, Sean V Kelley wrote:
> > > > From: Qiuxu Zhuo <qiuxu.zhuo@xxxxxxxxx>
> > > >
> > > > When attempting error recovery for an RCiEP associated with an
> > > > RCEC
> > > > device,
> > > > there needs to be a way to update the Root Error Status, the
> > > > Uncorrectable
> > > > Error Status and the Uncorrectable Error Severity of the
> > > > parent
> > > > RCEC.
> > > > In some non-native cases in which there is no OS visible device
> > > > associated with the RCiEP, there is nothing to act upon as the
> > > > firmware
> > > > is acting before the OS. So add handling for the linked 'rcec'
> > > > in
> > > > AER/ERR
> > > > while taking into account non-native cases.
> > > >
> > > > Co-developed-by: Sean V Kelley <sean.v.kelley@xxxxxxxxx>
> > > > Signed-off-by: Sean V Kelley <sean.v.kelley@xxxxxxxxx>
> > > > Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@xxxxxxxxx>
> > > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx>
> > > > ---
> > > > drivers/pci/pcie/aer.c | 9 +++++----
> > > > drivers/pci/pcie/err.c | 39 ++++++++++++++++++++++++++++----
> > > > -------
> > > > 2 files changed, 33 insertions(+), 15 deletions(-)
> > > >
> > > > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > > > index 65dff5f3457a..dccdba60b5d9 100644
> > > > --- a/drivers/pci/pcie/aer.c
> > > > +++ b/drivers/pci/pcie/aer.c
> > > > @@ -1358,17 +1358,18 @@ static int aer_probe(struct
> > > > pcie_device
> > > > *dev)
> > > > static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> > > > {
> > > > int aer = dev->aer_cap;
> > > > + int rc = 0;
> > > > u32 reg32;
> > > > - int rc;
> > > > -
> > > >
> > > > /* Disable Root's interrupt in response to error
> > > > messages */
> > > > pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND,
> > > > &reg32);
> > > > reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> > > > pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND,
> > > > reg32);
> > > >
> > > > - rc = pci_bus_error_reset(dev);
> > > > - pci_info(dev, "Root Port link has been reset\n");
> > > > + if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
> > > > + rc = pci_bus_error_reset(dev);
> > > > + pci_info(dev, "Root Port link has been
> > > > reset\n");
> > > > + }
> > > >
> > > > /* Clear Root Error Status */
> > > > pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS,
> > > > &reg32);
> > > > diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> > > > index 38abd7984996..956ad4c86d53 100644
> > > > --- a/drivers/pci/pcie/err.c
> > > > +++ b/drivers/pci/pcie/err.c
> > > > @@ -149,7 +149,8 @@ static int report_resume(struct pci_dev
> > > > *dev,
> > > > void *data)
> > > > /**
> > > > * pci_walk_bridge - walk bridges potentially AER affected
> > > > * @bridge bridge which may be an RCEC with associated
> > > > RCiEPs,
> > > > - * an RCiEP associated with an RCEC, or a Port.
> > > > + * or a Port.
> > > > + * @dev an RCiEP lacking an associated RCEC.
> > > > * @cb callback to be called for each device found
> > > > * @userdata arbitrary pointer to be passed to callback.
> > > > *
> > > > @@ -160,13 +161,20 @@ static int report_resume(struct pci_dev
> > > > *dev,
> > > > void *data)
> > > > * If the device provided has no subordinate bus, call the
> > > > provided
> > > > * callback on the device itself.
> > > > */
> > > > -static void pci_walk_bridge(struct pci_dev *bridge, int
> > > > (*cb)(struct pci_dev *, void *),
> > > > +static void pci_walk_bridge(struct pci_dev *bridge, struct
> > > > pci_dev
> > > > *dev,
> > > > + int (*cb)(struct pci_dev *, void
> > > > *),
> > > > void *userdata)
> > > > {
> > > > - if (bridge->subordinate)
> > > > + /*
> > > > + * In a non-native case where there is no OS-visible
> > > > reporting
> > > > + * device the bridge will be NULL, i.e., no RCEC, no
> > > > PORT.
> > > > + */
> > > > + if (bridge && bridge->subordinate)
> > > > pci_walk_bus(bridge->subordinate, cb,
> > > > userdata);
> > > > - else
> > > > + else if (bridge)
> > > > cb(bridge, userdata);
> > > > + else
> > > > + cb(dev, userdata);
> > > > }
> > > >
> > > > static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
> > > > @@ -196,16 +204,25 @@ pci_ers_result_t pcie_do_recovery(struct
> > > > pci_dev *dev,
> > > > type = pci_pcie_type(dev);
> > > > if (type == PCI_EXP_TYPE_ROOT_PORT ||
> > > > type == PCI_EXP_TYPE_DOWNSTREAM ||
> > > > - type == PCI_EXP_TYPE_RC_EC ||
> > > > - type == PCI_EXP_TYPE_RC_END)
> > > > + type == PCI_EXP_TYPE_RC_EC)
> > > > bridge = dev;
> > > > + else if (type == PCI_EXP_TYPE_RC_END)
> > > > + bridge = dev->rcec;
> > > > else
> > > > bridge = pci_upstream_bridge(dev);
> > > >
> > > > pci_dbg(dev, "broadcast error_detected message\n");
> > > > if (state == pci_channel_io_frozen) {
> > > > - pci_walk_bridge(bridge, report_frozen_detected,
> > > > &status);
> > > > + pci_walk_bridge(bridge, dev,
> > > > report_frozen_detected, &status);
> > > > if (type == PCI_EXP_TYPE_RC_END) {
> > > > + /*
> > > > + * The callback only clears the Root
> > > > Error Status
> > > > + * of the RCEC (see aer.c). Only
> > > > perform this for the
> > > > + * native case, i.e., an RCEC is
> > > > present.
> > > > + */
> > > > + if (bridge)
> > > > + reset_subordinate_devices(bridg
> > > > e);
> > >
> > > Help me understand this. There are lots of callbacks in this
> > > picture,
> > > but I guess this "callback only clears Root Error Status" must
> > > refer
> > > to aer_root_reset(), i.e., the reset_subordinate_devices pointer?
> > >
> > > Of course, the caller of pcie_do_recovery() supplied that
> > > pointer.
> > > And we can infer that it must be aer_root_reset(), not
> > > dpc_reset_link(), because RCECs and RCiEPs are not allowed to
> > > implement DPC.
> > >
> > > I wish we didn't have either this assumption about what
> > > reset_subordinate_devices points to, or the assumption about what
> > > aer_root_reset() does. They both seem a little bit tenuous.
> > >
> > > We already made aer_root_reset() smart enough to check for
> > > RCECs.
> > > Can
> > > we put the FLR there, too? Then we wouldn't have this weird
> > > situation
> > > where reset_subordinate_devices() does a reset and clears error
> > > status, EXCEPT for this case where it only clears error status
> > > and we
> > > do the reset here?
> >
> > Just as an example to make this concrete. Doesn't even compile.
>
> Okay, I tried something similar. Comments below:
>
> >
> > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > index d6927e6535e5..e389db7cbba6 100644
> > --- a/drivers/pci/pcie/aer.c
> > +++ b/drivers/pci/pcie/aer.c
> > @@ -1372,28 +1372,45 @@ static int aer_probe(struct pcie_device
> > *dev)
> > */
> > static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> > {
> > - int aer = dev->aer_cap;
> > + int type = pci_pcie_type(dev);
> > + struct pci_dev *root;
> > + int aer = 0;
> > int rc = 0;
> > u32 reg32;
> >
> > - /* Disable Root's interrupt in response to error messages */
> > - pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> > - reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> > - pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> > + if (type == PCI_EXP_TYPE_RC_END)
> > + root = dev->rcec;
> > + else
> > + root = dev;
> > +
> > + if (root)
> > + aer = root->aer_cap;
>
> Except here dev->rcec may be NULL for the non-native case. So I had
> a
> goto label to bypass what follows. I left the FLR in err.c.
>
>
> > - if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
> > + if (aer) {
> > + /* Disable Root's interrupt in response to error
> > messages */
> > + pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
> > &reg32);
> > + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
> > + pci_write_config_dword(root, aer +
> > PCI_ERR_ROOT_COMMAND, reg32);
> > + }
> > +
> > + if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END)
> > {
> > + rc = flr_on_rciep(dev);
> > + pci_info(dev, "has been reset (%d)\n", rc);
> > + } else {
> > rc = pci_bus_error_reset(dev);
> > - pci_info(dev, "Root Port link has been reset\n");
> > + pci_info(dev, "Root Port link has been reset (%d)\n",
> > rc);
> > }
> >
> > - /* Clear Root Error Status */
> > - pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
> > - pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
> > + if (aer) {
> > + /* Clear Root Error Status */
> > + pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS,
> > &reg32);
> > + pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS,
> > reg32);
> >
> > - /* Enable Root Port's interrupt in response to error messages
> > */
> > - pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
> > - reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> > - pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
> > + /* Enable Root Port's interrupt in response to error
> > messages */
> > + pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
> > &reg32);
> > + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> > + pci_write_config_dword(root, aer +
> > PCI_ERR_ROOT_COMMAND, reg32);
> > + }
> >
> > return rc ? PCI_ERS_RESULT_DISCONNECT :
> > PCI_ERS_RESULT_RECOVERED;
> > }
> > diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> > index 79ae1356141d..08976034a89c 100644
> > --- a/drivers/pci/pcie/err.c
> > +++ b/drivers/pci/pcie/err.c
> > @@ -203,36 +203,19 @@ pci_ers_result_t pcie_do_recovery(struct
> > pci_dev
> > *dev,
> > */
> > if (type == PCI_EXP_TYPE_ROOT_PORT ||
> > type == PCI_EXP_TYPE_DOWNSTREAM ||
> > - type == PCI_EXP_TYPE_RC_EC)
> > + type == PCI_EXP_TYPE_RC_EC ||
> > + type == PCI_EXP_TYPE_RC_END)
> > bridge = dev;
> > - else if (type == PCI_EXP_TYPE_RC_END)
> > - bridge = dev->rcec;
> > else
> > bridge = pci_upstream_bridge(dev);
> >
> > pci_dbg(bridge, "broadcast error_detected message\n");
> > if (state == pci_channel_io_frozen) {
> > pci_walk_bridge(bridge, dev, report_frozen_detected,
> > &status);
>
> We’d need to walk back the dev addition to the pci_walk_bridge() and
> revert to prior to the checks on dev->rcec.
>
> > - if (type == PCI_EXP_TYPE_RC_END) {
> > - /*
> > - * The callback only clears the Root Error
> > Status
> > - * of the RCEC (see aer.c). Only perform this
> > for the
> > - * native case, i.e., an RCEC is present.
> > - */
> > - if (bridge)
> > - reset_subordinates(bridge);
> > -
> > - status = flr_on_rciep(dev);
> > - if (status != PCI_ERS_RESULT_RECOVERED) {
> > - pci_warn(dev, "Function Level Reset
> > failed\n");
> > - goto failed;
> > - }
> > - } else {
> > - status = reset_subordinates(bridge);
> > - if (status != PCI_ERS_RESULT_RECOVERED) {
> > - pci_warn(dev, "subordinate device reset
> > failed\n");
> > - goto failed;
> > - }
> > + status = reset_subordinates(bridge);
> > + if (status != PCI_ERS_RESULT_RECOVERED) {
> > + pci_warn(bridge, "subordinate device reset
> > failed\n");
> > + goto failed;
> > }
> > } else {
> > pci_walk_bridge(bridge, dev, report_normal_detected,
> > &status);
>
> So this is what I experimented with for the most part but with the
> changes that I highlighted - addressing non-native case in the
> callback
> and also in the bridge walk.
>
> I can test it out.


So I tested the following out, including your moving flr to aer.c:

- Renamed flr_on_rciep() to flr_on_rc() for RC devices (RC_END and
RC_EC)

- Moved check on dev->rcec into aer_root_reset() including the FLR.

- Reworked pci_walk_bridge() to drop extra dev argument and check
locally for the bridge->rcec. Maybe should also check on type when
checking on bridge->rcec.

Note I didn't use the check on aer_cap existence because I think you
had added that for simply being able to skip over for the non-native
case and I handle that with the single goto at the beginning which
takes you to the FLR.

So this is rough, compiled, tested with AER injections but that's it...

Please take a look.

Thanks,

Sean




drivers/pci/pcie/aer.c | 57 ++++++++++++++++++++++++++++++--------
drivers/pci/pcie/err.c | 62 ++++++++++++------------------------------
2 files changed, 63 insertions(+), 56 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 3cde646f71c0..35b32309a97b 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1362,6 +1362,23 @@ static int aer_probe(struct pcie_device *dev)
return 0;
}

+/**
+ * flr_on_rc - function levl reset on root complex device
+ * @dev: pointer to root complex device's pci_dev data structure
+ *
+ * Invoked by aer_root_reset() when performing link reset.
+ */
+static pci_ers_result_t flr_on_rc(struct pci_dev *dev)
+{
+ if (!pcie_has_flr(dev))
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ if (pcie_flr(dev))
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
/**
* aer_root_reset - reset link on Root Port
* @dev: pointer to Root Port's pci_dev data structure
@@ -1370,28 +1387,46 @@ static int aer_probe(struct pcie_device *dev)
*/
static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
{
+ int type = pci_pcie_type(dev);
+ struct pci_dev *root = dev;
int aer = dev->aer_cap;
int rc = 0;
u32 reg32;

+ if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) {
+ /*
+ * The reset should only clear the Root Error Status
+ * of the RCEC. Only perform this for the
+ * native case, i.e., an RCEC is present.
+ */
+ if (!(dev->rcec))
+ goto non_native;
+ else
+ root = dev->rcec;
+ }
+
/* Disable Root's interrupt in response to error messages */
- pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+ pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
&reg32);
reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
- pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
-
- if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) {
- rc = pci_bus_error_reset(dev);
- pci_info(dev, "Root Port link has been reset\n");
- }
+ pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
reg32);

/* Clear Root Error Status */
- pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
- pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
+ pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
+ pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);

/* Enable Root Port's interrupt in response to error messages
*/
- pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+ pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
&reg32);
reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
- pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+ pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND,
reg32);
+
+non_native:
+ if ((type == PCI_EXP_TYPE_RC_EC) || (type ==
PCI_EXP_TYPE_RC_END)) {
+ rc = flr_on_rc(root);
+ pci_info(dev, "has been reset (%d)\n", rc);
+ } else {
+ rc = pci_bus_error_reset(root);
+ pci_info(dev, "Root Port link has been reset (%d)\n",
rc);
+ }

return rc ? PCI_ERS_RESULT_DISCONNECT :
PCI_ERS_RESULT_RECOVERED;
}
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 956ad4c86d53..81bbf8a4006d 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -150,7 +150,6 @@ static int report_resume(struct pci_dev *dev, void
*data)
* pci_walk_bridge - walk bridges potentially AER affected
* @bridge bridge which may be an RCEC with associated RCiEPs,
* or a Port.
- * @dev an RCiEP lacking an associated RCEC.
* @cb callback to be called for each device found
* @userdata arbitrary pointer to be passed to callback.
*
@@ -161,7 +160,7 @@ static int report_resume(struct pci_dev *dev, void
*data)
* If the device provided has no subordinate bus, call the provided
* callback on the device itself.
*/
-static void pci_walk_bridge(struct pci_dev *bridge, struct pci_dev
*dev,
+static void pci_walk_bridge(struct pci_dev *bridge,
int (*cb)(struct pci_dev *, void *),
void *userdata)
{
@@ -169,23 +168,12 @@ static void pci_walk_bridge(struct pci_dev
*bridge, struct pci_dev *dev,
* In a non-native case where there is no OS-visible reporting
* device the bridge will be NULL, i.e., no RCEC, no PORT.
*/
- if (bridge && bridge->subordinate)
+ if (bridge->subordinate)
pci_walk_bus(bridge->subordinate, cb, userdata);
- else if (bridge)
- cb(bridge, userdata);
+ else if (bridge->rcec)
+ cb(bridge->rcec, userdata);
else
- cb(dev, userdata);
-}
-
-static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
-{
- if (!pcie_has_flr(dev))
- return PCI_ERS_RESULT_DISCONNECT;
-
- if (pcie_flr(dev))
- return PCI_ERS_RESULT_DISCONNECT;
-
- return PCI_ERS_RESULT_RECOVERED;
+ cb(bridge, userdata);
}

pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
@@ -204,45 +192,29 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev
*dev,
type = pci_pcie_type(dev);
if (type == PCI_EXP_TYPE_ROOT_PORT ||
type == PCI_EXP_TYPE_DOWNSTREAM ||
- type == PCI_EXP_TYPE_RC_EC)
+ type == PCI_EXP_TYPE_RC_EC ||
+ type == PCI_EXP_TYPE_RC_END)
bridge = dev;
- else if (type == PCI_EXP_TYPE_RC_END)
- bridge = dev->rcec;
else
bridge = pci_upstream_bridge(dev);

pci_dbg(dev, "broadcast error_detected message\n");
if (state == pci_channel_io_frozen) {
- pci_walk_bridge(bridge, dev, report_frozen_detected,
&status);
- if (type == PCI_EXP_TYPE_RC_END) {
- /*
- * The callback only clears the Root Error
Status
- * of the RCEC (see aer.c). Only perform this
for the
- * native case, i.e., an RCEC is present.
- */
- if (bridge)
- reset_subordinate_devices(bridge);
-
- status = flr_on_rciep(dev);
- if (status != PCI_ERS_RESULT_RECOVERED) {
- pci_warn(dev, "function level reset
failed\n");
- goto failed;
- }
- } else {
- status = reset_subordinate_devices(bridge);
- if (status != PCI_ERS_RESULT_RECOVERED) {
- pci_warn(dev, "subordinate device reset
failed\n");
- goto failed;
- }
+ pci_walk_bridge(bridge, report_frozen_detected,
&status);
+ status = reset_subordinate_devices(bridge);
+ if (status != PCI_ERS_RESULT_RECOVERED) {
+ pci_warn(dev, "subordinate device reset
failed\n");
+ goto failed;
}
+
} else {
- pci_walk_bridge(bridge, dev, report_normal_detected,
&status);
+ pci_walk_bridge(bridge, report_normal_detected,
&status);
}

if (status == PCI_ERS_RESULT_CAN_RECOVER) {
status = PCI_ERS_RESULT_RECOVERED;
pci_dbg(dev, "broadcast mmio_enabled message\n");
- pci_walk_bridge(bridge, dev, report_mmio_enabled,
&status);
+ pci_walk_bridge(bridge, report_mmio_enabled, &status);
}

if (status == PCI_ERS_RESULT_NEED_RESET) {
@@ -253,14 +225,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev
*dev,
*/
status = PCI_ERS_RESULT_RECOVERED;
pci_dbg(dev, "broadcast slot_reset message\n");
- pci_walk_bridge(bridge, dev, report_slot_reset,
&status);
+ pci_walk_bridge(bridge, report_slot_reset, &status);
}

if (status != PCI_ERS_RESULT_RECOVERED)
goto failed;

pci_dbg(dev, "broadcast resume message\n");
- pci_walk_bridge(bridge, dev, report_resume, &status);
+ pci_walk_bridge(bridge, report_resume, &status);

if (type == PCI_EXP_TYPE_ROOT_PORT ||
type == PCI_EXP_TYPE_DOWNSTREAM ||
--
2.28.0
>
> Thanks,
>
> Sean