Re: [PATCH v4 2/2] PCI/ERR: Split the fatal and non-fatal error recovery handling

From: Kuppuswamy, Sathyanarayanan
Date: Tue Oct 13 2020 - 11:17:54 EST




On 10/13/20 4:56 AM, Christoph Hellwig wrote:
You might want to split out pcie_do_fatal_recovery and get rid of the
state argument:
This is how it was before Keith merged fatal and non-fatal error recovery
paths. When the comparison is between additional-parameter vs new-interface
, I choose the former. But I can merge your change in next version.


diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fa12f7cbc1a095..eec0d3fe9fd967 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -556,7 +556,8 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
/* PCI error reporting and recovery */
pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
- pci_channel_state_t state,
+ pci_ers_result_t (*reset_link)(struct pci_dev *pdev));
+pci_ers_result_t pcie_do_fatal_recovery(struct pci_dev *dev,
pci_ers_result_t (*reset_link)(struct pci_dev *pdev));
bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 65dff5f3457ac0..4bf7ebb34cf854 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -947,9 +947,9 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
if (pcie_aer_is_native(dev))
pcie_clear_device_status(dev);
} else if (info->severity == AER_NONFATAL)
- pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
+ pcie_do_recovery(dev, aer_root_reset);
else if (info->severity == AER_FATAL)
- pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset);
+ pcie_do_fatal_recovery(dev, aer_root_reset);
pci_dev_put(dev);
}
@@ -985,11 +985,9 @@ static void aer_recover_work_func(struct work_struct *work)
}
cper_print_aer(pdev, entry.severity, entry.regs);
if (entry.severity == AER_NONFATAL)
- pcie_do_recovery(pdev, pci_channel_io_normal,
- aer_root_reset);
+ pcie_do_recovery(pdev, aer_root_reset);
else if (entry.severity == AER_FATAL)
- pcie_do_recovery(pdev, pci_channel_io_frozen,
- aer_root_reset);
+ pcie_do_fatal_recovery(pdev, aer_root_reset);
pci_dev_put(pdev);
}
}
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index daa9a4153776ce..74e7d1da3cf054 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -233,7 +233,7 @@ static irqreturn_t dpc_handler(int irq, void *context)
dpc_process_error(pdev);
/* We configure DPC so it only triggers on ERR_FATAL */
- pcie_do_recovery(pdev, pci_channel_io_frozen, dpc_reset_link);
+ pcie_do_fatal_recovery(pdev, dpc_reset_link);
return IRQ_HANDLED;
}
diff --git a/drivers/pci/pcie/edr.c b/drivers/pci/pcie/edr.c
index a6b9b479b97ad0..87379bc566f691 100644
--- a/drivers/pci/pcie/edr.c
+++ b/drivers/pci/pcie/edr.c
@@ -183,7 +183,7 @@ static void edr_handle_event(acpi_handle handle, u32 event, void *data)
* or ERR_NONFATAL, since the link is already down, use the FATAL
* error recovery path for both cases.
*/
- estate = pcie_do_recovery(edev, pci_channel_io_frozen, dpc_reset_link);
+ estate = pcie_do_fatal_recovery(edev, dpc_reset_link);
send_ost:
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index c2ae4d08801a4d..11fcff16b17303 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -141,7 +141,7 @@ static int report_resume(struct pci_dev *dev, void *data)
return 0;
}
-static pci_ers_result_t pcie_do_fatal_recovery(struct pci_dev *dev,
+pci_ers_result_t pcie_do_fatal_recovery(struct pci_dev *dev,
pci_ers_result_t (*reset_link)(struct pci_dev *pdev))
{
struct pci_dev *udev;
@@ -194,15 +194,11 @@ static pci_ers_result_t pcie_do_fatal_recovery(struct pci_dev *dev,
}
pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
- pci_channel_state_t state,
pci_ers_result_t (*reset_link)(struct pci_dev *pdev))
{
pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
struct pci_bus *bus;
- if (state == pci_channel_io_frozen)
- return pcie_do_fatal_recovery(dev, reset_link);
-
/*
* Error recovery runs on all subordinates of the first downstream port.
* If the downstream port detected the error, it is cleared at the end.


--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer