[PATCH v2 3/4] PCI/AER: Fetch information for FTrace

From: Wang, Qingshun
Date: Thu Jan 25 2024 - 01:32:17 EST


Fetch and store the data of 3 more registers: "Link Status", "Device
Control 2", and "Advanced Error Capabilities and Control". This data is
needed for external observation to better understand ANFE.

Signed-off-by: "Wang, Qingshun" <qingshun.wang@xxxxxxxxxxxxxxx>
---
drivers/acpi/apei/ghes.c | 8 +++++++-
drivers/cxl/core/pci.c | 11 ++++++++++-
drivers/pci/pci.h | 4 ++++
drivers/pci/pcie/aer.c | 26 ++++++++++++++++++++------
include/linux/aer.h | 6 ++++--
5 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 6034039d5cff..047cc01be68c 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -594,7 +594,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
struct pcie_capability_regs *pcie_caps;
+ u16 device_control_2 = 0;
u16 device_status = 0;
+ u16 link_status = 0;
unsigned int devfn;
int aer_severity;
u8 *aer_info;
@@ -619,7 +621,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)

if (pcie_err->validation_bits & CPER_PCIE_VALID_CAPABILITY) {
pcie_caps = (struct pcie_capability_regs *)pcie_err->capability;
+ device_control_2 = pcie_caps->device_control_2;
device_status = pcie_caps->device_status;
+ link_status = pcie_caps->link_status;
}

aer_recover_queue(pcie_err->device_id.segment,
@@ -627,7 +631,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
devfn, aer_severity,
(struct aer_capability_regs *)
aer_info,
- device_status);
+ device_status,
+ link_status,
+ device_control_2);
}
#endif
}
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 9111a4415a63..3aa57fe8db42 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -903,7 +903,9 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
struct aer_capability_regs aer_regs;
struct cxl_dport *dport;
struct cxl_port *port;
+ u16 device_control_2;
u16 device_status;
+ u16 link_status;
int severity;

port = cxl_pci_find_port(pdev, &dport);
@@ -918,10 +920,17 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
return;

+ if (pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, &device_control_2))
+ return;
+
if (pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, &device_status))
return;

- pci_print_aer(pdev, severity, &aer_regs, device_status);
+ if (pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &link_status))
+ return;
+
+ pci_print_aer(pdev, severity, &aer_regs, device_status,
+ link_status, device_control_2);

if (severity == AER_CORRECTABLE)
cxl_handle_rdport_cor_ras(cxlds, dport);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f881a1b42f14..5788a94b4e95 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -412,7 +412,11 @@ struct aer_err_info {
u32 uncor_mask; /* UNCOR Error Mask */
u32 uncor_status; /* UNCOR Error Status */
u32 uncor_severity; /* UNCOR Error Severity */
+
+ u16 link_status;
+ u32 aer_cap_ctrl; /* AER Capabilities and Control */
u16 device_status;
+ u16 device_control_2;
struct aer_header_log_regs tlp; /* TLP Header */
};

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 713cbf625d3f..eec3406f727a 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -825,7 +825,8 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
#endif

void pci_print_aer(struct pci_dev *dev, int aer_severity,
- struct aer_capability_regs *aer, u16 device_status)
+ struct aer_capability_regs *aer, u16 device_status,
+ u16 link_status, u16 device_control_2)
{
int layer, agent, tlp_header_valid = 0;
u32 status, mask;
@@ -850,7 +851,10 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
info.uncor_status = aer->uncor_status;
info.uncor_severity = aer->uncor_severity;
info.uncor_mask = aer->uncor_mask;
+ info.link_status = link_status;
+ info.aer_cap_ctrl = aer->cap_control;
info.device_status = device_status;
+ info.device_control_2 = device_control_2;
info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);

pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
@@ -1205,7 +1209,9 @@ struct aer_recover_entry {
u8 devfn;
u16 domain;
int severity;
+ u16 link_status;
u16 device_status;
+ u16 device_control_2;
struct aer_capability_regs *regs;
};

@@ -1226,7 +1232,8 @@ static void aer_recover_work_func(struct work_struct *work)
PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
continue;
}
- pci_print_aer(pdev, entry.severity, entry.regs, entry.device_status);
+ pci_print_aer(pdev, entry.severity, entry.regs, entry.device_status,
+ entry.link_status, entry.device_control_2);
/*
* Memory for aer_capability_regs(entry.regs) is being allocated from the
* ghes_estatus_pool to protect it from overwriting when multiple sections
@@ -1255,7 +1262,8 @@ static DEFINE_SPINLOCK(aer_recover_ring_lock);
static DECLARE_WORK(aer_recover_work, aer_recover_work_func);

void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
- int severity, struct aer_capability_regs *aer_regs, u16 device_status)
+ int severity, struct aer_capability_regs *aer_regs, u16 device_status,
+ u16 link_status, u16 device_control_2)
{
struct aer_recover_entry entry = {
.bus = bus,
@@ -1263,7 +1271,9 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
.domain = domain,
.severity = severity,
.regs = aer_regs,
+ .link_status = link_status,
.device_status = device_status,
+ .device_control_2 = device_control_2,
};

if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1,
@@ -1289,7 +1299,6 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
{
int type = pci_pcie_type(dev);
int aer = dev->aer_cap;
- int temp;

/* Must reset in this function */
info->cor_status = 0;
@@ -1317,8 +1326,14 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
&info->uncor_severity);
pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK,
&info->uncor_mask);
+ pci_read_config_dword(dev, aer + PCI_ERR_CAP,
+ &info->aer_cap_ctrl);
+ pcie_capability_read_word(dev, PCI_EXP_LNKSTA,
+ &info->link_status);
pcie_capability_read_word(dev, PCI_EXP_DEVSTA,
&info->device_status);
+ pcie_capability_read_word(dev, PCI_EXP_DEVCTL2,
+ &info->device_control_2);
} else {
return 1;
}
@@ -1331,8 +1346,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
return 0;

/* Get First Error Pointer */
- pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp);
- info->first_error = PCI_ERR_CAP_FEP(temp);
+ info->first_error = PCI_ERR_CAP_FEP(info->aer_cap_ctrl);

if (info->uncor_status & AER_LOG_TLP_MASKS) {
info->tlp_header_valid = 1;
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 38ac802250ac..327ebec1e4b3 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -52,9 +52,11 @@ static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; }
#endif

void pci_print_aer(struct pci_dev *dev, int aer_severity,
- struct aer_capability_regs *aer, u16 device_status);
+ struct aer_capability_regs *aer, u16 device_status,
+ u16 link_status, u16 device_control_2);
int cper_severity_to_aer(int cper_severity);
void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
- int severity, struct aer_capability_regs *aer_regs, u16 device_status);
+ int severity, struct aer_capability_regs *aer_regs, u16 device_status,
+ u16 link_status, u16 device_control_2);
#endif //_AER_H_

--
2.42.0