RE: [PATCH v3 1/6] cxl/pci: Add RCH downstream port AER and RAS register discovery

From: Dan Williams
Date: Mon Apr 17 2023 - 19:01:14 EST


Terry Bowman wrote:
> Restricted CXL host (RCH) downstream port AER information is not currently
> logged while in the error state. One problem preventing existing PCIe AER
> functions from logging errors is the AER registers are not accessible. The
> CXL driver requires changes to find RCH downstream port AER registers for
> purpose of error logging.
>
> RCH downstream ports are not enumerated during a PCI bus scan and are
> instead discovered using system firmware, ACPI in this case.[1] The
> downstream port is implemented as a Root Complex Register Block (RCRB).
> The RCRB is a 4k memory block containing PCIe registers based on the PCIe
> root port.[2] The RCRB includes AER extended capability registers used for
> reporting errors. Note, the RCH's AER Capability is located in the RCRB
> memory space instead of PCI configuration space, thus its register access
> is different. Existing kernel PCIe AER functions can not be used to manage
> the downstream port AER capabilities because the port was not enumerated
> during PCI scan and the registers are not PCI config accessible.
>
> Discover RCH downstream port AER extended capability registers. This
> requires using MMIO accesses to search for extended AER capability in
> RCRB register space.
>
> [1] CXL 3.0 Spec, 9.11.2 - System Firmware View of CXL 1.1 Hierarchy
> [2] CXL 3.0 Spec, 8.2.1.1 - RCH Downstream Port RCRB
>
> Co-developed-by: Robert Richter <rrichter@xxxxxxx>
> Signed-off-by: Robert Richter <rrichter@xxxxxxx>
> Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx>
> ---
> drivers/cxl/core/regs.c | 93 +++++++++++++++++++++++++++++++++++------
> drivers/cxl/cxl.h | 5 +++
> drivers/cxl/mem.c | 39 +++++++++++------
> 3 files changed, 113 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> index 1476a0299c9b..bde1fffab09e 100644
> --- a/drivers/cxl/core/regs.c
> +++ b/drivers/cxl/core/regs.c
> @@ -332,10 +332,36 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
> }
> EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
>
> +static void __iomem *cxl_map_reg(struct device *dev, struct cxl_register_map *map,
> + char *name)
> +{
> +
> + if (!request_mem_region(map->resource, map->max_size, name))
> + return NULL;
> +
> + map->base = ioremap(map->resource, map->max_size);
> + if (!map->base) {
> + release_mem_region(map->resource, map->max_size);
> + return NULL;
> + }
> +
> + return map->base;
> +}
> +
> +static void cxl_unmap_reg(struct device *dev, struct cxl_register_map *map)
> +{
> + iounmap(map->base);
> + release_mem_region(map->resource, map->max_size);
> +}

Not clear why these new functions are needed vs cxl_map_regblock() /
cxl_unmap_regblock(), and this refactoring looks unrelated to the
claimed changes in the patch changelog.

...oh, I think I see why you went this way, a potential counter-proposal
below.

> +
> resource_size_t cxl_rcrb_to_component(struct device *dev,
> resource_size_t rcrb,
> enum cxl_rcrb which)
> {
> + struct cxl_register_map map = {
> + .resource = rcrb,
> + .max_size = SZ_4K
> + };
> resource_size_t component_reg_phys;
> void __iomem *addr;
> u32 bar0, bar1;
> @@ -343,7 +369,10 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
> u32 id;
>
> if (which == CXL_RCRB_UPSTREAM)
> - rcrb += SZ_4K;
> + map.resource += SZ_4K;
> +
> + if (!cxl_map_reg(dev, &map, "CXL RCRB"))
> + return CXL_RESOURCE_NONE;
>
> /*
> * RCRB's BAR[0..1] point to component block containing CXL
> @@ -351,21 +380,12 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
> * the PCI Base spec here, esp. 64 bit extraction and memory
> * ranges alignment (6.0, 7.5.1.2.1).
> */
> - if (!request_mem_region(rcrb, SZ_4K, "CXL RCRB"))
> - return CXL_RESOURCE_NONE;
> - addr = ioremap(rcrb, SZ_4K);
> - if (!addr) {
> - dev_err(dev, "Failed to map region %pr\n", addr);
> - release_mem_region(rcrb, SZ_4K);
> - return CXL_RESOURCE_NONE;
> - }
> -
> + addr = map.base;
> id = readl(addr + PCI_VENDOR_ID);
> cmd = readw(addr + PCI_COMMAND);
> bar0 = readl(addr + PCI_BASE_ADDRESS_0);
> bar1 = readl(addr + PCI_BASE_ADDRESS_1);
> - iounmap(addr);
> - release_mem_region(rcrb, SZ_4K);
> + cxl_unmap_reg(dev, &map);
>
> /*
> * Sanity check, see CXL 3.0 Figure 9-8 CXL Device that Does Not
> @@ -396,3 +416,52 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
> return component_reg_phys;
> }
> EXPORT_SYMBOL_NS_GPL(cxl_rcrb_to_component, CXL);
> +
> +u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb)
> +{
> + struct cxl_register_map map = {
> + .resource = rcrb,
> + .max_size = SZ_4K,
> + };
> + u32 cap_hdr;
> + u16 offset = 0;
> +
> + if (!cxl_map_reg(dev, &map, "CXL RCRB"))
> + return 0;
> +
> + cap_hdr = readl(map.base + offset);
> + while (PCI_EXT_CAP_ID(cap_hdr) != PCI_EXT_CAP_ID_ERR) {
> +
> + offset = PCI_EXT_CAP_NEXT(cap_hdr);
> + if (!offset) {
> + cxl_unmap_reg(dev, &map);
> + return 0;
> + }
> + cap_hdr = readl(map.base + offset);
> + }
> +
> + dev_dbg(dev, "found AER extended capability (0x%x)\n", offset);
> + cxl_unmap_reg(dev, &map);
> +
> + return offset;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_rcrb_to_aer, CXL);

> +
> +u16 cxl_component_to_ras(struct device *dev, resource_size_t component_reg_phys)
> +{
> + struct cxl_register_map map = {
> + .resource = component_reg_phys,
> + .max_size = CXL_COMPONENT_REG_BLOCK_SIZE,
> + };
> +
> + if (!cxl_map_reg(dev, &map, "component"))
> + return 0;
> +
> + cxl_probe_component_regs(dev, map.base, &map.component_map);
> + cxl_unmap_reg(dev, &map);
> + if (!map.component_map.ras.valid)
> + return 0;
> +
> + return map.component_map.ras.offset;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_component_to_ras, CXL);
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 044a92d9813e..df64c402e6e6 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -270,6 +270,9 @@ enum cxl_rcrb {
> resource_size_t cxl_rcrb_to_component(struct device *dev,
> resource_size_t rcrb,
> enum cxl_rcrb which);
> +u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb);
> +u16 cxl_component_to_ras(struct device *dev,
> + resource_size_t component_reg_phys);
>
> #define CXL_RESOURCE_NONE ((resource_size_t) -1)
> #define CXL_TARGET_STRLEN 20
> @@ -601,6 +604,8 @@ struct cxl_dport {
> int port_id;
> resource_size_t component_reg_phys;
> resource_size_t rcrb;
> + u16 aer_cap;
> + u16 ras_cap;
> bool rch;
> struct cxl_port *port;
> };
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 39c4b54f0715..014295ab6bc6 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -45,13 +45,36 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data)
> return 0;
> }
>
> +static void cxl_setup_rcrb(struct cxl_dev_state *cxlds,
> + struct cxl_dport *parent_dport)
> +{
> + struct cxl_memdev *cxlmd = cxlds->cxlmd;
> +
> + if (!parent_dport->rch)
> + return;
> +
> + /*
> + * The component registers for an RCD might come from the
> + * host-bridge RCRB if they are not already mapped via the
> + * typical register locator mechanism.
> + */
> + if (cxlds->component_reg_phys == CXL_RESOURCE_NONE)
> + cxlds->component_reg_phys = cxl_rcrb_to_component(
> + &cxlmd->dev, parent_dport->rcrb, CXL_RCRB_UPSTREAM);
> +
> + parent_dport->aer_cap = cxl_rcrb_to_aer(parent_dport->dport,
> + parent_dport->rcrb);

Hmm, how about just retrieve this as part of cxl_rcrb_to_component()
(renamed to cxl_probe_rcrb()), and make an rch dport its own distinct
object? Otherwise it feels odd to be retrieving downstream port
properties this late at upstream port component register detection time.
It also feels awkward to keep adding more RCH dport specific details to
the common 'struct cxl_dport'. So, I'm thinking something like the
following (compiled and cxl_test regression passed):

-- >8 --