Re: [RFC PATCH 0/9] CXL: Read and clear event logs

From: Davidlohr Bueso
Date: Mon Aug 22 2022 - 12:18:16 EST


On Fri, 12 Aug 2022, ira.weiny@xxxxxxxxx wrote:

From: Ira Weiny <ira.weiny@xxxxxxxxx>

Event records inform the OS of various device events. Events are not needed
for any kernel operation but various user level software will want to track
events.

Add event reporting through the trace event mechanism. On driver load read and
clear all device events.

Normally interrupts will trigger new events to be reported as they occur.
Because the interrupt code is still being worked on this series provides a
cxl-test mechanism to create a series of events and trigger the reporting of
those events.

Where is this irq code being worked on? I've asked about this for async mbox
commands, and Jonathan has also posted some code for the PMU implementation.

Could we not just start with an initial MSI/MSI-X support? Then gradually
interested users can be added? So each "feature" would need to do implement
it's "get message number" and to install the isr just do the standard:

irq = pci_irq_vector(pdev, num);
irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_%s\n", dev_name(dev),
cxl_irq_cap_table[feature].name);
rc = devm_request_irq(dev, irq, isr_fn, IRQF_SHARED, irq_name, info);

The only complexity I see for this is to know the number of vectors to request
apriori, for which we'd have to get the larges value of all CXL features that
can support interrupts. Something like the following? One thing I have not
considered in this is the DOE stuff.

Thanks,
Davidlohr

------
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 88e3a8e54b6a..b334d2f497c1 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -245,6 +245,8 @@ struct cxl_dev_state {
resource_size_t component_reg_phys;
u64 serial;

+ int irq_type; /* MSI-X, MSI */
+
struct xarray doe_mbs;

int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index eec597dbe763..95f4b91f43b1 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -53,15 +53,6 @@
#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8)
#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16)

-/* Register Block Identifier (RBI) */
-enum cxl_regloc_type {
- CXL_REGLOC_RBI_EMPTY = 0,
- CXL_REGLOC_RBI_COMPONENT,
- CXL_REGLOC_RBI_VIRT,
- CXL_REGLOC_RBI_MEMDEV,
- CXL_REGLOC_RBI_TYPES
-};
-
static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
struct cxl_register_map *map)
{
@@ -75,4 +66,44 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm);
void read_cdat_data(struct cxl_port *port);
+
+#define CXL_IRQ_CAPABILITY_TABLE \
+ C(ISOLATION, "isolation", NULL), \
+ C(PMU, "pmu_overflow", NULL), /* per pmu instance */ \
+ C(MBOX, "mailbox", NULL), /* primary-only */ \
+ C(EVENT, "event", NULL),
+
+#undef C
+#define C(a, b, c) CXL_IRQ_CAPABILITY_##a
+enum { CXL_IRQ_CAPABILITY_TABLE };
+#undef C
+#define C(a, b, c) { b, c }
+/**
+ * struct cxl_irq_cap - CXL feature that is capable of receiving MSI/MSI-X irqs.
+ *
+ * @name: Name of the device generating this interrupt.
+ * @get_max_msgnum: Get the feature's largest interrupt message number. In cases
+ * where there is only one instance it also indicates which
+ * MSI/MSI-X vector is used for the interrupt message generated
+ * in association with the feature. If the feature does not
+ * have the Interrupt Supported bit set, then return -1.
+ */
+struct cxl_irq_cap {
+ const char *name;
+ int (*get_max_msgnum)(struct cxl_dev_state *cxlds);
+};
+
+static const
+struct cxl_irq_cap cxl_irq_cap_table[] = { CXL_IRQ_CAPABILITY_TABLE };
+#undef C
+
+/* Register Block Identifier (RBI) */
+enum cxl_regloc_type {
+ CXL_REGLOC_RBI_EMPTY = 0,
+ CXL_REGLOC_RBI_COMPONENT,
+ CXL_REGLOC_RBI_VIRT,
+ CXL_REGLOC_RBI_MEMDEV,
+ CXL_REGLOC_RBI_TYPES
+};
+
#endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index faeb5d9d7a7a..c0fe78e0559b 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -387,6 +387,52 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
return rc;
}

+static void cxl_pci_free_irq_vectors(void *data)
+{
+ pci_free_irq_vectors(data);
+}
+
+static int cxl_pci_alloc_irq_vectors(struct cxl_dev_state *cxlds)
+{
+ struct device *dev = cxlds->dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int rc, i, vectors = -1;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_irq_cap_table); i++) {
+ int irq;
+
+ if (!cxl_irq_cap_table[i].get_max_msgnum)
+ continue;
+
+ irq = cxl_irq_cap_table[i].get_max_msgnum(cxlds);
+ vectors = max_t(int, irq, vectors);
+ }
+
+ if (vectors == -1)
+ return -EINVAL; /* no irq support whatsoever */
+
+ vectors++;
+ rc = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX);
+ if (rc < 0) {
+ rc = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSI);
+ if (rc < 0)
+ return rc;
+
+ cxlds->irq_type = PCI_IRQ_MSI;
+ } else {
+ cxlds->irq_type = PCI_IRQ_MSIX;
+ }
+
+ if (rc != vectors) {
+ pci_err(pdev, "Not enough interrupts; use polling where supported\n");
+ /* Some got allocated; clean them up */
+ cxl_pci_free_irq_vectors(pdev);
+ return -ENOSPC;
+ }
+
+ return devm_add_action_or_reset(dev, cxl_pci_free_irq_vectors, pdev);
+}
+
static void cxl_pci_destroy_doe(void *mbs)
{
xa_destroy(mbs);
@@ -476,6 +522,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)

cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);

+ if (cxl_pci_alloc_irq_vectors(cxlds))
+ cxlds->irq_type = 0;
+
devm_cxl_pci_create_doe(cxlds);

rc = cxl_pci_setup_mailbox(cxlds);