[PATCH 13/21] EDAC, ghes: Rework memory hierarchy detection

From: Robert Richter
Date: Wed May 29 2019 - 04:48:23 EST


In a later patch we want add more information about the memory
hierarchy (NUMA topology, DIMM label information). Rework memory
hierarchy detection to make the code extendable for this.

The general approach is roughly like:

mem_info_setup();
for_each_node(nid) {
mci = edac_mc_alloc(nid);
mci_add_dimm_info(mci);
edac_mc_add_mc(mci);
};

This patch introduces mem_info_setup() and mci_add_dimm_info().

All data of the memory hierarchy is collected in a local struct
ghes_mem_info.

Note: Per (NUMA) node registration will be implemented in a later
patch.

Signed-off-by: Robert Richter <rrichter@xxxxxxxxxxx>
---
drivers/edac/ghes_edac.c | 166 +++++++++++++++++++++++++++++----------
1 file changed, 126 insertions(+), 40 deletions(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index ea4d53043199..50f4ee36b755 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -67,17 +67,38 @@ struct memdev_dmi_entry {
u16 conf_mem_clk_speed;
} __attribute__((__packed__));

-struct ghes_edac_dimm_fill {
- struct mem_ctl_info *mci;
- unsigned count;
+struct ghes_dimm_info {
+ struct dimm_info dimm_info;
+ int idx;
+};
+
+struct ghes_mem_info {
+ int num_dimm;
+ struct ghes_dimm_info *dimms;
};

+struct ghes_mem_info mem_info;
+
+#define for_each_dimm(dimm) \
+ for (dimm = mem_info.dimms; \
+ dimm < mem_info.dimms + mem_info.num_dimm; \
+ dimm++)
+
static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
{
- int *num_dimm = arg;
-
if (dh->type == DMI_ENTRY_MEM_DEVICE)
- (*num_dimm)++;
+ mem_info.num_dimm++;
+}
+
+static void ghes_dimm_info_init(void)
+{
+ struct ghes_dimm_info *dimm;
+ int idx = 0;
+
+ for_each_dimm(dimm) {
+ dimm->idx = idx;
+ idx++;
+ }
}

static int get_dimm_smbios_index(u16 handle)
@@ -94,18 +115,17 @@ static int get_dimm_smbios_index(u16 handle)

static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
{
- struct ghes_edac_dimm_fill *dimm_fill = arg;
- struct mem_ctl_info *mci = dimm_fill->mci;
-
if (dh->type == DMI_ENTRY_MEM_DEVICE) {
+ int *idx = arg;
struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
- struct dimm_info *dimm = edac_get_dimm(mci, dimm_fill->count,
- 0, 0);
+ struct ghes_dimm_info *mi = &mem_info.dimms[*idx];
+ struct dimm_info *dimm = &mi->dimm_info;
u16 rdr_mask = BIT(7) | BIT(13);

+ mi->phys_handle = entry->phys_mem_array_handle;
+
if (entry->size == 0xffff) {
- pr_info("Can't get DIMM%i size\n",
- dimm_fill->count);
+ pr_info("Can't get DIMM%i size\n", mi->idx);
dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
} else if (entry->size == 0x7fff) {
dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
@@ -179,7 +199,7 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)

if (dimm->nr_pages) {
edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
- dimm_fill->count, edac_mem_types[dimm->mtype],
+ mi->idx, edac_mem_types[dimm->mtype],
PAGES_TO_MiB(dimm->nr_pages),
(dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
@@ -189,8 +209,83 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)

dimm->smbios_handle = entry->handle;

- dimm_fill->count++;
+ (*idx)++;
+ }
+}
+
+static int mem_info_setup(void)
+{
+ int idx = 0;
+
+ memset(&mem_info, 0, sizeof(mem_info));
+
+ /* Get the number of DIMMs */
+ dmi_walk(ghes_edac_count_dimms, NULL);
+ if (!mem_info.num_dimm)
+ return -EINVAL;
+
+ mem_info.dimms = kcalloc(mem_info.num_dimm,
+ sizeof(*mem_info.dimms), GFP_KERNEL);
+ if (!mem_info.dimms)
+ return -ENOMEM;
+
+ ghes_dimm_info_init();
+ dmi_walk(ghes_edac_dmidecode, &idx);
+
+ return 0;
+}
+
+static int mem_info_setup_fake(void)
+{
+ struct ghes_dimm_info *ghes_dimm;
+ struct dimm_info *dimm;
+
+ memset(&mem_info, 0, sizeof(mem_info));
+
+ ghes_dimm = kzalloc(sizeof(*mem_info.dimms), GFP_KERNEL);
+ if (!ghes_dimm)
+ return -ENOMEM;
+
+ mem_info.num_dimm = 1;
+ mem_info.dimms = ghes_dimm;
+
+ ghes_dimm_info_init();
+
+ dimm = &ghes_dimm->dimm_info;
+ dimm->nr_pages = 1;
+ dimm->grain = 128;
+ dimm->mtype = MEM_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_SECDED;
+
+ return 0;
+}
+
+static void mci_add_dimm_info(struct mem_ctl_info *mci)
+{
+ struct dimm_info *mci_dimm, *dmi_dimm;
+ struct ghes_dimm_info *dimm;
+ int index = 0;
+
+ for_each_dimm(dimm) {
+ dmi_dimm = &dimm->dimm_info;
+ mci_dimm = edac_get_dimm_by_index(mci, index);
+
+ index++;
+ if (index > mci->tot_dimms)
+ break;
+
+ mci_dimm->nr_pages = dmi_dimm->nr_pages;
+ mci_dimm->mtype = dmi_dimm->mtype;
+ mci_dimm->edac_mode = dmi_dimm->edac_mode;
+ mci_dimm->dtype = dmi_dimm->dtype;
+ mci_dimm->grain = dmi_dimm->grain;
+ mci_dimm->smbios_handle = dmi_dimm->smbios_handle;
}
+
+ if (index != mci->tot_dimms)
+ pr_warn("Unexpected number of DIMMs: %d (exp. %d)\n",
+ index, mci->tot_dimms);
}

void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
@@ -451,10 +546,9 @@ static struct acpi_platform_list plat_list[] = {
int ghes_edac_register(struct ghes *ghes, struct device *dev)
{
bool fake = false;
- int rc, num_dimm = 0;
+ int rc;
struct mem_ctl_info *mci;
struct edac_mc_layer layers[1];
- struct ghes_edac_dimm_fill dimm_fill;
int idx = -1;

if (IS_ENABLED(CONFIG_X86)) {
@@ -472,22 +566,24 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
if (atomic_inc_return(&ghes_init) > 1)
return 0;

- /* Get the number of DIMMs */
- dmi_walk(ghes_edac_count_dimms, &num_dimm);
-
- /* Check if we've got a bogus BIOS */
- if (num_dimm == 0) {
+ rc = mem_info_setup();
+ if (rc == -EINVAL) {
+ /* we've got a bogus BIOS */
fake = true;
- num_dimm = 1;
+ rc = mem_info_setup_fake();
+ }
+ if (rc < 0) {
+ pr_err("Can't allocate memory for DIMM data\n");
+ return rc;
}

layers[0].type = EDAC_MC_LAYER_ALL_MEM;
- layers[0].size = num_dimm;
+ layers[0].size = mem_info.num_dimm;
layers[0].is_virt_csrow = true;

mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_edac_pvt));
if (!mci) {
- pr_info("Can't allocate memory for EDAC data\n");
+ pr_err("Can't allocate memory for EDAC data\n");
return -ENOMEM;
}

@@ -513,26 +609,14 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
pr_info("If you find incorrect reports, please contact your hardware vendor\n");
pr_info("to correct its BIOS.\n");
- pr_info("This system has %d DIMM sockets.\n", num_dimm);
+ pr_info("This system has %d DIMM sockets.\n", mem_info.num_dimm);
}

- if (!fake) {
- dimm_fill.count = 0;
- dimm_fill.mci = mci;
- dmi_walk(ghes_edac_dmidecode, &dimm_fill);
- } else {
- struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0);
-
- dimm->nr_pages = 1;
- dimm->grain = 128;
- dimm->mtype = MEM_UNKNOWN;
- dimm->dtype = DEV_UNKNOWN;
- dimm->edac_mode = EDAC_SECDED;
- }
+ mci_add_dimm_info(mci);

rc = edac_mc_add_mc(mci);
if (rc < 0) {
- pr_info("Can't register at EDAC core\n");
+ pr_err("Can't register at EDAC core\n");
edac_mc_free(mci);
return -ENODEV;
}
@@ -549,4 +633,6 @@ void ghes_edac_unregister(struct ghes *ghes)
mci = ghes_pvt->mci;
edac_mc_del_mc(mci->pdev);
edac_mc_free(mci);
+
+ kfree(mem_info.dimms);
}
--
2.20.1