Re: [PATCH v4 2/2] EDAC/amd64: Add new register offset support and related changes

From: William Roche
Date: Thu Feb 03 2022 - 08:19:55 EST


This version is clearer according to me, and the dimm value can continue to be used in the debug messages at the end of the f17_addr_mask_to_cs_size function.
Thank you.

Reviewed-by: William Roche <william.roche@xxxxxxxxxx>

W.


On 02/02/2022 15:43, Yazen Ghannam wrote:
Introduce a "family flags" bitmask that can be used to indicate any
special behavior needed on a per-family basis.

Add a flag to indicate a system uses the new register offsets introduced
with Family 19h Model 10h.

Use this flag to account for register offset changes, a new bitfield
indicating DDR5 use on a memory controller, and to set the proper number
of chip select masks.

Rework f17_addr_mask_to_cs_size() to properly handle the change in chip
select masks. And update code comments to reflect the updated Chip
Select, DIMM, and Mask relationships.

[uninitiliazed variable warning]
Reported-by: kernel test robot <lkp@xxxxxxxxx>
Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
---
Link:
https://lore.kernel.org/r/20211228200615.412999-3-yazen.ghannam@xxxxxxx

v3->v4:
* Use a single helper function for new register offsets.
* Fix an uninitialized variable warning.

v2->v3:
* Adjust variable names to explicitly show what they represent.
* Update code comment to give more detail on CS/MASK/DIMM layout.

v1->v2:
* Was patch 4 in v1.
* Change "has_ddr5" flag to "zn_regs_v2".
* Drop flag check helper function.
* Update determine_memory_type() to check bitfield for DDR5.
* Update code comments.

drivers/edac/amd64_edac.c | 80 +++++++++++++++++++++++++++++++--------
drivers/edac/amd64_edac.h | 14 +++++++
2 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 49e384207ce0..5806fe657373 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -15,6 +15,21 @@ static struct msr __percpu *msrs;
static struct amd64_family_type *fam_type;
+static inline u32 get_umc_reg(u32 reg)
+{
+ if (!fam_type->flags.zn_regs_v2)
+ return reg;
+
+ switch (reg) {
+ case UMCCH_ADDR_CFG: return UMCCH_ADDR_CFG_DDR5;
+ case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5;
+ case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5;
+ }
+
+ WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg);
+ return 0;
+}
+
/* Per-node stuff */
static struct ecc_settings **ecc_stngs;
@@ -1429,8 +1444,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
- if (umc->dram_type == MEM_LRDDR4) {
- amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp);
+ if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) {
+ amd_smn_read(pvt->mc_node_id,
+ umc_base + get_umc_reg(UMCCH_ADDR_CFG),
+ &tmp);
edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
i, 1 << ((tmp >> 4) & 0x3));
}
@@ -1505,7 +1522,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
for_each_umc(umc) {
pvt->csels[umc].b_cnt = 4;
- pvt->csels[umc].m_cnt = 2;
+ pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2;
}
} else {
@@ -1545,7 +1562,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt)
}
umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
- umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC;
+ umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC);
for_each_chip_select_mask(cs, umc, pvt) {
mask = &pvt->csels[umc].csmasks[cs];
@@ -1623,12 +1640,25 @@ static void _determine_memory_type_df(struct amd64_umc *umc)
return;
}
- if (umc->dimm_cfg & BIT(5))
- umc->dram_type = MEM_LRDDR4;
- else if (umc->dimm_cfg & BIT(4))
- umc->dram_type = MEM_RDDR4;
- else
- umc->dram_type = MEM_DDR4;
+ /*
+ * Check if the system supports the "DDR Type" field in UMC Config
+ * and has DDR5 DIMMs in use.
+ */
+ if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) {
+ if (umc->dimm_cfg & BIT(5))
+ umc->dram_type = MEM_LRDDR5;
+ else if (umc->dimm_cfg & BIT(4))
+ umc->dram_type = MEM_RDDR5;
+ else
+ umc->dram_type = MEM_DDR5;
+ } else {
+ if (umc->dimm_cfg & BIT(5))
+ umc->dram_type = MEM_LRDDR4;
+ else if (umc->dimm_cfg & BIT(4))
+ umc->dram_type = MEM_RDDR4;
+ else
+ umc->dram_type = MEM_DDR4;
+ }
}
static void determine_memory_type_df(struct amd64_pvt *pvt)
@@ -2170,6 +2200,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
{
u32 addr_mask_orig, addr_mask_deinterleaved;
u32 msb, weight, num_zero_bits;
+ int cs_mask_nr = csrow_nr;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@@ -2185,17 +2216,33 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
return size;
/*
- * There is one mask per DIMM, and two Chip Selects per DIMM.
- * CS0 and CS1 -> DIMM0
- * CS2 and CS3 -> DIMM1
+ * Family 17h introduced systems with one mask per DIMM,
+ * and two Chip Selects per DIMM.
+ *
+ * CS0 and CS1 -> MASK0 / DIMM0
+ * CS2 and CS3 -> MASK1 / DIMM1
+ *
+ * Family 19h Model 10h introduced systems with one mask per Chip Select,
+ * and two Chip Selects per DIMM.
+ *
+ * CS0 -> MASK0 -> DIMM0
+ * CS1 -> MASK1 -> DIMM0
+ * CS2 -> MASK2 -> DIMM1
+ * CS3 -> MASK3 -> DIMM1
+ *
+ * Keep the mask number equal to the Chip Select number for newer systems,
+ * and shift the mask number for older systems.
*/
dimm = csrow_nr >> 1;
+ if (!fam_type->flags.zn_regs_v2)
+ cs_mask_nr >>= 1;
+
/* Asymmetric dual-rank DIMM support. */
if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
- addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm];
+ addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
else
- addr_mask_orig = pvt->csels[umc].csmasks[dimm];
+ addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
/*
* The number of zero bits in the mask is equal to the number of bits
@@ -2951,6 +2998,7 @@ static struct amd64_family_type family_types[] = {
.f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6,
.max_mcs = 12,
+ .flags.zn_regs_v2 = 1,
.ops = {
.early_channel_count = f17_early_channel_count,
.dbam_to_cs = f17_addr_mask_to_cs_size,
@@ -3389,7 +3437,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
- amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg);
+ amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg);
amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 09ad28299c57..6f8147abfa71 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -273,8 +273,11 @@
#define UMCCH_BASE_ADDR_SEC 0x10
#define UMCCH_ADDR_MASK 0x20
#define UMCCH_ADDR_MASK_SEC 0x28
+#define UMCCH_ADDR_MASK_SEC_DDR5 0x30
#define UMCCH_ADDR_CFG 0x30
+#define UMCCH_ADDR_CFG_DDR5 0x40
#define UMCCH_DIMM_CFG 0x80
+#define UMCCH_DIMM_CFG_DDR5 0x90
#define UMCCH_UMC_CFG 0x100
#define UMCCH_SDP_CTRL 0x104
#define UMCCH_ECC_CTRL 0x14C
@@ -483,11 +486,22 @@ struct low_ops {
unsigned cs_mode, int cs_mask_nr);
};
+struct amd64_family_flags {
+ /*
+ * Indicates that the system supports the new register offsets, etc.
+ * first introduced with Family 19h Model 10h.
+ */
+ __u64 zn_regs_v2 : 1,
+
+ __reserved : 63;
+};
+
struct amd64_family_type {
const char *ctl_name;
u16 f0_id, f1_id, f2_id, f6_id;
/* Maximum number of memory controllers per die/node. */
u8 max_mcs;
+ struct amd64_family_flags flags;
struct low_ops ops;
};