Re: [PATCH v4 0/3] AMD Address Translation Library

From: Borislav Petkov
Date: Wed Jan 03 2024 - 06:00:16 EST


On Mon, Dec 18, 2023 at 01:04:03PM -0600, Yazen Ghannam wrote:
> Hi all,
>
> This revision addresses comments from Boris for v3. The most substantial
> change is the removal of the library "stub".
>
> Thanks,
> Yazen
>
> Yazen Ghannam (3):
> RAS: Introduce AMD Address Translation Library
> EDAC/amd64: Use new AMD Address Translation Library
> Documentation: RAS: Add index and address translation section

Ok, a combo diff of my fixes ontop, below. Lemme queue it - further
fixes can go ontop from now on.

Thx.

---
diff --git a/drivers/ras/amd/atl/access.c b/drivers/ras/amd/atl/access.c
index 1de0460f5e03..f6dd87bb2c35 100644
--- a/drivers/ras/amd/atl/access.c
+++ b/drivers/ras/amd/atl/access.c
@@ -18,12 +18,12 @@ static DEFINE_MUTEX(df_indirect_mutex);
/*
* Data Fabric Indirect Access uses FICAA/FICAD.
*
- * Fabric Indirect Configuration Access Address (FICAA): Constructed based
+ * Fabric Indirect Configuration Access Address (FICAA): constructed based
* on the device's Instance Id and the PCI function and register offset of
* the desired register.
*
- * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO
- * and FICAD HI registers but so far we only need the LO register.
+ * Fabric Indirect Configuration Access Data (FICAD): there are FICAD
+ * low and high registers but so far only the low register is needed.
*
* Use Instance Id 0xFF to indicate a broadcast read.
*/
diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
index 9cc31c052427..6dc4e06305f7 100644
--- a/drivers/ras/amd/atl/core.c
+++ b/drivers/ras/amd/atl/core.c
@@ -31,7 +31,7 @@ static int addr_over_limit(struct addr_ctx *ctx)

/* Is calculated system address above DRAM limit address? */
if (ctx->ret_addr > dram_limit_addr) {
- atl_debug("Calculated address (0x%016llx) > DRAM limit (0x%016llx)",
+ atl_debug(ctx, "Calculated address (0x%016llx) > DRAM limit (0x%016llx)",
ctx->ret_addr, dram_limit_addr);
return -EINVAL;
}
@@ -179,7 +179,7 @@ static void check_for_legacy_df_access(void)
* are technically independent things.
*
* It's possible to match on the PCI IDs of the Data Fabric devices, but this will be
- * an every expanding list. Instead match on the SMCA and Zen features to cover all
+ * an ever expanding list. Instead, match on the SMCA and Zen features to cover all
* relevant systems.
*/
static const struct x86_cpu_id amd_atl_cpuids[] = {
diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c
index 51721094dd06..6f414926e6fe 100644
--- a/drivers/ras/amd/atl/dehash.c
+++ b/drivers/ras/amd/atl/dehash.c
@@ -12,7 +12,14 @@

#include "internal.h"

-static inline bool valid_map_bits(struct addr_ctx *ctx, u8 bit1, u8 bit2,
+/*
+ * Verify the interleave bits are correct in the different interleaving
+ * settings.
+ *
+ * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
+ * respective interleaving is disabled.
+ */
+static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
u8 num_intlv_dies, u8 num_intlv_sockets)
{
if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
@@ -37,11 +44,7 @@ static int df2_dehash_addr(struct addr_ctx *ctx)
{
u8 hashed_bit, intlv_bit, intlv_bit_pos;

- /*
- * Assert that interleave bit is 8 or 9 and that die and socket
- * interleaving are disabled.
- */
- if (!valid_map_bits(ctx, 8, 9, 1, 1))
+ if (!map_bits_valid(ctx, 8, 9, 1, 1))
return -EINVAL;

intlv_bit_pos = ctx->map.intlv_bit_pos;
@@ -64,11 +67,7 @@ static int df3_dehash_addr(struct addr_ctx *ctx)
bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
u8 hashed_bit, intlv_bit, intlv_bit_pos;

- /*
- * Assert that interleave bit is 8 or 9 and that die and socket
- * interleaving are disabled.
- */
- if (!valid_map_bits(ctx, 8, 9, 1, 1))
+ if (!map_bits_valid(ctx, 8, 9, 1, 1))
return -EINVAL;

hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl);
@@ -172,11 +171,7 @@ static int df4_dehash_addr(struct addr_ctx *ctx)
bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
u8 hashed_bit, intlv_bit;

- /*
- * Assert that interleave bit is 8, die interleaving is disabled,
- * and no more than 2 sockets are interleaved.
- */
- if (!valid_map_bits(ctx, 8, 8, 1, 2))
+ if (!map_bits_valid(ctx, 8, 8, 1, 2))
return -EINVAL;

hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
@@ -252,11 +247,7 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx)
u8 hashed_bit, intlv_bit;
u64 rehash_vector;

- /*
- * Assert that interleave bit is 8, die interleaving is disabled,
- * and no more than 2 sockets are interleaved.
- */
- if (!valid_map_bits(ctx, 8, 8, 1, 2))
+ if (!map_bits_valid(ctx, 8, 8, 1, 2))
return -EINVAL;

hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c
index fb182dd7cca6..01f1d0fb6799 100644
--- a/drivers/ras/amd/atl/denormalize.c
+++ b/drivers/ras/amd/atl/denormalize.c
@@ -339,7 +339,8 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
}

if (log_fabric_id == MAX_COH_ST_CHANNELS)
- atl_debug("COH_ST remap entry not found for 0x%x", log_fabric_id);
+ atl_debug(ctx, "COH_ST remap entry not found for 0x%x",
+ log_fabric_id);

/* Get the Node ID bits from the physical and apply to the logical. */
return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index a1996811aa34..f17c5f5c9950 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -279,10 +279,10 @@ static inline u64 remove_bits(u8 low_bit, u8 high_bit, u64 data)
return temp1 | temp2;
}

-#define atl_debug(fmt, arg...) \
+#define atl_debug(ctx, fmt, arg...) \
pr_debug("socket_id=%u die_id=%u coh_st_inst_id=%u norm_addr=0x%016llx: " fmt,\
- ctx->inputs.socket_id, ctx->inputs.die_id,\
- ctx->inputs.coh_st_inst_id, ctx->inputs.norm_addr, ##arg)
+ (ctx)->inputs.socket_id, (ctx)->inputs.die_id,\
+ (ctx)->inputs.coh_st_inst_id, (ctx)->inputs.norm_addr, ##arg)

static inline void atl_debug_on_bad_df_rev(void)
{
@@ -291,7 +291,7 @@ static inline void atl_debug_on_bad_df_rev(void)

static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx)
{
- atl_debug("Unrecognized interleave mode: %u", ctx->map.intlv_mode);
+ atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode);
}

#endif /* __AMD_ATL_INTERNAL_H__ */
diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c
index 8145b7bb2b40..64e8b1eda1ae 100644
--- a/drivers/ras/amd/atl/map.c
+++ b/drivers/ras/amd/atl/map.c
@@ -140,7 +140,7 @@ static int get_dram_offset(struct addr_ctx *ctx, u64 *norm_offset)

/* Should not be called for map 0. */
if (!ctx->map.num) {
- atl_debug("Trying to find DRAM offset for map 0");
+ atl_debug(ctx, "Trying to find DRAM offset for map 0");
return -EINVAL;
}

@@ -388,7 +388,6 @@ static int find_normalized_offset(struct addr_ctx *ctx, u64 *norm_offset)

for (ctx->map.num = 1; ctx->map.num < df_cfg.num_coh_st_maps; ctx->map.num++) {
ret = get_dram_offset(ctx, norm_offset);
-
if (ret < 0)
return ret;

@@ -398,13 +397,13 @@ static int find_normalized_offset(struct addr_ctx *ctx, u64 *norm_offset)

/* Enabled offsets should never be 0. */
if (*norm_offset == 0) {
- atl_debug("Enabled map %u offset is 0", ctx->map.num);
+ atl_debug(ctx, "Enabled map %u offset is 0", ctx->map.num);
return -EINVAL;
}

/* Offsets should always increase from one map to the next. */
if (*norm_offset <= last_offset) {
- atl_debug("Map %u offset (0x%016llx) <= previous (0x%016llx)",
+ atl_debug(ctx, "Map %u offset (0x%016llx) <= previous (0x%016llx)",
ctx->map.num, *norm_offset, last_offset);
return -EINVAL;
}
@@ -650,18 +649,17 @@ static void dump_address_map(struct dram_addr_map *map)

int get_address_map(struct addr_ctx *ctx)
{
- int ret = 0;
+ int ret;

ret = get_address_map_common(ctx);
if (ret)
- goto out;
+ return ret;

ret = get_global_map_data(ctx);
if (ret)
- goto out;
+ return ret;

dump_address_map(&ctx->map);

-out:
return ret;
}
diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c
index 37ad203bb93e..af61f2f1d6de 100644
--- a/drivers/ras/amd/atl/system.c
+++ b/drivers/ras/amd/atl/system.c
@@ -17,7 +17,7 @@ int determine_node_id(struct addr_ctx *ctx, u8 socket_id, u8 die_id)
u16 socket_id_bits, die_id_bits;

if (socket_id > 0 && df_cfg.socket_id_mask == 0) {
- atl_debug("Invalid socket inputs: socket_id=%u socket_id_mask=0x%x",
+ atl_debug(ctx, "Invalid socket inputs: socket_id=%u socket_id_mask=0x%x",
socket_id, df_cfg.socket_id_mask);
return -EINVAL;
}
@@ -28,7 +28,7 @@ int determine_node_id(struct addr_ctx *ctx, u8 socket_id, u8 die_id)
socket_id_bits &= df_cfg.socket_id_mask;

if (die_id > 0 && df_cfg.die_id_mask == 0) {
- atl_debug("Invalid die inputs: die_id=%u die_id_mask=0x%x",
+ atl_debug(ctx, "Invalid die inputs: die_id=%u die_id_mask=0x%x",
die_id, df_cfg.die_id_mask);
return -EINVAL;
}
@@ -225,8 +225,6 @@ static void get_num_maps(void)
df_cfg.num_coh_st_maps = 2;
break;
case DF4:
- df_cfg.num_coh_st_maps = 4;
- break;
case DF4p5:
df_cfg.num_coh_st_maps = 4;
break;

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette