[RFC][PATCH 19/34] x86/cpu: Introduce cache alignment multiplier

From: Dave Hansen
Date: Thu Feb 22 2024 - 13:47:31 EST



From: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>

The kernel currently tracks two different but similar-sounding values:

x86_clflush_size
and
x86_cache_alignment

x86_clflush_size is literally the size of the cacheline which is also
the architectural granularity at which CLFLUSH operates.

But some weirdo CPUs like the Pentium 4 do some operations across
two cachelines. Their CLFLUSH still works at 'x86_clflush_size'
but they need some operations aligned to that double cacheline to
work well, thus 'x86_cache_alignment'.

Introduce and use a 'bsp_addr_config' field for these systems.

Note that the Intel Family 15 code actually did this in two different
sites, one for 32-bit and one for 64-bit. Unify them.

Signed-off-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
---

b/arch/x86/include/asm/processor.h | 7 +++++++
b/arch/x86/kernel/cpu/centaur.c | 2 +-
b/arch/x86/kernel/cpu/common.c | 3 +++
b/arch/x86/kernel/cpu/intel.c | 10 ++++------
4 files changed, 15 insertions(+), 7 deletions(-)

diff -puN arch/x86/include/asm/processor.h~x86_cache_alignment_mult arch/x86/include/asm/processor.h
--- a/arch/x86/include/asm/processor.h~x86_cache_alignment_mult 2024-02-22 10:08:57.732817356 -0800
+++ b/arch/x86/include/asm/processor.h 2024-02-22 10:08:57.740817669 -0800
@@ -172,6 +172,13 @@ struct x86_addr_config {
* platforms that use memory encryption.
*/
u8 phys_addr_reduction_bits;
+
+ /*
+ * "x86_clflush_size" is the size of an actual cacheline.
+ * Allow systems to specify a multiplier where alignment
+ * will take place at a more coarse granularity.
+ */
+ u8 cache_align_mult;
};

/*
diff -puN arch/x86/kernel/cpu/centaur.c~x86_cache_alignment_mult arch/x86/kernel/cpu/centaur.c
--- a/arch/x86/kernel/cpu/centaur.c~x86_cache_alignment_mult 2024-02-22 10:08:57.732817356 -0800
+++ b/arch/x86/kernel/cpu/centaur.c 2024-02-22 10:08:57.740817669 -0800
@@ -216,7 +216,7 @@ static void init_centaur(struct cpuinfo_
static void bsp_init_centaur(struct cpuinfo_x86 *c)
{
if (c->x86 == 0x6 && c->x86_model >= 0xf)
- c->x86_cache_alignment = x86_clflush_size() * 2;
+ bsp_addr_config.cache_align_mult = 2;
}

#ifdef CONFIG_X86_32
diff -puN arch/x86/kernel/cpu/common.c~x86_cache_alignment_mult arch/x86/kernel/cpu/common.c
--- a/arch/x86/kernel/cpu/common.c~x86_cache_alignment_mult 2024-02-22 10:08:57.736817513 -0800
+++ b/arch/x86/kernel/cpu/common.c 2024-02-22 10:08:57.740817669 -0800
@@ -1124,7 +1124,10 @@ void get_cpu_address_sizes(struct cpuinf
}
}
c->x86_cache_bits = x86_config.phys_bits;
+
c->x86_cache_alignment = x86_clflush_size();
+ if (bsp_addr_config.cache_align_mult)
+ c->x86_cache_alignment *= bsp_addr_config.cache_align_mult;

/* Do this last to avoid affecting ->x86_cache_bits. */
x86_config.phys_bits -= bsp_addr_config.phys_addr_reduction_bits;
diff -puN arch/x86/kernel/cpu/intel.c~x86_cache_alignment_mult arch/x86/kernel/cpu/intel.c
--- a/arch/x86/kernel/cpu/intel.c~x86_cache_alignment_mult 2024-02-22 10:08:57.736817513 -0800
+++ b/arch/x86/kernel/cpu/intel.c 2024-02-22 10:08:57.740817669 -0800
@@ -236,10 +236,6 @@ static void early_init_intel(struct cpui

#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
-#else
- /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
- if (c->x86 == 15 && c->x86_cache_alignment == 64)
- c->x86_cache_alignment = 128;
#endif

/*
@@ -418,6 +414,10 @@ static void bsp_init_intel(struct cpuinf
WARN_ON_ONCE(keyid_bits);
bsp_addr_config.phys_addr_reduction_bits = 4;
}
+
+ /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
+ if (c->x86 == 15)
+ bsp_addr_config.cache_align_mult = 2;
}

#ifdef CONFIG_X86_32
@@ -659,8 +659,6 @@ static void init_intel(struct cpuinfo_x8
set_cpu_bug(c, X86_BUG_MONITOR);

#ifdef CONFIG_X86_64
- if (c->x86 == 15)
- c->x86_cache_alignment = x86_clflush_size() * 2;
if (c->x86 == 6)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
#else
_