Re: [PATCH v4 11/11] x86/startup_64: Drop global variables keeping track of LA57 state
From: Brian Gerst
Date: Wed Feb 14 2024 - 15:26:16 EST
On Wed, Feb 14, 2024 at 10:45 AM Ard Biesheuvel <ardb@xxxxxxxxxx> wrote:
>
> On Wed, 14 Feb 2024 at 16:24, Brian Gerst <brgerst@xxxxxxxxx> wrote:
> >
> > On Tue, Feb 13, 2024 at 7:42 AM Ard Biesheuvel <ardb+git@googlecom> wrote:
> > >
> > > From: Ard Biesheuvel <ardb@xxxxxxxxxx>
> > >
> > > On x86_64, the core kernel is entered in long mode, which implies that
> > > paging is enabled. This means that the CR4.LA57 control bit is
> > > guaranteed to be in sync with the number of paging levels used by the
> > > kernel, and there is no need to store this in a variable.
> > >
> > > There is also no need to use variables for storing the calculations of
> > > pgdir_shift and ptrs_per_p4d, as they are easily determined on the fly.
> > >
> > > This removes the need for two different sources of truth for determining
> > > whether 5-level paging is in use: CR4.LA57 always reflects the actual
> > > state, and never changes from the point of view of the 64-bit core
> > > kernel. The only potential concern is the cost of CR4 accesses, which
> > > can be mitigated using alternatives patching based on feature detection.
> > >
> > > Note that even the decompressor does not manipulate any page tables
> > > before updating CR4.LA57, so it can also avoid the associated global
> > > variables entirely. However, as it does not implement alternatives
> > > patching, the associated ELF sections need to be discarded.
> > >
> > > Signed-off-by: Ard Biesheuvel <ardb@xxxxxxxxxx>
> > > ---
> > > arch/x86/boot/compressed/misc.h | 4 --
> > > arch/x86/boot/compressed/pgtable_64.c | 12 ----
> > > arch/x86/boot/compressed/vmlinux.lds.S | 1 +
> > > arch/x86/include/asm/pgtable_64_types.h | 58 ++++++++------------
> > > arch/x86/kernel/cpu/common.c | 2 -
> > > arch/x86/kernel/head64.c | 33 +----------
> > > arch/x86/mm/kasan_init_64.c | 3 -
> > > arch/x86/mm/mem_encrypt_identity.c | 9 ---
> > > 8 files changed, 27 insertions(+), 95 deletions(-)
> > >
> > > diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
> > > index bc2f0f17fb90..2b15ddd0e177 100644
> > > --- a/arch/x86/boot/compressed/misc.h
> > > +++ b/arch/x86/boot/compressed/misc.h
> > > @@ -16,9 +16,6 @@
> > >
> > > #define __NO_FORTIFY
> > >
> > > -/* cpu_feature_enabled() cannot be used this early */
> > > -#define USE_EARLY_PGTABLE_L5
> > > -
> > > /*
> > > * Boot stub deals with identity mappings, physical and virtual addresses are
> > > * the same, so override these defines.
> > > @@ -178,7 +175,6 @@ static inline int count_immovable_mem_regions(void) { return 0; }
> > > #endif
> > >
> > > /* ident_map_64.c */
> > > -extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
> > > extern void kernel_add_identity_map(unsigned long start, unsigned long end);
> > >
> > > /* Used by PAGE_KERN* macros: */
> > > diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
> > > index 51f957b24ba7..ae72f53f5e77 100644
> > > --- a/arch/x86/boot/compressed/pgtable_64.c
> > > +++ b/arch/x86/boot/compressed/pgtable_64.c
> > > @@ -9,13 +9,6 @@
> > > #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
> > > #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
> > >
> > > -#ifdef CONFIG_X86_5LEVEL
> > > -/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
> > > -unsigned int __section(".data") __pgtable_l5_enabled;
> > > -unsigned int __section(".data") pgdir_shift = 39;
> > > -unsigned int __section(".data") ptrs_per_p4d = 1;
> > > -#endif
> > > -
> > > /* Buffer to preserve trampoline memory */
> > > static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
> > >
> > > @@ -125,11 +118,6 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
> > > native_cpuid_eax(0) >= 7 &&
> > > (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
> > > l5_required = true;
> > > -
> > > - /* Initialize variables for 5-level paging */
> > > - __pgtable_l5_enabled = 1;
> > > - pgdir_shift = 48;
> > > - ptrs_per_p4d = 512;
> > > }
> > >
> > > /*
> > > diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
> > > index 083ec6d7722a..06358bb067fe 100644
> > > --- a/arch/x86/boot/compressed/vmlinux.lds.S
> > > +++ b/arch/x86/boot/compressed/vmlinux.lds.S
> > > @@ -81,6 +81,7 @@ SECTIONS
> > > *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss)
> > > *(.hash) *(.gnu.hash)
> > > *(.note.*)
> > > + *(.altinstructions .altinstr_replacement)
> > > }
> > >
> > > .got.plt (INFO) : {
> > > diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
> > > index 38b54b992f32..6a57bfdff52b 100644
> > > --- a/arch/x86/include/asm/pgtable_64_types.h
> > > +++ b/arch/x86/include/asm/pgtable_64_types.h
> > > @@ -6,7 +6,10 @@
> > >
> > > #ifndef __ASSEMBLY__
> > > #include <linux/types.h>
> > > +#include <asm/alternative.h>
> > > +#include <asm/cpufeatures.h>
> > > #include <asm/kaslr.h>
> > > +#include <asm/processor-flags.h>
> > >
> > > /*
> > > * These are used to make use of C type-checking..
> > > @@ -21,63 +24,50 @@ typedef unsigned long pgprotval_t;
> > > typedef struct { pteval_t pte; } pte_t;
> > > typedef struct { pmdval_t pmd; } pmd_t;
> > >
> > > -#ifdef CONFIG_X86_5LEVEL
> > > -extern unsigned int __pgtable_l5_enabled;
> > > -
> > > -#ifdef USE_EARLY_PGTABLE_L5
> > > -/*
> > > - * cpu_feature_enabled() is not available in early boot code.
> > > - * Use variable instead.
> > > - */
> > > -static inline bool pgtable_l5_enabled(void)
> > > +static __always_inline __pure bool pgtable_l5_enabled(void)
> > > {
> > > - return __pgtable_l5_enabled;
> > > -}
> > > -#else
> > > -#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
> > > -#endif /* USE_EARLY_PGTABLE_L5 */
> > > + unsigned long r;
> > > + bool ret;
> > >
> > > -#else
> > > -#define pgtable_l5_enabled() 0
> > > -#endif /* CONFIG_X86_5LEVEL */
> > > + if (!IS_ENABLED(CONFIG_X86_5LEVEL))
> > > + return false;
> > > +
> > > + asm(ALTERNATIVE_TERNARY(
> > > + "movq %%cr4, %[reg] \n\t btl %[la57], %k[reg]" CC_SET(c),
> > > + %P[feat], "stc", "clc")
> > > + : [reg] "=&r" (r), CC_OUT(c) (ret)
> > > + : [feat] "i" (X86_FEATURE_LA57),
> > > + [la57] "i" (X86_CR4_LA57_BIT)
> > > + : "cc");
> >
> > This should be more like _static_cpu_has(), where the runtime test is
> > out of line in a discardable section, and the inline part is just a
> > JMP or NOP.
> >
>
> Why exactly? It matters very little in terms of space, a cross-section
> jump is 5 bytes, and movq+btl is 7 bytes.
>
> If you are referring to the use of the C flag: this way, it is left up
> to the compiler to decide whether a branch or a conditional move is
> more suitable, rather than forcing the use of a branch in one of the
> two cases.
You're probably right in that many uses of pgtable_l5_enabled() are
choosing between two constants, and static_cpu_has() does not handle
that case very efficiently. Something like static_cpu_choose(feature,
yes_val, no_val) would be a possible idea to explore.
Brian Gerst