[PATCH 1/3] x86-64: Use per-cpu stack canary if supported by compiler

From: Brian Gerst
Date: Sat Nov 13 2021 - 07:41:46 EST


If the compiler supports it, use a standard per-cpu variable for the
stack protector instead of the old fixed location. Keep the fixed
location code for compatibility with older compilers.

Signed-off-by: Brian Gerst <brgerst@xxxxxxxxx>
---
arch/x86/Kconfig | 5 +++++
arch/x86/Makefile | 21 ++++++++++++++-------
arch/x86/entry/entry_64.S | 6 +++++-
arch/x86/include/asm/processor.h | 14 +++++++++++---
arch/x86/include/asm/stackprotector.h | 17 ++++++-----------
arch/x86/kernel/asm-offsets_64.c | 2 +-
arch/x86/kernel/cpu/common.c | 14 ++++++++------
arch/x86/kernel/head_64.S | 16 ++++++++++++----
arch/x86/kernel/vmlinux.lds.S | 4 ++--
arch/x86/xen/xen-head.S | 10 ++++------
10 files changed, 68 insertions(+), 41 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b1d4b481fcdd..8268910e09cd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -397,6 +397,11 @@ config CC_HAS_SANE_STACKPROTECTOR
the compiler produces broken code or if it does not let us control
the segment on 32-bit kernels.

+config STACKPROTECTOR_FIXED
+ bool
+ depends on X86_64 && STACKPROTECTOR
+ default y if !$(cc-option,-mstack-protector-guard-reg=gs)
+
menu "Processor type and features"

config SMP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index aab70413ae7a..94aee76a3457 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -79,13 +79,7 @@ ifeq ($(CONFIG_X86_32),y)
# temporary until string.h is fixed
KBUILD_CFLAGS += -ffreestanding

- ifeq ($(CONFIG_STACKPROTECTOR),y)
- ifeq ($(CONFIG_SMP),y)
- KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
- else
- KBUILD_CFLAGS += -mstack-protector-guard=global
- endif
- endif
+ percpu_seg := fs
else
BITS := 64
UTS_MACHINE := x86_64
@@ -126,6 +120,19 @@ else

KBUILD_CFLAGS += -mno-red-zone
KBUILD_CFLAGS += -mcmodel=kernel
+
+ percpu_seg := gs
+endif
+
+ifeq ($(CONFIG_STACKPROTECTOR),y)
+ ifneq ($(CONFIG_STACKPROTECTOR_FIXED),y)
+ ifeq ($(CONFIG_SMP),y)
+ KBUILD_CFLAGS += -mstack-protector-guard-reg=$(percpu_seg) \
+ -mstack-protector-guard-symbol=__stack_chk_guard
+ else
+ KBUILD_CFLAGS += -mstack-protector-guard=global
+ endif
+ endif
endif

ifdef CONFIG_X86_X32
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index e38a4cf795d9..2006fde249c2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -217,6 +217,10 @@ syscall_return_via_sysret:
sysretq
SYM_CODE_END(entry_SYSCALL_64)

+#ifdef CONFIG_STACKPROTECTOR_FIXED
+#define __stack_chk_guard fixed_percpu_data + stack_canary_offset
+#endif
+
/*
* %rdi: prev task
* %rsi: next task
@@ -240,7 +244,7 @@ SYM_FUNC_START(__switch_to_asm)

#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
- movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
+ movq %rbx, PER_CPU_VAR(__stack_chk_guard)
#endif

#ifdef CONFIG_RETPOLINE
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 191878a65c61..44376a15b9d2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -427,6 +427,8 @@ struct irq_stack {
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);

#ifdef CONFIG_X86_64
+
+#ifdef CONFIG_STACKPROTECTOR_FIXED
struct fixed_percpu_data {
/*
* GCC hardcodes the stack canary as %gs:40. Since the
@@ -442,10 +444,19 @@ struct fixed_percpu_data {

DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
DECLARE_INIT_PER_CPU(fixed_percpu_data);
+#endif /* STACKPROTECTOR_FIXED */

static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
+#ifdef CONFIG_STACKPROTECTOR_FIXED
return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
+#else
+#ifdef CONFIG_SMP
+ return per_cpu_offset(cpu);
+#else
+ return 0;
+#endif
+#endif
}

DECLARE_PER_CPU(void *, hardirq_stack_ptr);
@@ -455,9 +466,6 @@ extern asmlinkage void ignore_sysret(void);
/* Save actual FS/GS selectors and bases to current->thread */
void current_save_fsgs(void);
#else /* X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
-#endif
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* !X86_64 */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 24a8d6c4fb18..1c7f0035c8fb 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -37,6 +37,12 @@
#include <linux/random.h>
#include <linux/sched.h>

+#ifdef CONFIG_STACKPROTECTOR_FIXED
+#define __stack_chk_guard fixed_percpu_data.stack_canary
+#else
+DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
+#endif
+
/*
* Initialize the stackprotector canary value.
*
@@ -53,9 +59,6 @@ static __always_inline void boot_init_stack_canary(void)
u64 canary;
u64 tsc;

-#ifdef CONFIG_X86_64
- BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
-#endif
/*
* We both use the random pool and the current TSC as a source
* of randomness. The TSC only matters for very early init,
@@ -68,20 +71,12 @@ static __always_inline void boot_init_stack_canary(void)
canary &= CANARY_MASK;

current->stack_canary = canary;
-#ifdef CONFIG_X86_64
- this_cpu_write(fixed_percpu_data.stack_canary, canary);
-#else
this_cpu_write(__stack_chk_guard, canary);
-#endif
}

static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{
-#ifdef CONFIG_X86_64
- per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
-#else
per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
-#endif
}

#else /* STACKPROTECTOR */
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index b14533af7676..e9f4870a5321 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,7 +56,7 @@ int main(void)

BLANK();

-#ifdef CONFIG_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR_FIXED
DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
BLANK();
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0083464de5e3..f3dfb5b59530 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1767,10 +1767,6 @@ static __init int setup_clearcpuid(char *arg)
__setup("clearcpuid=", setup_clearcpuid);

#ifdef CONFIG_X86_64
-DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
- fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
-EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
-
/*
* The following percpu variables are hot. Align current_task to
* cacheline size such that they fall in the same cacheline.
@@ -1840,12 +1836,18 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);

+#endif /* CONFIG_X86_64 */
+
#ifdef CONFIG_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR_FIXED
+DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
+ fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
+EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
+#else
DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
-
-#endif /* CONFIG_X86_64 */
+#endif

/*
* Clear all 6 debug registers:
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d8b3ebd2bb85..6e396ffb1610 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -198,10 +198,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
movl %eax,%fs
movl %eax,%gs

- /* Set up %gs.
- *
- * The base of %gs always points to fixed_percpu_data. If the
- * stack protector canary is enabled, it is located at %gs:40.
+ /*
+ * Set up GS base.
* Note that, on SMP, the boot cpu uses init data section until
* the per cpu areas are set up.
*/
@@ -337,7 +335,17 @@ SYM_CODE_END(vc_boot_ghcb)
__REFDATA
.balign 8
SYM_DATA(initial_code, .quad x86_64_start_kernel)
+
+#ifdef CONFIG_STACKPROTECTOR_FIXED
SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data))
+#else
+#ifdef CONFIG_SMP
+SYM_DATA(initial_gs, .quad __per_cpu_load)
+#else
+SYM_DATA(initial_gs, .quad 0)
+#endif
+#endif
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
#endif
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3d6dc12d198f..c475d21d2126 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -481,10 +481,10 @@ SECTIONS
*/
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(fixed_percpu_data);
INIT_PER_CPU(irq_stack_backing_store);

-#ifdef CONFIG_SMP
+#ifdef CONFIG_STACKPROTECTOR_FIXED
+INIT_PER_CPU(fixed_percpu_data);
. = ASSERT((fixed_percpu_data == 0),
"fixed_percpu_data is not at start of per-cpu area");
#endif
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 9e27b86a0c31..54995efd74f7 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -54,16 +54,14 @@ SYM_CODE_START(startup_xen)
mov %_ASM_SI, xen_start_info
mov initial_stack(%rip), %rsp

- /* Set up %gs.
- *
- * The base of %gs always points to fixed_percpu_data. If the
- * stack protector canary is enabled, it is located at %gs:40.
+ /*
+ * Set up GS base.
* Note that, on SMP, the boot cpu uses init data section until
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
- movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
- cdq
+ movl initial_gs(%rip),%eax
+ movl initial_gs+4(%rip),%edx
wrmsr

call xen_start_kernel
--
2.31.1