[patch 15/60] x86/entry/64: Separate cpu_current_top_of_stack from TSS.sp0

From: Thomas Gleixner
Date: Mon Dec 04 2017 - 12:06:05 EST


From: Andy Lutomirski <luto@xxxxxxxxxx>

On 64-bit kernels, we used to assume that TSS.sp0 was the current
top of stack. With the addition of an entry trampoline, this will
no longer be the case. Store the current top of stack in TSS.sp1,
which is otherwise unused but shares the same cacheline.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Reviewed-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Reviewed-by: Borislav Petkov <bp@xxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/f56634c746a2926eb7bae61e7b80ed51a1940769.1511497875.git.luto@xxxxxxxxxx

---
arch/x86/include/asm/processor.h | 18 +++++++++++++-----
arch/x86/include/asm/thread_info.h | 2 +-
arch/x86/kernel/asm-offsets_64.c | 1 +
arch/x86/kernel/process.c | 10 ++++++++++
arch/x86/kernel/process_64.c | 1 +
5 files changed, 26 insertions(+), 6 deletions(-)

--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -310,7 +310,13 @@ struct x86_hw_tss {
struct x86_hw_tss {
u32 reserved1;
u64 sp0;
+
+ /*
+ * We store cpu_current_top_of_stack in sp1 so it's always accessible.
+ * Linux does not use ring 1, so sp1 is not otherwise needed.
+ */
u64 sp1;
+
u64 sp2;
u64 reserved2;
u64 ist[7];
@@ -369,6 +375,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_

#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#else
+#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1
#endif

/*
@@ -540,12 +548,12 @@ static inline void native_swapgs(void)

static inline unsigned long current_top_of_stack(void)
{
-#ifdef CONFIG_X86_64
- return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
-#else
- /* sp0 on x86_32 is special in and around vm86 mode. */
+ /*
+ * We can't read directly from tss.sp0: sp0 on x86_32 is special in
+ * and around vm86 mode and sp0 on x86_64 is special because of the
+ * entry trampoline.
+ */
return this_cpu_read_stable(cpu_current_top_of_stack);
-#endif
}

static inline bool on_thread_stack(void)
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_fram
#else /* !__ASSEMBLY__ */

#ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+# define cpu_current_top_of_stack (cpu_tss + TSS_sp1)
#endif

#endif
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -66,6 +66,7 @@ int main(void)

OFFSET(TSS_ist, tss_struct, x86_tss.ist);
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+ OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
BLANK();

#ifdef CONFIG_CC_STACKPROTECTOR
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -56,6 +56,16 @@
* Poison it.
*/
.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+
+#ifdef CONFIG_X86_64
+ /*
+ * .sp1 is cpu_current_top_of_stack. The init task never
+ * runs user code, but cpu_current_top_of_stack should still
+ * be well defined before the first context switch.
+ */
+ .sp1 = TOP_OF_INIT_STACK,
+#endif
+
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -461,6 +461,7 @@ void compat_start_thread(struct pt_regs
* Switch the PDA and FPU contexts.
*/
this_cpu_write(current_task, next_p);
+ this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));

/* Reload sp0. */
update_sp0(next_p);