Re: [PATCH v3 00/13] Virtually mapped stacks with guard pages (x86, core)

From: Linus Torvalds
Date: Fri Jun 24 2016 - 17:07:01 EST


On Fri, Jun 24, 2016 at 1:51 PM, Josh Poimboeuf <jpoimboe@xxxxxxxxxx> wrote:
>
> Found another bug:

Yup. Those two were the ones that kept it from working for me. Thanks.

Anyway, here's the final combined patch that works for me on x86-64.
No more thread-info on the stack.

Linus

This is an attempt at moving the thread_info into the task_struct
---
arch/mn10300/include/asm/thread_info.h | 2 +-
arch/mn10300/kernel/kgdb.c | 3 ++-
arch/tile/include/asm/thread_info.h | 2 +-
arch/tile/kernel/process.c | 3 ++-
arch/x86/Kconfig | 1 +
arch/x86/entry/common.c | 21 ++++++----------
arch/x86/entry/entry_64.S | 9 ++++---
arch/x86/include/asm/cpu.h | 1 -
arch/x86/include/asm/kprobes.h | 12 +++++-----
arch/x86/include/asm/smp.h | 6 -----
arch/x86/include/asm/switch_to.h | 6 ++---
arch/x86/include/asm/thread_info.h | 37 +++-------------------------
arch/x86/kernel/cpu/common.c | 2 +-
arch/x86/kernel/dumpstack.c | 2 +-
arch/x86/kernel/irq_32.c | 2 --
arch/x86/kernel/irq_64.c | 3 +--
arch/x86/kernel/process.c | 6 ++---
arch/x86/um/ptrace_32.c | 8 +++----
include/linux/init_task.h | 11 ++++++++-
include/linux/sched.h | 18 +++++++++++---
init/Kconfig | 3 +++
init/init_task.c | 7 ++++--
kernel/fork.c | 44 +++++++++++++++++-----------------
23 files changed, 95 insertions(+), 114 deletions(-)

diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index 4861a78c7160..f5f90bbf019d 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void)
}

#ifndef CONFIG_KGDB
-void arch_release_thread_info(struct thread_info *ti);
+void arch_release_thread_stack(unsigned long *stack);
#endif
#define get_thread_info(ti) get_task_struct((ti)->task)
#define put_thread_info(ti) put_task_struct((ti)->task)
diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c
index 99770823451a..2d7986c386fe 100644
--- a/arch/mn10300/kernel/kgdb.c
+++ b/arch/mn10300/kernel/kgdb.c
@@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs)
* single-step state is cleared. At this point the breakpoints should have
* been removed by __switch_to().
*/
-void arch_release_thread_info(struct thread_info *ti)
+void arch_release_thread_stack(unsigned long *stack)
{
+ struct thread_info *ti = (void *)stack;
if (kgdb_sstep_thread == ti) {
kgdb_sstep_thread = NULL;

diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 4b7cef9e94e0..c1467ac59ce6 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -78,7 +78,7 @@ struct thread_info {

#ifndef __ASSEMBLY__

-void arch_release_thread_info(struct thread_info *info);
+void arch_release_thread_stack(unsigned long *stack);

/* How to get the thread information struct from C. */
register unsigned long stack_pointer __asm__("sp");
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 6b705ccc9cc1..a465d8372edd 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -73,8 +73,9 @@ void arch_cpu_idle(void)
/*
* Release a thread_info structure
*/
-void arch_release_thread_info(struct thread_info *info)
+void arch_release_thread_stack(unsigned long *stack)
{
+ struct thread_info *info = (void *)stack;
struct single_step_state *step_state = info->step_state;

if (step_state) {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d9a94da0c29f..f33bc80577c5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -154,6 +154,7 @@ config X86
select SPARSE_IRQ
select SRCU
select SYSCTL_EXCEPTION_TRACE
+ select THREAD_INFO_IN_TASK
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select X86_DEV_DMA_OPS if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec138e538c44..d5feac5f252d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -31,13 +31,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>

-static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
-{
- unsigned long top_of_stack =
- (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
- return (struct thread_info *)(top_of_stack - THREAD_SIZE);
-}
-
#ifdef CONFIG_CONTEXT_TRACKING
/* Called on entry from user mode with IRQs off. */
__visible void enter_from_user_mode(void)
@@ -78,7 +71,7 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
*/
unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
unsigned long ret = 0;
u32 work;

@@ -156,7 +149,7 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
unsigned long phase1_result)
{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
long ret = 0;
u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;

@@ -239,7 +232,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
/* Disable IRQs and retry */
local_irq_disable();

- cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+ cached_flags = READ_ONCE(current_thread_info()->flags);

if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
break;
@@ -250,7 +243,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
/* Called with IRQs disabled. */
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
u32 cached_flags;

if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -309,7 +302,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
*/
__visible inline void syscall_return_slowpath(struct pt_regs *regs)
{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
u32 cached_flags = READ_ONCE(ti->flags);

CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
@@ -332,7 +325,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
#ifdef CONFIG_X86_64
__visible void do_syscall_64(struct pt_regs *regs)
{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
unsigned long nr = regs->orig_ax;

enter_from_user_mode();
@@ -365,7 +358,7 @@ __visible void do_syscall_64(struct pt_regs *regs)
*/
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
+ struct thread_info *ti = current_thread_info();
unsigned int nr = (unsigned int)regs->orig_ax;

#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9ee0da1807ed..f49742de2c65 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
* If we need to do entry work or if we guess we'll need to do
* exit work, go straight to the slow path.
*/
- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ GET_THREAD_INFO(%r11)
+ testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TI_flags(%r11)
jnz entry_SYSCALL64_slow_path

entry_SYSCALL_64_fastpath:
@@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath:
*/
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ GET_THREAD_INFO(%r11)
+ testl $_TIF_ALLWORK_MASK, TI_flags(%r11)
jnz 1f

LOCKDEP_SYS_EXIT
@@ -368,9 +370,10 @@ END(ptregs_\func)
* A newly forked process directly context switches into this address.
*
* rdi: prev task we switched from
+ * rsi: task we're switching to
*/
ENTRY(ret_from_fork)
- LOCK ; btr $TIF_FORK, TI_flags(%r8)
+ LOCK ; btr $TIF_FORK, TI_flags(%rsi) /* rsi: this newly forked task */

call schedule_tail /* rdi: 'prev' task parameter */

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 678637ad7476..59d34c521d96 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -17,7 +17,6 @@ static inline void prefill_possible_map(void) {}

#define cpu_physical_id(cpu) boot_cpu_physical_apicid
#define safe_smp_processor_id() 0
-#define stack_smp_processor_id() 0

#endif /* CONFIG_SMP */

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4421b5da409d..1d2997e74b08 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -38,12 +38,12 @@ typedef u8 kprobe_opcode_t;
#define RELATIVECALL_OPCODE 0xe8
#define RELATIVE_ADDR_SIZE 4
#define MAX_STACK_SIZE 64
-#define MIN_STACK_SIZE(ADDR) \
- (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \
- THREAD_SIZE - (unsigned long)(ADDR))) \
- ? (MAX_STACK_SIZE) \
- : (((unsigned long)current_thread_info()) + \
- THREAD_SIZE - (unsigned long)(ADDR)))
+
+#define current_stack_top() ((unsigned long)task_stack_page(current)+THREAD_SIZE)
+#define current_stack_size(ADDR) (current_stack_top() - (unsigned long)(ADDR))
+
+#define MIN_STACK_SIZE(ADDR) \
+ (MAX_STACK_SIZE < current_stack_size(ADDR) ? MAX_STACK_SIZE : current_stack_size(ADDR))

#define flush_insn_slot(p) do { } while (0)

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 66b057306f40..0576b6157f3a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -172,12 +172,6 @@ extern int safe_smp_processor_id(void);
#elif defined(CONFIG_X86_64_SMP)
#define raw_smp_processor_id() (this_cpu_read(cpu_number))

-#define stack_smp_processor_id() \
-({ \
- struct thread_info *ti; \
- __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
- ti->cpu; \
-})
#define safe_smp_processor_id() smp_processor_id()

#endif
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8f321a1b03a1..ae0aa0612c67 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -110,18 +110,16 @@ do { \
"call __switch_to\n\t" \
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
__switch_canary \
- "movq %P[thread_info](%%rsi),%%r8\n\t" \
"movq %%rax,%%rdi\n\t" \
- "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
+ "testl %[_tif_fork],%P[ti_flags](%%rsi)\n\t" \
"jnz ret_from_fork\n\t" \
RESTORE_CONTEXT \
: "=a" (last) \
__switch_canary_oparam \
: [next] "S" (next), [prev] "D" (prev), \
[threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
- [ti_flags] "i" (offsetof(struct thread_info, flags)), \
+ [ti_flags] "i" (offsetof(struct task_struct, thread_info.flags)), \
[_tif_fork] "i" (_TIF_FORK), \
- [thread_info] "i" (offsetof(struct task_struct, stack)), \
[current_task] "m" (current_task) \
__switch_canary_iparam \
: "memory", "cc" __EXTRA_CLOBBER)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 30c133ac05cd..d38ebb08f4c1 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -53,24 +53,21 @@ struct task_struct;
#include <linux/atomic.h>

struct thread_info {
- struct task_struct *task; /* main task structure */
__u32 flags; /* low level flags */
__u32 status; /* thread synchronous flags */
__u32 cpu; /* current CPU */
- mm_segment_t addr_limit;
unsigned int sig_on_uaccess_error:1;
unsigned int uaccess_err:1; /* uaccess failed */
+ mm_segment_t addr_limit;
};

#define INIT_THREAD_INFO(tsk) \
{ \
- .task = &tsk, \
.flags = 0, \
.cpu = 0, \
.addr_limit = KERNEL_DS, \
}

-#define init_thread_info (init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)

#else /* !__ASSEMBLY__ */
@@ -166,7 +163,7 @@ struct thread_info {

static inline struct thread_info *current_thread_info(void)
{
- return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
+ return (struct thread_info *)current;
}

static inline unsigned long current_stack_pointer(void)
@@ -188,35 +185,7 @@ static inline unsigned long current_stack_pointer(void)

/* Load thread_info address into "reg" */
#define GET_THREAD_INFO(reg) \
- _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
- _ASM_SUB $(THREAD_SIZE),reg ;
-
-/*
- * ASM operand which evaluates to a 'thread_info' address of
- * the current task, if it is known that "reg" is exactly "off"
- * bytes below the top of the stack currently.
- *
- * ( The kernel stack's size is known at build time, it is usually
- * 2 or 4 pages, and the bottom of the kernel stack contains
- * the thread_info structure. So to access the thread_info very
- * quickly from assembly code we can calculate down from the
- * top of the kernel stack to the bottom, using constant,
- * build-time calculations only. )
- *
- * For example, to fetch the current thread_info->flags value into %eax
- * on x86-64 defconfig kernels, in syscall entry code where RSP is
- * currently at exactly SIZEOF_PTREGS bytes away from the top of the
- * stack:
- *
- * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
- *
- * will translate to:
- *
- * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
- *
- * which is below the current RSP by almost 16K.
- */
-#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
+ _ASM_MOV PER_CPU_VAR(current_task),reg

#endif

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0fe6953f421c..d22a7b9c4f0e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1452,7 +1452,7 @@ void cpu_init(void)
struct task_struct *me;
struct tss_struct *t;
unsigned long v;
- int cpu = stack_smp_processor_id();
+ int cpu = raw_smp_processor_id();
int i;

wait_for_master_cpu(cpu);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index d6209f3a69cb..ef8017ca5ba9 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -80,7 +80,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
static inline int valid_stack_ptr(struct task_struct *task,
void *p, unsigned int size, void *end)
{
- void *t = task_thread_info(task);
+ void *t = task_stack_page(task);
if (end) {
if (p < end && p >= (end-THREAD_SIZE))
return 1;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 38da8f29a9c8..c627bf8d98ad 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -130,11 +130,9 @@ void irq_ctx_init(int cpu)

void do_softirq_own_stack(void)
{
- struct thread_info *curstk;
struct irq_stack *irqstk;
u32 *isp, *prev_esp;

- curstk = current_stack();
irqstk = __this_cpu_read(softirq_stack);

/* build the stack frame on the softirq stack */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 206d0b90a3ab..38f9f5678dc8 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -41,8 +41,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
if (user_mode(regs))
return;

- if (regs->sp >= curbase + sizeof(struct thread_info) +
- sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
+ if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
regs->sp <= curbase + THREAD_SIZE)
return;

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 96becbbb52e0..8f60f810a9e7 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -536,9 +536,7 @@ unsigned long get_wchan(struct task_struct *p)
* PADDING
* ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
* stack
- * ----------- bottom = start + sizeof(thread_info)
- * thread_info
- * ----------- start
+ * ----------- bottom = start
*
* The tasks stack pointer points at the location where the
* framepointer is stored. The data on the stack is:
@@ -549,7 +547,7 @@ unsigned long get_wchan(struct task_struct *p)
*/
top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
top -= 2 * sizeof(unsigned long);
- bottom = start + sizeof(struct thread_info);
+ bottom = start;

sp = READ_ONCE(p->thread.sp);
if (sp < bottom || sp > top)
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index ebd4dd6ef73b..14e8f6a628c2 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -191,7 +191,7 @@ int peek_user(struct task_struct *child, long addr, long data)

static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
{
- int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+ int err, n, cpu = task_thread_info(child)->cpu;
struct user_i387_struct fpregs;

err = save_i387_registers(userspace_pid[cpu],
@@ -208,7 +208,7 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c

static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
{
- int n, cpu = ((struct thread_info *) child->stack)->cpu;
+ int n, cpu = task_thread_info(child)->cpu;
struct user_i387_struct fpregs;

n = copy_from_user(&fpregs, buf, sizeof(fpregs));
@@ -221,7 +221,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c

static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
{
- int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+ int err, n, cpu = task_thread_info(child)->cpu;
struct user_fxsr_struct fpregs;

err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
@@ -237,7 +237,7 @@ static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *

static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
{
- int n, cpu = ((struct thread_info *) child->stack)->cpu;
+ int n, cpu = task_thread_info(child)->cpu;
struct user_fxsr_struct fpregs;

n = copy_from_user(&fpregs, buf, sizeof(fpregs));
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f2cb8d45513d..9c04d44eeb3c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,6 +15,8 @@
#include <net/net_namespace.h>
#include <linux/sched/rt.h>

+#include <asm/thread_info.h>
+
#ifdef CONFIG_SMP
# define INIT_PUSHABLE_TASKS(tsk) \
.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
@@ -183,14 +185,21 @@ extern struct task_group root_task_group;
# define INIT_KASAN(tsk)
#endif

+#ifdef CONFIG_THREAD_INFO_IN_TASK
+# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk),
+#else
+# define INIT_TASK_TI(tsk)
+#endif
+
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
*/
#define INIT_TASK(tsk) \
{ \
+ INIT_TASK_TI(tsk) \
.state = 0, \
- .stack = &init_thread_info, \
+ .stack = init_stack, \
.usage = ATOMIC_INIT(2), \
.flags = PF_KTHREAD, \
.prio = MAX_PRIO-20, \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e42ada26345..743f0307bf4a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1456,8 +1456,11 @@ struct tlbflush_unmap_batch {
};

struct task_struct {
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ struct thread_info thread_info;
+#endif
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
- void *stack;
+ unsigned long *stack;
atomic_t usage;
unsigned int flags; /* per process flags, defined below */
unsigned int ptrace;
@@ -2539,7 +2542,9 @@ extern void set_curr_task(int cpu, struct task_struct *p);
void yield(void);

union thread_union {
+#ifndef CONFIG_THREAD_INFO_IN_TASK
struct thread_info thread_info;
+#endif
unsigned long stack[THREAD_SIZE/sizeof(long)];
};

@@ -2967,10 +2972,17 @@ static inline void threadgroup_change_end(struct task_struct *tsk)
cgroup_threadgroup_change_end(tsk);
}

-#ifndef __HAVE_THREAD_FUNCTIONS
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+
+#define task_thread_info(task) (&(task)->thread_info)
+#define task_stack_page(task) ((void *)(task)->stack)
+#define setup_thread_stack(new,old) do { } while(0)
+#define end_of_stack(task) ((task)->stack)
+
+#elif !defined(__HAVE_THREAD_FUNCTIONS)

#define task_thread_info(task) ((struct thread_info *)(task)->stack)
-#define task_stack_page(task) ((task)->stack)
+#define task_stack_page(task) ((void *)(task)->stack)

static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
{
diff --git a/init/Kconfig b/init/Kconfig
index f755a602d4a1..0c83af6d3753 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -26,6 +26,9 @@ config IRQ_WORK
config BUILDTIME_EXTABLE_SORT
bool

+config THREAD_INFO_IN_TASK
+ bool
+
menu "General setup"

config BROKEN
diff --git a/init/init_task.c b/init/init_task.c
index ba0a7f362d9e..11f83be1fa79 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -22,5 +22,8 @@ EXPORT_SYMBOL(init_task);
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
*/
-union thread_union init_thread_union __init_task_data =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data = {
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+ INIT_THREAD_INFO(init_task)
+#endif
+};
diff --git a/kernel/fork.c b/kernel/fork.c
index 5c2c355aa97f..9f290226d9d5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -148,7 +148,7 @@ static inline void free_task_struct(struct task_struct *tsk)
}
#endif

-void __weak arch_release_thread_info(struct thread_info *ti)
+void __weak arch_release_thread_stack(unsigned long *stack)
{
}

@@ -159,7 +159,7 @@ void __weak arch_release_thread_info(struct thread_info *ti)
* kmemcache based allocator.
*/
# if THREAD_SIZE >= PAGE_SIZE
-static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
int node)
{
struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
@@ -172,31 +172,31 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
return page ? page_address(page) : NULL;
}

-static inline void free_thread_info(struct thread_info *ti)
+static inline void free_thread_stack(unsigned long *stack)
{
- struct page *page = virt_to_page(ti);
+ struct page *page = virt_to_page(stack);

memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
-(1 << THREAD_SIZE_ORDER));
__free_kmem_pages(page, THREAD_SIZE_ORDER);
}
# else
-static struct kmem_cache *thread_info_cache;
+static struct kmem_cache *thread_stack_cache;

-static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk,
int node)
{
- return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node);
+ return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
}

-static void free_thread_info(struct thread_info *ti)
+static void free_stack(unsigned long *stack)
{
- kmem_cache_free(thread_info_cache, ti);
+ kmem_cache_free(thread_stack_cache, stack);
}

void thread_info_cache_init(void)
{
- thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+ thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE,
THREAD_SIZE, 0, NULL);
BUG_ON(thread_info_cache == NULL);
}
@@ -221,9 +221,9 @@ struct kmem_cache *vm_area_cachep;
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;

-static void account_kernel_stack(struct thread_info *ti, int account)
+static void account_kernel_stack(unsigned long *stack, int account)
{
- struct zone *zone = page_zone(virt_to_page(ti));
+ struct zone *zone = page_zone(virt_to_page(stack));

mod_zone_page_state(zone, NR_KERNEL_STACK, account);
}
@@ -231,8 +231,8 @@ static void account_kernel_stack(struct thread_info *ti, int account)
void free_task(struct task_struct *tsk)
{
account_kernel_stack(tsk->stack, -1);
- arch_release_thread_info(tsk->stack);
- free_thread_info(tsk->stack);
+ arch_release_thread_stack(tsk->stack);
+ free_thread_stack(tsk->stack);
rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk);
put_seccomp_filter(tsk);
@@ -343,7 +343,7 @@ void set_task_stack_end_magic(struct task_struct *tsk)
static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
{
struct task_struct *tsk;
- struct thread_info *ti;
+ unsigned long *stack;
int err;

if (node == NUMA_NO_NODE)
@@ -352,15 +352,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (!tsk)
return NULL;

- ti = alloc_thread_info_node(tsk, node);
- if (!ti)
+ stack = alloc_thread_stack_node(tsk, node);
+ if (!stack)
goto free_tsk;

err = arch_dup_task_struct(tsk, orig);
if (err)
- goto free_ti;
+ goto free_stack;

- tsk->stack = ti;
+ tsk->stack = stack;
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
@@ -392,14 +392,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->task_frag.page = NULL;
tsk->wake_q.next = NULL;

- account_kernel_stack(ti, 1);
+ account_kernel_stack(stack, 1);

kcov_task_init(tsk);

return tsk;

-free_ti:
- free_thread_info(ti);
+free_stack:
+ free_thread_stack(stack);
free_tsk:
free_task_struct(tsk);
return NULL;