[PATCH 3/1] x86/fpu: Remove init_task FPU state dependencies, add debugging warning

From: Ingo Molnar
Date: Fri Mar 29 2024 - 05:45:36 EST



* Ingo Molnar <mingo@xxxxxxxxxx> wrote:

> > - exit_thread() - trivial
> >
> > - arch_dup_task_struct() clears thread.fpu, but I guess this is unnecessary
> >
> > - fpu_clone() initializes dst->thread.fpu, no longer needed
> >
> > - finally, fpu__init_system_early_generic() which initializes the
> > init_task.thread.fpu pointer.
> >
> > But this doesn't look difficult? Although I don't understand the magic in
> > arch/x86/kernel/vmlinux.lds.S ...
>
> Yeah.
>
> Something like the patch below, on top of my previous patch.
>
> The main/only fishy bit left is:
>
> --- a/arch/x86/kernel/vmlinux.lds.S
> +++ b/arch/x86/kernel/vmlinux.lds.S
> @@ -172,6 +172,10 @@ SECTIONS
> /* init_task */
> INIT_TASK_DATA(THREAD_SIZE)
>
> + __x86_init_fpu_begin = .;
> + . = __x86_init_fpu_begin + 128*PAGE_SIZE;
> + __x86_init_fpu_end = .;
>
> ... as we don't know the FPU context switch size in advance, and modern
> vector CPUs can be huge. Plus CPU vendors like to push more crap into
> the FPU context area (XSAVE area), because the rather opaque
> enumeration and variable-size format makes OS support easier for them.
> :-/
>
> And in principle init_task should never use its task-fpu context switch
> area for real, but I couldn't convince myself (yet) whether that's true
> under all circumstances - and getting this wrong would be a memory
> corruptor ...
>
> kernel_fpu_begin_mask() has this:
>
> this_cpu_write(in_kernel_fpu, true);
>
> if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) &&
> !test_thread_flag(TIF_NEED_FPU_LOAD)) {
> set_thread_flag(TIF_NEED_FPU_LOAD);
> save_fpregs_to_fpstate(x86_task_fpu(current));
> }
> __cpu_invalidate_fpregs_state();
>
> and the save_fpregs_to_fpstate() inside the branch should never execute
> for init_task.

So there were a few places, the patch below should cure all warnings
that I was able to trigger.

- x86_64 boots to user-space in KVM/qemu
- i386 math-emu boots to init (not sure if it works though)

Note that we weren't actually using src_fpu for anything useful in
fpu_clone() anymore.

So this now looks like something that looks useful and clean, but the
changes are quite a bit more scary than I wanted them to be ...

Thanks,

Ingo


======================>
From: Ingo Molnar <mingo@xxxxxxxxxx>
Date: Fri, 29 Mar 2024 10:33:27 +0100
Subject: [PATCH] x86/fpu: Remove init_task FPU state dependencies, add debugging warning

Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/include/asm/processor.h | 6 +++++-
arch/x86/kernel/fpu/core.c | 12 +++++++++---
arch/x86/kernel/fpu/init.c | 5 ++---
arch/x86/kernel/fpu/xstate.c | 3 ---
arch/x86/kernel/vmlinux.lds.S | 4 ----
5 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index fb78720774ea..d90c31e87b96 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -491,7 +491,11 @@ struct thread_struct {
#endif
};

-#define x86_task_fpu(task) ((struct fpu *)((void *)task + sizeof(*task)))
+#ifdef CONFIG_X86_DEBUG_FPU
+extern struct fpu *x86_task_fpu(struct task_struct *task);
+#else
+# define x86_task_fpu(task) ((struct fpu *)((void *)task + sizeof(*task)))
+#endif

/*
* X86 doesn't need any embedded-FPU-struct quirks:
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 0ccabcd3bf62..fdc3b227800d 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -51,6 +51,15 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
*/
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);

+#ifdef CONFIG_X86_DEBUG_FPU
+struct fpu *x86_task_fpu(struct task_struct *task)
+{
+ WARN_ON_ONCE(task == &init_task);
+
+ return (void *)task + sizeof(*task);
+}
+#endif
+
/*
* Can we use the FPU in kernel mode with the
* whole "kernel_fpu_begin/end()" sequence?
@@ -591,10 +600,8 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
* This is safe because task_struct size is a multiple of cacheline size.
*/
struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
- struct fpu *src_fpu = x86_task_fpu(current);

BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
- BUG_ON(!src_fpu);

/* The new task's FPU state cannot be valid in the hardware. */
dst_fpu->last_cpu = -1;
@@ -657,7 +664,6 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
if (update_fpu_shstk(dst, ssp))
return 1;

- trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);

return 0;
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 11aa31410df2..53580e59e5db 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -38,7 +38,7 @@ static void fpu__init_cpu_generic(void)
/* Flush out any pending x87 state: */
#ifdef CONFIG_MATH_EMULATION
if (!boot_cpu_has(X86_FEATURE_FPU))
- fpstate_init_soft(&x86_task_fpu(current)->fpstate->regs.soft);
+ ;
else
#endif
asm volatile ("fninit");
@@ -164,7 +164,7 @@ static void __init fpu__init_task_struct_size(void)
* Subtract off the static size of the register state.
* It potentially has a bunch of padding.
*/
- task_size -= sizeof(x86_task_fpu(current)->__fpstate.regs);
+ task_size -= sizeof(union fpregs_state);

/*
* Add back the dynamically-calculated register state
@@ -209,7 +209,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
fpu_kernel_cfg.default_size = size;
fpu_user_cfg.max_size = size;
fpu_user_cfg.default_size = size;
- fpstate_reset(x86_task_fpu(current));
}

/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index dade16d92484..75196e4d40bd 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -844,9 +844,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
if (err)
goto out_disable;

- /* Reset the state for the current task */
- fpstate_reset(x86_task_fpu(current));
-
/*
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index d21331187b20..56451fd2099e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -172,10 +172,6 @@ SECTIONS
/* init_task */
INIT_TASK_DATA(THREAD_SIZE)

- __x86_init_fpu_begin = .;
- . = __x86_init_fpu_begin + 128*PAGE_SIZE;
- __x86_init_fpu_end = .;
-
#ifdef CONFIG_X86_32
/* 32 bit has nosave before _edata */
NOSAVE_DATA