Re: [GIT PULL] x86/mm changes for v6.8

From: Linus Torvalds
Date: Mon Jan 08 2024 - 22:58:11 EST


On Mon, 8 Jan 2024 at 18:06, Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> This does not even compile for me.
>
> arch/x86/include/asm/uaccess_64.h: In function ‘__untagged_addr’:
> arch/x86/include/asm/uaccess_64.h:25:28: error: implicit declaration
> of function ‘__my_cpu_var’; did you mean ‘put_cpu_var’?
> [-Werror=implicit-function-declaration]

Side note: the whole __my_cpu_var() reminds me of the attached patch
that I have in my testing tree, and have been carrying along for a
number of months now.

I definitely think it's the right thing to do, so here it is again,
even if it is only tangentially related to the build failure wrt this
broken pull request.

Linus
From 14f81cfd3aa3b53be9ad05801cdc7d7de91f807a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Date: Mon, 16 Oct 2023 16:04:11 -0700
Subject: [PATCH] x86: clean up fpu switching to not load 'current' in the
middle of task switching

It happens to work, but it's very very wrong, because out 'current'
macro is magic that is supposedly loading a stable value.

It just happens to be not quite stable enough and the compilers re-load
the value enough for this code to work. But it's wrong.

It also generates worse code.

So fix it.

Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
---
arch/x86/include/asm/fpu/sched.h | 10 ++++++----
arch/x86/kernel/process_32.c | 7 +++----
arch/x86/kernel/process_64.c | 7 +++----
3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index ca6e5e5f16b2..c485f1944c5f 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -37,10 +37,12 @@ extern void fpu_flush_thread(void);
* The FPU context is only stored/restored for a user task and
* PF_KTHREAD is used to distinguish between kernel and user threads.
*/
-static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
+static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
{
if (cpu_feature_enabled(X86_FEATURE_FPU) &&
- !(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ !(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ struct fpu *old_fpu = &old->thread.fpu;
+
save_fpregs_to_fpstate(old_fpu);
/*
* The save operation preserved register state, so the
@@ -60,10 +62,10 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
* Delay loading of the complete FPU state until the return to userland.
* PKRU is handled separately.
*/
-static inline void switch_fpu_finish(void)
+static inline void switch_fpu_finish(struct task_struct *new)
{
if (cpu_feature_enabled(X86_FEATURE_FPU))
- set_thread_flag(TIF_NEED_FPU_LOAD);
+ set_tsk_thread_flag(new, TIF_NEED_FPU_LOAD);
}

#endif /* _ASM_X86_FPU_SCHED_H */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 708c87b88cc1..0917c7f25720 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -156,13 +156,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
- struct fpu *prev_fpu = &prev->fpu;
int cpu = smp_processor_id();

/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

- if (!test_thread_flag(TIF_NEED_FPU_LOAD))
- switch_fpu_prepare(prev_fpu, cpu);
+ if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
+ switch_fpu_prepare(prev_p, cpu);

/*
* Save away %gs. No need to save %fs, as it was saved on the
@@ -209,7 +208,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)

raw_cpu_write(pcpu_hot.current_task, next_p);

- switch_fpu_finish();
+ switch_fpu_finish(next_p);

/* Load the Intel cache allocation PQR MSR. */
resctrl_sched_in(next_p);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 33b268747bb7..1553e19904e0 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -562,14 +562,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread;
- struct fpu *prev_fpu = &prev->fpu;
int cpu = smp_processor_id();

WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(pcpu_hot.hardirq_stack_inuse));

- if (!test_thread_flag(TIF_NEED_FPU_LOAD))
- switch_fpu_prepare(prev_fpu, cpu);
+ if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
+ switch_fpu_prepare(prev_p, cpu);

/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
@@ -623,7 +622,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
raw_cpu_write(pcpu_hot.current_task, next_p);
raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p));

- switch_fpu_finish();
+ switch_fpu_finish(next_p);

/* Reload sp0. */
update_task_stack(next_p);
--
2.43.0.5.g38fb137bdb