Re: [PATCH v2 -tip] x86/percpu: Use C for arch_raw_cpu_ptr()

From: Uros Bizjak
Date: Tue Oct 17 2023 - 15:12:04 EST


On Tue, Oct 17, 2023 at 9:00 PM Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> On Tue, 17 Oct 2023 at 00:23, Nadav Amit <namit@xxxxxxxxxx> wrote:
> >
> > Yes, the FPU issue is the one that caused me to crash before.
>
> Uros, can you verify whether that patch of mine resolves the issue you saw?
>
> That patch is _technically_ an actual bug-fix, although right now our
> existing 'current' caching that depends on just CSE'ing the inline asm
> (and is apparently limited to only doing so within single basic
> blocks) doesn't actually trigger the bug in our __switch_to() logic in
> practice.

Unfortunately, it doesn't fix the oops :(

I'm testing your patch, together with the attached patch with the
current tip tree (that already has all necessary percpu stuff), and
get exactly the same oops in:

[ 4.969657] cfg80211: Loading compiled-in X.509 certificates for
regulatory database
[ 4.980712] modprobe (53) used greatest stack depth: 13480 bytes left
[ 4.981048] BUG: kernel NULL pointer dereference, address: 0000000000000000
[ 4.981830] #PF: supervisor write access in kernel mode
[ 4.981830] #PF: error_code(0x0002) - not-present page
[ 4.981830] PGD 0 P4D 0
[ 4.981830] Oops: 0002 [#1] PREEMPT SMP PTI
[ 4.981830] CPU: 1 PID: 54 Comm: kworker/u4:1 Not tainted
6.6.0-rc6-00406-g84ab57184ff4-dirty #2
[ 4.981830] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.16.2-1.fc37 04/01/2014
[ 4.981830] RIP: 0010:begin_new_exec+0x8f2/0xa30
[ 4.981830] Code: 31 f6 e8 c1 49 f9 ff e9 3c fa ff ff 31 f6 4c 89
ef e8 b2 4a f9 ff e9 19 fa ff ff 31 f6 4c 89 ef e8 23 4a f9 ff e9 ea
fa ff ff <f0>
41 ff 0c 24 0f 85 55 fb ff ff 4c 89 e7 e8 4b 02 df ff e9 48 fb
[ 4.981830] RSP: 0000:ffffa505401f3d68 EFLAGS: 00010246
[ 4.981830] RAX: 0000000000000000 RBX: ffff89ed809e9f00 RCX: 0000000000000000
[ 4.981830] RDX: 0000000000000000 RSI: ffff89ed80e6c000 RDI: ffff89ed809ea718
[ 4.981830] RBP: ffff89ed8039ee00 R08: 00000000fffffffe R09: 00000000ffffffff
[ 4.981830] R10: 000001ffffffffff R11: 0000000000000001 R12: 0000000000000000
[ 4.981830] R13: 0000000000000000 R14: ffff89ed809ea718 R15: ffff89ed80e6c000
[ 4.981830] FS: 0000000000000000(0000) GS:ffff89ee24900000(0000)
knlGS:0000000000000000
[ 4.981830] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 4.981830] CR2: 0000000000000000 CR3: 00000001003a0000 CR4: 00000000000406f0
[ 4.981830] Call Trace:
[ 4.981830] <TASK>
[ 4.981830] ? __die+0x1e/0x60
[ 4.981830] ? page_fault_oops+0x17b/0x470
[ 4.981830] ? search_module_extables+0x14/0x50
[ 4.981830] ? exc_page_fault+0x66/0x140
[ 4.981830] ? asm_exc_page_fault+0x26/0x30
[ 4.981830] ? begin_new_exec+0x8f2/0xa30
[ 4.981830] ? begin_new_exec+0x3ce/0xa30
[ 4.981830] ? load_elf_phdrs+0x67/0xb0
[ 4.981830] load_elf_binary+0x2bb/0x1770
[ 4.981830] ? __kernel_read+0x136/0x2d0
[ 4.981830] bprm_execve+0x277/0x630
[ 4.981830] kernel_execve+0x145/0x1a0
[ 4.981830] call_usermodehelper_exec_async+0xcb/0x180
[ 4.981830] ? __pfx_call_usermodehelper_exec_async+0x10/0x10
[ 4.981830] ret_from_fork+0x2f/0x50
[ 4.981830] ? __pfx_call_usermodehelper_exec_async+0x10/0x10
[ 4.981830] ret_from_fork_asm+0x1b/0x30
[ 4.981830] </TASK>
[ 4.981830] Modules linked in:
[ 4.981830] CR2: 0000000000000000
[ 5.052612] ---[ end trace 0000000000000000 ]---
[ 5.053833] RIP: 0010:begin_new_exec+0x8f2/0xa30
[ 5.055065] Code: 31 f6 e8 c1 49 f9 ff e9 3c fa ff ff 31 f6 4c 89
ef e8 b2 4a f9 ff e9 19 fa ff ff 31 f6 4c 89 ef e8 23 4a f9 ff e9 ea
fa ff ff <f0>
41 ff 0c 24 0f 85 55 fb ff ff 4c 89 e7 e8 4b 02 df ff e9 48 fb
[ 5.059476] RSP: 0000:ffffa505401f3d68 EFLAGS: 00010246
[ 5.060780] RAX: 0000000000000000 RBX: ffff89ed809e9f00 RCX: 0000000000000000
[ 5.062483] RDX: 0000000000000000 RSI: ffff89ed80e6c000 RDI: ffff89ed809ea718
[ 5.064190] RBP: ffff89ed8039ee00 R08: 00000000fffffffe R09: 00000000ffffffff
[ 5.065908] R10: 000001ffffffffff R11: 0000000000000001 R12: 0000000000000000
[ 5.067625] R13: 0000000000000000 R14: ffff89ed809ea718 R15: ffff89ed80e6c000
[ 5.069343] FS: 0000000000000000(0000) GS:ffff89ee24900000(0000)
knlGS:0000000000000000
[ 5.071313] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 5.072732] CR2: 0000000000000000 CR3: 00000001003a0000 CR4: 00000000000406f0
[ 5.074439] Kernel panic - not syncing: Fatal exception
[ 5.075028] Kernel Offset: 0xcc00000 from 0xffffffff81000000
(relocation range: 0xffffffff80000000-0xffffffffbfffffff)



Uros.
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index a1168e7b69e5..21e8bd4ea44e 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -36,10 +36,23 @@ static_assert(sizeof(struct pcpu_hot) == 64);

DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);

+/*
+ *
+ */
+DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
+ const_pcpu_hot);
+
+#ifdef CONFIG_USE_X86_SEG_SUPPORT
+static __always_inline struct task_struct *get_current(void)
+{
+ return const_pcpu_hot.current_task;
+}
+#else
static __always_inline struct task_struct *get_current(void)
{
return this_cpu_read_stable(pcpu_hot.current_task);
}
+#endif

#define current get_current()

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b14fc8c1c953..f284c08aaeca 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2050,6 +2050,10 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
};
EXPORT_PER_CPU_SYMBOL(pcpu_hot);

+DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
+ const_pcpu_hot) __attribute__((alias("pcpu_hot")));
+EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
+
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d7779a18b24f..bf9815eaf4aa 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -212,7 +212,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
*/
#define ___ADDRESSABLE(sym, __attrs) \
static void * __used __attrs \
- __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym;
+ __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym;
#define __ADDRESSABLE(sym) \
___ADDRESSABLE(sym, __section(".discard.addressable"))