[PATCH V2] riscv: kexec: Fixup synchronization problem between init_mm and active_mm

From: guoren
Date: Mon Jul 10 2023 - 01:40:54 EST


From: Guo Ren <guoren@xxxxxxxxxxxxxxxxx>

The machine_kexec() uses set_memory_x to modify the direct mapping
attributes from RW to RWX. But set_memory_x only changes the init_mm's
attributes, not current->active_mm, so when kexec jumps into
control_buffer, the instruction page fault happens, and there is no
minor_pagefault for it, then panic.

The bug is found on an MMU_sv39 machine, and the direct mapping used a
1GB PUD, the pgd entries. Here is the bug output:

kexec_core: Starting new kernel
Will call new kernel at 00300000 from hart id 0
FDT image at 747c7000
Bye...
Unable to handle kernel paging request at virtual address ffffffda23b0d000
Oops [#1]
Modules linked in:
CPU: 0 PID: 53 Comm: uinit Not tainted 6.4.0-rc6 #15
Hardware name: Sophgo Mango (DT)
epc : 0xffffffda23b0d000
ra : machine_kexec+0xa6/0xb0
epc : ffffffda23b0d000 ra : ffffffff80008272 sp : ffffffc80c173d10
gp : ffffffff8150e1e0 tp : ffffffd9073d2c40 t0 : 0000000000000000
t1 : 0000000000000042 t2 : 6567616d69205444 s0 : ffffffc80c173d50
s1 : ffffffd9076c4800 a0 : ffffffd9076c4800 a1 : 0000000000300000
a2 : 00000000747c7000 a3 : 0000000000000000 a4 : ffffffd800000000
a5 : 0000000000000000 a6 : ffffffd903619c40 a7 : ffffffffffffffff
s2 : ffffffda23b0d000 s3 : 0000000000300000 s4 : 00000000747c7000
s5 : 0000000000000000 s6 : 0000000000000000 s7 : 0000000000000000
s8 : 0000000000000000 s9 : 0000000000000000 s10: 0000000000000000
s11: 0000003f940001a0 t3 : ffffffff815351af t4 : ffffffff815351af
t5 : ffffffff815351b0 t6 : ffffffc80c173b50
status: 0000000200000100 badaddr: ffffffda23b0d000 cause: 000000000000000c

The solution is to fix machine_kexec() to remap control code page outside
the linear mapping.

Fixes: 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping")
Signed-off-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx>
Signed-off-by: Guo Ren <guoren@xxxxxxxxxx>
Cc: Alexandre Ghiti <alex@xxxxxxxx>
---
Changelog:
V2:
- Use vm_map_ram instead of modifying set_memory_x
- Correct Fixes tag
---
arch/riscv/include/asm/kexec.h | 1 +
arch/riscv/kernel/machine_kexec.c | 14 ++++++++++----
2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h
index 2b56769cb530..17456e91476e 100644
--- a/arch/riscv/include/asm/kexec.h
+++ b/arch/riscv/include/asm/kexec.h
@@ -41,6 +41,7 @@ crash_setup_regs(struct pt_regs *newregs,
struct kimage_arch {
void *fdt; /* For CONFIG_KEXEC_FILE */
unsigned long fdt_addr;
+ void *control_code_buffer;
};

extern const unsigned char riscv_kexec_relocate[];
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index 2d139b724bc8..eeb209775107 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -86,7 +86,14 @@ machine_kexec_prepare(struct kimage *image)

/* Copy the assembler code for relocation to the control page */
if (image->type != KEXEC_TYPE_CRASH) {
- control_code_buffer = page_address(image->control_code_page);
+ control_code_buffer = vm_map_ram(&image->control_code_page,
+ KEXEC_CONTROL_PAGE_SIZE/PAGE_SIZE,
+ NUMA_NO_NODE);
+ if (control_code_buffer == NULL) {
+ pr_err("Failed to vm_map control page\n");
+ return -ENOMEM;
+ }
+
control_code_buffer_sz = page_size(image->control_code_page);

if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) {
@@ -97,8 +104,7 @@ machine_kexec_prepare(struct kimage *image)
memcpy(control_code_buffer, riscv_kexec_relocate,
riscv_kexec_relocate_size);

- /* Mark the control page executable */
- set_memory_x((unsigned long) control_code_buffer, 1);
+ internal->control_code_buffer = control_code_buffer;
}

return 0;
@@ -211,7 +217,7 @@ machine_kexec(struct kimage *image)
unsigned long this_cpu_id = __smp_processor_id();
unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id);
unsigned long fdt_addr = internal->fdt_addr;
- void *control_code_buffer = page_address(image->control_code_page);
+ void *control_code_buffer = internal->control_code_buffer;
riscv_kexec_method kexec_method = NULL;

#ifdef CONFIG_SMP
--
2.36.1