Re: [RESEND PATCH v3 1/2] RISC-V: mm: Restrict address space for sv39,sv48,sv57

From: Alexandre Ghiti
Date: Thu Jul 06 2023 - 05:11:57 EST


Hi Charlie,


On 05/07/2023 20:59, Charlie Jenkins wrote:
Make sv48 the default address space for mmap as some applications
currently depend on this assumption. The RISC-V specification enforces
that bits outside of the virtual address range are not used, so
restricting the size of the default address space as such should be
temporary.


What do you mean in the last sentence above?


A hint address passed to mmap will cause the largest address
space that fits entirely into the hint to be used. If the hint is less
than or equal to 1<<38, an sv39 address will be used. An exception is
that if the hint address is 0, then a sv48 address will be used.After
an address space is completely full, the next smallest address space
will be used.

Signed-off-by: Charlie Jenkins <charlie@xxxxxxxxxxxx>
---
arch/riscv/include/asm/elf.h | 2 +-
arch/riscv/include/asm/pgtable.h | 13 +++++++++++-
arch/riscv/include/asm/processor.h | 34 ++++++++++++++++++++++++------
3 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index 30e7d2455960..1b57f13a1afd 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
* the loader. We need to make sure that it is out of the way of the program
* that it will "exec", and that there is sufficient room for the brk.
*/
-#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2)
+#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2)
#ifdef CONFIG_64BIT
#ifdef CONFIG_COMPAT
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 75970ee2bda2..752e210c7547 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -57,18 +57,29 @@
#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
#endif
+
/*
* Roughly size the vmemmap space to be large enough to fit enough
* struct pages to map half the virtual address space. Then
* position vmemmap directly below the VMALLOC region.
*/
#ifdef CONFIG_64BIT
+#define VA_BITS_SV39 39
+#define VA_BITS_SV48 48
+#define VA_BITS_SV57 57
+
+#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
+#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
+#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
+
#define VA_BITS (pgtable_l5_enabled ? \
- 57 : (pgtable_l4_enabled ? 48 : 39))
+ VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
#else
#define VA_BITS 32
#endif
+#define DEFAULT_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)


Maybe rename DEFAULT_VA_BITS into MMAP_VA_BITS? Or something similar?


+
#define VMEMMAP_SHIFT \
(VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 94a0590c6971..468a1f4b9da4 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -12,20 +12,40 @@
#include <asm/ptrace.h>
-/*
- * This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
-
-#define STACK_TOP TASK_SIZE
#ifdef CONFIG_64BIT
+#define DEFAULT_MAP_WINDOW (UL(1) << (DEFAULT_VA_BITS - 1))
#define STACK_TOP_MAX TASK_SIZE_64
+
+#define arch_get_mmap_end(addr, len, flags) \
+ ((addr) >= VA_USER_SV57 ? STACK_TOP_MAX : \
+ ((((addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) ? \
+ VA_USER_SV48 : \
+ VA_USER_SV39)
+
+#define arch_get_mmap_base(addr, base) \
+ (((addr >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) ? \


So IIUC, a user must pass a hint larger than the max address of the mode the user wants right? Shouldn't the user rather pass an address that is larger than the previous mode? I mean if the user wants a 56-bit address, he should just pass an address above 1<<47 no?


+ VA_USER_SV57 - (DEFAULT_MAP_WINDOW - base) : \
+ ((((addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) ? \
+ VA_USER_SV48 - (DEFAULT_MAP_WINDOW - base) : \
+ (addr == 0) ? \
+ base : \
+ VA_USER_SV39 - (DEFAULT_MAP_WINDOW - base))
+


Can you turn that into a function or use if/else statement? It's very hard to understand what happens there.

And riscv selects ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT which means the base is at the top of the address space (minus the stack IIRC). But if rlimit_stack is set to infinity (see mmap_base() https://elixir.bootlin.com/linux/latest/source/mm/util.c#L412), mmap_base is equal to TASK_UNMAPPED_BASE. Does that work in that case? It seems like this: VA_USER_SV39 - (DEFAULT_MAP_WINDOW - base)) would be negative right?

You should also add a rlimit test.


#else
+#define DEFAULT_MAP_WINDOW TASK_SIZE
#define STACK_TOP_MAX TASK_SIZE
#endif
#define STACK_ALIGN 16
+
+#define STACK_TOP DEFAULT_MAP_WINDOW
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3)
+
#ifndef __ASSEMBLY__
struct task_struct;