[PATCH v12 14/14] powerpc/64s/radix: Enable huge vmalloc mappings

From: Nicholas Piggin
Date: Tue Feb 02 2021 - 06:09:59 EST


This reduces TLB misses by nearly 30x on a `git diff` workload on a
2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due
to vfs hashes being allocated with 2MB pages.

Cc: linuxppc-dev@xxxxxxxxxxxxxxxx
Acked-by: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx>
---
.../admin-guide/kernel-parameters.txt | 2 ++
arch/powerpc/Kconfig | 1 +
arch/powerpc/kernel/module.c | 21 +++++++++++++++----
3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a10b545c2070..d62df53e5200 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3225,6 +3225,8 @@

nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.

+ nohugevmalloc [PPC] Disable kernel huge vmalloc mappings.
+
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 107bb4319e0e..781da6829ab7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -181,6 +181,7 @@ config PPC
select GENERIC_GETTIMEOFDAY
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
+ select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index a211b0253cdb..07026335d24d 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -87,13 +87,26 @@ int module_finalize(const Elf_Ehdr *hdr,
return 0;
}

-#ifdef MODULES_VADDR
void *module_alloc(unsigned long size)
{
+ unsigned long start = VMALLOC_START;
+ unsigned long end = VMALLOC_END;
+
+#ifdef MODULES_VADDR
BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
+ start = MODULES_VADDR;
+ end = MODULES_END;
+#endif
+
+ /*
+ * Don't do huge page allocations for modules yet until more testing
+ * is done. STRICT_MODULE_RWX may require extra work to support this
+ * too.
+ */

- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL,
- PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+ return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
+ PAGE_KERNEL_EXEC,
+ VM_NO_HUGE_VMAP | VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE,
__builtin_return_address(0));
}
-#endif
--
2.23.0