[PATCH RFC/RFT v2 4/4] riscv: Stop emitting preventive sfence.vma for new userspace mappings with Svvptc

From: Alexandre Ghiti
Date: Wed Jan 31 2024 - 11:03:50 EST


The preventive sfence.vma were emitted because new mappings must be made
visible to the page table walker but Svvptc guarantees that xRET act as
a fence, so no need to sfence.vma for the uarchs that implement this
extension.

This allows to drastically reduce the number of sfence.vma emitted:

* Ubuntu boot to login:
Before: ~630k sfence.vma
After: ~200k sfence.vma

* ltp - mmapstress01
Before: ~45k
After: ~6.3k

* lmbench - lat_pagefault
Before: ~665k
After: 832 (!)

* lmbench - lat_mmap
Before: ~546k
After: 718 (!)

Signed-off-by: Alexandre Ghiti <alexghiti@xxxxxxxxxxxx>
---
arch/riscv/include/asm/pgtable.h | 16 +++++++++++++++-
arch/riscv/mm/pgtable.c | 13 +++++++++++++
2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 0c94260b5d0c..50986e4c4601 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -473,6 +473,9 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
struct vm_area_struct *vma, unsigned long address,
pte_t *ptep, unsigned int nr)
{
+ asm_volatile_goto(ALTERNATIVE("nop", "j %l[svvptc]", 0, RISCV_ISA_EXT_SVVPTC, 1)
+ : : : : svvptc);
+
/*
* The kernel assumes that TLBs don't cache invalid entries, but
* in RISC-V, SFENCE.VMA specifies an ordering constraint, not a
@@ -482,12 +485,23 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
*/
while (nr--)
local_flush_tlb_page(address + nr * PAGE_SIZE);
+
+svvptc:
+ /*
+ * Svvptc guarantees that xRET act as a fence, so when the uarch does
+ * not cache invalid entries, we don't have to do anything.
+ */
+ ;
}
#define update_mmu_cache(vma, addr, ptep) \
update_mmu_cache_range(NULL, vma, addr, ptep, 1)

#define __HAVE_ARCH_UPDATE_MMU_TLB
-#define update_mmu_tlb update_mmu_cache
+static inline void update_mmu_tlb(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+{
+ flush_tlb_range(vma, address, address + PAGE_SIZE);
+}

static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index ef887efcb679..99ed389e4c8a 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -9,6 +9,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
{
+ asm_volatile_goto(ALTERNATIVE("nop", "j %l[svvptc]", 0, RISCV_ISA_EXT_SVVPTC, 1)
+ : : : : svvptc);
+
if (!pte_same(ptep_get(ptep), entry))
__set_pte_at(ptep, entry);
/*
@@ -16,6 +19,16 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
* the case that the PTE changed and the spurious fault case.
*/
return true;
+
+svvptc:
+ if (!pte_same(ptep_get(ptep), entry)) {
+ __set_pte_at(ptep, entry);
+ /* Here only not svadu is impacted */
+ flush_tlb_page(vma, address);
+ return true;
+ }
+
+ return false;
}

int ptep_test_and_clear_young(struct vm_area_struct *vma,
--
2.39.2