[PATCH] RISC-V: add uniprocessor flush_tlb_range() support

From: Yunhui Cui
Date: Thu Jan 25 2024 - 01:21:24 EST


Add support for flush_tlb_range() to improve TLB performance for
UP systems. In order to avoid the mutual inclusion of tlbflush.h
and hugetlb.h, the UP part is also implemented in tlbflush.c.

Signed-off-by: Yunhui Cui <cuiyunhui@xxxxxxxxxxxxx>
---
arch/riscv/include/asm/tlbflush.h | 61 ++++++----
arch/riscv/mm/Makefile | 2 +-
arch/riscv/mm/tlbflush.c | 195 ++++++++++++++++++------------
3 files changed, 156 insertions(+), 102 deletions(-)

diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 928f096dca21..426f043fb450 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -10,12 +10,21 @@
#include <linux/mm_types.h>
#include <asm/smp.h>
#include <asm/errata_list.h>
+#include <asm/tlbbatch.h>

#define FLUSH_TLB_MAX_SIZE ((unsigned long)-1)
#define FLUSH_TLB_NO_ASID ((unsigned long)-1)

#ifdef CONFIG_MMU
extern unsigned long asid_mask;
+DECLARE_STATIC_KEY_FALSE(use_asid_allocator);
+
+struct flush_tlb_range_data {
+ unsigned long asid;
+ unsigned long start;
+ unsigned long size;
+ unsigned long stride;
+};

static inline void local_flush_tlb_all(void)
{
@@ -27,12 +36,40 @@ static inline void local_flush_tlb_page(unsigned long addr)
{
ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
}
+
+static inline void local_flush_tlb_all_asid(unsigned long asid)
+{
+ if (asid != FLUSH_TLB_NO_ASID)
+ __asm__ __volatile__ ("sfence.vma x0, %0"
+ :
+ : "r" (asid)
+ : "memory");
+ else
+ local_flush_tlb_all();
+}
+
+static inline void local_flush_tlb_page_asid(unsigned long addr,
+ unsigned long asid)
+{
+ if (asid != FLUSH_TLB_NO_ASID)
+ __asm__ __volatile__ ("sfence.vma %0, %1"
+ :
+ : "r" (addr), "r" (asid)
+ : "memory");
+ else
+ local_flush_tlb_page(addr);
+}
+
+static inline unsigned long get_mm_asid(struct mm_struct *mm)
+{
+ return static_branch_unlikely(&use_asid_allocator) ?
+ atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
+}
#else /* CONFIG_MMU */
#define local_flush_tlb_all() do { } while (0)
#define local_flush_tlb_page(addr) do { } while (0)
#endif /* CONFIG_MMU */

-#if defined(CONFIG_SMP) && defined(CONFIG_MMU)
void flush_tlb_all(void);
void flush_tlb_mm(struct mm_struct *mm);
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -55,26 +92,4 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
void arch_flush_tlb_batched_pending(struct mm_struct *mm);
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);

-#else /* CONFIG_SMP && CONFIG_MMU */
-
-#define flush_tlb_all() local_flush_tlb_all()
-#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- local_flush_tlb_all();
-}
-
-/* Flush a range of kernel pages */
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
- local_flush_tlb_all();
-}
-
-#define flush_tlb_mm(mm) flush_tlb_all()
-#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
-#endif /* !CONFIG_SMP || !CONFIG_MMU */
-
#endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 2c869f8026a8..7c6c4c858a6b 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -19,7 +19,7 @@ obj-y += context.o
obj-y += pmem.o

ifeq ($(CONFIG_MMU),y)
-obj-$(CONFIG_SMP) += tlbflush.o
+obj-y += tlbflush.o
endif
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 8d12b26f5ac3..4765603fa08a 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -6,28 +6,36 @@
#include <linux/hugetlb.h>
#include <asm/sbi.h>
#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>

-static inline void local_flush_tlb_all_asid(unsigned long asid)
+static unsigned long get_stride_size(struct vm_area_struct *vma)
{
- if (asid != FLUSH_TLB_NO_ASID)
- __asm__ __volatile__ ("sfence.vma x0, %0"
- :
- : "r" (asid)
- : "memory");
- else
- local_flush_tlb_all();
-}
+ unsigned long stride_size;

-static inline void local_flush_tlb_page_asid(unsigned long addr,
- unsigned long asid)
-{
- if (asid != FLUSH_TLB_NO_ASID)
- __asm__ __volatile__ ("sfence.vma %0, %1"
- :
- : "r" (addr), "r" (asid)
- : "memory");
- else
- local_flush_tlb_page(addr);
+ if (!is_vm_hugetlb_page(vma))
+ return PAGE_SIZE;
+
+ stride_size = huge_page_size(hstate_vma(vma));
+
+ /*
+ * As stated in the privileged specification, every PTE in a
+ * NAPOT region must be invalidated, so reset the stride in that
+ * case.
+ */
+ if (has_svnapot()) {
+ if (stride_size >= PGDIR_SIZE)
+ stride_size = PGDIR_SIZE;
+ else if (stride_size >= P4D_SIZE)
+ stride_size = P4D_SIZE;
+ else if (stride_size >= PUD_SIZE)
+ stride_size = PUD_SIZE;
+ else if (stride_size >= PMD_SIZE)
+ stride_size = PMD_SIZE;
+ else
+ stride_size = PAGE_SIZE;
+ }
+
+ return stride_size;
}

/*
@@ -66,31 +74,12 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
local_flush_tlb_range_threshold_asid(start, size, stride, asid);
}

-void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
- local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID);
-}
-
+#ifdef CONFIG_SMP
static void __ipi_flush_tlb_all(void *info)
{
local_flush_tlb_all();
}

-void flush_tlb_all(void)
-{
- if (riscv_use_ipi_for_rfence())
- on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
- else
- sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
-}
-
-struct flush_tlb_range_data {
- unsigned long asid;
- unsigned long start;
- unsigned long size;
- unsigned long stride;
-};
-
static void __ipi_flush_tlb_range_asid(void *info)
{
struct flush_tlb_range_data *d = info;
@@ -138,10 +127,18 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
put_cpu();
}

-static inline unsigned long get_mm_asid(struct mm_struct *mm)
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
{
- return static_branch_unlikely(&use_asid_allocator) ?
- atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
+ __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+ addr, PAGE_SIZE, PAGE_SIZE);
+}
+
+void flush_tlb_all(void)
+{
+ if (riscv_use_ipi_for_rfence())
+ on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
+ else
+ sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
}

void flush_tlb_mm(struct mm_struct *mm)
@@ -158,41 +155,12 @@ void flush_tlb_mm_range(struct mm_struct *mm,
start, end - start, page_size);
}

-void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
-{
- __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
- addr, PAGE_SIZE, PAGE_SIZE);
-}
-
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
unsigned long stride_size;

- if (!is_vm_hugetlb_page(vma)) {
- stride_size = PAGE_SIZE;
- } else {
- stride_size = huge_page_size(hstate_vma(vma));
-
- /*
- * As stated in the privileged specification, every PTE in a
- * NAPOT region must be invalidated, so reset the stride in that
- * case.
- */
- if (has_svnapot()) {
- if (stride_size >= PGDIR_SIZE)
- stride_size = PGDIR_SIZE;
- else if (stride_size >= P4D_SIZE)
- stride_size = P4D_SIZE;
- else if (stride_size >= PUD_SIZE)
- stride_size = PUD_SIZE;
- else if (stride_size >= PMD_SIZE)
- stride_size = PMD_SIZE;
- else
- stride_size = PAGE_SIZE;
- }
- }
-
+ stride_size = get_stride_size(vma);
__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
start, end - start, stride_size);
}
@@ -203,6 +171,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
start, end - start, PAGE_SIZE);
}

+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+ __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
+ FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
@@ -212,6 +186,77 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
}
#endif

+#else
+static void __flush_tlb_range_up(struct mm_struct *mm, unsigned long start,
+ unsigned long size, unsigned long stride)
+{
+ unsigned long asid = FLUSH_TLB_NO_ASID;
+
+ if (mm)
+ asid = get_mm_asid(mm);
+
+ local_flush_tlb_range_asid(start, size, stride, asid);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+ local_flush_tlb_page(addr);
+}
+
+void flush_tlb_all(void)
+{
+ local_flush_tlb_all();
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+ __flush_tlb_range_up(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
+
+void flush_tlb_mm_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned int page_size)
+{
+ __flush_tlb_range_up(mm, start, end - start, page_size);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ unsigned long stride_size;
+
+ stride_size = get_stride_size(vma);
+ __flush_tlb_range_up(vma->vm_mm, start, end - start, stride_size);
+}
+
+/* Flush a range of kernel pages */
+void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+ __flush_tlb_range_up(NULL, start, end - start, PAGE_SIZE);
+}
+
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+ __flush_tlb_range_up(NULL, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ __flush_tlb_range_up(vma->vm_mm, start, end - start, PMD_SIZE);
+}
+#endif
+
+#endif
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ local_flush_tlb_range_asid(start, end - start, PAGE_SIZE,
+ FLUSH_TLB_NO_ASID);
+}
+
bool arch_tlbbatch_should_defer(struct mm_struct *mm)
{
return true;
@@ -228,9 +273,3 @@ void arch_flush_tlb_batched_pending(struct mm_struct *mm)
{
flush_tlb_mm(mm);
}
-
-void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
-{
- __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
- FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
-}
--
2.20.1