[RFC PATCH v2 08/11] riscv: cmo: Add vendor custom icache sync

From: guoren
Date: Sun Jun 06 2021 - 05:05:52 EST


From: Guo Ren <guoren@xxxxxxxxxxxxxxxxx>

It's a draft version to show you how T-HEAD C9xx work with the
icache sync (We use hardware broadcast mechanism, and our icache
is VIPT):
- icache.i(v/p)a will broadcast all harts' icache invalidtion
- sync.is will broadcast all harts' pipeline flush and ensure all
broadcasts finished.

This patch could improve the performance of OpenJDK on JIT and
reduce flush_icache_all in linux.

Epecially:
static inline void set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
if (pte_present(pteval) && pte_exec(pteval))
flush_icache_pte(pteval);

set_pte(ptep, pteval);
}

Different from sbi_dma_sync, it can't be hidden in SBI and we must
set up a framework to hold all vendors' implementations in
linux/arch/riscv.

Signed-off-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx>
Signed-off-by: Liu Shaohua <liush@xxxxxxxxxxxxxxxxx>
Cc: Anup Patel <anup.patel@xxxxxxx>
Cc: Atish Patra <atish.patra@xxxxxxx>
Cc: Palmer Dabbelt <palmerdabbelt@xxxxxxxxxx>
Cc: Chen-Yu Tsai <wens@xxxxxxxx>
Cc: Drew Fustini <drew@xxxxxxxxxxxxxxx>
Cc: Maxime Ripard <maxime@xxxxxxxxxx>
Cc: Palmer Dabbelt <palmerdabbelt@xxxxxxxxxx>
Cc: Wei Fu <wefu@xxxxxxxxxx>
Cc: Wei Wu <lazyparser@xxxxxxxxx>
---
arch/riscv/include/asm/cacheflush.h | 48 ++++++++++++++++++++++++++++++++++-
arch/riscv/kernel/vdso/flush_icache.S | 33 +++++++++++++++++++-----
arch/riscv/mm/cacheflush.c | 3 ++-
3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index 23ff703..2e2dba1 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -22,11 +22,57 @@ static inline void flush_dcache_page(struct page *page)
}
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1

+#define ICACHE_IPA_X5 ".long 0x0382800b"
+#define ICACHE_IVA_X5 ".long 0x0302800b"
+#define SYNC_IS ".long 0x01b0000b"
+
+static inline void flush_icache_range(unsigned long start, unsigned long end)
+{
+ register unsigned long tmp asm("x5") = start & (~(L1_CACHE_BYTES-1));
+
+ for (; tmp < ALIGN(end, L1_CACHE_BYTES); tmp += L1_CACHE_BYTES) {
+ __asm__ __volatile__ (
+ ICACHE_IVA_X5
+ :
+ : "r" (tmp)
+ : "memory");
+ }
+
+ __asm__ __volatile__(SYNC_IS);
+
+ return;
+}
+
+static inline void flush_icache_range_phy(unsigned long start, unsigned long end)
+{
+ register unsigned long tmp asm("x5") = start & (~(L1_CACHE_BYTES-1));
+
+ for (; tmp < ALIGN(end, L1_CACHE_BYTES); tmp += L1_CACHE_BYTES) {
+ __asm__ __volatile__ (
+ ICACHE_IPA_X5
+ :
+ : "r" (tmp)
+ : "memory");
+ }
+
+ __asm__ __volatile__(SYNC_IS);
+
+ return;
+}
+
+static inline void __flush_icache_page(struct page *page) {
+ unsigned long start = PFN_PHYS(page_to_pfn(page));
+
+ flush_icache_range_phy(start, start + PAGE_SIZE);
+
+ return;
+}
+
/*
* RISC-V doesn't have an instruction to flush parts of the instruction cache,
* so instead we just flush the whole thing.
*/
-#define flush_icache_range(start, end) flush_icache_all()
+#define flush_icache_range(start, end) flush_icache_range(start, end)
#define flush_icache_user_page(vma, pg, addr, len) \
flush_icache_mm(vma->vm_mm, 0)

diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S
index 82f97d6..efb2d2e 100644
--- a/arch/riscv/kernel/vdso/flush_icache.S
+++ b/arch/riscv/kernel/vdso/flush_icache.S
@@ -5,18 +5,39 @@

#include <linux/linkage.h>
#include <asm/unistd.h>
+#include <asm/cache.h>
+
+/*
+ * icache.ipa rs1
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ * 0000001 11000 rs1 000 00000 0001011
+ *
+ * icache.iva rs1
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ * 0000001 10000 rs1 000 00000 0001011
+ *
+ * sync.is
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ * 0000000 11011 00000 000 00000 0001011
+ */
+#define ICACHE_IPA_X5 .long 0x0382800b
+#define ICACHE_IVA_X5 .long 0x0302800b
+#define SYNC_IS .long 0x01b0000b

.text
/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
ENTRY(__vdso_flush_icache)
.cfi_startproc
-#ifdef CONFIG_SMP
- li a7, __NR_riscv_flush_icache
- ecall
-#else
- fence.i
+ srli t0, a0, L1_CACHE_SHIFT
+ slli t0, t0, L1_CACHE_SHIFT
+ addi a1, a1, (L1_CACHE_BYTES - 1)
+ srli a1, a1, L1_CACHE_SHIFT
+ slli a1, a1, L1_CACHE_SHIFT
+1: ICACHE_IVA_X5
+ addi t0, t0, L1_CACHE_BYTES
+ bne t0, a1, 1b
+ SYNC_IS
li a0, 0
-#endif
ret
.cfi_endproc
ENDPROC(__vdso_flush_icache)
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 0941186..0fb8344 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -3,6 +3,7 @@
* Copyright (C) 2017 SiFive
*/

+#include <linux/pfn.h>
#include <asm/cacheflush.h>

#ifdef CONFIG_SMP
@@ -84,6 +85,6 @@ void flush_icache_pte(pte_t pte)
struct page *page = pte_page(pte);

if (!test_and_set_bit(PG_dcache_clean, &page->flags))
- flush_icache_all();
+ __flush_icache_page(page);
}
#endif /* CONFIG_MMU */
--
2.7.4