Re: [PATCH] LoongArch: Introduce hardware page table walker

From: Huacai Chen
Date: Wed May 17 2023 - 02:36:40 EST


Hi, Xuerui,

On Tue, May 16, 2023 at 9:13 PM WANG Xuerui <kernel@xxxxxxxxxx> wrote:
>
> On 2023/5/16 20:46, Huacai Chen wrote:
> > Loongson-3A6000 and newer processors have hardware page table walker
> > (PTW) support. PTW can handle all fastpaths of TLBI/TLBL/TLBS/TLBM
> > exceptions by hardware, software only need to handle slowpaths (page
> > faults).
> >
> > BTW, PTW doesn't append _PAGE_MODIFIED for page table entries, so we
> > change pmd_dirty() and pte_dirty() to also check _PAGE_DIRTY for the
> > "dirty" attribute.
>
> Too bad we can't get a sneak peek at the new manuals... reviewing some
> of the code anyway.
>
> >
> > Signed-off-by: Liang Gao <gaoliang@xxxxxxxxxxx>
> > Signed-off-by: Jun Yi <yijun@xxxxxxxxxxx>
> > Signed-off-by: Huacai Chen <chenhuacai@xxxxxxxxxxx>
> > ---
> > arch/loongarch/include/asm/cpu-features.h | 2 +-
> > arch/loongarch/include/asm/cpu.h | 2 ++
> > arch/loongarch/include/asm/loongarch.h | 4 ++++
> > arch/loongarch/include/asm/pgtable.h | 4 ++--
> > arch/loongarch/include/asm/tlb.h | 3 +++
> > arch/loongarch/include/uapi/asm/hwcap.h | 1 +
> > arch/loongarch/kernel/cpu-probe.c | 4 ++++
> > arch/loongarch/kernel/proc.c | 1 +
> > arch/loongarch/mm/tlb.c | 24 ++++++++++++++++++-----
> > arch/loongarch/mm/tlbex.S | 21 ++++++++++++++++++++
> > 10 files changed, 58 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h
> > index f6177f133477..2eafe6a6aca8 100644
> > --- a/arch/loongarch/include/asm/cpu-features.h
> > +++ b/arch/loongarch/include/asm/cpu-features.h
> > @@ -64,6 +64,6 @@
> > #define cpu_has_eiodecode cpu_opt(LOONGARCH_CPU_EIODECODE)
> > #define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID)
> > #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR)
> > -
> > +#define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW)
> >
> > #endif /* __ASM_CPU_FEATURES_H */
> > diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
> > index 88773d849e33..48b9f7168bcc 100644
> > --- a/arch/loongarch/include/asm/cpu.h
> > +++ b/arch/loongarch/include/asm/cpu.h
> > @@ -98,6 +98,7 @@ enum cpu_type_enum {
> > #define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */
> > #define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */
> > #define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */
> > +#define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */
> >
> > #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG)
> > #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM)
> > @@ -125,5 +126,6 @@ enum cpu_type_enum {
> > #define LOONGARCH_CPU_EIODECODE BIT_ULL(CPU_FEATURE_EIODECODE)
> > #define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID)
> > #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR)
> > +#define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW)
> >
> > #endif /* _ASM_CPU_H */
> > diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
> > index b3323ab5b78d..93b22a7af654 100644
> > --- a/arch/loongarch/include/asm/loongarch.h
> > +++ b/arch/loongarch/include/asm/loongarch.h
> > @@ -138,6 +138,7 @@ static inline u32 read_cpucfg(u32 reg)
> > #define CPUCFG2_MIPSBT BIT(20)
> > #define CPUCFG2_LSPW BIT(21)
> > #define CPUCFG2_LAM BIT(22)
> > +#define CPUCFG2_PTW BIT(24)
> >
> > #define LOONGARCH_CPUCFG3 0x3
> > #define CPUCFG3_CCDMA BIT(0)
> > @@ -453,6 +454,9 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
> > #define CSR_PWCTL0_PTBASE (_ULCAST_(0x1f) << CSR_PWCTL0_PTBASE_SHIFT)
> >
> > #define LOONGARCH_CSR_PWCTL1 0x1d /* PWCtl1 */
> > +#define CSR_PWCTL1_PTW_SHIFT 24
> > +#define CSR_PWCTL1_PTW_WIDTH 1
> > +#define CSR_PWCTL1_PTW (_ULCAST_(0x1) << CSR_PWCTL1_PTW_SHIFT)
> > #define CSR_PWCTL1_DIR3WIDTH_SHIFT 18
> > #define CSR_PWCTL1_DIR3WIDTH_WIDTH 5
> > #define CSR_PWCTL1_DIR3WIDTH (_ULCAST_(0x1f) << CSR_PWCTL1_DIR3WIDTH_SHIFT)
> > diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> > index d28fb9dbec59..5f93d6eef657 100644
> > --- a/arch/loongarch/include/asm/pgtable.h
> > +++ b/arch/loongarch/include/asm/pgtable.h
> > @@ -362,7 +362,7 @@ extern pgd_t invalid_pg_dir[];
> > */
> > static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
> > static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
> > -static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; }
> > +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & (_PAGE_DIRTY | _PAGE_MODIFIED); }
>
> Will this affect existing models? I can't seem to remember the details.
No problem here, because the DIRTY bit is set by hardware only, and
the DIRTY bit always spreads to MODIFIED by tlb handler. Moreover,
ARM64 does the same.

>
> >
> > static inline pte_t pte_mkold(pte_t pte)
> > {
> > @@ -506,7 +506,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
> >
> > static inline int pmd_dirty(pmd_t pmd)
> > {
> > - return !!(pmd_val(pmd) & _PAGE_MODIFIED);
> > + return !!(pmd_val(pmd) & (_PAGE_DIRTY | _PAGE_MODIFIED));
> > }
> >
> > static inline pmd_t pmd_mkclean(pmd_t pmd)
> > diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h
> > index f5e4deb97402..0dc9ee2b05d2 100644
> > --- a/arch/loongarch/include/asm/tlb.h
> > +++ b/arch/loongarch/include/asm/tlb.h
> > @@ -163,6 +163,9 @@ extern void handle_tlb_store(void);
> > extern void handle_tlb_modify(void);
> > extern void handle_tlb_refill(void);
> > extern void handle_tlb_protect(void);
> > +extern void handle_tlb_load_ptw(void);
> > +extern void handle_tlb_store_ptw(void);
> > +extern void handle_tlb_modify_ptw(void);
> >
> > extern void dump_tlb_all(void);
> > extern void dump_tlb_regs(void);
> > diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h
> > index 8840b72fa8e8..6955a7cb2c65 100644
> > --- a/arch/loongarch/include/uapi/asm/hwcap.h
> > +++ b/arch/loongarch/include/uapi/asm/hwcap.h
> > @@ -16,5 +16,6 @@
> > #define HWCAP_LOONGARCH_LBT_X86 (1 << 10)
> > #define HWCAP_LOONGARCH_LBT_ARM (1 << 11)
> > #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12)
> > +#define HWCAP_LOONGARCH_PTW (1 << 13)
>
> This feature should be irrevelant to userspace, right? We should
> probably not add it here.
Not used now but we don't know the future, add it which is similar to LVZ.

>
> >
> > #endif /* _UAPI_ASM_HWCAP_H */
> > diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
> > index f42acc6c8df6..e925579c7a71 100644
> > --- a/arch/loongarch/kernel/cpu-probe.c
> > +++ b/arch/loongarch/kernel/cpu-probe.c
> > @@ -136,6 +136,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
> > c->options |= LOONGARCH_CPU_CRYPTO;
> > elf_hwcap |= HWCAP_LOONGARCH_CRYPTO;
> > }
> > + if (config & CPUCFG2_PTW) {
> > + c->options |= LOONGARCH_CPU_PTW;
> > + elf_hwcap |= HWCAP_LOONGARCH_PTW;
> > + }
> > if (config & CPUCFG2_LVZP) {
> > c->options |= LOONGARCH_CPU_LVZ;
> > elf_hwcap |= HWCAP_LOONGARCH_LVZ;
> > diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c
> > index 0d82907b5404..782a34e7336e 100644
> > --- a/arch/loongarch/kernel/proc.c
> > +++ b/arch/loongarch/kernel/proc.c
> > @@ -79,6 +79,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
> > if (cpu_has_crc32) seq_printf(m, " crc32");
> > if (cpu_has_complex) seq_printf(m, " complex");
> > if (cpu_has_crypto) seq_printf(m, " crypto");
> > + if (cpu_has_ptw) seq_printf(m, " ptw");
> > if (cpu_has_lvz) seq_printf(m, " lvz");
> > if (cpu_has_lbt_x86) seq_printf(m, " lbt_x86");
> > if (cpu_has_lbt_arm) seq_printf(m, " lbt_arm");
> > diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
> > index 8bad6b0cff59..3e1d1d3c7058 100644
> > --- a/arch/loongarch/mm/tlb.c
> > +++ b/arch/loongarch/mm/tlb.c
> > @@ -167,6 +167,9 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep
> > int idx;
> > unsigned long flags;
> >
> > + if (cpu_has_ptw)
> > + return;
> > +
> > /*
> > * Handle debugger faulting in for debugee.
> > */
> > @@ -220,7 +223,11 @@ static void setup_ptwalker(void)
> > pte_w = PAGE_SHIFT - 3;
> >
> > pwctl0 = pte_i | pte_w << 5 | pmd_i << 10 | pmd_w << 15 | pud_i << 20 | pud_w << 25;
> > - pwctl1 = pgd_i | pgd_w << 6;
> > +
> > + if (!cpu_has_ptw)
> > + pwctl1 = pgd_i | pgd_w << 6;
> > + else
> > + pwctl1 = pgd_i | pgd_w << 6 | CSR_PWCTL1_PTW;
>
> This is better in avoiding the duplication:
>
> if (cpu_has_ptw)
> pwctl1 |= CSR_PWCTL1_PTW;
OK, I can change but I don't know the advantages.

>
> >
> > csr_write64(pwctl0, LOONGARCH_CSR_PWCTL0);
> > csr_write64(pwctl1, LOONGARCH_CSR_PWCTL1);
> > @@ -264,10 +271,17 @@ void setup_tlb_handler(int cpu)
> > if (cpu == 0) {
> > memcpy((void *)tlbrentry, handle_tlb_refill, 0x80);
> > local_flush_icache_range(tlbrentry, tlbrentry + 0x80);
> > - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE);
> > - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE);
> > - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE);
> > - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE);
> > + if (!cpu_has_ptw) {
>
> Better not have the condition inverted for less cognitive burden (have
> to go "no [hardware] page table walker" -> "take the software approach"
> otherwise).
I want to let the "old behaviors" appear earlier.

>
> > + set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE);
> > + set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE);
> > + set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE);
> > + set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE);
> > + } else {
> > + set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE);
> > + set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE);
> > + set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE);
> > + set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE);
> > + }
> > set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
> > set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
> > set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
> > diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
> > index 240ced55586e..01df8a0b87f6 100644
> > --- a/arch/loongarch/mm/tlbex.S
> > +++ b/arch/loongarch/mm/tlbex.S
> > @@ -190,6 +190,13 @@ nopage_tlb_load:
> > jr t0
> > SYM_FUNC_END(handle_tlb_load)
> >
> > +SYM_FUNC_START(handle_tlb_load_ptw)
> > + csrwr t0, LOONGARCH_CSR_KS0
> > + csrwr t1, LOONGARCH_CSR_KS1
> > + la_abs t0, tlb_do_page_fault_0
> > + jirl zero, t0, 0
>
> jr t0
OK, thanks.

>
> > +SYM_FUNC_END(handle_tlb_load_ptw)
> > +
> > SYM_FUNC_START(handle_tlb_store)
> > csrwr t0, EXCEPTION_KS0
> > csrwr t1, EXCEPTION_KS1
> > @@ -339,6 +346,13 @@ nopage_tlb_store:
> > jr t0
> > SYM_FUNC_END(handle_tlb_store)
> >
> > +SYM_FUNC_START(handle_tlb_store_ptw)
> > + csrwr t0, LOONGARCH_CSR_KS0
> > + csrwr t1, LOONGARCH_CSR_KS1
> > + la_abs t0, tlb_do_page_fault_1
> > + jirl zero, t0, 0
>
> Same here.
OK, thanks.

>
> > +SYM_FUNC_END(handle_tlb_store_ptw)
> > +
> > SYM_FUNC_START(handle_tlb_modify)
> > csrwr t0, EXCEPTION_KS0
> > csrwr t1, EXCEPTION_KS1
> > @@ -486,6 +500,13 @@ nopage_tlb_modify:
> > jr t0
> > SYM_FUNC_END(handle_tlb_modify)
> >
> > +SYM_FUNC_START(handle_tlb_modify_ptw)
> > + csrwr t0, LOONGARCH_CSR_KS0
> > + csrwr t1, LOONGARCH_CSR_KS1
> > + la_abs t0, tlb_do_page_fault_1
> > + jirl zero, t0, 0
>
> And here.
OK, thanks.

Huacai
>
> > +SYM_FUNC_END(handle_tlb_modify_ptw)
> > +
> > SYM_FUNC_START(handle_tlb_refill)
> > csrwr t0, LOONGARCH_CSR_TLBRSAVE
> > csrrd t0, LOONGARCH_CSR_PGD
>
> --
> WANG "xen0n" Xuerui
>
> Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/
>
>