[PATCH 2/2] LoongArch: Relay BCE exceptions to userland as SIGSEGVs with si_code=SEGV_BNDERR

From: WANG Xuerui
Date: Sun Apr 16 2023 - 13:34:27 EST


From: WANG Xuerui <git@xxxxxxxxxx>

SEGV_BNDERR was introduced initially for supporting the Intel MPX, but
fell into disuse after the MPX support was removed. The LoongArch
bounds-checking instructions behave very differently than MPX, but
overall the interface is still kind of suitable for conveying the
information to userland when bounds-checking assertions trigger, so we
wouldn't have to invent more UAPI. Specifically, when the BCE triggers,
a SEGV_BNDERR is sent to userland, with si_addr set to the out-of-bounds
address or value (in asrt{gt,le}'s case), and one of si_lower or
si_upper set to the configured bound depending on the faulting
instruction. The other bound is set to either 0 or ULONG_MAX to resemble
a range with both lower and upper bounds.

Note that it is possible to have si_addr == si_lower in case of failing
asrtgt or {ld,st}gt, because those instructions test for strict
greater-than relationship. This should not pose a problem for userland,
though, because the faulting PC is available for the application to
associate back to the exact instruction for figuring out the
expectation.

Somewhat contrary to the ISA manual's wording, CSR.BADV does not seem to
contain the out-of-bounds value when BCE occurs, so we have to resort to
pattern-matching the instruction word to pull out the appropriate
operand ourselves from the context.

Example exception context generated by a faulting `asrtgt.d t0, t1`
(assert t0 > t1 or BCE) with t0=100 and t1=200:

> tp 00007ffff2f2f180 sp 00007ffffbf9fb80 a0 0000000000000002
> a1 00007ffffbf9fce8 a2 00007ffffbf9fd00 a3 00007ffff2ed4558
> a4 0000000000000000 a5 00007ffff2f044c8 a6 00007ffffbf9fce0
> a7 fffffffffffff000 t0 0000000000000064 t1 00000000000000c8
> t2 00007ffffbfa2d5e t3 00007ffff2f12aa0 t4 00007ffff2ed6158
> t5 00007ffff2ed6158 t6 000000000000002e t7 0000000003d8f538
> t8 0000000000000005 u0 0000000000000000 s9 0000000000000000
> s0 00007ffffbf9fce8 s1 0000000000000002 s2 0000000000000000
> s3 00007ffff2f2c038 s4 0000555555820610 s5 00007ffff2ed5000
> s6 0000555555827e38 s7 00007ffffbf9fd00 s8 0000555555827e38
> era: 00005555558206a4
> ra: 00007ffff2d854fc
> crmd: 000000b0 (-WE DACM=CC DACF=CC +PG -DA -IE PLV0)
> prmd: 00000007 (-PWE +PIE PPLV3)
> euen: 00000000 (-BTE -ASXE -SXE -FPE)
> ecfg: 0007181c (VS=7 LIE=2-4,11-12)
> estat: 000a0000 [BCE] (EsubCode=0 ECode=10 IS=)
> prid: 0014c010 (Loongson-64bit)

Signed-off-by: WANG Xuerui <git@xxxxxxxxxx>
---
arch/loongarch/include/asm/kdebug.h | 1 +
arch/loongarch/kernel/genex.S | 1 +
arch/loongarch/kernel/traps.c | 107 ++++++++++++++++++++++++++++
3 files changed, 109 insertions(+)

diff --git a/arch/loongarch/include/asm/kdebug.h b/arch/loongarch/include/asm/kdebug.h
index d721b4b82fae..793a8fdfeb90 100644
--- a/arch/loongarch/include/asm/kdebug.h
+++ b/arch/loongarch/include/asm/kdebug.h
@@ -18,6 +18,7 @@ enum die_val {
DIE_SSTEPBP,
DIE_UPROBE,
DIE_UPROBE_XOL,
+ DIE_BOUNDS_CHECK,
};

#endif /* _ASM_LOONGARCH_KDEBUG_H */
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 44ff1ff64260..78f066384657 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -82,6 +82,7 @@ SYM_FUNC_END(except_vec_cex)

BUILD_HANDLER ade ade badv
BUILD_HANDLER ale ale badv
+ BUILD_HANDLER bce bce none
BUILD_HANDLER bp bp none
BUILD_HANDLER fpe fpe fcsr
BUILD_HANDLER fpu fpu none
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index a060481198af..c6fc421a5983 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -36,6 +36,7 @@
#include <asm/break.h>
#include <asm/cpu.h>
#include <asm/fpu.h>
+#include <asm/inst.h>
#include <asm/loongarch.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
@@ -51,6 +52,7 @@

extern asmlinkage void handle_ade(void);
extern asmlinkage void handle_ale(void);
+extern asmlinkage void handle_bce(void);
extern asmlinkage void handle_sys(void);
extern asmlinkage void handle_bp(void);
extern asmlinkage void handle_ri(void);
@@ -589,6 +591,110 @@ static void bug_handler(struct pt_regs *regs)
}
}

+asmlinkage void noinstr do_bce(struct pt_regs *regs)
+{
+ irqentry_state_t state = irqentry_enter(regs);
+ bool user = user_mode(regs);
+ unsigned long era = exception_era(regs);
+ union loongarch_instruction insn;
+ u64 badv = 0, lower = 0, upper = ULONG_MAX;
+
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_enable();
+
+ current->thread.trap_nr = read_csr_excode();
+
+ /*
+ * notify the kprobe handlers, if instruction is likely to
+ * pertain to them.
+ */
+ if (notify_die(DIE_BOUNDS_CHECK, "Bounds check error", regs, 0,
+ current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
+ goto out;
+
+ __show_regs(regs);
+ die_if_kernel("Bounds check error in kernel code", regs);
+
+ /*
+ * Pull out the address that failed bounds checking, and the lower /
+ * upper bound, by minimally looking at the faulting instruction word
+ * and reading from the correct register.
+ *
+ * Somewhat counter-intuitively (but kinds of makes sense), BCEs
+ * during checked memory accesses *do not* update CSR.BADV. So badv
+ * still comes from regs[rj] in these cases.
+ */
+ if (__get_inst(&insn.word, (u32 *)era, user))
+ goto bad_era;
+ switch (insn.reg3_format.opcode) {
+ case asrtle_op:
+ if (insn.reg3_format.rd != 0)
+ /* not asrtle */
+ break;
+ badv = regs->regs[insn.reg3_format.rj];
+ upper = regs->regs[insn.reg3_format.rk];
+ break;
+
+ case asrtgt_op:
+ if (insn.reg3_format.rd != 0)
+ /* not asrtgt */
+ break;
+ badv = regs->regs[insn.reg3_format.rj];
+ lower = regs->regs[insn.reg3_format.rk];
+ break;
+
+ case fldles_op:
+ case fldled_op:
+ case fstles_op:
+ case fstled_op:
+ case ldleb_op:
+ case ldleh_op:
+ case ldlew_op:
+ case ldled_op:
+ case stleb_op:
+ case stleh_op:
+ case stlew_op:
+ case stled_op:
+ badv = regs->regs[insn.reg3_format.rj];
+ upper = regs->regs[insn.reg3_format.rk];
+ break;
+
+ case fldgts_op:
+ case fldgtd_op:
+ case fstgts_op:
+ case fstgtd_op:
+ case ldgtb_op:
+ case ldgth_op:
+ case ldgtw_op:
+ case ldgtd_op:
+ case stgtb_op:
+ case stgth_op:
+ case stgtw_op:
+ case stgtd_op:
+ badv = regs->regs[insn.reg3_format.rj];
+ lower = regs->regs[insn.reg3_format.rk];
+ break;
+ }
+
+ force_sig_bnderr((void __user *)badv, (void __user *)lower,
+ (void __user *)upper);
+
+out:
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_disable();
+
+ irqentry_exit(regs, state);
+ return;
+
+bad_era:
+ /*
+ * Cannot pull out the instruction word, hence cannot provide more
+ * info than a regular SIGSEGV in this case.
+ */
+ force_sig(SIGSEGV);
+ goto out;
+}
+
asmlinkage void noinstr do_bp(struct pt_regs *regs)
{
bool user = user_mode(regs);
@@ -1032,6 +1138,7 @@ void __init trap_init(void)

set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE);
set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE);
+ set_handler(EXCCODE_OOB * VECSIZE, handle_bce, VECSIZE);
set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE);
set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE);
set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE);
--
2.40.0