Re: [PATCH v2] arm64: Support Clang UBSAN trap codes for better reporting

From: Fangrui Song
Date: Fri Feb 03 2023 - 14:15:07 EST


On Fri, Feb 3, 2023 at 9:39 AM Kees Cook <keescook@xxxxxxxxxxxx> wrote:
>
> When building with CONFIG_UBSAN_TRAP=y on arm64, Clang encodes the UBSAN
> check (handler) type in the esr. Extract this and actually report these
> traps as coming from the specific UBSAN check that tripped.
>
> Before:
>
> Internal error: BRK handler: 00000000f20003e8 [#1] PREEMPT SMP
>
> After:
>
> Internal error: UBSAN: shift out of bounds: 00000000f2005514 [#1] PREEMPT SMP
>
> Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
> Cc: Will Deacon <will@xxxxxxxxxx>
> Cc: Mark Rutland <mark.rutland@xxxxxxx>
> Cc: John Stultz <jstultz@xxxxxxxxxx>
> Cc: Yongqin Liu <yongqin.liu@xxxxxxxxxx>
> Cc: Sami Tolvanen <samitolvanen@xxxxxxxxxx>
> Cc: Ard Biesheuvel <ardb@xxxxxxxxxx>
> Cc: Yury Norov <yury.norov@xxxxxxxxx>
> Cc: Andrey Konovalov <andreyknvl@xxxxxxxxx>
> Cc: Marco Elver <elver@xxxxxxxxxx>
> Cc: linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
> Cc: llvm@xxxxxxxxxxxxxxx
> Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx>
> ---
> v2: improve commit log, limit report strings to actual configs, document mappings
> v1: https://lore.kernel.org/lkml/20230202223653.never.473-kees@xxxxxxxxxx/

Thanks. I'll add the Linux kernel use to
https://maskray.me/blog/2023-01-29-all-about-undefined-behavior-sanitizer
when this lands:)

> ---
> arch/arm64/include/asm/brk-imm.h | 2 +
> arch/arm64/kernel/traps.c | 21 ++++++++++
> include/linux/ubsan.h | 9 +++++
> lib/Makefile | 2 -
> lib/ubsan.c | 67 ++++++++++++++++++++++++++++++++
> lib/ubsan.h | 32 +++++++++++++++
> 6 files changed, 131 insertions(+), 2 deletions(-)
> create mode 100644 include/linux/ubsan.h
>
> diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
> index 6e000113e508..3f0f0d03268b 100644
> --- a/arch/arm64/include/asm/brk-imm.h
> +++ b/arch/arm64/include/asm/brk-imm.h
> @@ -28,6 +28,8 @@
> #define BUG_BRK_IMM 0x800
> #define KASAN_BRK_IMM 0x900
> #define KASAN_BRK_MASK 0x0ff
> +#define UBSAN_BRK_IMM 0x5500
> +#define UBSAN_BRK_MASK 0x00ff

Q: How is 0x5500 derived?

> #define CFI_BRK_IMM_TARGET GENMASK(4, 0)
> #define CFI_BRK_IMM_TYPE GENMASK(9, 5)
> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> index 4c0caa589e12..87f42eb1c950 100644
> --- a/arch/arm64/kernel/traps.c
> +++ b/arch/arm64/kernel/traps.c
> @@ -26,6 +26,7 @@
> #include <linux/syscalls.h>
> #include <linux/mm_types.h>
> #include <linux/kasan.h>
> +#include <linux/ubsan.h>
> #include <linux/cfi.h>
>
> #include <asm/atomic.h>
> @@ -1074,6 +1075,19 @@ static struct break_hook kasan_break_hook = {
> };
> #endif
>
> +#ifdef CONFIG_UBSAN_TRAP
> +static int ubsan_handler(struct pt_regs *regs, unsigned long esr)
> +{
> + die(report_ubsan_failure(regs, esr & UBSAN_BRK_MASK), regs, esr);
> + return DBG_HOOK_HANDLED;
> +}
> +
> +static struct break_hook ubsan_break_hook = {
> + .fn = ubsan_handler,
> + .imm = UBSAN_BRK_IMM,
> + .mask = UBSAN_BRK_MASK,
> +};
> +#endif
>
> #define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK)
>
> @@ -1091,6 +1105,10 @@ int __init early_brk64(unsigned long addr, unsigned long esr,
> #ifdef CONFIG_KASAN_SW_TAGS
> if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
> return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
> +#endif
> +#ifdef CONFIG_UBSAN_TRAP
> + if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM)
> + return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED;
> #endif
> return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
> }
> @@ -1104,6 +1122,9 @@ void __init trap_init(void)
> register_kernel_break_hook(&fault_break_hook);
> #ifdef CONFIG_KASAN_SW_TAGS
> register_kernel_break_hook(&kasan_break_hook);
> +#endif
> +#ifdef CONFIG_UBSAN_TRAP
> + register_kernel_break_hook(&ubsan_break_hook);
> #endif

IIUC, the break hook is a list so CONFIG_KASAN_SW_TAGS
(kernel-hwaddress) can be used with CONFIG_UBSAN_TRAP.

> debug_traps_init();
> }
> diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h
> new file mode 100644
> index 000000000000..bff7445498de
> --- /dev/null
> +++ b/include/linux/ubsan.h
> @@ -0,0 +1,9 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_UBSAN_H
> +#define _LINUX_UBSAN_H
> +
> +#ifdef CONFIG_UBSAN_TRAP
> +const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type);
> +#endif
> +
> +#endif
> diff --git a/lib/Makefile b/lib/Makefile
> index 4d9461bfea42..81b988bf9448 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -340,9 +340,7 @@ quiet_cmd_build_OID_registry = GEN $@
> clean-files += oid_registry_data.c
>
> obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
> -ifneq ($(CONFIG_UBSAN_TRAP),y)
> obj-$(CONFIG_UBSAN) += ubsan.o
> -endif
>
> UBSAN_SANITIZE_ubsan.o := n
> KASAN_SANITIZE_ubsan.o := n
> diff --git a/lib/ubsan.c b/lib/ubsan.c
> index 60c7099857a0..f05ae85fc268 100644
> --- a/lib/ubsan.c
> +++ b/lib/ubsan.c
> @@ -18,6 +18,71 @@
>
> #include "ubsan.h"
>
> +#ifdef CONFIG_UBSAN_TRAP
> +/*
> + * Only include matches for UBSAN checks that are actually compiled in.
> + * The mappings of struct SanitizerKind (the -fsanitize=xxx args) to
> + * enum SanitizerHandler (the traps) in Clang is in clang/lib/CodeGen/.
> + */
> +const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type)
> +{
> + switch (check_type) {
> +#ifdef CONFIG_UBSAN_BOUNDS
> + /*
> + * SanitizerKind::ArrayBounds and SanitizerKind::LocalBounds
> + * emit SanitizerHandler::OutOfBounds.
> + */
> + case ubsan_out_of_bounds:
> + return "UBSAN: array index out of bounds";
> +#endif
> +#ifdef CONFIG_UBSAN_SHIFT
> + /*
> + * SanitizerKind::ShiftBase and SanitizerKind::ShiftExponent
> + * emit SanitizerHandler::ShiftOutOfBounds.
> + */
> + case ubsan_shift_out_of_bounds:
> + return "UBSAN: shift out of bounds";
> +#endif
> +#ifdef CONFIG_UBSAN_DIV_ZERO
> + /*
> + * SanitizerKind::IntegerDivideByZero emits
> + * SanitizerHandler::DivremOverflow.
> + */
> + case ubsan_divrem_overflow:
> + return "UBSAN: divide/remainder overflow";
> +#endif
> +#ifdef CONFIG_UBSAN_UNREACHABLE
> + /*
> + * SanitizerKind::Unreachable emits
> + * SanitizerHandler::BuiltinUnreachable.
> + */
> + case ubsan_builtin_unreachable:
> + return "UBSAN: unreachable code";
> +#endif
> +#if defined(CONFIG_UBSAN_BOOL) || defined(CONFIG_UBSAN_ENUM)
> + /*
> + * SanitizerKind::Bool and SanitizerKind::Enum emit
> + * SanitizerHandler::LoadInvalidValue.
> + */
> + case ubsan_load_invalid_value:
> + return "UBSAN: loading invalid value";
> +#endif
> +#ifdef CONFIG_UBSAN_ALIGNMENT
> + /*
> + * SanitizerKind::Alignment emits SanitizerHandler::TypeMismatch
> + * or SanitizerHandler::AlignmentAssumption.
> + */
> + case ubsan_alignment_assumption:
> + return "UBSAN: alignment assumption";
> + case ubsan_type_mismatch:
> + return "UBSAN: type mismatch";
> +#endif
> + default:
> + return "UBSAN: unrecognized failure code";
> + }
> +}

I wonder whether keeping the dash-prefixed name is better since that
matches compiler-rt/lib/ubsan.
People can search for "add-overflow" and get cross references from
compiler-rt/lib/ubsan, instead of needing to knowing that "addition
overflow" is another name for "add-overflow".

> +
> +#else
> static const char * const type_check_kinds[] = {
> "load of",
> "store to",
> @@ -384,3 +449,5 @@ void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr,
> ubsan_epilogue();
> }
> EXPORT_SYMBOL(__ubsan_handle_alignment_assumption);
> +
> +#endif /* !CONFIG_UBSAN_TRAP */
> diff --git a/lib/ubsan.h b/lib/ubsan.h
> index 9a0b71c5ff9f..cc5cb94895a6 100644
> --- a/lib/ubsan.h
> +++ b/lib/ubsan.h
> @@ -2,6 +2,38 @@
> #ifndef _LIB_UBSAN_H
> #define _LIB_UBSAN_H
>
> +/*
> + * ABI defined by Clang's UBSAN enum SanitizerHandler:
> + * https://github.com/llvm/llvm-project/blob/release/16.x/clang/lib/CodeGen/CodeGenFunction.h#L113
> + */
> +enum ubsan_checks {
> + ubsan_add_overflow,
> + ubsan_builtin_unreachable,
> + ubsan_cfi_check_fail,
> + ubsan_divrem_overflow,
> + ubsan_dynamic_type_cache_miss,
> + ubsan_float_cast_overflow,
> + ubsan_function_type_mismatch,
> + ubsan_implicit_conversion,
> + ubsan_invalid_builtin,
> + ubsan_invalid_objc_cast,
> + ubsan_load_invalid_value,
> + ubsan_missing_return,
> + ubsan_mul_overflow,
> + ubsan_negate_overflow,
> + ubsan_nullability_arg,
> + ubsan_nullability_return,
> + ubsan_nonnull_arg,
> + ubsan_nonnull_return,
> + ubsan_out_of_bounds,
> + ubsan_pointer_overflow,
> + ubsan_shift_out_of_bounds,
> + ubsan_sub_overflow,
> + ubsan_type_mismatch,
> + ubsan_alignment_assumption,
> + ubsan_vla_bound_not_positive,
> +};
> +
> enum {
> type_kind_int = 0,
> type_kind_float = 1,
> --
> 2.34.1
>
>

Reviewed-by: Fangrui Song <maskray@xxxxxxxxxx>