[RFC PATCH 45/47] mm: asi: Mapping global nonsensitive areas in asi_global_init

From: Junaid Shahid
Date: Wed Feb 23 2022 - 00:28:37 EST


From: Ofir Weisse <oweisse@xxxxxxxxxx>

There are several areas in memory which we consider non sensitive.
These areas should be mapped in every ASI domain. We map there areas
in asi_global_init(). We modified some of the linking scripts to
ensure these areas are starting and ending on page boundaries.

The areas:
- _stext --> _etext
- __init_begin --> __init_end
- __start_rodata --> __end_rodata
- __start_once --> __end_once
- __start___ex_table --> __stop___ex_table
- __start_asi_nonsensitive --> __end_asi_nonsensitive
- __start_asi_nonsensitive_readmostly -->
__end_asi_nonsensitive_readmostly
- __vvar_page --> + PAGE_SIZE
- APIC_BASE --> + PAGE_SIZE
- phys_base --> + PAGE_SIZE
- __start___tracepoints_ptrs --> __stop___tracepoints_ptrs
- __start___tracepoint_str --> __stop___tracepoint_str
- __per_cpu_asi_start --> __per_cpu_asi_end (percpu)
- irq_stack_backing_store --> + sizeof(irq_stack_backing_store)
(percpu)

The pgd's of the following addresses are cloned, modeled after KPTI:
- CPU_ENTRY_AREA_BASE
- ESPFIX_BASE_ADDR

Signed-off-by: Ofir Weisse <oweisse@xxxxxxxxxx>


---
arch/x86/kernel/head_64.S | 12 +++++
arch/x86/kernel/vmlinux.lds.S | 2 +-
arch/x86/mm/asi.c | 82 +++++++++++++++++++++++++++++++
include/asm-generic/vmlinux.lds.h | 13 +++--
4 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d8b3ebd2bb85..3d3874661895 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -574,9 +574,21 @@ SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))

.align 16
/* This must match the first entry in level2_kernel_pgt */
+
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+/* TODO: Find a way to mark .section for phys_base */
+/* Ideally, we want to map phys_base in .data..asi_non_sensitive. That doesn't
+ * seem to work properly. For now, we just make sure phys_base is in it's own
+ * page. */
+ .align PAGE_SIZE
+#endif
SYM_DATA(phys_base, .quad 0x0)
EXPORT_SYMBOL(phys_base)

+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+ .align PAGE_SIZE
+#endif
+
#include "../../x86/xen/xen-head.S"

__PAGE_ALIGNED_BSS
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3d6dc12d198f..2b3668291785 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -148,8 +148,8 @@ SECTIONS
} :text =0xcccc

/* End of text section, which should occupy whole number of pages */
- _etext = .;
. = ALIGN(PAGE_SIZE);
+ _etext = .;

X86_ALIGN_RODATA_BEGIN
RO_DATA(PAGE_SIZE)
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index 04628949e89d..7f2aa1823736 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -9,6 +9,7 @@

#include <asm/asi.h>
#include <asm/pgalloc.h>
+#include <asm/processor.h> /* struct irq_stack */
#include <asm/mmu_context.h>

#include "mm_internal.h"
@@ -17,6 +18,24 @@
#undef pr_fmt
#define pr_fmt(fmt) "ASI: " fmt

+#include <linux/extable.h>
+#include <asm-generic/sections.h>
+
+extern struct exception_table_entry __start___ex_table[];
+extern struct exception_table_entry __stop___ex_table[];
+
+extern const char __start_asi_nonsensitive[], __end_asi_nonsensitive[];
+extern const char __start_asi_nonsensitive_readmostly[],
+ __end_asi_nonsensitive_readmostly[];
+extern const char __per_cpu_asi_start[], __per_cpu_asi_end[];
+extern const char *__start___tracepoint_str[];
+extern const char *__stop___tracepoint_str[];
+extern const char *__start___tracepoints_ptrs[];
+extern const char *__stop___tracepoints_ptrs[];
+extern const char __vvar_page[];
+
+DECLARE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store);
+
static struct asi_class asi_class[ASI_MAX_NUM] __asi_not_sensitive;
static DEFINE_SPINLOCK(asi_class_lock __asi_not_sensitive);

@@ -412,6 +431,7 @@ void asi_unload_module(struct module* module)
static int __init asi_global_init(void)
{
uint i, n;
+ int err = 0;

if (!boot_cpu_has(X86_FEATURE_ASI))
return 0;
@@ -436,6 +456,68 @@ static int __init asi_global_init(void)

pcpu_map_asi_reserved_chunk();

+
+ /*
+ * TODO: We need to ensure that all the sections mapped below are
+ * actually page-aligned by the linker. For now, we temporarily just
+ * align the start/end addresses here, but that is incorrect as the
+ * rest of the page could potentially contain sensitive data.
+ */
+#define MAP_SECTION(start, end) \
+ pr_err("%s:%d mapping 0x%lx --> 0x%lx", \
+ __FUNCTION__, __LINE__, start, end); \
+ err = asi_map(ASI_GLOBAL_NONSENSITIVE, \
+ (void*)((unsigned long)(start) & PAGE_MASK),\
+ PAGE_ALIGN((unsigned long)(end)) - \
+ ((unsigned long)(start) & PAGE_MASK)); \
+ BUG_ON(err);
+
+#define MAP_SECTION_PERCPU(start, size) \
+ pr_err("%s:%d mapping PERCPU 0x%lx --> 0x%lx", \
+ __FUNCTION__, __LINE__, start, (unsigned long)start+size); \
+ err = asi_map_percpu(ASI_GLOBAL_NONSENSITIVE, \
+ (void*)((unsigned long)(start) & PAGE_MASK), \
+ PAGE_ALIGN((unsigned long)(size))); \
+ BUG_ON(err);
+
+ MAP_SECTION(_stext, _etext);
+ MAP_SECTION(__init_begin, __init_end);
+ MAP_SECTION(__start_rodata, __end_rodata);
+ MAP_SECTION(__start_once, __end_once);
+ MAP_SECTION(__start___ex_table, __stop___ex_table);
+ MAP_SECTION(__start_asi_nonsensitive, __end_asi_nonsensitive);
+ MAP_SECTION(__start_asi_nonsensitive_readmostly,
+ __end_asi_nonsensitive_readmostly);
+ MAP_SECTION(__vvar_page, __vvar_page + PAGE_SIZE);
+ MAP_SECTION(APIC_BASE, APIC_BASE + PAGE_SIZE);
+ MAP_SECTION(&phys_base, &phys_base + PAGE_SIZE);
+
+ /* TODO: add a build flag to enable disable mapping only when
+ * instrumentation is used */
+ MAP_SECTION(__start___tracepoints_ptrs, __stop___tracepoints_ptrs);
+ MAP_SECTION(__start___tracepoint_str, __stop___tracepoint_str);
+
+ MAP_SECTION_PERCPU((void*)__per_cpu_asi_start,
+ __per_cpu_asi_end - __per_cpu_asi_start);
+
+ MAP_SECTION_PERCPU(&irq_stack_backing_store,
+ sizeof(irq_stack_backing_store));
+
+ /* We have to map the stack canary into ASI. This is far from ideal, as
+ * attackers can use L1TF to steal the canary value, and then perhaps
+ * mount some other attack including a buffer overflow. This is a price
+ * we must pay to use ASI.
+ */
+ MAP_SECTION_PERCPU(&fixed_percpu_data, PAGE_SIZE);
+
+#define CLONE_INIT_PGD(addr) \
+ asi_clone_pgd(asi_global_nonsensitive_pgd, init_mm.pgd, addr);
+
+ CLONE_INIT_PGD(CPU_ENTRY_AREA_BASE);
+#ifdef CONFIG_X86_ESPFIX64
+ CLONE_INIT_PGD(ESPFIX_BASE_ADDR);
+#endif
+
return 0;
}
subsys_initcall(asi_global_init)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 0a931aedc285..7152ce3613f5 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -235,8 +235,10 @@
#define TRACE_PRINTKS() __start___trace_bprintk_fmt = .; \
KEEP(*(__trace_printk_fmt)) /* Trace_printk fmt' pointer */ \
__stop___trace_bprintk_fmt = .;
-#define TRACEPOINT_STR() __start___tracepoint_str = .; \
+#define TRACEPOINT_STR() . = ALIGN(PAGE_SIZE); \
+ __start___tracepoint_str = .; \
KEEP(*(__tracepoint_str)) /* Trace_printk fmt' pointer */ \
+ . = ALIGN(PAGE_SIZE); \
__stop___tracepoint_str = .;
#else
#define TRACE_PRINTKS()
@@ -348,8 +350,10 @@
MEM_KEEP(init.data*) \
MEM_KEEP(exit.data*) \
*(.data.unlikely) \
+ . = ALIGN(PAGE_SIZE); \
__start_once = .; \
*(.data.once) \
+ . = ALIGN(PAGE_SIZE); \
__end_once = .; \
STRUCT_ALIGN(); \
*(__tracepoints) \
@@ -453,9 +457,10 @@
*(.rodata) *(.rodata.*) \
SCHED_DATA \
RO_AFTER_INIT_DATA /* Read only after init */ \
- . = ALIGN(8); \
+ . = ALIGN(PAGE_SIZE); \
__start___tracepoints_ptrs = .; \
KEEP(*(__tracepoints_ptrs)) /* Tracepoints: pointer array */ \
+ . = ALIGN(PAGE_SIZE); \
__stop___tracepoints_ptrs = .; \
*(__tracepoints_strings)/* Tracepoints: strings */ \
} \
@@ -671,11 +676,13 @@
*/
#define EXCEPTION_TABLE(align) \
. = ALIGN(align); \
+ . = ALIGN(PAGE_SIZE); \
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \
__start___ex_table = .; \
KEEP(*(__ex_table)) \
+ . = ALIGN(PAGE_SIZE); \
__stop___ex_table = .; \
- }
+ } \

/*
* .BTF
--
2.35.1.473.g83b2b277ed-goog