[PATCH] x86/fault: Decode page fault OOPSes better

From: Andy Lutomirski
Date: Fri Aug 31 2018 - 23:42:17 EST


One of Linus' favorite hobbies seems to be looking at OOPSes and
decoding the error code in his head. This is not one of my favorite
hobbies :)

Teach the page fault OOPS hander to decode the error code. If it's
a !USER fault from user mode, print an explicit note to that effect
and print out the addresses of various tables that might cause such
an error.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
---

This is not the best-tested thing in the world, although it appears to
work. I think it would have made developing PTI much more pleasant.

arch/x86/mm/fault.c | 76 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 76 insertions(+)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b9123c497e0a..9381ce880c9d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -25,6 +25,7 @@
#include <asm/vsyscall.h> /* emulate_vsyscall */
#include <asm/vm86.h> /* struct vm86 */
#include <asm/mmu_context.h> /* vma_pkey() */
+#include <asm/desc.h> /* store_idt(), ... */

#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -640,10 +641,43 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
return 0;
}

+static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
+{
+ u32 offset = (index >> 3) * sizeof(struct desc_struct);
+ unsigned long addr;
+ struct ldttss_desc desc;
+
+ if (index == 0) {
+ pr_alert("%s: NULL\n", name);
+ return;
+ }
+
+ if (offset + sizeof(struct ldttss_desc) >= gdt->size) {
+ pr_alert("%s: 0x%hx -- out of bounds\n", name, index);
+ return;
+ }
+
+ if (probe_kernel_read(&desc, (void *)(gdt->address + offset),
+ sizeof(struct ldttss_desc))) {
+ pr_alert("%s: 0x%hx -- GDT entry is not readable\n",
+ name, index);
+ return;
+ }
+
+ addr = desc.base0 | (desc.base1 << 16) | (desc.base2 << 24);
+#ifdef CONFIG_X86_64
+ addr |= ((u64)desc.base3 << 32);
+#endif
+ pr_alert("%s: 0x%hx -- base=0x%lx limit=0x%x\n",
+ name, index, addr, (desc.limit0 | (desc.limit1 << 16)));
+}
+
static void
show_fault_oops(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
+ char errcode[64];
+
if (!oops_may_print())
return;

@@ -671,6 +705,48 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
(void *)address);

+ errcode[0] = 0;
+#define ERRSTR(x) if (error_code & X86_PF_##x) strcat(errcode, " " #x)
+ ERRSTR(PROT);
+ ERRSTR(WRITE);
+ ERRSTR(USER);
+ ERRSTR(RSVD);
+ ERRSTR(INSTR);
+ ERRSTR(PK);
+#undef ERRSTR
+ pr_alert("HW error: %s\n", errcode[0] ? errcode + 1 :
+ "normal kernel read fault");
+ if (!(error_code & X86_PF_USER) && user_mode(regs)) {
+ struct desc_ptr idt, gdt;
+ u16 ldtr, tr;
+
+ pr_alert("This was a system access from user code\n");
+
+ /*
+ * This can happen for quite a few reasons. The more obvious
+ * ones are faults accessing the GDT, or LDT. Perhaps
+ * surprisingly, if the CPU tries to deliver a benign or
+ * contributory exception from user code and gets a page fault
+ * during delivery, the page fault can be delivered as though
+ * it originated directly from user code. This could happen
+ * due to wrong permissions on the IDT, GDT, LDT, TSS, or
+ * kernel or IST stack.
+ */
+ store_idt(&idt);
+
+ /* Usable even on Xen PV -- it's just slow. */
+ native_store_gdt(&gdt);
+
+ pr_alert("IDT: 0x%lx (limit=0x%hx) GDT: 0x%lx (limit=0x%hx)\n",
+ idt.address, idt.size, gdt.address, gdt.size);
+
+ store_ldt(ldtr);
+ show_ldttss(&gdt, "LDTR", ldtr);
+
+ store_tr(tr);
+ show_ldttss(&gdt, "TR", tr);
+ }
+
dump_pagetable(address);
}

--
2.17.1