[ 14/25] x86_64, traps: Fix the espfix64 #DF fixup and rewrite it in C

From: Willy Tarreau
Date: Sat Dec 06 2014 - 12:56:07 EST


2.6.32-longterm review patch. If anyone has any objections, please let me know.

------------------

From: Andy Lutomirski <luto@xxxxxxxxxxxxxx>

There's nothing special enough about the espfix64 double fault fixup to
justify writing it in assembly. Move it to C.

This also fixes a bug: if the double fault came from an IST stack, the
old asm code would return to a partially uninitialized stack frame.

Fixes: 3891a04aafd668686239349ea58f3314ea2af86b
Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Reviewed-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
(cherry picked from commit af726f21ed8af2cdaa4e93098dc211521218ae65)
[wt: backport notes for 2.6.32 :
- Adaptations to entry_64.S in declaration of do_double_fault.
- no exception_enter() in 2.6.32. Seems to be only for context tracking.
/wt]
Signed-off-by: Willy Tarreau <w@xxxxxx>
---
arch/x86/kernel/entry_64.S | 34 ++--------------------------------
arch/x86/kernel/traps.c | 24 ++++++++++++++++++++++++
2 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8e2f14a..c862780 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -871,6 +871,7 @@ ENTRY(native_iret)
jnz native_irq_return_ldt
#endif

+.global native_irq_return_iret
native_irq_return_iret:
iretq

@@ -972,37 +973,6 @@ ENTRY(retint_kernel)
CFI_ENDPROC
END(common_interrupt)

- /*
- * If IRET takes a fault on the espfix stack, then we
- * end up promoting it to a doublefault. In that case,
- * modify the stack to make it look like we just entered
- * the #GP handler from user space, similar to bad_iret.
- */
-#ifdef CONFIG_X86_ESPFIX64
- ALIGN
-__do_double_fault:
- XCPT_FRAME 1 RDI+8
- movq RSP(%rdi),%rax /* Trap on the espfix stack? */
- sarq $PGDIR_SHIFT,%rax
- cmpl $ESPFIX_PGD_ENTRY,%eax
- jne do_double_fault /* No, just deliver the fault */
- cmpl $__KERNEL_CS,CS(%rdi)
- jne do_double_fault
- movq RIP(%rdi),%rax
- cmpq $native_irq_return_iret,%rax
- jne do_double_fault /* This shouldn't happen... */
- movq PER_CPU_VAR(kernel_stack),%rax
- subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
- movq %rax,RSP(%rdi)
- movq $0,(%rax) /* Missing (lost) #GP error code */
- movq $general_protection,RIP(%rdi)
- retq
- CFI_ENDPROC
-END(__do_double_fault)
-#else
-# define __do_double_fault do_double_fault
-#endif
-
/*
* APIC interrupts.
*/
@@ -1179,7 +1149,7 @@ zeroentry overflow do_overflow
zeroentry bounds do_bounds
zeroentry invalid_op do_invalid_op
zeroentry device_not_available do_device_not_available
-paranoiderrorentry double_fault __do_double_fault
+paranoiderrorentry double_fault do_double_fault
zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
errorentry invalid_TSS do_invalid_TSS
errorentry segment_not_present do_segment_not_present
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c4cc05a..03563a4 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -230,6 +230,30 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
static const char str[] = "double fault";
struct task_struct *tsk = current;

+#ifdef CONFIG_X86_ESPFIX64
+ extern unsigned char native_irq_return_iret[];
+
+ /*
+ * If IRET takes a non-IST fault on the espfix64 stack, then we
+ * end up promoting it to a doublefault. In that case, modify
+ * the stack to make it look like we just entered the #GP
+ * handler from user space, similar to bad_iret.
+ */
+ if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
+ regs->cs == __KERNEL_CS &&
+ regs->ip == (unsigned long)native_irq_return_iret)
+ {
+ struct pt_regs *normal_regs = task_pt_regs(current);
+
+ /* Fake a #GP(0) from userspace. */
+ memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
+ normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
+ regs->ip = (unsigned long)general_protection;
+ regs->sp = (unsigned long)&normal_regs->orig_ax;
+ return;
+ }
+#endif
+
/* Return not checked because double check cannot be ignored */
notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);

--
1.7.12.2.21.g234cd45.dirty



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/