Re: WARNING: kernel stack frame pointer has bad value

From: Josh Poimboeuf
Date: Wed Apr 19 2017 - 12:38:43 EST


On Wed, Apr 19, 2017 at 10:12:03AM -0400, Steven Rostedt wrote:
> On Wed, 19 Apr 2017 08:44:57 -0500
> Josh Poimboeuf <jpoimboe@xxxxxxxxxx> wrote:
>
> > On Tue, Apr 18, 2017 at 11:37:14PM -0400, Steven Rostedt wrote:
> > > Josh,
> > >
> > > I'm starting to get a bunch of these warnings, and I'm thinking they
> > > are false positives. The stack frame error is recorded at a call from
> > > entry_SYSCALL_64_fastpath, where I would expect the bp to not be valid.
> > >
> > > To trigger this, I only need to go into /sys/kernel/debug/tracing and
> > > echo function > current_tracer then cat trace. Maybe function tracer
> > > stack frames is messing it up some how, but it always fails at the
> > > entry call.
> > >
> > > Here's the dump;
> > >
> > > WARNING: kernel stack frame pointer at ffff8800bda0ff30 in sshd:1090 has bad value 000055b32abf1fa8
> > ...
> > > ffff8800bda0ff20: ffff8800bda0ff30 (0xffff8800bda0ff30)
> > > ffff8800bda0ff28: ffffffff810dc945 (SyS_rt_sigprocmask+0x5/0x1a0)
> > > ffff8800bda0ff30: 000055b32abf1fa8 (0x55b32abf1fa8)
> > > ffff8800bda0ff38: ffffffff81cf502a (entry_SYSCALL_64_fastpath+0x18/0xad)
> > > ffff8800bda0ff40: 000055b32abf1fa8 (0x55b32abf1fa8)
> > > ffff8800bda0ff48: ffffffff810dc945 (SyS_rt_sigprocmask+0x5/0x1a0)
> > > ffff8800bda0ff50: ffffffff81cf502a (entry_SYSCALL_64_fastpath+0x18/0xad)
> >
> > Thanks for reporting, I hadn't seen this one yet.
> >
> > The problem is that the unwinder expects the last frame pointer to be at
> > a certain address (0xffff8800bda0ff48 in this case), so it can know that
> > it reached the end. It's confused by the save_mcount_regs macro, which
> > builds some fake frames -- which is good -- but then the last frame is
> > at a different offset than what the unwinder expects.
> >
> > Would it be possible for ftrace to rewrite the stack so that it looks
> > like this instead?
> >
> > > ffff8800bda0ff38: ffff8800bda0ff48 (0xffff8800bda0ff48)
> > > ffff8800bda0ff40: ffffffff810dc945 (SyS_rt_sigprocmask+0x5/0x1a0)
> > > ffff8800bda0ff48: 000055b32abf1fa8 (0x55b32abf1fa8)
> > > ffff8800bda0ff50: ffffffff81cf502a (entry_SYSCALL_64_fastpath+0x18/0xad)
> >
> > In other words it would overwrite the "SyS_rt_sigprocmask+0x5/0x1a0"
> > value on the stack at ffff8800bda0ff48 with the original bp, instead of
> > appending to the existing stack. If you would be ok with such an
> > approach, I could take a stab at it.
>
> This is because we have to handle each different config differently.
> This is the case with FENTRY and FRAME_POINTERS. As I like to keep this
> as efficient as possible. To do the above, we need to modify the return
> address and then restore it. And handle that for each config type.
>
> >
> > The alternative would be to change the unwinder, but I would rather
> > avoid having to detect another special case if possible.
>
> I'm not sure what's worse. Modifying all the special cases of ftrace,
> or adding a new one to the undwinder.
>
> You can take a crack at it if you like, but it needs to be negligible
> in the performance of FENTRY and no frame pointers.

How about something like the following (completely untested):

diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 7b0d3da..54f0f45 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -27,19 +27,19 @@ EXPORT_SYMBOL(mcount)
/* All cases save the original rbp (8 bytes) */
#ifdef CONFIG_FRAME_POINTER
# ifdef CC_USING_FENTRY
-/* Save parent and function stack frames (rip and rbp) */
-# define MCOUNT_FRAME_SIZE (8+16*2)
+/* Save extra stack frame (rip and rbp) */
+# define MCOUNT_FRAME_SIZE 16
# else
-/* Save just function stack frame (rip and rbp) */
-# define MCOUNT_FRAME_SIZE (8+16)
+/* Save just rbp */
+# define MCOUNT_FRAME_SIZE 8
# endif
#else
/* No need to save a stack frame */
-# define MCOUNT_FRAME_SIZE 8
+# define MCOUNT_FRAME_SIZE 0
#endif /* CONFIG_FRAME_POINTER */

/* Size of stack used to save mcount regs in save_mcount_regs */
-#define MCOUNT_REG_SIZE (SS+8 + MCOUNT_FRAME_SIZE)
+#define MCOUNT_REG_SIZE (FRAME_SIZE + MCOUNT_FRAME_SIZE)

/*
* gcc -pg option adds a call to 'mcount' in most functions.
@@ -66,10 +66,7 @@ EXPORT_SYMBOL(mcount)
* %rsi - holds the parent function (traced function's return address)
* %rdx - holds the original %rbp
*/
-.macro save_mcount_regs added=0
-
- /* Always save the original rbp */
- pushq %rbp
+.macro save_mcount_regs save_flags=0

#ifdef CONFIG_FRAME_POINTER
/*
@@ -80,15 +77,14 @@ EXPORT_SYMBOL(mcount)
* is called afterward.
*/
#ifdef CC_USING_FENTRY
- /* Save the parent pointer (skip orig rbp and our return address) */
- pushq \added+8*2(%rsp)
- pushq %rbp
- movq %rsp, %rbp
- /* Save the return address (now skip orig rbp, rbp and parent) */
- pushq \added+8*3(%rsp)
-#else
- /* Can't assume that rip is before this (unless added was zero) */
- pushq \added+8(%rsp)
+ /* Copy rip to make room for original rbp */
+ pushq (%rsp)
+
+ /* Put original rbp next to parent rip */
+ movq %rbp, 8(%rsp)
+
+ /* Make rbp point to original rbp */
+ leaq 8(%rsp), %rbp
#endif
pushq %rbp
movq %rsp, %rbp
@@ -96,8 +92,19 @@ EXPORT_SYMBOL(mcount)

/*
* We add enough stack to save all regs.
+ *
+ * If saving flags, stop in the middle of the stack adjustment to save
+ * them in the right spot. Use 'leaq' instead of 'subq' so the flags
+ * aren't affected.
*/
- subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp
+ .if \save_flags
+ leaq EFLAGS-FRAME_SIZE+8(%rsp), %rsp
+ pushfq
+ subq $EFLAGS, %rsp
+ .else
+ subq FRAME_SIZE, %rsp
+ .endif
+
movq %rax, RAX(%rsp)
movq %rcx, RCX(%rsp)
movq %rdx, RDX(%rsp)
@@ -105,23 +112,28 @@ EXPORT_SYMBOL(mcount)
movq %rdi, RDI(%rsp)
movq %r8, R8(%rsp)
movq %r9, R9(%rsp)
+
/*
* Save the original RBP. Even though the mcount ABI does not
* require this, it helps out callers.
*/
- movq MCOUNT_REG_SIZE-8(%rsp), %rdx
+#ifdef CONFIG_FRAME_POINTER
+ movq (%rbp), %rdx
movq %rdx, RBP(%rsp)
+#else
+ movq %rbp, RBP(%rsp)
+#endif

/* Copy the parent address into %rsi (second parameter) */
#ifdef CC_USING_FENTRY
- movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi
+ movq MCOUNT_REG_SIZE+8(%rsp), %rsi
#else
- /* %rdx contains original %rbp */
+ movq RBP(%rsp), %rdx
movq 8(%rdx), %rsi
#endif

/* Move RIP to its proper location */
- movq MCOUNT_REG_SIZE+\added(%rsp), %rdi
+ movq MCOUNT_REG_SIZE(%rsp), %rdi
movq %rdi, RIP(%rsp)

/*
@@ -132,7 +144,7 @@ EXPORT_SYMBOL(mcount)
subq $MCOUNT_INSN_SIZE, %rdi
.endm

-.macro restore_mcount_regs
+.macro restore_mcount_regs restore_flags=0
movq R9(%rsp), %r9
movq R8(%rsp), %r8
movq RDI(%rsp), %rdi
@@ -144,7 +156,13 @@ EXPORT_SYMBOL(mcount)
/* ftrace_regs_caller can modify %rbp */
movq RBP(%rsp), %rbp

+ .if \restore_flags
+ leaq EFLAGS(%rsp), %rsp
+ popfq
+ addq FRAME_SIZE-EFLAGS+8, %rsp
+ .else
addq $MCOUNT_REG_SIZE, %rsp
+ .endif

.endm

@@ -191,11 +209,7 @@ WEAK(ftrace_stub)
END(ftrace_caller)

ENTRY(ftrace_regs_caller)
- /* Save the current flags before any operations that can change them */
- pushfq
-
- /* added 8 bytes to save flags */
- save_mcount_regs 8
+ save_mcount_regs save_flags=1
/* save_mcount_regs fills in first two parameters */

GLOBAL(ftrace_regs_caller_op_ptr)
@@ -210,16 +224,13 @@ GLOBAL(ftrace_regs_caller_op_ptr)
movq %r11, R11(%rsp)
movq %r10, R10(%rsp)
movq %rbx, RBX(%rsp)
- /* Copy saved flags */
- movq MCOUNT_REG_SIZE(%rsp), %rcx
- movq %rcx, EFLAGS(%rsp)
/* Kernel segments */
movq $__KERNEL_DS, %rcx
movq %rcx, SS(%rsp)
movq $__KERNEL_CS, %rcx
movq %rcx, CS(%rsp)
- /* Stack - skipping return address and flags */
- leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx
+ /* Stack - skipping return address */
+ leaq MCOUNT_REG_SIZE+8(%rsp), %rcx
movq %rcx, RSP(%rsp)

/* regs go into 4th parameter */
@@ -228,10 +239,6 @@ GLOBAL(ftrace_regs_caller_op_ptr)
GLOBAL(ftrace_regs_call)
call ftrace_stub

- /* Copy flags back to SS, to restore them */
- movq EFLAGS(%rsp), %rax
- movq %rax, MCOUNT_REG_SIZE(%rsp)
-
/* Handlers can change the RIP */
movq RIP(%rsp), %rax
movq %rax, MCOUNT_REG_SIZE+8(%rsp)
@@ -244,10 +251,7 @@ GLOBAL(ftrace_regs_call)
movq R10(%rsp), %r10
movq RBX(%rsp), %rbx

- restore_mcount_regs
-
- /* Restore flags */
- popfq
+ restore_mcount_regs restore_flags=1

/*
* As this jmp to ftrace_epilogue can be a short jump