[PATCH v3 12/33] function_graph: Have the instances use their own ftrace_ops for filtering

From: Masami Hiramatsu (Google)
Date: Mon Nov 27 2023 - 08:57:19 EST


From: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>

Allow for instances to have their own ftrace_ops part of the fgraph_ops
that makes the funtion_graph tracer filter on the set_ftrace_filter file
of the instance and not the top instance.

This also change how the function_graph handles multiple instances on the
shadow stack. Previously we use ARRAY type entries to record which one
is enabled, and this makes it a bitmap of the fgraph_array's indexes.
Previous function_graph_enter() expects calling back from
prepare_ftrace_return() function which is called back only once if it is
enabled. But this introduces different ftrace_ops for each fgraph
instance and those are called from ftrace_graph_func() one by one. Thus
we can not loop on the fgraph_array(), and need to reuse the ret_stack
pushed by the previous instance. Finding the ret_stack is easy because
we can check the ret_stack->func. But that is not enough for the self-
recursive tail-call case. Thus fgraph uses the bitmap entry to find it
is already set (this means that entry is for previous tail call).

Signed-off-by: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>
---
Changes in v3:
- Pass current fgraph_ops to the new entry handler
(function_graph_enter_ops) if fgraph use ftrace.
- Add fgraph_ops::idx in this patch.
- Replace the array type with the bitmap type so that it can record
which fgraph is called.
- Fix some helper function to use passed task_struct instead of current.
- Reduce the ret-index size to 1024 words.
- Make the ret-index directly points the ret_stack.
- Fix ftrace_graph_ret_addr() to handle tail-call case correctly.
Changes in v2:
- Use ftrace_graph_func and FTRACE_OPS_GRAPH_STUB instead of
ftrace_stub and FTRACE_OPS_FL_STUB for new ftrace based fgraph.
---
arch/arm64/kernel/ftrace.c | 19 ++
arch/x86/kernel/ftrace.c | 19 ++
include/linux/ftrace.h | 7 +
kernel/trace/fgraph.c | 364 ++++++++++++++++++++--------------
kernel/trace/ftrace.c | 6 -
kernel/trace/trace.h | 16 +
kernel/trace/trace_functions.c | 2
kernel/trace/trace_functions_graph.c | 8 +
8 files changed, 277 insertions(+), 164 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11fc5..205937e04ece 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -481,7 +481,24 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- prepare_ftrace_return(ip, &fregs->lr, fregs->fp);
+ unsigned long *parent = &fregs->lr;
+ struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
+ int bit;
+
+ if (unlikely(ftrace_graph_is_dead()))
+ return;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ bit = ftrace_test_recursion_trylock(ip, *parent);
+ if (bit < 0)
+ return;
+
+ if (!function_graph_enter_ops(*parent, ip, fregs->fp, parent, gops))
+ *parent = (unsigned long)&return_to_handler;
+
+ ftrace_test_recursion_unlock(bit);
}
#else
/*
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 12df54ff0e81..845e29b4254f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -657,9 +657,24 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct pt_regs *regs = &fregs->regs;
- unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
+ unsigned long *parent = (unsigned long *)kernel_stack_pointer(regs);
+ struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
+ int bit;
+
+ if (unlikely(ftrace_graph_is_dead()))
+ return;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;

- prepare_ftrace_return(ip, (unsigned long *)stack, 0);
+ bit = ftrace_test_recursion_trylock(ip, *parent);
+ if (bit < 0)
+ return;
+
+ if (!function_graph_enter_ops(*parent, ip, 0, parent, gops))
+ *parent = (unsigned long)&return_to_handler;
+
+ ftrace_test_recursion_unlock(bit);
}
#endif

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7b08169aa51d..c431a33fe789 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1070,7 +1070,9 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph
struct fgraph_ops {
trace_func_graph_ent_t entryfunc;
trace_func_graph_ret_t retfunc;
+ struct ftrace_ops ops; /* for the hash lists */
void *private;
+ int idx;
};

/*
@@ -1104,6 +1106,11 @@ extern int
function_graph_enter(unsigned long ret, unsigned long func,
unsigned long frame_pointer, unsigned long *retp);

+extern int
+function_graph_enter_ops(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp,
+ struct fgraph_ops *gops);
+
struct ftrace_ret_stack *
ftrace_graph_get_ret_stack(struct task_struct *task, int idx);

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 62c35d6d95f9..32c03d6ffd17 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -7,6 +7,7 @@
*
* Highly modified by Steven Rostedt (VMware).
*/
+#include <linux/bits.h>
#include <linux/jump_label.h>
#include <linux/suspend.h>
#include <linux/ftrace.h>
@@ -17,22 +18,15 @@
#include "ftrace_internal.h"
#include "trace.h"

-#ifdef CONFIG_DYNAMIC_FTRACE
-#define ASSIGN_OPS_HASH(opsname, val) \
- .func_hash = val, \
- .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
-#else
-#define ASSIGN_OPS_HASH(opsname, val)
-#endif
-
#define FGRAPH_RET_SIZE sizeof(struct ftrace_ret_stack)
#define FGRAPH_RET_INDEX (FGRAPH_RET_SIZE / sizeof(long))

/*
* On entry to a function (via function_graph_enter()), a new ftrace_ret_stack
- * is allocated on the task's ret_stack, then each fgraph_ops on the
- * fgraph_array[]'s entryfunc is called and if that returns non-zero, the
- * index into the fgraph_array[] for that fgraph_ops is added to the ret_stack.
+ * is allocated on the task's ret_stack with indexes entry, then each
+ * fgraph_ops on the fgraph_array[]'s entryfunc is called and if that returns
+ * non-zero, the index into the fgraph_array[] for that fgraph_ops is recorded
+ * on the indexes entry as a bit flag.
* As the associated ftrace_ret_stack saved for those fgraph_ops needs to
* be found, the index to it is also added to the ret_stack along with the
* index of the fgraph_array[] to each fgraph_ops that needs their retfunc
@@ -42,61 +36,59 @@
* to the last ftrace_ret_stack saved. All references to the
* ftrace_ret_stack has the format of:
*
- * bits: 0 - 13 Index in words from the previous ftrace_ret_stack
- * bits: 14 - 15 Type of storage
+ * bits: 0 - 9 offset in words from the previous ftrace_ret_stack
+ * (bitmap type should have FGRAPH_RET_INDEX always)
+ * bits: 10 - 11 Type of storage
* 0 - reserved
- * 1 - fgraph_array index
- * For fgraph_array_index:
- * bits: 16 - 23 The fgraph_ops fgraph_array index
+ * 1 - bitmap of fgraph_array index
+ *
+ * For bitmap of fgraph_array index
+ * bits: 12 - 27 The bitmap of fgraph_ops fgraph_array index
*
* That is, at the end of function_graph_enter, if the first and forth
* fgraph_ops on the fgraph_array[] (index 0 and 3) needs their retfunc called
* on the return of the function being traced, this is what will be on the
* task's shadow ret_stack: (the stack grows upward)
*
- * | | <- task->curr_ret_stack
- * +----------------------------------+
- * | (3 << FGRAPH_ARRAY_SHIFT)|(2) | ( 3 for index of fourth fgraph_ops)
- * +----------------------------------+
- * | (0 << FGRAPH_ARRAY_SHIFT)|(1) | ( 0 for index of first fgraph_ops)
- * +----------------------------------+
- * | struct ftrace_ret_stack |
- * | (stores the saved ret pointer) |
- * +----------------------------------+
- * | (X) | (N) | ( N words away from previous ret_stack)
- * | |
+ * | | <- task->curr_ret_stack
+ * +--------------------------------------------+
+ * | bitmap_type(bitmap:(BIT(3)|BIT(0)), |
+ * | offset:FGRAPH_RET_INDEX) | <- the offset is from here
+ * +--------------------------------------------+
+ * | struct ftrace_ret_stack |
+ * | (stores the saved ret pointer) | <- the offset points here
+ * +--------------------------------------------+
+ * | (X) | (N) | ( N words away from
+ * | | previous ret_stack)
*
* If a backtrace is required, and the real return pointer needs to be
* fetched, then it looks at the task's curr_ret_stack index, if it
- * is greater than zero, it would subtact one, and then mask the value
- * on the ret_stack by FGRAPH_RET_INDEX_MASK and subtract FGRAPH_RET_INDEX
- * from that, to get the index of the ftrace_ret_stack structure stored
- * on the shadow stack.
+ * is greater than zero (reserved, or right before poped), it would mask
+ * the value by FGRAPH_RET_INDEX_MASK to get the offset index of the
+ * ftrace_ret_stack structure stored on the shadow stack.
*/

-#define FGRAPH_RET_INDEX_SIZE 14
-#define FGRAPH_RET_INDEX_MASK ((1 << FGRAPH_RET_INDEX_SIZE) - 1)
-
+#define FGRAPH_RET_INDEX_SIZE 10
+#define FGRAPH_RET_INDEX_MASK GENMASK(FGRAPH_RET_INDEX_SIZE - 1, 0)

#define FGRAPH_TYPE_SIZE 2
-#define FGRAPH_TYPE_MASK ((1 << FGRAPH_TYPE_SIZE) - 1)
+#define FGRAPH_TYPE_MASK GENMASK(FGRAPH_TYPE_SIZE - 1, 0)
#define FGRAPH_TYPE_SHIFT FGRAPH_RET_INDEX_SIZE

enum {
FGRAPH_TYPE_RESERVED = 0,
- FGRAPH_TYPE_ARRAY = 1,
+ FGRAPH_TYPE_BITMAP = 1,
};

-#define FGRAPH_ARRAY_SIZE 16
-#define FGRAPH_ARRAY_MASK ((1 << FGRAPH_ARRAY_SIZE) - 1)
-#define FGRAPH_ARRAY_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_SIZE)
+#define FGRAPH_INDEX_SIZE 16
+#define FGRAPH_INDEX_MASK GENMASK(FGRAPH_INDEX_SIZE - 1, 0)
+#define FGRAPH_INDEX_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_SIZE)

/* Currently the max stack index can't be more than register callers */
-#define FGRAPH_MAX_INDEX FGRAPH_ARRAY_SIZE
+#define FGRAPH_MAX_INDEX (FGRAPH_INDEX_SIZE + FGRAPH_RET_INDEX)
+
+#define FGRAPH_ARRAY_SIZE FGRAPH_INDEX_SIZE

-#define FGRAPH_FRAME_SIZE (FGRAPH_RET_SIZE + FGRAPH_ARRAY_SIZE * (sizeof(long)))
-#define FGRAPH_FRAME_INDEX (ALIGN(FGRAPH_FRAME_SIZE, \
- sizeof(long)) / sizeof(long))
#define SHADOW_STACK_SIZE (PAGE_SIZE)
#define SHADOW_STACK_INDEX (SHADOW_STACK_SIZE / sizeof(long))
/* Leave on a buffer at the end */
@@ -113,19 +105,36 @@ static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE];

static inline int get_ret_stack_index(struct task_struct *t, int offset)
{
- return current->ret_stack[offset] & FGRAPH_RET_INDEX_MASK;
+ return t->ret_stack[offset] & FGRAPH_RET_INDEX_MASK;
}

static inline int get_fgraph_type(struct task_struct *t, int offset)
{
- return (current->ret_stack[offset] >> FGRAPH_TYPE_SHIFT) &
- FGRAPH_TYPE_MASK;
+ return (t->ret_stack[offset] >> FGRAPH_TYPE_SHIFT) & FGRAPH_TYPE_MASK;
+}
+
+static inline unsigned long
+get_fgraph_index_bitmap(struct task_struct *t, int offset)
+{
+ return (t->ret_stack[offset] >> FGRAPH_INDEX_SHIFT) & FGRAPH_INDEX_MASK;
+}
+
+static inline void
+set_fgraph_index_bitmap(struct task_struct *t, int offset, unsigned long bitmap)
+{
+ t->ret_stack[offset] = (bitmap << FGRAPH_INDEX_SHIFT) |
+ (FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT) | FGRAPH_RET_INDEX;
+}
+
+static inline bool is_fgraph_index_set(struct task_struct *t, int offset, int idx)
+{
+ return !!(get_fgraph_index_bitmap(t, offset) & BIT(idx));
}

-static inline int get_fgraph_array(struct task_struct *t, int offset)
+static inline void
+add_fgraph_index_bitmap(struct task_struct *t, int offset, unsigned long bitmap)
{
- return (current->ret_stack[offset] >> FGRAPH_ARRAY_SHIFT) &
- FGRAPH_ARRAY_MASK;
+ t->ret_stack[offset] |= (bitmap << FGRAPH_INDEX_SHIFT);
}

/* ftrace_graph_entry set to this to tell some archs to run function graph */
@@ -163,12 +172,12 @@ get_ret_stack(struct task_struct *t, int offset, int *index)
if (offset <= 0)
return NULL;

- idx = get_ret_stack_index(t, offset - 1);
+ idx = get_ret_stack_index(t, --offset);

if (idx <= 0 || idx > FGRAPH_MAX_INDEX)
return NULL;

- offset -= idx + FGRAPH_RET_INDEX;
+ offset -= idx;
if (offset < 0)
return NULL;

@@ -231,10 +240,12 @@ void ftrace_graph_stop(void)
/* Add a function return address to the trace stack on thread info.*/
static int
ftrace_push_return_trace(unsigned long ret, unsigned long func,
- unsigned long frame_pointer, unsigned long *retp)
+ unsigned long frame_pointer, unsigned long *retp,
+ int fgraph_idx)
{
struct ftrace_ret_stack *ret_stack;
unsigned long long calltime;
+ unsigned long val;
int index;

if (unlikely(ftrace_graph_is_dead()))
@@ -243,6 +254,27 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
if (!current->ret_stack)
return -EBUSY;

+ if (ret == (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) {
+ /*
+ * In this case, the previous fgraph callback already pushed the
+ * ret_stack, or @func is called by tail-call. Usual tail-call can
+ * be detected if ret_stack::func is not @func, but for the self-
+ * recursive tail-call case needs to check whether the @fgraph_idx
+ * is already recorded or not.
+ */
+ ret_stack = get_ret_stack(current, current->curr_ret_stack, &index);
+ if ((ret_stack && ret_stack->func == func) &&
+ !is_fgraph_index_set(current, index + FGRAPH_RET_INDEX, fgraph_idx)) {
+ return index + FGRAPH_RET_INDEX;
+ }
+ /*
+ * But, !ret_stack is simply something wrong because it can not
+ * return anywhere.
+ */
+ WARN_ON_ONCE(!ret_stack);
+ }
+ val = (FGRAPH_TYPE_RESERVED << FGRAPH_TYPE_SHIFT) | FGRAPH_RET_INDEX;
+
BUILD_BUG_ON(SHADOW_STACK_SIZE % sizeof(long));

/*
@@ -252,17 +284,19 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
smp_rmb();

/* The return trace stack is full */
- if (current->curr_ret_stack >= SHADOW_STACK_MAX_INDEX) {
+ if (current->curr_ret_stack + FGRAPH_RET_INDEX >= SHADOW_STACK_MAX_INDEX) {
atomic_inc(&current->trace_overrun);
return -EBUSY;
}

calltime = trace_clock_local();

- index = current->curr_ret_stack;
- /* ret offset = 1 ; type = reserved */
- current->ret_stack[index + FGRAPH_RET_INDEX] = 1;
+ index = READ_ONCE(current->curr_ret_stack);
ret_stack = RET_STACK(current, index);
+ index += FGRAPH_RET_INDEX;
+
+ /* ret offset = FGRAPH_RET_INDEX ; type = reserved */
+ current->ret_stack[index] = val;
ret_stack->ret = ret;
/*
* The unwinders expect curr_ret_stack to point to either zero
@@ -278,7 +312,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
* at least a correct index!
*/
barrier();
- current->curr_ret_stack += FGRAPH_RET_INDEX + 1;
+ current->curr_ret_stack = index + 1;
/*
* This next barrier is to ensure that an interrupt coming in
* will not corrupt what we are about to write.
@@ -286,7 +320,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
barrier();

/* Still keep it reserved even if an interrupt came in */
- current->ret_stack[index + FGRAPH_RET_INDEX] = 1;
+ current->ret_stack[index] = val;

ret_stack->ret = ret;
ret_stack->func = func;
@@ -297,7 +331,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
ret_stack->retp = retp;
#endif
- return 0;
+ return index;
}

/*
@@ -314,15 +348,13 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
# define MCOUNT_INSN_SIZE 0
#endif

+/* If the caller does not use ftrace, call this function. */
int function_graph_enter(unsigned long ret, unsigned long func,
unsigned long frame_pointer, unsigned long *retp)
{
struct ftrace_graph_ent trace;
- int offset;
- int start;
- int type;
- int val;
- int cnt = 0;
+ unsigned long bitmap = 0;
+ int index;
int i;

#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
@@ -337,69 +369,29 @@ int function_graph_enter(unsigned long ret, unsigned long func,
return -EBUSY;
#endif

- if (!ftrace_ops_test(&global_ops, func, NULL))
- return -EBUSY;
-
trace.func = func;
trace.depth = ++current->curr_ret_depth;

- if (ftrace_push_return_trace(ret, func, frame_pointer, retp))
+ index = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
+ if (index < 0)
goto out;

- /* Use start for the distance to ret_stack (skipping over reserve) */
- start = offset = current->curr_ret_stack - 2;
-
for (i = 0; i < fgraph_array_cnt; i++) {
struct fgraph_ops *gops = fgraph_array[i];

if (gops == &fgraph_stub)
continue;

- if ((offset == start) &&
- (current->curr_ret_stack >= SHADOW_STACK_INDEX - 1)) {
- atomic_inc(&current->trace_overrun);
- break;
- }
- if (fgraph_array[i]->entryfunc(&trace, fgraph_array[i])) {
- offset = current->curr_ret_stack;
- /* Check the top level stored word */
- type = get_fgraph_type(current, offset - 1);
-
- val = (i << FGRAPH_ARRAY_SHIFT) |
- (FGRAPH_TYPE_ARRAY << FGRAPH_TYPE_SHIFT) |
- ((offset - start) - 1);
-
- /* We can reuse the top word if it is reserved */
- if (type == FGRAPH_TYPE_RESERVED) {
- current->ret_stack[offset - 1] = val;
- cnt++;
- continue;
- }
- val++;
-
- current->ret_stack[offset] = val;
- /*
- * Write the value before we increment, so that
- * if an interrupt comes in after we increment
- * it will still see the value and skip over
- * this.
- */
- barrier();
- current->curr_ret_stack++;
- /*
- * Have to write again, in case an interrupt
- * came in before the increment and after we
- * wrote the value.
- */
- barrier();
- current->ret_stack[offset] = val;
- cnt++;
- }
+ if (ftrace_ops_test(&gops->ops, func, NULL) &&
+ gops->entryfunc(&trace, gops))
+ bitmap |= BIT(i);
}

- if (!cnt)
+ if (!bitmap)
goto out_ret;

+ set_fgraph_index_bitmap(current, index, bitmap);
+
return 0;
out_ret:
current->curr_ret_stack -= FGRAPH_RET_INDEX + 1;
@@ -408,15 +400,51 @@ int function_graph_enter(unsigned long ret, unsigned long func,
return -EBUSY;
}

+/* This is called from ftrace_graph_func() via ftrace */
+int function_graph_enter_ops(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp,
+ struct fgraph_ops *gops)
+{
+ struct ftrace_graph_ent trace;
+ int index;
+ int type;
+
+
+ /* Use start for the distance to ret_stack (skipping over reserve) */
+ index = ftrace_push_return_trace(ret, func, frame_pointer, retp, gops->idx);
+ if (index < 0)
+ return index;
+ type = get_fgraph_type(current, index);
+
+ /* This is the first ret_stack for this fentry */
+ if (type == FGRAPH_TYPE_RESERVED)
+ ++current->curr_ret_depth;
+
+ trace.func = func;
+ trace.depth = current->curr_ret_depth;
+ if (gops->entryfunc(&trace, gops)) {
+ if (type == FGRAPH_TYPE_RESERVED)
+ set_fgraph_index_bitmap(current, index, BIT(gops->idx));
+ else
+ add_fgraph_index_bitmap(current, index, BIT(gops->idx));
+ return 0;
+ }
+
+ if (type == FGRAPH_TYPE_RESERVED) {
+ current->curr_ret_stack -= FGRAPH_RET_INDEX + 1;
+ current->curr_ret_depth--;
+ }
+ return -EBUSY;
+}
+
/* Retrieve a function return address to the trace stack on thread info.*/
static struct ftrace_ret_stack *
ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
- unsigned long frame_pointer)
+ unsigned long frame_pointer, int *index)
{
struct ftrace_ret_stack *ret_stack;
- int index;

- ret_stack = get_ret_stack(current, current->curr_ret_stack, &index);
+ ret_stack = get_ret_stack(current, current->curr_ret_stack, index);

if (unlikely(!ret_stack)) {
ftrace_graph_stop();
@@ -455,6 +483,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
}
#endif

+ *index += FGRAPH_RET_INDEX;
*ret = ret_stack->ret;
trace->func = ret_stack->func;
trace->calltime = ret_stack->calltime;
@@ -507,13 +536,12 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
{
struct ftrace_ret_stack *ret_stack;
struct ftrace_graph_ret trace;
+ unsigned long bitmap;
unsigned long ret;
- int offset;
int index;
- int idx;
int i;

- ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer);
+ ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &index);

if (unlikely(!ret_stack)) {
ftrace_graph_stop();
@@ -527,16 +555,17 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
trace.retval = fgraph_ret_regs_return_value(ret_regs);
#endif

- offset = current->curr_ret_stack - 1;
- index = get_ret_stack_index(current, offset);
+ bitmap = get_fgraph_index_bitmap(current, index);
+ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) {
+ struct fgraph_ops *gops = fgraph_array[i];

- /* index has to be at least one! Optimize for it */
- i = 0;
- do {
- idx = get_fgraph_array(current, offset - i);
- fgraph_array[idx]->retfunc(&trace, fgraph_array[idx]);
- i++;
- } while (i < index);
+ if (!(bitmap & BIT(i)))
+ continue;
+ if (gops == &fgraph_stub)
+ continue;
+
+ gops->retfunc(&trace, gops);
+ }

/*
* The ftrace_graph_return() may still access the current
@@ -544,7 +573,7 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
* curr_ret_stack is after that.
*/
barrier();
- current->curr_ret_stack -= index + FGRAPH_RET_INDEX;
+ current->curr_ret_stack -= FGRAPH_RET_INDEX + 1;
current->curr_ret_depth--;
return ret;
}
@@ -622,7 +651,17 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
ret_stack = get_ret_stack(current, i, &i);
if (!ret_stack)
break;
- if (ret_stack->retp == retp)
+ /*
+ * For the tail-call, there would be 2 or more ftrace_ret_stacks on
+ * the ret_stack, which records "return_to_handler" as the return
+ * address excpt for the last one.
+ * But on the real stack, there should be 1 entry because tail-call
+ * reuses the return address on the stack and jump to the next function.
+ * Thus we will continue to find real return address.
+ */
+ if (ret_stack->retp == retp &&
+ ret_stack->ret !=
+ (unsigned long)dereference_kernel_function_descriptor(return_to_handler))
return ret_stack->ret;
}

@@ -645,6 +684,9 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
i = *idx;
do {
ret_stack = get_ret_stack(task, task_idx, &task_idx);
+ if (ret_stack && ret_stack->ret ==
+ (unsigned long)dereference_kernel_function_descriptor(return_to_handler))
+ continue;
i--;
} while (i >= 0 && ret_stack);

@@ -655,17 +697,25 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
}
#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */

-static struct ftrace_ops graph_ops = {
- .func = ftrace_graph_func,
- .flags = FTRACE_OPS_FL_INITIALIZED |
- FTRACE_OPS_FL_PID |
- FTRACE_OPS_GRAPH_STUB,
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+ struct ftrace_ops *src_ops)
+{
+ dst_ops->func = ftrace_graph_func;
+ dst_ops->flags = FTRACE_OPS_FL_PID | FTRACE_OPS_GRAPH_STUB;
+
#ifdef FTRACE_GRAPH_TRAMP_ADDR
- .trampoline = FTRACE_GRAPH_TRAMP_ADDR,
+ dst_ops->trampoline = FTRACE_GRAPH_TRAMP_ADDR;
/* trampoline_size is only needed for dynamically allocated tramps */
#endif
- ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
-};
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ if (src_ops) {
+ dst_ops->func_hash = &src_ops->local_hash;
+ mutex_init(&dst_ops->local_hash.regex_lock);
+ dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED;
+ }
+#endif
+}

void ftrace_graph_sleep_time_control(bool enable)
{
@@ -869,11 +919,20 @@ static int start_graph_tracing(void)

int register_ftrace_graph(struct fgraph_ops *gops)
{
+ int command = 0;
int ret = 0;
int i;

mutex_lock(&ftrace_lock);

+ if (!gops->ops.func) {
+ gops->ops.flags |= FTRACE_OPS_GRAPH_STUB;
+ gops->ops.func = ftrace_graph_func;
+#ifdef FTRACE_GRAPH_TRAMP_ADDR
+ gops->ops.trampoline = FTRACE_GRAPH_TRAMP_ADDR;
+#endif
+ }
+
if (!fgraph_array[0]) {
/* The array must always have real data on it */
for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
@@ -893,6 +952,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
fgraph_array[i] = gops;
if (i + 1 > fgraph_array_cnt)
fgraph_array_cnt = i + 1;
+ gops->idx = i;

ftrace_graph_active++;

@@ -909,9 +969,10 @@ int register_ftrace_graph(struct fgraph_ops *gops)
*/
ftrace_graph_return = return_run;
ftrace_graph_entry = entry_run;
-
- ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
+ command = FTRACE_START_FUNC_RET;
}
+
+ ret = ftrace_startup(&gops->ops, command);
out:
mutex_unlock(&ftrace_lock);
return ret;
@@ -919,6 +980,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)

void unregister_ftrace_graph(struct fgraph_ops *gops)
{
+ int command = 0;
int i;

mutex_lock(&ftrace_lock);
@@ -926,25 +988,29 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
if (unlikely(!ftrace_graph_active))
goto out;

- for (i = 0; i < fgraph_array_cnt; i++)
- if (gops == fgraph_array[i])
- break;
- if (i >= fgraph_array_cnt)
+ if (unlikely(gops->idx < 0 || gops->idx >= fgraph_array_cnt))
goto out;

- fgraph_array[i] = &fgraph_stub;
- if (i + 1 == fgraph_array_cnt) {
- for (; i >= 0; i--)
- if (fgraph_array[i] != &fgraph_stub)
- break;
+ WARN_ON_ONCE(fgraph_array[gops->idx] != gops);
+
+ fgraph_array[gops->idx] = &fgraph_stub;
+ if (gops->idx + 1 == fgraph_array_cnt) {
+ i = gops->idx;
+ while (i >= 0 && fgraph_array[i] == &fgraph_stub)
+ i--;
fgraph_array_cnt = i + 1;
}

ftrace_graph_active--;
+
+ if (!ftrace_graph_active)
+ command = FTRACE_STOP_FUNC_RET;
+
+ ftrace_shutdown(&gops->ops, command);
+
if (!ftrace_graph_active) {
ftrace_graph_return = ftrace_stub_graph;
ftrace_graph_entry = ftrace_graph_entry_stub;
- ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
unregister_pm_notifier(&ftrace_suspend_notifier);
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 83fbfb7b48f8..c4cc2a9d0047 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3050,6 +3050,8 @@ int ftrace_startup(struct ftrace_ops *ops, int command)
if (unlikely(ftrace_disabled))
return -ENODEV;

+ ftrace_ops_init(ops);
+
ret = __register_ftrace_function(ops);
if (ret)
return ret;
@@ -7319,7 +7321,7 @@ __init void ftrace_init_global_array_ops(struct trace_array *tr)
tr->ops = &global_ops;
tr->ops->private = tr;
ftrace_init_trace_array(tr);
- init_array_fgraph_ops(tr);
+ init_array_fgraph_ops(tr, tr->ops);
}

void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
@@ -8051,7 +8053,7 @@ static int register_ftrace_function_nolock(struct ftrace_ops *ops)
*/
int register_ftrace_function(struct ftrace_ops *ops)
{
- int ret;
+ int ret = -1;

lock_direct_mutex();
ret = prepare_direct_functions_for_ipmodify(ops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index febb9c6d01c7..f77322e3b177 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -885,8 +885,8 @@ extern int __trace_graph_entry(struct trace_array *tr,
extern void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned int trace_ctx);
-extern void init_array_fgraph_ops(struct trace_array *tr);
-extern int allocate_fgraph_ops(struct trace_array *tr);
+extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
+extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
extern void free_fgraph_ops(struct trace_array *tr);

#ifdef CONFIG_DYNAMIC_FTRACE
@@ -969,6 +969,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
preempt_enable_notrace();
return ret;
}
+
#else
static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
{
@@ -994,18 +995,19 @@ static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace)
(fgraph_max_depth && trace->depth >= fgraph_max_depth);
}

+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+ struct ftrace_ops *src_ops);
+
#else /* CONFIG_FUNCTION_GRAPH_TRACER */
static inline enum print_line_t
print_graph_function_flags(struct trace_iterator *iter, u32 flags)
{
return TRACE_TYPE_UNHANDLED;
}
-static inline void init_array_fgraph_ops(struct trace_array *tr) { }
-static inline int allocate_fgraph_ops(struct trace_array *tr)
-{
- return 0;
-}
static inline void free_fgraph_ops(struct trace_array *tr) { }
+/* ftrace_ops may not be defined */
+#define init_array_fgraph_ops(tr, ops) do { } while (0)
+#define allocate_fgraph_ops(tr, ops) ({ 0; })
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

extern struct list_head ftrace_pids;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8e8da0d0ee52..13bf2415245d 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -91,7 +91,7 @@ int ftrace_create_function_files(struct trace_array *tr,
if (!tr->ops)
return -EINVAL;

- ret = allocate_fgraph_ops(tr);
+ ret = allocate_fgraph_ops(tr, tr->ops);
if (ret) {
kfree(tr->ops);
return ret;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 9ccc904a7703..7f30652f0e97 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -288,7 +288,7 @@ static struct fgraph_ops funcgraph_ops = {
.retfunc = &trace_graph_return,
};

-int allocate_fgraph_ops(struct trace_array *tr)
+int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops)
{
struct fgraph_ops *gops;

@@ -301,6 +301,9 @@ int allocate_fgraph_ops(struct trace_array *tr)

tr->gops = gops;
gops->private = tr;
+
+ fgraph_init_ops(&gops->ops, ops);
+
return 0;
}

@@ -309,10 +312,11 @@ void free_fgraph_ops(struct trace_array *tr)
kfree(tr->gops);
}

-__init void init_array_fgraph_ops(struct trace_array *tr)
+__init void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops)
{
tr->gops = &funcgraph_ops;
funcgraph_ops.private = tr;
+ fgraph_init_ops(&tr->gops->ops, ops);
}

static int graph_trace_init(struct trace_array *tr)