[PATCH 1/3] csky: Add perf callchin support

From: Mao Han
Date: Fri Mar 08 2019 - 00:21:33 EST


This patch add support for perf callchain sampling on csky platform.
Both fp and dwarf unwinding are supported with this patch. When fp
is used to unwind the stack, the program being sampled and the
C library need to be compiled with -mbacktrace for user callchains,
kernel callchains require CONFIG_STACKTRACE = y. Unwinding with
dwarf requires compilation with -fexceptions, otherwise there will
be not debug information inside the excutable file.

Signed-off-by: Mao Han <han_mao@xxxxxxxxx>
---
arch/csky/Kconfig | 2 +
arch/csky/include/uapi/asm/perf_regs.h | 48 ++++++++++++
arch/csky/kernel/Makefile | 2 +
arch/csky/kernel/perf_callchain.c | 133 +++++++++++++++++++++++++++++++++
arch/csky/kernel/perf_regs.c | 41 ++++++++++
5 files changed, 226 insertions(+)
create mode 100644 arch/csky/include/uapi/asm/perf_regs.h
create mode 100644 arch/csky/kernel/perf_callchain.c
create mode 100644 arch/csky/kernel/perf_regs.c

diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 398113c..93b535d 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -35,6 +35,8 @@ config CSKY
select HAVE_KERNEL_LZO
select HAVE_KERNEL_LZMA
select HAVE_PERF_EVENTS
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
select HAVE_C_RECORDMCOUNT
select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
diff --git a/arch/csky/include/uapi/asm/perf_regs.h b/arch/csky/include/uapi/asm/perf_regs.h
new file mode 100644
index 0000000..337d8fa
--- /dev/null
+++ b/arch/csky/include/uapi/asm/perf_regs.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#ifndef _ASM_CSKY_PERF_REGS_H
+#define _ASM_CSKY_PERF_REGS_H
+
+enum perf_event_csky_regs {
+ PERF_REG_CSKY_TLS,
+ PERF_REG_CSKY_LR,
+ PERF_REG_CSKY_PC,
+ PERF_REG_CSKY_SR,
+ PERF_REG_CSKY_SP,
+ PERF_REG_CSKY_ORIG_A0,
+ PERF_REG_CSKY_R0,
+ PERF_REG_CSKY_R1,
+ PERF_REG_CSKY_R2,
+ PERF_REG_CSKY_R3,
+ PERF_REG_CSKY_R4,
+ PERF_REG_CSKY_R5,
+ PERF_REG_CSKY_R6,
+ PERF_REG_CSKY_R7,
+ PERF_REG_CSKY_R8,
+ PERF_REG_CSKY_R9,
+ PERF_REG_CSKY_R10,
+ PERF_REG_CSKY_R11,
+ PERF_REG_CSKY_R12,
+ PERF_REG_CSKY_R13,
+ PERF_REG_CSKY_R16,
+ PERF_REG_CSKY_R17,
+ PERF_REG_CSKY_R18,
+ PERF_REG_CSKY_R19,
+ PERF_REG_CSKY_R20,
+ PERF_REG_CSKY_R21,
+ PERF_REG_CSKY_R22,
+ PERF_REG_CSKY_R23,
+ PERF_REG_CSKY_R24,
+ PERF_REG_CSKY_R25,
+ PERF_REG_CSKY_R26,
+ PERF_REG_CSKY_R27,
+ PERF_REG_CSKY_R28,
+ PERF_REG_CSKY_R29,
+ PERF_REG_CSKY_R30,
+ PERF_REG_CSKY_HI,
+ PERF_REG_CSKY_LO,
+ PERF_REG_CSKY_DCSR,
+ PERF_REG_CSKY_MAX,
+};
+#endif /* _ASM_CSKY_PERF_REGS_H */
diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile
index 484e6d3..3549d0d 100644
--- a/arch/csky/kernel/Makefile
+++ b/arch/csky/kernel/Makefile
@@ -9,6 +9,8 @@ obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_CSKY_PMU_V1) += perf_event.o
+obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
+obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o

ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c
new file mode 100644
index 0000000..0ed8279
--- /dev/null
+++ b/arch/csky/kernel/perf_callchain.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+/* Kernel callchain */
+struct stackframe {
+ unsigned long fp;
+ unsigned long lr;
+};
+
+static int
+unwind_frame_kernel(struct stackframe *frame)
+{
+ int graph = 0;
+
+ /* 0x3 means misalignment */
+ if (!kstack_end((void *)frame->fp) &&
+ !((unsigned long)frame->fp & 0x3) &&
+ ((unsigned long)frame->fp >= TASK_SIZE)) {
+ frame->lr = ((struct stackframe *)frame->fp)->lr;
+ frame->fp = ((struct stackframe *)frame->fp)->fp;
+ /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
+ if (__kernel_text_address(frame->lr))
+ frame->lr = ftrace_graph_ret_addr
+ (NULL, &graph, frame->lr, NULL);
+ return 0;
+ } else {
+ return -EPERM;
+ }
+}
+
+static void notrace
+walk_stackframe(struct stackframe *fr,
+ struct perf_callchain_entry_ctx *entry)
+{
+ while (1) {
+ int ret;
+
+ perf_callchain_store(entry, fr->lr);
+
+ ret = unwind_frame_kernel(fr);
+ if (ret < 0)
+ break;
+ }
+}
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static unsigned long
+user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp,
+ unsigned long reg_lr)
+{
+ struct stackframe buftail;
+ unsigned long lr = 0;
+ unsigned long *user_frame_tail = (unsigned long *)fp;
+
+ /* Check accessibility of one struct frame_tail beyond */
+ if (!access_ok(user_frame_tail, sizeof(buftail)))
+ return 0;
+ if (__copy_from_user_inatomic
+ (&buftail, user_frame_tail, sizeof(buftail)))
+ return 0;
+
+ if (reg_lr != 0) {
+ lr = reg_lr;
+ } else {
+ lr = buftail.lr;
+ }
+ fp = buftail.fp;
+ perf_callchain_store(entry, lr);
+ return fp;
+}
+
+/*
+ * This will be called when the target is in user mode
+ * This function will only be called when we use
+ * "PERF_SAMPLE_CALLCHAIN" in
+ * kernel/events/core.c:perf_prepare_sample()
+ *
+ * How to trigger perf_callchain_[user/kernel] :
+ * $ perf record -e cpu-clock --call-graph fp ./program
+ * $ perf report --call-graph
+ *
+ * On C-SKY platform, the program being sampled and the C library
+ * need to be compiled with * -mbacktrace, otherwise the user
+ * stack will not contain function frame.
+ */
+void
+perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ unsigned long fp = 0;
+
+ /* C-SKY does not support virtualization. */
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ return;
+ }
+
+ fp = regs->regs[4];
+ perf_callchain_store(entry, regs->pc);
+ /*
+ * While backtrace from leaf function, lr is normally
+ * not saved inside frame on C-SKY, so get lr from pt_regs
+ * at the sample point. However, lr value can be incorrect if
+ * lr is used as temp register
+ */
+ fp = user_backtrace(entry, fp, regs->lr);
+
+ while ((entry->nr < entry->max_stack) &&
+ fp && !((unsigned long)fp & 0x3))
+ fp = user_backtrace(entry, fp, 0);
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ struct stackframe fr;
+
+ /* C-SKY does not support virtualization. */
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ pr_warn("C-SKY does not support perf in guest mode!");
+ return;
+ }
+
+ fr.fp = regs->regs[4];
+ fr.lr = regs->lr;
+ walk_stackframe(&fr, entry);
+}
diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c
new file mode 100644
index 0000000..55fa389
--- /dev/null
+++ b/arch/csky/kernel/perf_regs.c
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+#include <linux/sched/task_stack.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+ if (WARN_ON_ONCE((u32)idx >= PERF_REG_CSKY_MAX))
+ return 0;
+
+ return ((long *)regs)[idx];
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_CSKY_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+ return PERF_SAMPLE_REGS_ABI_32;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+ struct pt_regs *regs,
+ struct pt_regs *regs_user_copy)
+{
+ regs_user->regs = task_pt_regs(current);
+ regs_user->abi = perf_reg_abi(current);
+}
--
2.7.4


--opJtzjQTFsWo+cga
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="0002-perf-use-hweight64-instead-of-hweight_long.patch"