[GIT PULL] perf fixes

From: Ingo Molnar
Date: Sun Apr 03 2016 - 07:00:59 EST


Linus,

Please pull the latest perf-urgent-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

# HEAD: 85dc600263c2291cea33bffa90038808ee64198b perf/x86/amd/ibs: Fix pmu::stop() nesting

Misc kernel side fixes:

- fix event leak
- fix AMD PMU driver bug
- fix core event handling bug
- fix build bug on certain randconfigs

Plus misc tooling fixes.

Thanks,

Ingo

------------------>
Alexander Shishkin (1):
perf/core: Don't leak event in the syscall error path

Andres Freund (1):
perf hists: Fix determination of a callchain node's childlessness

Anton Blanchard (1):
perf jit: genelf makes assumptions about endian

Arnaldo Carvalho de Melo (3):
perf tests: Fix tarpkg build test error output redirection
perf bench: Fix detached tarball building due to missing 'perf bench memcpy' headers
perf tools: Add missing initialization of perf_sample.cpumode in synthesized samples

Huang Rui (1):
perf/x86: Move events_sysfs_show() outside CPU_SUP_INTEL

Peter Zijlstra (2):
perf/core: Fix time tracking bug with multiplexing
perf/x86/amd/ibs: Fix pmu::stop() nesting

Sukadev Bhattiprolu (1):
perf tools: Fix build break on powerpc


arch/x86/events/amd/ibs.c | 52 ++++++++++++++++++++++++++++++-----
arch/x86/events/perf_event.h | 6 ++--
kernel/events/core.c | 15 ++++++++--
tools/perf/MANIFEST | 1 +
tools/perf/arch/powerpc/util/header.c | 2 ++
tools/perf/tests/perf-targz-src-pkg | 2 +-
tools/perf/ui/browsers/hists.c | 2 +-
tools/perf/util/event.c | 23 +++++++++++-----
tools/perf/util/genelf.h | 24 +++++++---------
tools/perf/util/intel-bts.c | 1 +
tools/perf/util/intel-pt.c | 3 ++
tools/perf/util/jitdump.c | 2 ++
12 files changed, 98 insertions(+), 35 deletions(-)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 3ea25c3917c0..feb90f6730e8 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -28,10 +28,46 @@ static u32 ibs_caps;
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT

+
+/*
+ * IBS states:
+ *
+ * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
+ * and any further add()s must fail.
+ *
+ * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
+ * complicated by the fact that the IBS hardware can send late NMIs (ie. after
+ * we've cleared the EN bit).
+ *
+ * In order to consume these late NMIs we have the STOPPED state, any NMI that
+ * happens after we've cleared the EN state will clear this bit and report the
+ * NMI handled (this is fundamentally racy in the face or multiple NMI sources,
+ * someone else can consume our BIT and our NMI will go unhandled).
+ *
+ * And since we cannot set/clear this separate bit together with the EN bit,
+ * there are races; if we cleared STARTED early, an NMI could land in
+ * between clearing STARTED and clearing the EN bit (in fact multiple NMIs
+ * could happen if the period is small enough), and consume our STOPPED bit
+ * and trigger streams of unhandled NMIs.
+ *
+ * If, however, we clear STARTED late, an NMI can hit between clearing the
+ * EN bit and clearing STARTED, still see STARTED set and process the event.
+ * If this event will have the VALID bit clear, we bail properly, but this
+ * is not a given. With VALID set we can end up calling pmu::stop() again
+ * (the throttle logic) and trigger the WARNs in there.
+ *
+ * So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
+ * nesting, and clear STARTED late, so that we have a well defined state over
+ * the clearing of the EN bit.
+ *
+ * XXX: we could probably be using !atomic bitops for all this.
+ */
+
enum ibs_states {
IBS_ENABLED = 0,
IBS_STARTED = 1,
IBS_STOPPING = 2,
+ IBS_STOPPED = 3,

IBS_MAX_STATES,
};
@@ -377,11 +413,10 @@ static void perf_ibs_start(struct perf_event *event, int flags)

perf_ibs_set_period(perf_ibs, hwc, &period);
/*
- * Set STARTED before enabling the hardware, such that
- * a subsequent NMI must observe it. Then clear STOPPING
- * such that we don't consume NMIs by accident.
+ * Set STARTED before enabling the hardware, such that a subsequent NMI
+ * must observe it.
*/
- set_bit(IBS_STARTED, pcpu->state);
+ set_bit(IBS_STARTED, pcpu->state);
clear_bit(IBS_STOPPING, pcpu->state);
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);

@@ -396,6 +431,9 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
u64 config;
int stopping;

+ if (test_and_set_bit(IBS_STOPPING, pcpu->state))
+ return;
+
stopping = test_bit(IBS_STARTED, pcpu->state);

if (!stopping && (hwc->state & PERF_HES_UPTODATE))
@@ -405,12 +443,12 @@ static void perf_ibs_stop(struct perf_event *event, int flags)

if (stopping) {
/*
- * Set STOPPING before disabling the hardware, such that it
+ * Set STOPPED before disabling the hardware, such that it
* must be visible to NMIs the moment we clear the EN bit,
* at which point we can generate an !VALID sample which
* we need to consume.
*/
- set_bit(IBS_STOPPING, pcpu->state);
+ set_bit(IBS_STOPPED, pcpu->state);
perf_ibs_disable_event(perf_ibs, hwc, config);
/*
* Clear STARTED after disabling the hardware; if it were
@@ -556,7 +594,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
* with samples that even have the valid bit cleared.
* Mark all this NMIs as handled.
*/
- if (test_and_clear_bit(IBS_STOPPING, pcpu->state))
+ if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
return 1;

return 0;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ba6ef18528c9..a6771e2303d2 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -795,6 +795,9 @@ ssize_t intel_event_sysfs_show(char *page, u64 config);

struct attribute **merge_attr(struct attribute **a, struct attribute **b);

+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page);
+
#ifdef CONFIG_CPU_SUP_AMD

int amd_pmu_init(void);
@@ -925,9 +928,6 @@ int p6_pmu_init(void);

int knc_pmu_init(void);

-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
- char *page);
-
static inline int is_ht_workaround_enabled(void)
{
return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index de24fbce5277..52bedc5a5aaa 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2417,14 +2417,24 @@ static void ctx_sched_out(struct perf_event_context *ctx,
cpuctx->task_ctx = NULL;
}

- is_active ^= ctx->is_active; /* changed bits */
-
+ /*
+ * Always update time if it was set; not only when it changes.
+ * Otherwise we can 'forget' to update time for any but the last
+ * context we sched out. For example:
+ *
+ * ctx_sched_out(.event_type = EVENT_FLEXIBLE)
+ * ctx_sched_out(.event_type = EVENT_PINNED)
+ *
+ * would only update time for the pinned events.
+ */
if (is_active & EVENT_TIME) {
/* update (and stop) ctx time */
update_context_time(ctx);
update_cgrp_time_from_cpuctx(cpuctx);
}

+ is_active ^= ctx->is_active; /* changed bits */
+
if (!ctx->nr_active || !(is_active & EVENT_ALL))
return;

@@ -8532,6 +8542,7 @@ SYSCALL_DEFINE5(perf_event_open,
f_flags);
if (IS_ERR(event_file)) {
err = PTR_ERR(event_file);
+ event_file = NULL;
goto err_context;
}

diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 2e1fa2357528..8c8c6b9ce915 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -74,6 +74,7 @@ arch/*/include/uapi/asm/unistd*.h
arch/*/include/uapi/asm/perf_regs.h
arch/*/lib/memcpy*.S
arch/*/lib/memset*.S
+arch/*/include/asm/*features.h
include/linux/poison.h
include/linux/hw_breakpoint.h
include/uapi/linux/perf_event.h
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 6138bdef6e63..f8ccee132867 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -4,6 +4,8 @@
#include <stdlib.h>
#include <string.h>
#include <linux/stringify.h>
+#include "header.h"
+#include "util.h"

#define mfspr(rn) ({unsigned long rval; \
asm volatile("mfspr %0," __stringify(rn) \
diff --git a/tools/perf/tests/perf-targz-src-pkg b/tools/perf/tests/perf-targz-src-pkg
index 238aa3927c71..f2d9c5fe58e0 100755
--- a/tools/perf/tests/perf-targz-src-pkg
+++ b/tools/perf/tests/perf-targz-src-pkg
@@ -15,7 +15,7 @@ TMP_DEST=$(mktemp -d)
tar xf ${TARBALL} -C $TMP_DEST
rm -f ${TARBALL}
cd - > /dev/null
-make -C $TMP_DEST/perf*/tools/perf > /dev/null 2>&1
+make -C $TMP_DEST/perf*/tools/perf > /dev/null
RC=$?
rm -rf ${TMP_DEST}
exit $RC
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 4b9816555946..2a83414159a6 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -337,7 +337,7 @@ static void callchain_node__init_have_children(struct callchain_node *node,
chain = list_entry(node->val.next, struct callchain_list, list);
chain->has_children = has_sibling;

- if (node->val.next != node->val.prev) {
+ if (!list_empty(&node->val)) {
chain = list_entry(node->val.prev, struct callchain_list, list);
chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 52cf479bc593..dad55d04ffdd 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -56,13 +56,22 @@ const char *perf_event__name(unsigned int id)
return perf_event__names[id];
}

-static struct perf_sample synth_sample = {
+static int perf_tool__process_synth_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct machine *machine,
+ perf_event__handler_t process)
+{
+ struct perf_sample synth_sample = {
.pid = -1,
.tid = -1,
.time = -1,
.stream_id = -1,
.cpu = -1,
.period = 1,
+ .cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
+ };
+
+ return process(tool, event, &synth_sample, machine);
};

/*
@@ -186,7 +195,7 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
return -1;

- if (process(tool, event, &synth_sample, machine) != 0)
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
return -1;

return tgid;
@@ -218,7 +227,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,

event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);

- if (process(tool, event, &synth_sample, machine) != 0)
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
return -1;

return 0;
@@ -344,7 +353,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
event->mmap2.pid = tgid;
event->mmap2.tid = pid;

- if (process(tool, event, &synth_sample, machine) != 0) {
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
rc = -1;
break;
}
@@ -402,7 +411,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,

memcpy(event->mmap.filename, pos->dso->long_name,
pos->dso->long_name_len + 1);
- if (process(tool, event, &synth_sample, machine) != 0) {
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
rc = -1;
break;
}
@@ -472,7 +481,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
/*
* Send the prepared comm event
*/
- if (process(tool, comm_event, &synth_sample, machine) != 0)
+ if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
break;

rc = 0;
@@ -701,7 +710,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
event->mmap.len = map->end - event->mmap.start;
event->mmap.pid = machine->pid;

- err = process(tool, event, &synth_sample, machine);
+ err = perf_tool__process_synth_event(tool, event, machine, process);
free(event);

return err;
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index cd67e64a0494..2fbeb59c4bdd 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -9,36 +9,32 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent

#if defined(__arm__)
#define GEN_ELF_ARCH EM_ARM
-#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS32
#elif defined(__aarch64__)
#define GEN_ELF_ARCH EM_AARCH64
-#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS64
#elif defined(__x86_64__)
#define GEN_ELF_ARCH EM_X86_64
-#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS64
#elif defined(__i386__)
#define GEN_ELF_ARCH EM_386
-#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS32
-#elif defined(__ppcle__)
-#define GEN_ELF_ARCH EM_PPC
-#define GEN_ELF_ENDIAN ELFDATA2LSB
-#define GEN_ELF_CLASS ELFCLASS64
-#elif defined(__powerpc__)
-#define GEN_ELF_ARCH EM_PPC64
-#define GEN_ELF_ENDIAN ELFDATA2MSB
-#define GEN_ELF_CLASS ELFCLASS64
-#elif defined(__powerpcle__)
+#elif defined(__powerpc64__)
#define GEN_ELF_ARCH EM_PPC64
-#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH EM_PPC
+#define GEN_ELF_CLASS ELFCLASS32
#else
#error "unsupported architecture"
#endif

+#if __BYTE_ORDER == __BIG_ENDIAN
+#define GEN_ELF_ENDIAN ELFDATA2MSB
+#else
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#endif
+
#if GEN_ELF_CLASS == ELFCLASS64
#define elf_newehdr elf64_newehdr
#define elf_getshdr elf64_getshdr
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 6bc3ecd2e7ca..abf1366e2a24 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -279,6 +279,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
event.sample.header.misc = PERF_RECORD_MISC_USER;
event.sample.header.size = sizeof(struct perf_event_header);

+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = le64_to_cpu(branch->from);
sample.pid = btsq->pid;
sample.tid = btsq->tid;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 05d815851be1..407f11b97c8d 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -979,6 +979,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);

+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid;
sample.tid = ptq->tid;
@@ -1035,6 +1036,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);

+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid;
sample.tid = ptq->tid;
@@ -1092,6 +1094,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);

+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid;
sample.tid = ptq->tid;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index cd272cc21e05..ad0c0bb1fbc7 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -417,6 +417,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
* use first address as sample address
*/
memset(&sample, 0, sizeof(sample));
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid;
sample.tid = tid;
sample.time = id->time;
@@ -505,6 +506,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
* use first address as sample address
*/
memset(&sample, 0, sizeof(sample));
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid;
sample.tid = tid;
sample.time = id->time;