[PATCH 01/10] perf/core, x86: Add PERF_SAMPLE_LBR_TOS

From: kan . liang
Date: Mon Oct 07 2019 - 13:59:59 EST


From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>

In LBR call stack mode, the depth of reconstructed LBR call stack limits
to the number of LBR registers. With LBR Top-of-Stack (TOS) information,
perf tool may stitch the stacks of two samples. The reconstructed LBR
call stack can break the HW limitation.

Add a new sample type for LBR TOS.

PEBS record doesn't store TOS information. For single PEBS, TOS can be
directly read from MSR, because the PMI is triggered immediately after
PEBS is written. TOS MSR is still unchanged.
For large PEBS, TOS MSR has stale value. Set -1ULL to indicate that the
TOS information is not available.

Reviewed-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
---
arch/x86/events/intel/lbr.c | 9 +++++++++
include/linux/perf_event.h | 1 +
include/uapi/linux/perf_event.h | 4 +++-
kernel/events/core.c | 12 ++++++++++++
4 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index ea54634eabf3..4640ff1c9ecb 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -562,6 +562,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
+ cpuc->lbr_stack.tos = tos;
}

/*
@@ -657,6 +658,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
out++;
}
cpuc->lbr_stack.nr = out;
+ cpuc->lbr_stack.tos = tos;
}

void intel_pmu_lbr_read(void)
@@ -1097,6 +1099,13 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
int i;

cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
+
+ /* Cannot get TOS for large PEBS */
+ if (cpuc->n_pebs == cpuc->n_large_pebs)
+ cpuc->lbr_stack.tos = -1ULL;
+ else
+ cpuc->lbr_stack.tos = intel_pmu_lbr_tos();
+
for (i = 0; i < x86_pmu.lbr_nr; i++) {
u64 info = lbr->lbr[i].info;
struct perf_branch_entry *e = &cpuc->lbr_entries[i];
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61448c19a132..ee9ef0c4cb08 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -100,6 +100,7 @@ struct perf_raw_record {
*/
struct perf_branch_stack {
__u64 nr;
+ __u64 tos;
struct perf_branch_entry entries[0];
};

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index bb7b271397a6..fe36ebb7dc2e 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -141,8 +141,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
+ PERF_SAMPLE_LBR_TOS = 1U << 20,

- PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */

__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
@@ -864,6 +865,7 @@ enum perf_event_type {
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
+ * { u64 tos;} && PERF_SAMPLE_LBR_TOS
* };
*/
PERF_RECORD_SAMPLE = 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 275eae05af20..6ab0913c7b36 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6468,6 +6468,15 @@ void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
perf_output_put(handle, data->phys_addr);

+ if (sample_type & PERF_SAMPLE_LBR_TOS) {
+ u64 tos = -1ULL;
+
+ if (data->br_stack)
+ tos = data->br_stack->tos;
+
+ perf_output_put(handle, tos);
+ }
+
if (!event->attr.watermark) {
int wakeup_events = event->attr.wakeup_events;

@@ -6656,6 +6665,9 @@ void perf_prepare_sample(struct perf_event_header *header,

if (sample_type & PERF_SAMPLE_PHYS_ADDR)
data->phys_addr = perf_virt_to_phys(data->addr);
+
+ if (sample_type & PERF_SAMPLE_LBR_TOS)
+ header->size += sizeof(u64);
}

static __always_inline int
--
2.17.1