[RFC][PATCH 2/7] perf,x86: Ensure perf_sched_cb_{inc,dec}() is only called from pmu::{add,del}()

From: Peter Zijlstra
Date: Fri Jul 08 2016 - 10:05:55 EST


Currently perf_sched_cb_{inc,dec}() are called from
pmu::{start,stop}(), which has the problem that this can happen from
NMI context, this is making it hard to optimize perf_pmu_sched_task().

Furthermore, we really only need this accounting on pmu::{add,del}(),
so doing it from pmu::{start,stop}() is doing more work than we really
need.

Introduce x86_pmu::{add,del}() and wire up the LBR and PEBS.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/events/core.c | 24 ++++++++++++++++++++++--
arch/x86/events/intel/core.c | 31 ++++++++++++++++++-------------
arch/x86/events/intel/ds.c | 8 ++------
arch/x86/events/intel/lbr.c | 4 ++--
arch/x86/events/perf_event.h | 10 ++++++++--
5 files changed, 52 insertions(+), 25 deletions(-)

--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1197,6 +1197,9 @@ static int x86_pmu_add(struct perf_event
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be performed
* at commit time (->commit_txn) as a whole.
+ *
+ * If commit fails, we'll call ->del() on all events
+ * for which ->add() was called.
*/
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto done_collect;
@@ -1219,6 +1222,14 @@ static int x86_pmu_add(struct perf_event
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;

+ if (x86_pmu.add) {
+ /*
+ * This is before x86_pmu_enable() will call x86_pmu_start(),
+ * so we enable LBRs before an event needs them etc..
+ */
+ x86_pmu.add(event);
+ }
+
ret = 0;
out:
return ret;
@@ -1342,7 +1353,7 @@ static void x86_pmu_del(struct perf_even
event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;

/*
- * If we're called during a txn, we don't need to do anything.
+ * If we're called during a txn, we only need to undo x86_pmu.add.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
*
@@ -1350,7 +1361,7 @@ static void x86_pmu_del(struct perf_even
* an event added during that same TXN.
*/
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
- return;
+ goto do_del;

/*
* Not a TXN, therefore cleanup properly.
@@ -1380,6 +1391,15 @@ static void x86_pmu_del(struct perf_even
--cpuc->n_events;

perf_event_update_userpage(event);
+
+do_del:
+ if (x86_pmu.del) {
+ /*
+ * This is after x86_pmu_stop(); so we disable LBRs after any
+ * event can need them etc..
+ */
+ x86_pmu.del(event);
+ }
}

int x86_pmu_handle_irq(struct pt_regs *regs)
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1907,13 +1907,6 @@ static void intel_pmu_disable_event(stru
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);

- /*
- * must disable before any actual event
- * because any event may be combined with LBR
- */
- if (needs_branch_stack(event))
- intel_pmu_lbr_disable(event);
-
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
@@ -1925,6 +1918,14 @@ static void intel_pmu_disable_event(stru
intel_pmu_pebs_disable(event);
}

+static void intel_pmu_del_event(struct perf_event *event)
+{
+ if (needs_branch_stack(event))
+ intel_pmu_lbr_del(event);
+ if (event->attr.precise_ip)
+ intel_pmu_pebs_del(event);
+}
+
static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
{
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
@@ -1968,12 +1969,6 @@ static void intel_pmu_enable_event(struc
intel_pmu_enable_bts(hwc->config);
return;
}
- /*
- * must enabled before any actual event
- * because any event may be combined with LBR
- */
- if (needs_branch_stack(event))
- intel_pmu_lbr_enable(event);

if (event->attr.exclude_host)
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1994,6 +1989,14 @@ static void intel_pmu_enable_event(struc
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}

+static void intel_pmu_add_event(struct perf_event *event)
+{
+ if (event->attr.precise_ip)
+ intel_pmu_pebs_add(event);
+ if (needs_branch_stack(event))
+ intel_pmu_lbr_add(event);
+}
+
/*
* Save and restart an expired event. Called by NMI contexts,
* so it has to be careful about preempting normal event ops:
@@ -3290,6 +3293,8 @@ static __initconst const struct x86_pmu
.enable_all = intel_pmu_enable_all,
.enable = intel_pmu_enable_event,
.disable = intel_pmu_disable_event,
+ .add = intel_pmu_add_event,
+ .del = intel_pmu_del_event,
.hw_config = intel_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -831,7 +831,7 @@ static inline void pebs_update_threshold
ds->pebs_interrupt_threshold = threshold;
}

-static void intel_pmu_pebs_add(struct perf_event *event)
+void intel_pmu_pebs_add(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
@@ -853,8 +853,6 @@ void intel_pmu_pebs_enable(struct perf_e
struct hw_perf_event *hwc = &event->hw;
struct debug_store *ds = cpuc->ds;

- intel_pmu_pebs_add(event);
-
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;

cpuc->pebs_enabled |= 1ULL << hwc->idx;
@@ -874,7 +872,7 @@ void intel_pmu_pebs_enable(struct perf_e
}
}

-static void intel_pmu_pebs_del(struct perf_event *event)
+void intel_pmu_pebs_del(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
@@ -910,8 +908,6 @@ void intel_pmu_pebs_disable(struct perf_
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);

hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
-
- intel_pmu_pebs_del(event);
}

void intel_pmu_pebs_enable_all(void)
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -422,7 +422,7 @@ static inline bool branch_user_callstack
return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
}

-void intel_pmu_lbr_enable(struct perf_event *event)
+void intel_pmu_lbr_add(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx;
@@ -450,7 +450,7 @@ void intel_pmu_lbr_enable(struct perf_ev
perf_sched_cb_inc(event->ctx->pmu);
}

-void intel_pmu_lbr_disable(struct perf_event *event)
+void intel_pmu_lbr_del(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx;
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -510,6 +510,8 @@ struct x86_pmu {
void (*enable_all)(int added);
void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *);
+ void (*add)(struct perf_event *);
+ void (*del)(struct perf_event *);
int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
@@ -890,6 +892,10 @@ extern struct event_constraint intel_skl

struct event_constraint *intel_pebs_constraints(struct perf_event *event);

+void intel_pmu_pebs_add(struct perf_event *event);
+
+void intel_pmu_pebs_del(struct perf_event *event);
+
void intel_pmu_pebs_enable(struct perf_event *event);

void intel_pmu_pebs_disable(struct perf_event *event);
@@ -908,9 +914,9 @@ u64 lbr_from_signext_quirk_wr(u64 val);

void intel_pmu_lbr_reset(void);

-void intel_pmu_lbr_enable(struct perf_event *event);
+void intel_pmu_lbr_add(struct perf_event *event);

-void intel_pmu_lbr_disable(struct perf_event *event);
+void intel_pmu_lbr_del(struct perf_event *event);

void intel_pmu_lbr_enable_all(bool pmi);