Re: [tip:perf/x86] perf/x86/intel: Support task events with Intel CQM

From: Peter Zijlstra
Date: Thu Mar 05 2015 - 16:10:36 EST


On Thu, Mar 05, 2015 at 01:55:16AM +0100, Ingo Molnar wrote:
> That's a fair point. Peter?

Indeed, sorry. And its because of a stupid reason.

---
Subject: perf: Remove type specific target pointers

The only reason CQM had to use a hard-coded pmu type was so it could use
cqm_target in hw_perf_event.

Do away with the {tp,bp,cqm}_target pointers and provide a non type
specific one.

This allows us to do away with that silly pmu type as well.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/arm/kernel/hw_breakpoint.c | 2 +-
arch/arm64/kernel/hw_breakpoint.c | 2 +-
arch/x86/kernel/cpu/perf_event_intel_cqm.c | 7 +++----
include/linux/perf_event.h | 4 +---
include/uapi/linux/perf_event.h | 1 -
kernel/events/core.c | 14 ++++----------
kernel/events/hw_breakpoint.c | 8 ++++----
kernel/trace/trace_uprobe.c | 10 +++++-----
8 files changed, 19 insertions(+), 29 deletions(-)

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 7fc70ae21185..dc7d0a95bd36 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* Per-cpu breakpoints are not supported by our stepping
* mechanism.
*/
- if (!bp->hw.bp_target)
+ if (!bp->hw.target)
return -EINVAL;

/*
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 98bbe06e469c..e7d934d3afe0 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -527,7 +527,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* Disallow per-task kernel breakpoints since these would
* complicate the stepping code.
*/
- if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target)
+ if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
return -EINVAL;

return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 9a8ef8376fcd..e4d1b8b738fa 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -263,7 +263,7 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
/*
* Events that target same task are placed into the same cache group.
*/
- if (a->hw.cqm_target == b->hw.cqm_target)
+ if (a->hw.target == b->hw.target)
return true;

/*
@@ -279,7 +279,7 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
{
if (event->attach_state & PERF_ATTACH_TASK)
- return perf_cgroup_from_task(event->hw.cqm_target);
+ return perf_cgroup_from_task(event->hw.target);

return event->cgrp;
}
@@ -1365,8 +1365,7 @@ static int __init intel_cqm_init(void)

__perf_cpu_notifier(intel_cqm_cpu_notifier);

- ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm",
- PERF_TYPE_INTEL_CQM);
+ ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
if (ret)
pr_err("Intel CQM perf registration failed: %d\n", ret);
else
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b8f69d35b5c7..b16eac5f54ce 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -119,7 +119,6 @@ struct hw_perf_event {
struct hrtimer hrtimer;
};
struct { /* tracepoint */
- struct task_struct *tp_target;
/* for tp_event->class */
struct list_head tp_list;
};
@@ -129,7 +128,6 @@ struct hw_perf_event {
struct list_head cqm_events_entry;
struct list_head cqm_groups_entry;
struct list_head cqm_group_entry;
- struct task_struct *cqm_target;
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
struct { /* breakpoint */
@@ -138,12 +136,12 @@ struct hw_perf_event {
* problem hw_breakpoint has with context
* creation and event initalization.
*/
- struct task_struct *bp_target;
struct arch_hw_breakpoint info;
struct list_head bp_list;
};
#endif
};
+ struct task_struct *target;
int state;
local64_t prev_count;
u64 sample_period;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 3c8b45de57ec..1e3cd07cf76e 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -32,7 +32,6 @@ enum perf_type_id {
PERF_TYPE_HW_CACHE = 3,
PERF_TYPE_RAW = 4,
PERF_TYPE_BREAKPOINT = 5,
- PERF_TYPE_INTEL_CQM = 6,

PERF_TYPE_MAX, /* non-ABI */
};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 89f0f16d55f9..7a0816ef7d18 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7171,18 +7171,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,

if (task) {
event->attach_state = PERF_ATTACH_TASK;
-
- if (attr->type == PERF_TYPE_TRACEPOINT)
- event->hw.tp_target = task;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
/*
- * hw_breakpoint is a bit difficult here..
+ * XXX pmu::event_init needs to know what task to account to
+ * and we cannot use the ctx information because we need the
+ * pmu before we get a ctx.
*/
- else if (attr->type == PERF_TYPE_BREAKPOINT)
- event->hw.bp_target = task;
-#endif
- else if (attr->type == PERF_TYPE_INTEL_CQM)
- event->hw.cqm_target = task;
+ event->hw.target = task;
}

if (!overflow_handler && parent_event) {
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 9803a6600d49..92ce5f4ccc26 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -116,12 +116,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
*/
static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
{
- struct task_struct *tsk = bp->hw.bp_target;
+ struct task_struct *tsk = bp->hw.target;
struct perf_event *iter;
int count = 0;

list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
- if (iter->hw.bp_target == tsk &&
+ if (iter->hw.target == tsk &&
find_slot_idx(iter) == type &&
(iter->cpu < 0 || cpu == iter->cpu))
count += hw_breakpoint_weight(iter);
@@ -153,7 +153,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
int nr;

nr = info->cpu_pinned;
- if (!bp->hw.bp_target)
+ if (!bp->hw.target)
nr += max_task_bp_pinned(cpu, type);
else
nr += task_bp_pinned(cpu, bp, type);
@@ -210,7 +210,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
weight = -weight;

/* Pinned counter cpu profiling */
- if (!bp->hw.bp_target) {
+ if (!bp->hw.target) {
get_bp_info(bp->cpu, type)->cpu_pinned += weight;
return;
}
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 7dc1c8abecd6..996e452e1eb3 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1005,7 +1005,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
return true;

list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
- if (event->hw.tp_target->mm == mm)
+ if (event->hw.target->mm == mm)
return true;
}

@@ -1015,7 +1015,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
static inline bool
uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
{
- return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
+ return __uprobe_perf_filter(&tu->filter, event->hw.target->mm);
}

static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
@@ -1023,10 +1023,10 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
bool done;

write_lock(&tu->filter.rwlock);
- if (event->hw.tp_target) {
+ if (event->hw.target) {
list_del(&event->hw.tp_list);
done = tu->filter.nr_systemwide ||
- (event->hw.tp_target->flags & PF_EXITING) ||
+ (event->hw.target->flags & PF_EXITING) ||
uprobe_filter_event(tu, event);
} else {
tu->filter.nr_systemwide--;
@@ -1046,7 +1046,7 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
int err;

write_lock(&tu->filter.rwlock);
- if (event->hw.tp_target) {
+ if (event->hw.target) {
/*
* event->parent != NULL means copy_process(), we can avoid
* uprobe_apply(). current->mm must be probed and we can rely

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/