[PATCH 05/12] x86/cqm,perf/core: Cgroup support prepare

From: Vikas Shivappa
Date: Fri Jan 06 2017 - 17:43:17 EST


From: David Carrillo-Cisneros <davidcc@xxxxxxxxxx>

cgroup hierarchy monitoring is not supported currently. This patch
builds all the necessary datastructures, cgroup APIs like alloc, free
etc and necessary quirks for supporting cgroup hierarchy monitoring in
later patches.

- Introduce a architecture specific data structure arch_info in
perf_cgroup to keep track of RMIDs and cgroup hierarchical monitoring.
- perf sched_in calls all the cgroup ancestors when a cgroup is
scheduled in. This will not work with cqm as we have a common per pkg
rmid associated with one task and hence cannot write different RMIds
into the MSR for each event. cqm driver enables a flag
PERF_EV_CGROUP_NO_RECURSION which indicates the perf to not call all
ancestor cgroups for each event and let the driver handle the hierarchy
monitoring for cgroup.
- Introduce event_terminate as event_destroy is called after cgrp is
disassociated from the event to support rmid handling of the cgroup.
This helps cqm clean up the cqm specific arch_info.
- Add the cgroup APIs for alloc,free,attach and can_attach

The above framework will be used to build different cgroup features in
later patches.

Tests: Same as before. Cgroup still doesnt work but we did the prep to
get it to work

Patch modified/refactored by Vikas Shivappa
<vikas.shivappa@xxxxxxxxxxxxxxx> to support recycling removal.

Signed-off-by: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>
---
arch/x86/events/intel/cqm.c | 19 ++++++++++++++++++-
arch/x86/include/asm/perf_event.h | 27 +++++++++++++++++++++++++++
include/linux/perf_event.h | 32 ++++++++++++++++++++++++++++++++
kernel/events/core.c | 28 +++++++++++++++++++++++++++-
4 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 68fd1da..a9bd7bd 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -741,7 +741,13 @@ static int intel_cqm_event_init(struct perf_event *event)
INIT_LIST_HEAD(&event->hw.cqm_group_entry);
INIT_LIST_HEAD(&event->hw.cqm_groups_entry);

- event->destroy = intel_cqm_event_destroy;
+ /*
+ * CQM driver handles cgroup recursion and since only noe
+ * RMID can be programmed at the time in each core, then
+ * it is incompatible with the way generic code handles
+ * cgroup hierarchies.
+ */
+ event->event_caps |= PERF_EV_CAP_CGROUP_NO_RECURSION;

mutex_lock(&cache_mutex);

@@ -918,6 +924,17 @@ static int intel_cqm_event_init(struct perf_event *event)
.read = intel_cqm_event_read,
.count = intel_cqm_event_count,
};
+#ifdef CONFIG_CGROUP_PERF
+int perf_cgroup_arch_css_alloc(struct cgroup_subsys_state *parent_css,
+ struct cgroup_subsys_state *new_css)
+{}
+void perf_cgroup_arch_css_free(struct cgroup_subsys_state *css)
+{}
+void perf_cgroup_arch_attach(struct cgroup_taskset *tset)
+{}
+int perf_cgroup_arch_can_attach(struct cgroup_taskset *tset)
+{}
+#endif

static inline void cqm_pick_event_reader(int cpu)
{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f353061..f38c7f0 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -299,4 +299,31 @@ static inline void perf_check_microcode(void) { }

#define arch_perf_out_copy_user copy_from_user_nmi

+/*
+ * Hooks for architecture specific features of perf_event cgroup.
+ * Currently used by Intel's CQM.
+ */
+#ifdef CONFIG_INTEL_RDT_M
+#ifdef CONFIG_CGROUP_PERF
+
+#define perf_cgroup_arch_css_alloc perf_cgroup_arch_css_alloc
+
+int perf_cgroup_arch_css_alloc(struct cgroup_subsys_state *parent_css,
+ struct cgroup_subsys_state *new_css);
+
+#define perf_cgroup_arch_css_free perf_cgroup_arch_css_free
+
+void perf_cgroup_arch_css_free(struct cgroup_subsys_state *css);
+
+#define perf_cgroup_arch_attach perf_cgroup_arch_attach
+
+void perf_cgroup_arch_attach(struct cgroup_taskset *tset);
+
+#define perf_cgroup_arch_can_attach perf_cgroup_arch_can_attach
+
+int perf_cgroup_arch_can_attach(struct cgroup_taskset *tset);
+
+#endif
+
+#endif
#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a8f4749..410642a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -300,6 +300,12 @@ struct pmu {
int (*event_init) (struct perf_event *event);

/*
+ * Terminate the event for this PMU. Optional complement for a
+ * successful event_init. Called before the event fields are tear down.
+ */
+ void (*event_terminate) (struct perf_event *event);
+
+ /*
* Notification that the event was mapped or unmapped. Called
* in the context of the mapping task.
*/
@@ -516,9 +522,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
* PERF_EV_CAP_SOFTWARE: Is a software event.
* PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
* from any CPU in the package where it is active.
+ * PERF_EV_CAP_CGROUP_NO_RECURSION: A cgroup event that handles its own
+ * cgroup scoping. It does not need to be enabled for all of its descendants
+ * cgroups.
*/
#define PERF_EV_CAP_SOFTWARE BIT(0)
#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
+#define PERF_EV_CAP_CGROUP_NO_RECURSION BIT(2)

#define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
@@ -823,6 +833,8 @@ struct perf_cgroup_info {
};

struct perf_cgroup {
+ /* Architecture specific information. */
+ void *arch_info;
struct cgroup_subsys_state css;
struct perf_cgroup_info __percpu *info;
};
@@ -844,6 +856,7 @@ struct perf_cgroup {

#ifdef CONFIG_PERF_EVENTS

+extern int is_cgroup_event(struct perf_event *event);
extern void *perf_aux_output_begin(struct perf_output_handle *handle,
struct perf_event *event);
extern void perf_aux_output_end(struct perf_output_handle *handle,
@@ -1387,4 +1400,23 @@ ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
#define perf_event_exit_cpu NULL
#endif

+/*
+ * Hooks for architecture specific extensions for perf_cgroup.
+ */
+#ifndef perf_cgroup_arch_css_alloc
+#define perf_cgroup_arch_css_alloc(parent_css, new_css) 0
+#endif
+
+#ifndef perf_cgroup_arch_css_free
+#define perf_cgroup_arch_css_free(css) do { } while (0)
+#endif
+
+#ifndef perf_cgroup_arch_attach
+#define perf_cgroup_arch_attach(tskset) do { } while (0)
+#endif
+
+#ifndef perf_cgroup_arch_can_attach
+#define perf_cgroup_arch_can_attach(tskset) 0
+#endif
+
#endif /* _LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ab15509..229f611 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -590,6 +590,9 @@ static inline u64 perf_event_clock(struct perf_event *event)
if (!cpuctx->cgrp)
return false;

+ if (event->event_caps & PERF_EV_CAP_CGROUP_NO_RECURSION)
+ return cpuctx->cgrp->css.cgroup == event->cgrp->css.cgroup;
+
/*
* Cgroup scoping is recursive. An event enabled for a cgroup is
* also enabled for all its descendant cgroups. If @cpuctx's
@@ -606,7 +609,7 @@ static inline void perf_detach_cgroup(struct perf_event *event)
event->cgrp = NULL;
}

-static inline int is_cgroup_event(struct perf_event *event)
+int is_cgroup_event(struct perf_event *event)
{
return event->cgrp != NULL;
}
@@ -4019,6 +4022,9 @@ static void _free_event(struct perf_event *event)
mutex_unlock(&event->mmap_mutex);
}

+ if (event->pmu->event_terminate)
+ event->pmu->event_terminate(event);
+
if (is_cgroup_event(event))
perf_detach_cgroup(event);

@@ -9246,6 +9252,8 @@ static void account_event(struct perf_event *event)
exclusive_event_destroy(event);

err_pmu:
+ if (event->pmu->event_terminate)
+ event->pmu->event_terminate(event);
if (event->destroy)
event->destroy(event);
module_put(pmu->module);
@@ -10748,6 +10756,7 @@ static int __init perf_event_sysfs_init(void)
perf_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct perf_cgroup *jc;
+ int ret;

jc = kzalloc(sizeof(*jc), GFP_KERNEL);
if (!jc)
@@ -10759,6 +10768,12 @@ static int __init perf_event_sysfs_init(void)
return ERR_PTR(-ENOMEM);
}

+ jc->arch_info = NULL;
+
+ ret = perf_cgroup_arch_css_alloc(parent_css, &jc->css);
+ if (ret)
+ return ERR_PTR(ret);
+
return &jc->css;
}

@@ -10766,6 +10781,8 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
{
struct perf_cgroup *jc = container_of(css, struct perf_cgroup, css);

+ perf_cgroup_arch_css_free(css);
+
free_percpu(jc->info);
kfree(jc);
}
@@ -10786,11 +10803,20 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset)

cgroup_taskset_for_each(task, css, tset)
task_function_call(task, __perf_cgroup_move, task);
+
+ perf_cgroup_arch_attach(tset);
+}
+
+static int perf_cgroup_can_attach(struct cgroup_taskset *tset)
+{
+ return perf_cgroup_arch_can_attach(tset);
}

+
struct cgroup_subsys perf_event_cgrp_subsys = {
.css_alloc = perf_cgroup_css_alloc,
.css_free = perf_cgroup_css_free,
+ .can_attach = perf_cgroup_can_attach,
.attach = perf_cgroup_attach,
};
#endif /* CONFIG_CGROUP_PERF */
--
1.9.1