[PATCH v2 1/6] perf/x86/amd/uncore: Refactor uncore management

From: Sandipan Das
Date: Thu Oct 05 2023 - 12:41:13 EST


Since struct amd_uncore is used to manage per-cpu contexts, rename it to
amd_uncore_ctx in order to better reflect its purpose. Add a new struct
amd_uncore_pmu to encapsulate all attributes which are shared by per-cpu
contexts for a corresponding PMU. These include the number of counters,
active mask, MSR and RDPMC base addresses, etc. Since the struct pmu is
now embedded, the corresponding amd_uncore_pmu for a given event can be
found by simply using container_of().

Finally, move all PMU-specific code to separate functions. While the
original event management functions continue to provide the base
functionality, all PMU-specific quirks and customizations are applied in
separate functions.

The motivation is to simplify the management of uncore PMUs.

Signed-off-by: Sandipan Das <sandipan.das@xxxxxxx>
---
arch/x86/events/amd/uncore.c | 737 ++++++++++++++++++-----------------
1 file changed, 379 insertions(+), 358 deletions(-)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 83f15fe411b3..ffcecda13d65 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -27,56 +27,41 @@

#define COUNTER_SHIFT 16

+#define NUM_UNCORES_MAX 2 /* DF (or NB) and L3 (or L2) */
+#define UNCORE_NAME_LEN 16
+
#undef pr_fmt
#define pr_fmt(fmt) "amd_uncore: " fmt

static int pmu_version;
-static int num_counters_llc;
-static int num_counters_nb;
-static bool l3_mask;

static HLIST_HEAD(uncore_unused_list);

-struct amd_uncore {
+struct amd_uncore_ctx {
int id;
int refcnt;
int cpu;
- int num_counters;
- int rdpmc_base;
- u32 msr_base;
- cpumask_t *active_mask;
- struct pmu *pmu;
struct perf_event **events;
struct hlist_node node;
};

-static struct amd_uncore * __percpu *amd_uncore_nb;
-static struct amd_uncore * __percpu *amd_uncore_llc;
-
-static struct pmu amd_nb_pmu;
-static struct pmu amd_llc_pmu;
-
-static cpumask_t amd_nb_active_mask;
-static cpumask_t amd_llc_active_mask;
-
-static bool is_nb_event(struct perf_event *event)
-{
- return event->pmu->type == amd_nb_pmu.type;
-}
+struct amd_uncore_pmu {
+ char name[UNCORE_NAME_LEN];
+ int num_counters;
+ int rdpmc_base;
+ u32 msr_base;
+ cpumask_t active_mask;
+ struct pmu pmu;
+ struct amd_uncore_ctx * __percpu *ctx;
+ int (*id)(unsigned int cpu);
+};

-static bool is_llc_event(struct perf_event *event)
-{
- return event->pmu->type == amd_llc_pmu.type;
-}
+static struct amd_uncore_pmu pmus[NUM_UNCORES_MAX];
+static int num_pmus __read_mostly;

-static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
+static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
{
- if (is_nb_event(event) && amd_uncore_nb)
- return *per_cpu_ptr(amd_uncore_nb, event->cpu);
- else if (is_llc_event(event) && amd_uncore_llc)
- return *per_cpu_ptr(amd_uncore_llc, event->cpu);
-
- return NULL;
+ return container_of(event->pmu, struct amd_uncore_pmu, pmu);
}

static void amd_uncore_read(struct perf_event *event)
@@ -118,7 +103,7 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
hwc->state |= PERF_HES_STOPPED;

if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- amd_uncore_read(event);
+ event->pmu->read(event);
hwc->state |= PERF_HES_UPTODATE;
}
}
@@ -126,15 +111,16 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
static int amd_uncore_add(struct perf_event *event, int flags)
{
int i;
- struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;

/* are we already assigned? */
- if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
+ if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
goto out;

- for (i = 0; i < uncore->num_counters; i++) {
- if (uncore->events[i] == event) {
+ for (i = 0; i < pmu->num_counters; i++) {
+ if (ctx->events[i] == event) {
hwc->idx = i;
goto out;
}
@@ -142,8 +128,8 @@ static int amd_uncore_add(struct perf_event *event, int flags)

/* if not, take the first available counter */
hwc->idx = -1;
- for (i = 0; i < uncore->num_counters; i++) {
- if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
+ for (i = 0; i < pmu->num_counters; i++) {
+ if (cmpxchg(&ctx->events[i], NULL, event) == NULL) {
hwc->idx = i;
break;
}
@@ -153,23 +139,13 @@ static int amd_uncore_add(struct perf_event *event, int flags)
if (hwc->idx == -1)
return -EBUSY;

- hwc->config_base = uncore->msr_base + (2 * hwc->idx);
- hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
- hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+ hwc->config_base = pmu->msr_base + (2 * hwc->idx);
+ hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
+ hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;

- /*
- * The first four DF counters are accessible via RDPMC index 6 to 9
- * followed by the L3 counters from index 10 to 15. For processors
- * with more than four DF counters, the DF RDPMC assignments become
- * discontiguous as the additional counters are accessible starting
- * from index 16.
- */
- if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB)
- hwc->event_base_rdpmc += NUM_COUNTERS_L3;
-
if (flags & PERF_EF_START)
- amd_uncore_start(event, PERF_EF_RELOAD);
+ event->pmu->start(event, PERF_EF_RELOAD);

return 0;
}
@@ -177,55 +153,36 @@ static int amd_uncore_add(struct perf_event *event, int flags)
static void amd_uncore_del(struct perf_event *event, int flags)
{
int i;
- struct amd_uncore *uncore = event_to_amd_uncore(event);
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;

- amd_uncore_stop(event, PERF_EF_UPDATE);
+ event->pmu->stop(event, PERF_EF_UPDATE);

- for (i = 0; i < uncore->num_counters; i++) {
- if (cmpxchg(&uncore->events[i], event, NULL) == event)
+ for (i = 0; i < pmu->num_counters; i++) {
+ if (cmpxchg(&ctx->events[i], event, NULL) == event)
break;
}

hwc->idx = -1;
}

-/*
- * Return a full thread and slice mask unless user
- * has provided them
- */
-static u64 l3_thread_slice_mask(u64 config)
-{
- if (boot_cpu_data.x86 <= 0x18)
- return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
- ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
-
- /*
- * If the user doesn't specify a threadmask, they're not trying to
- * count core 0, so we enable all cores & threads.
- * We'll also assume that they want to count slice 0 if they specify
- * a threadmask and leave sliceid and enallslices unpopulated.
- */
- if (!(config & AMD64_L3_F19H_THREAD_MASK))
- return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
- AMD64_L3_EN_ALL_CORES;
-
- return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
- AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
- AMD64_L3_COREID_MASK);
-}
-
static int amd_uncore_event_init(struct perf_event *event)
{
- struct amd_uncore *uncore;
+ struct amd_uncore_pmu *pmu;
+ struct amd_uncore_ctx *ctx;
struct hw_perf_event *hwc = &event->hw;
- u64 event_mask = AMD64_RAW_EVENT_MASK_NB;

if (event->attr.type != event->pmu->type)
return -ENOENT;

- if (pmu_version >= 2 && is_nb_event(event))
- event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB;
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ pmu = event_to_amd_uncore_pmu(event);
+ ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
+ if (!ctx)
+ return -ENODEV;

/*
* NB and Last level cache counters (MSRs) are shared across all cores
@@ -235,28 +192,14 @@ static int amd_uncore_event_init(struct perf_event *event)
* out. So we do not support sampling and per-thread events via
* CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
*/
- hwc->config = event->attr.config & event_mask;
+ hwc->config = event->attr.config;
hwc->idx = -1;

- if (event->cpu < 0)
- return -EINVAL;
-
- /*
- * SliceMask and ThreadMask need to be set for certain L3 events.
- * For other events, the two fields do not affect the count.
- */
- if (l3_mask && is_llc_event(event))
- hwc->config |= l3_thread_slice_mask(event->attr.config);
-
- uncore = event_to_amd_uncore(event);
- if (!uncore)
- return -ENODEV;
-
/*
* since request can come in to any of the shared cores, we will remap
* to a single common cpu.
*/
- event->cpu = uncore->cpu;
+ event->cpu = ctx->cpu;

return 0;
}
@@ -278,17 +221,10 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- cpumask_t *active_mask;
- struct pmu *pmu = dev_get_drvdata(dev);
-
- if (pmu->type == amd_nb_pmu.type)
- active_mask = &amd_nb_active_mask;
- else if (pmu->type == amd_llc_pmu.type)
- active_mask = &amd_llc_active_mask;
- else
- return 0;
+ struct pmu *ptr = dev_get_drvdata(dev);
+ struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);

- return cpumap_print_to_pagebuf(true, buf, active_mask);
+ return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
}
static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);

@@ -396,113 +332,57 @@ static const struct attribute_group *amd_uncore_l3_attr_update[] = {
NULL,
};

-static struct pmu amd_nb_pmu = {
- .task_ctx_nr = perf_invalid_context,
- .attr_groups = amd_uncore_df_attr_groups,
- .name = "amd_nb",
- .event_init = amd_uncore_event_init,
- .add = amd_uncore_add,
- .del = amd_uncore_del,
- .start = amd_uncore_start,
- .stop = amd_uncore_stop,
- .read = amd_uncore_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
- .module = THIS_MODULE,
-};
-
-static struct pmu amd_llc_pmu = {
- .task_ctx_nr = perf_invalid_context,
- .attr_groups = amd_uncore_l3_attr_groups,
- .attr_update = amd_uncore_l3_attr_update,
- .name = "amd_l2",
- .event_init = amd_uncore_event_init,
- .add = amd_uncore_add,
- .del = amd_uncore_del,
- .start = amd_uncore_start,
- .stop = amd_uncore_stop,
- .read = amd_uncore_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
- .module = THIS_MODULE,
-};
-
-static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
-{
- return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
- cpu_to_node(cpu));
-}
-
-static inline struct perf_event **
-amd_uncore_events_alloc(unsigned int num, unsigned int cpu)
-{
- return kzalloc_node(sizeof(struct perf_event *) * num, GFP_KERNEL,
- cpu_to_node(cpu));
-}
-
static int amd_uncore_cpu_up_prepare(unsigned int cpu)
{
- struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL;
-
- if (amd_uncore_nb) {
- *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
- uncore_nb = amd_uncore_alloc(cpu);
- if (!uncore_nb)
- goto fail;
- uncore_nb->cpu = cpu;
- uncore_nb->num_counters = num_counters_nb;
- uncore_nb->rdpmc_base = RDPMC_BASE_NB;
- uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
- uncore_nb->active_mask = &amd_nb_active_mask;
- uncore_nb->pmu = &amd_nb_pmu;
- uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu);
- if (!uncore_nb->events)
+ struct amd_uncore_pmu *pmu;
+ struct amd_uncore_ctx *ctx;
+ int node = cpu_to_node(cpu), i;
+
+ for (i = 0; i < num_pmus; i++) {
+ pmu = &pmus[i];
+ *per_cpu_ptr(pmu->ctx, cpu) = NULL;
+ ctx = kzalloc_node(sizeof(struct amd_uncore_ctx), GFP_KERNEL,
+ node);
+ if (!ctx)
goto fail;
- uncore_nb->id = -1;
- *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
- }

- if (amd_uncore_llc) {
- *per_cpu_ptr(amd_uncore_llc, cpu) = NULL;
- uncore_llc = amd_uncore_alloc(cpu);
- if (!uncore_llc)
- goto fail;
- uncore_llc->cpu = cpu;
- uncore_llc->num_counters = num_counters_llc;
- uncore_llc->rdpmc_base = RDPMC_BASE_LLC;
- uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
- uncore_llc->active_mask = &amd_llc_active_mask;
- uncore_llc->pmu = &amd_llc_pmu;
- uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu);
- if (!uncore_llc->events)
+ ctx->cpu = cpu;
+ ctx->events = kzalloc_node(sizeof(struct perf_event *) *
+ pmu->num_counters, GFP_KERNEL,
+ node);
+ if (!ctx->events)
goto fail;
- uncore_llc->id = -1;
- *per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
+
+ ctx->id = -1;
+ *per_cpu_ptr(pmu->ctx, cpu) = ctx;
}

return 0;

fail:
- if (uncore_nb) {
- kfree(uncore_nb->events);
- kfree(uncore_nb);
- }
+ /* Rollback */
+ for (; i >= 0; i--) {
+ pmu = &pmus[i];
+ ctx = *per_cpu_ptr(pmu->ctx, cpu);
+ if (!ctx)
+ continue;

- if (uncore_llc) {
- kfree(uncore_llc->events);
- kfree(uncore_llc);
+ kfree(ctx->events);
+ kfree(ctx);
}

return -ENOMEM;
}

-static struct amd_uncore *
-amd_uncore_find_online_sibling(struct amd_uncore *this,
- struct amd_uncore * __percpu *uncores)
+static struct amd_uncore_ctx *
+amd_uncore_find_online_sibling(struct amd_uncore_ctx *this,
+ struct amd_uncore_pmu *pmu)
{
unsigned int cpu;
- struct amd_uncore *that;
+ struct amd_uncore_ctx *that;

for_each_online_cpu(cpu) {
- that = *per_cpu_ptr(uncores, cpu);
+ that = *per_cpu_ptr(pmu->ctx, cpu);

if (!that)
continue;
@@ -523,24 +403,16 @@ amd_uncore_find_online_sibling(struct amd_uncore *this,

static int amd_uncore_cpu_starting(unsigned int cpu)
{
- unsigned int eax, ebx, ecx, edx;
- struct amd_uncore *uncore;
-
- if (amd_uncore_nb) {
- uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
- cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
- uncore->id = ecx & 0xff;
-
- uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
- *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
- }
-
- if (amd_uncore_llc) {
- uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
- uncore->id = get_llc_id(cpu);
+ struct amd_uncore_pmu *pmu;
+ struct amd_uncore_ctx *ctx;
+ int i;

- uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
- *per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
+ for (i = 0; i < num_pmus; i++) {
+ pmu = &pmus[i];
+ ctx = *per_cpu_ptr(pmu->ctx, cpu);
+ ctx->id = pmu->id(cpu);
+ ctx = amd_uncore_find_online_sibling(ctx, pmu);
+ *per_cpu_ptr(pmu->ctx, cpu) = ctx;
}

return 0;
@@ -548,195 +420,359 @@ static int amd_uncore_cpu_starting(unsigned int cpu)

static void uncore_clean_online(void)
{
- struct amd_uncore *uncore;
+ struct amd_uncore_ctx *ctx;
struct hlist_node *n;

- hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
- hlist_del(&uncore->node);
- kfree(uncore->events);
- kfree(uncore);
+ hlist_for_each_entry_safe(ctx, n, &uncore_unused_list, node) {
+ hlist_del(&ctx->node);
+ kfree(ctx->events);
+ kfree(ctx);
}
}

-static void uncore_online(unsigned int cpu,
- struct amd_uncore * __percpu *uncores)
+static int amd_uncore_cpu_online(unsigned int cpu)
{
- struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+ struct amd_uncore_pmu *pmu;
+ struct amd_uncore_ctx *ctx;
+ int i;

uncore_clean_online();

- if (cpu == uncore->cpu)
- cpumask_set_cpu(cpu, uncore->active_mask);
+ for (i = 0; i < num_pmus; i++) {
+ pmu = &pmus[i];
+ ctx = *per_cpu_ptr(pmu->ctx, cpu);
+ if (cpu == ctx->cpu)
+ cpumask_set_cpu(cpu, &pmu->active_mask);
+ }
+
+ return 0;
}

-static int amd_uncore_cpu_online(unsigned int cpu)
+static int amd_uncore_cpu_down_prepare(unsigned int cpu)
{
- if (amd_uncore_nb)
- uncore_online(cpu, amd_uncore_nb);
+ struct amd_uncore_ctx *this, *that;
+ struct amd_uncore_pmu *pmu;
+ int i, j;
+
+ for (i = 0; i < num_pmus; i++) {
+ pmu = &pmus[i];
+ this = *per_cpu_ptr(pmu->ctx, cpu);
+
+ /* this cpu is going down, migrate to a shared sibling if possible */
+ for_each_online_cpu(j) {
+ that = *per_cpu_ptr(pmu->ctx, j);
+
+ if (cpu == j)
+ continue;
+
+ if (this == that) {
+ perf_pmu_migrate_context(&pmu->pmu, cpu, j);
+ cpumask_clear_cpu(cpu, &pmu->active_mask);
+ cpumask_set_cpu(j, &pmu->active_mask);
+ that->cpu = j;
+ break;
+ }
+ }
+ }

- if (amd_uncore_llc)
- uncore_online(cpu, amd_uncore_llc);
+ return 0;
+}
+
+static int amd_uncore_cpu_dead(unsigned int cpu)
+{
+ struct amd_uncore_ctx *ctx;
+ struct amd_uncore_pmu *pmu;
+ int i;
+
+ for (i = 0; i < num_pmus; i++) {
+ pmu = &pmus[i];
+ ctx = *per_cpu_ptr(pmu->ctx, cpu);
+ if (cpu == ctx->cpu)
+ cpumask_clear_cpu(cpu, &pmu->active_mask);
+
+ if (!--ctx->refcnt) {
+ kfree(ctx->events);
+ kfree(ctx);
+ }
+
+ *per_cpu_ptr(pmu->ctx, cpu) = NULL;
+ }

return 0;
}

-static void uncore_down_prepare(unsigned int cpu,
- struct amd_uncore * __percpu *uncores)
+static int amd_uncore_df_id(unsigned int cpu)
{
- unsigned int i;
- struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
+ unsigned int eax, ebx, ecx, edx;

- if (this->cpu != cpu)
- return;
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);

- /* this cpu is going down, migrate to a shared sibling if possible */
- for_each_online_cpu(i) {
- struct amd_uncore *that = *per_cpu_ptr(uncores, i);
+ return ecx & 0xff;
+}

- if (cpu == i)
- continue;
+static int amd_uncore_df_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int ret = amd_uncore_event_init(event);

- if (this == that) {
- perf_pmu_migrate_context(this->pmu, cpu, i);
- cpumask_clear_cpu(cpu, that->active_mask);
- cpumask_set_cpu(i, that->active_mask);
- that->cpu = i;
- break;
- }
- }
+ if (ret || pmu_version < 2)
+ return ret;
+
+ hwc->config = event->attr.config &
+ (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
+ AMD64_RAW_EVENT_MASK_NB);
+
+ return 0;
}

-static int amd_uncore_cpu_down_prepare(unsigned int cpu)
+static int amd_uncore_df_add(struct perf_event *event, int flags)
{
- if (amd_uncore_nb)
- uncore_down_prepare(cpu, amd_uncore_nb);
+ int ret = amd_uncore_add(event, flags & ~PERF_EF_START);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (ret)
+ return ret;
+
+ /*
+ * The first four DF counters are accessible via RDPMC index 6 to 9
+ * followed by the L3 counters from index 10 to 15. For processors
+ * with more than four DF counters, the DF RDPMC assignments become
+ * discontiguous as the additional counters are accessible starting
+ * from index 16.
+ */
+ if (hwc->idx >= NUM_COUNTERS_NB)
+ hwc->event_base_rdpmc += NUM_COUNTERS_L3;

- if (amd_uncore_llc)
- uncore_down_prepare(cpu, amd_uncore_llc);
+ /* Delayed start after rdpmc base update */
+ if (flags & PERF_EF_START)
+ amd_uncore_start(event, PERF_EF_RELOAD);

return 0;
}

-static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
+static int amd_uncore_df_init(void)
{
- struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+ struct attribute **df_attr = amd_uncore_df_format_attr;
+ struct amd_uncore_pmu *pmu = &pmus[num_pmus];
+ union cpuid_0x80000022_ebx ebx;
+ int ret;

- if (cpu == uncore->cpu)
- cpumask_clear_cpu(cpu, uncore->active_mask);
+ if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
+ return 0;

- if (!--uncore->refcnt) {
- kfree(uncore->events);
- kfree(uncore);
+ /*
+ * For Family 17h and above, the Northbridge counters are repurposed
+ * as Data Fabric counters. The PMUs are exported based on family as
+ * either NB or DF.
+ */
+ strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
+ sizeof(pmu->name));
+
+ pmu->num_counters = NUM_COUNTERS_NB;
+ pmu->msr_base = MSR_F15H_NB_PERF_CTL;
+ pmu->rdpmc_base = RDPMC_BASE_NB;
+ pmu->id = amd_uncore_df_id;
+
+ if (pmu_version >= 2) {
+ *df_attr++ = &format_attr_event14v2.attr;
+ *df_attr++ = &format_attr_umask12.attr;
+ ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+ pmu->num_counters = ebx.split.num_df_pmc;
+ } else if (boot_cpu_data.x86 >= 0x17) {
+ *df_attr = &format_attr_event14.attr;
+ }
+
+ pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
+ if (!pmu->ctx)
+ return -ENOMEM;
+
+ pmu->pmu = (struct pmu) {
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = amd_uncore_df_attr_groups,
+ .name = pmu->name,
+ .event_init = amd_uncore_df_event_init,
+ .add = amd_uncore_df_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+ .module = THIS_MODULE,
+ };
+
+ ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1);
+ if (ret) {
+ free_percpu(pmu->ctx);
+ pmu->ctx = NULL;
+ return ret;
}

- *per_cpu_ptr(uncores, cpu) = NULL;
+ pr_info("%d %s %s counters detected\n", pmu->num_counters,
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
+ pmu->pmu.name);
+
+ num_pmus++;
+
+ return 0;
}

-static int amd_uncore_cpu_dead(unsigned int cpu)
+static int amd_uncore_l3_id(unsigned int cpu)
{
- if (amd_uncore_nb)
- uncore_dead(cpu, amd_uncore_nb);
+ return get_llc_id(cpu);
+}
+
+static int amd_uncore_l3_event_init(struct perf_event *event)
+{
+ int ret = amd_uncore_event_init(event);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 config = event->attr.config;
+ u64 mask;
+
+ hwc->config = config & AMD64_RAW_EVENT_MASK_NB;
+
+ /*
+ * SliceMask and ThreadMask need to be set for certain L3 events.
+ * For other events, the two fields do not affect the count.
+ */
+ if (ret || boot_cpu_data.x86 < 0x17)
+ return ret;
+
+ mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
+ AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
+ AMD64_L3_COREID_MASK);
+
+ if (boot_cpu_data.x86 <= 0x18)
+ mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
+ ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
+
+ /*
+ * If the user doesn't specify a ThreadMask, they're not trying to
+ * count core 0, so we enable all cores & threads.
+ * We'll also assume that they want to count slice 0 if they specify
+ * a ThreadMask and leave SliceId and EnAllSlices unpopulated.
+ */
+ else if (!(config & AMD64_L3_F19H_THREAD_MASK))
+ mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
+ AMD64_L3_EN_ALL_CORES;

- if (amd_uncore_llc)
- uncore_dead(cpu, amd_uncore_llc);
+ hwc->config |= mask;

return 0;
}

-static int __init amd_uncore_init(void)
+static int amd_uncore_l3_init(void)
{
- struct attribute **df_attr = amd_uncore_df_format_attr;
struct attribute **l3_attr = amd_uncore_l3_format_attr;
- union cpuid_0x80000022_ebx ebx;
- int ret = -ENODEV;
+ struct amd_uncore_pmu *pmu = &pmus[num_pmus];
+ int ret;

- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
- return -ENODEV;
+ if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
+ return 0;

- if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
- return -ENODEV;
+ /*
+ * For Family 17h and above, L3 cache counters are available instead
+ * of L2 cache counters. The PMUs are exported based on family as
+ * either L2 or L3.
+ */
+ strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
+ sizeof(pmu->name));

- if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
- pmu_version = 2;
+ pmu->num_counters = NUM_COUNTERS_L2;
+ pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
+ pmu->rdpmc_base = RDPMC_BASE_LLC;
+ pmu->id = amd_uncore_l3_id;

- num_counters_nb = NUM_COUNTERS_NB;
- num_counters_llc = NUM_COUNTERS_L2;
if (boot_cpu_data.x86 >= 0x17) {
- /*
- * For F17h and above, the Northbridge counters are
- * repurposed as Data Fabric counters. Also, L3
- * counters are supported too. The PMUs are exported
- * based on family as either L2 or L3 and NB or DF.
- */
- num_counters_llc = NUM_COUNTERS_L3;
- amd_nb_pmu.name = "amd_df";
- amd_llc_pmu.name = "amd_l3";
- l3_mask = true;
+ *l3_attr++ = &format_attr_event8.attr;
+ *l3_attr++ = &format_attr_umask8.attr;
+ *l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
+ &format_attr_threadmask2.attr :
+ &format_attr_threadmask8.attr;
+ pmu->num_counters = NUM_COUNTERS_L3;
}

- if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
- if (pmu_version >= 2) {
- *df_attr++ = &format_attr_event14v2.attr;
- *df_attr++ = &format_attr_umask12.attr;
- } else if (boot_cpu_data.x86 >= 0x17) {
- *df_attr = &format_attr_event14.attr;
- }
+ pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
+ if (!pmu->ctx)
+ return -ENOMEM;
+
+ pmu->pmu = (struct pmu) {
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = amd_uncore_l3_attr_groups,
+ .attr_update = amd_uncore_l3_attr_update,
+ .name = pmu->name,
+ .event_init = amd_uncore_l3_event_init,
+ .add = amd_uncore_add,
+ .del = amd_uncore_del,
+ .start = amd_uncore_start,
+ .stop = amd_uncore_stop,
+ .read = amd_uncore_read,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+ .module = THIS_MODULE,
+ };
+
+ ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1);
+ if (ret) {
+ free_percpu(pmu->ctx);
+ pmu->ctx = NULL;
+ return ret;
+ }

- amd_uncore_nb = alloc_percpu(struct amd_uncore *);
- if (!amd_uncore_nb) {
- ret = -ENOMEM;
- goto fail_nb;
- }
- ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
- if (ret)
- goto fail_nb;
+ pr_info("%d %s %s counters detected\n", pmu->num_counters,
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
+ pmu->pmu.name);

- if (pmu_version >= 2) {
- ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
- num_counters_nb = ebx.split.num_df_pmc;
- }
+ num_pmus++;

- pr_info("%d %s %s counters detected\n", num_counters_nb,
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
- amd_nb_pmu.name);
+ return 0;
+}

- ret = 0;
- }
+static void uncore_free(void)
+{
+ struct amd_uncore_pmu *pmu;
+ int i;

- if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
- if (boot_cpu_data.x86 >= 0x19) {
- *l3_attr++ = &format_attr_event8.attr;
- *l3_attr++ = &format_attr_umask8.attr;
- *l3_attr++ = &format_attr_threadmask2.attr;
- } else if (boot_cpu_data.x86 >= 0x17) {
- *l3_attr++ = &format_attr_event8.attr;
- *l3_attr++ = &format_attr_umask8.attr;
- *l3_attr++ = &format_attr_threadmask8.attr;
- }
+ for (i = 0; i < num_pmus; i++) {
+ pmu = &pmus[i];
+ if (!pmu->ctx)
+ continue;

- amd_uncore_llc = alloc_percpu(struct amd_uncore *);
- if (!amd_uncore_llc) {
- ret = -ENOMEM;
- goto fail_llc;
- }
- ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1);
- if (ret)
- goto fail_llc;
-
- pr_info("%d %s %s counters detected\n", num_counters_llc,
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
- amd_llc_pmu.name);
- ret = 0;
+ perf_pmu_unregister(&pmu->pmu);
+ free_percpu(pmu->ctx);
+ pmu->ctx = NULL;
}

+ num_pmus = 0;
+}
+
+static int __init amd_uncore_init(void)
+{
+ int ret;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ return -ENODEV;
+
+ if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
+ return -ENODEV;
+
+ if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
+ pmu_version = 2;
+
+ ret = amd_uncore_df_init();
+ if (ret)
+ goto fail;
+
+ ret = amd_uncore_l3_init();
+ if (ret)
+ goto fail;
+
/*
* Install callbacks. Core will call them for each online cpu.
*/
if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
"perf/x86/amd/uncore:prepare",
amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
- goto fail_llc;
+ goto fail;

if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
"perf/x86/amd/uncore:starting",
@@ -753,12 +789,8 @@ static int __init amd_uncore_init(void)
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
fail_prep:
cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
-fail_llc:
- if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
- perf_pmu_unregister(&amd_nb_pmu);
- free_percpu(amd_uncore_llc);
-fail_nb:
- free_percpu(amd_uncore_nb);
+fail:
+ uncore_free();

return ret;
}
@@ -768,18 +800,7 @@ static void __exit amd_uncore_exit(void)
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
-
- if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
- perf_pmu_unregister(&amd_llc_pmu);
- free_percpu(amd_uncore_llc);
- amd_uncore_llc = NULL;
- }
-
- if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
- perf_pmu_unregister(&amd_nb_pmu);
- free_percpu(amd_uncore_nb);
- amd_uncore_nb = NULL;
- }
+ uncore_free();
}

module_init(amd_uncore_init);
--
2.34.1