Re: [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support

From: Anju T Sudhakar
Date: Wed Jun 07 2017 - 01:26:55 EST


Hi Thomas,

Thank you for reviewing the patch.


On Tuesday 06 June 2017 02:09 PM, Thomas Gleixner wrote:
On Mon, 5 Jun 2017, Anju T Sudhakar wrote:
+/*
+ * nest_imc_stop : Does OPAL call to stop nest engine.
+ */
+static void nest_imc_stop(int *cpu_opal_rc)
+{
+ int rc;
+
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
+ if (rc)
+ cpu_opal_rc[smp_processor_id()] = 1;
What's wrong with just assigning the result directly?


yes. We can assign it directly. Will do this.


+/* nest_imc_start: Does the OPAL call to enable nest counters. */
+static void nest_imc_start(int *cpu_opal_rc)
+{
+ int rc;
+
+ rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST);
+ if (rc)
+ cpu_opal_rc[smp_processor_id()] = 1;
+}
+
+/*
+ * nest_imc_control: Function to start and stop nest imc engine.
+ * The function is primarily called from event init
+ * and event destroy.
As I don't see other call sites, what's the secondary usage?


This function is called from event init and event destroy only.
Will fix the comment here.


+ */
+static int nest_imc_control(int operation)
+{
+ int *cpus_opal_rc, cpu;
+
+ /*
+ * Memory for OPAL call return value.
+ */
+ cpus_opal_rc = kzalloc((sizeof(int) * nr_cpu_ids), GFP_KERNEL);
+ if (!cpus_opal_rc)
+ return -ENOMEM;
+ switch (operation) {
+ case IMC_COUNTER_ENABLE:
+ /* Initialize Nest PMUs in each node using designated cpus */
+ on_each_cpu_mask(&nest_imc_cpumask, (smp_call_func_t)nest_imc_start,
+ cpus_opal_rc, 1);
That's one indentation level too deep.

My bad. Will fix this.

+ break;
+ case IMC_COUNTER_DISABLE:
+ /* Disable the counters */
+ on_each_cpu_mask(&nest_imc_cpumask, (smp_call_func_t)nest_imc_stop,
+ cpus_opal_rc, 1);
+ break;
+ default:
+ kfree(cpus_opal_rc);
+ return -EINVAL;
+
+ }
+
+ /* Check return value array for any OPAL call failure */
+ for_each_cpu(cpu, &nest_imc_cpumask) {
+ if (cpus_opal_rc[cpu]) {
+ kfree(cpus_opal_rc);
+ return -ENODEV;
+ }
+ }
+ kfree(cpus_opal_rc);
So you have THREE places where you do kfree(cpus_opal_rc). Sigh.

You can spare that hole alloc/free dance by simply having

static struct cpumask nest_imc_result;

mutex_lock(&imc_nest_reserve);
memset(&nest_imc_result, 0, sizeof(nest_imc_result));

switch (op) {
case IMC_COUNTER_ENABLE:
on_each_cpu_mask(&nest_imc_cpumask, nest_imc_start, NULL, 1);
break;
....
}

res = cpumask_empty(&nest_imc_result) ? 0 : -ENODEV;
mutex_unlock(&imc_nest_reserve);
return res;

And in the start/stop functions:

if (opal_imc_counters_xxx(OPAL_IMC_COUNTERS_NEST))
cpumask_set_cpu(smp_processor_id(), &nest_imc_result);

Ok.

+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+ struct imc_pmu **pn = per_nest_pmu_arr;
+ int i;
+
+ if (old_cpu < 0 || new_cpu < 0)
+ return;
+
+ for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++)
+ perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
+
+}
+
+static int ppc_nest_imc_cpu_offline(unsigned int cpu)
+{
+ int nid, target = -1;
+ const struct cpumask *l_cpumask;
+
+ /*
+ * Check in the designated list for this cpu. Dont bother
+ * if not one of them.
+ */
+ if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
+ return 0;
+
+ /*
+ * Now that this cpu is one of the designated,
+ * find a next cpu a) which is online and b) in same chip.
+ */
+ nid = cpu_to_node(cpu);
+ l_cpumask = cpumask_of_node(nid);
+ target = cpumask_next(cpu, l_cpumask);
So if the outgoing CPU is the highest numbered CPU on the node, then
cpumask_next() will not find a CPU, while there still might be other lower
numbered CPUs online in the node.

target = cpumask_any_but(l_cpumask, cpu);

is what you want.

In the previous revisions we designated the smallest cpu in the mask. And then we re wrote the
code to pick next available cpu. But this is a nice catch. :-) Thanks!
I will fix this.

+
+ /*
+ * Update the cpumask with the target cpu and
+ * migrate the context if needed
+ */
+ if (target >= 0 && target < nr_cpu_ids) {
+ cpumask_set_cpu(target, &nest_imc_cpumask);
+ nest_change_cpu_context(cpu, target);
+ } else {
+ target = -1;
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
+ nest_change_cpu_context(cpu, target);
Why are you calling nest_change_cpu_context()? You already know that it
will return immediately due to target < 0.

Yes right. Will remove the function call here.

+ }
+ return 0;
+}
+
+static int ppc_nest_imc_cpu_online(unsigned int cpu)
+{
+ const struct cpumask *l_cpumask;
+ static struct cpumask tmp_mask;
+ int res;
+
+ /* Get the cpumask of this node */
+ l_cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+ /*
+ * If this is not the first online CPU on this node, then
+ * just return.
+ */
+ if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
+ return 0;
+
+ /*
+ * If this is the first online cpu on this node
+ * disable the nest counters by making an OPAL call.
+ */
+ res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
+ if (res)
+ return res;
+
+ /* Make this CPU the designated target for counter collection */
+ cpumask_set_cpu(cpu, &nest_imc_cpumask);
+ nest_change_cpu_context(-1, cpu);
Ditto

static int nest_imc_event_init(struct perf_event *event)
{
- int chip_id;
+ int chip_id, rc;
u32 config = event->attr.config;
struct imc_mem_info *pcni;
struct imc_pmu *pmu;
@@ -80,6 +277,20 @@ static int nest_imc_event_init(struct perf_event *event)
* "chip_id" and add "config" to it".
*/
event->hw.event_base = pcni->vbase[config/PAGE_SIZE] + (config & ~PAGE_MASK);
+ /*
+ * Nest pmu units are enabled only when it is used.
+ * See if this is triggered for the first time.
+ * If yes, take the mutex lock and enable the nest counters.
+ * If not, just increment the count in nest_events.
+ */
+ if (atomic_inc_return(&nest_events) == 1) {
+ mutex_lock(&imc_nest_reserve);
+ rc = nest_imc_control(IMC_COUNTER_ENABLE);
+ mutex_unlock(&imc_nest_reserve);
+ if (rc)
+ pr_err("IMC: Unable to start the counters\n");
So if that fails, nest_events will stay incremented. Is that on purpose?

Nice catch. Will return here and decrement the count.


Thanks and Regards,

Anju
+ }
+ event->destroy = nest_imc_counters_release;
return 0;
And this happily returns success even if the enable call failed ....

Thanks,

tglx