[PATCH] perf cs-etm: Add support for coresight trace for any range of CPUs

From: Ganapatrao Kulkarni
Date: Wed Apr 19 2023 - 13:21:29 EST


The current implementation supports coresight trace for a range of
CPUs, if the first CPU is CPU0.

Adding changes to enable coresight trace for any range of CPUs by
decoding the first CPU also from the header.
Later, first CPU id is used instead of CPU0 across the decoder functions.

Signed-off-by: Ganapatrao Kulkarni <gankulkarni@xxxxxxxxxxxxxxxxxxxxxx>
---
.../perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 +-
.../perf/util/cs-etm-decoder/cs-etm-decoder.h | 3 +-
tools/perf/util/cs-etm.c | 62 ++++++++++++-------
3 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 82a27ab90c8b..41ab299b643b 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -724,7 +724,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
}

struct cs_etm_decoder *
-cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params,
+cs_etm_decoder__new(int first_decoder, int decoders, struct cs_etm_decoder_params *d_params,
struct cs_etm_trace_params t_params[])
{
struct cs_etm_decoder *decoder;
@@ -769,7 +769,7 @@ cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params,
/* init raw frame logging if required */
cs_etm_decoder__init_raw_frame_logging(d_params, decoder);

- for (i = 0; i < decoders; i++) {
+ for (i = first_decoder; i < decoders; i++) {
ret = cs_etm_decoder__create_etm_decoder(d_params,
&t_params[i],
decoder);
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 92a855fbe5b8..b06193fc75b4 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -90,7 +90,8 @@ int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
size_t len, size_t *consumed);

struct cs_etm_decoder *
-cs_etm_decoder__new(int num_cpu,
+cs_etm_decoder__new(int first_decoder,
+ int decoders,
struct cs_etm_decoder_params *d_params,
struct cs_etm_trace_params t_params[]);

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 94e2d02009eb..2619513ae088 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -55,6 +55,8 @@ struct cs_etm_auxtrace {
u8 has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */

int num_cpu;
+ int first_cpu;
+ int last_cpu;
u64 latest_kernel_timestamp;
u32 auxtrace_type;
u64 branches_sample_type;
@@ -638,14 +640,13 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
}

static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm,
- int decoders)
+ struct cs_etm_auxtrace *etm)
{
int i;
u32 etmidr;
u64 architecture;

- for (i = 0; i < decoders; i++) {
+ for (i = etm->first_cpu; i < etm->last_cpu; i++) {
architecture = etm->metadata[i][CS_ETM_MAGIC];

switch (architecture) {
@@ -817,7 +818,7 @@ static void cs_etm__free(struct perf_session *session)
/* Then the RB tree itself */
intlist__delete(traceid_list);

- for (i = 0; i < aux->num_cpu; i++)
+ for (i = aux->first_cpu; i < aux->last_cpu; i++)
zfree(&aux->metadata[i]);

thread__zput(aux->unknown_thread);
@@ -921,7 +922,8 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
* Each queue can only contain data from one CPU when unformatted, so only one decoder is
* needed.
*/
- int decoders = formatted ? etm->num_cpu : 1;
+ int first_decoder = formatted ? etm->first_cpu : 0;
+ int decoders = first_decoder + (formatted ? etm->num_cpu : 1);

etmq = zalloc(sizeof(*etmq));
if (!etmq)
@@ -937,7 +939,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
if (!t_params)
goto out_free;

- if (cs_etm__init_trace_params(t_params, etm, decoders))
+ if (cs_etm__init_trace_params(t_params, etm))
goto out_free;

/* Set decoder parameters to decode trace packets */
@@ -947,8 +949,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
formatted))
goto out_free;

- etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
- t_params);
+ etmq->decoder = cs_etm_decoder__new(first_decoder, decoders, &d_params, t_params);

if (!etmq->decoder)
goto out_free;
@@ -2959,11 +2960,11 @@ static int cs_etm__queue_aux_records(struct perf_session *session)
* Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
* timestamps).
*/
-static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
+static bool cs_etm__has_virtual_ts(u64 **metadata, struct cs_etm_auxtrace *etm)
{
int j;

- for (j = 0; j < num_cpu; j++) {
+ for (j = etm->first_cpu; j < etm->last_cpu; j++) {
switch (metadata[j][CS_ETM_MAGIC]) {
case __perf_cs_etmv4_magic:
if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
@@ -2982,13 +2983,14 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
}

/* map trace ids to correct metadata block, from information in metadata */
-static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
+static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm)
{
u64 cs_etm_magic;
+ u64 **metadata = etm->metadata;
u8 trace_chan_id;
int i, err;

- for (i = 0; i < num_cpu; i++) {
+ for (i = etm->first_cpu; i < etm->last_cpu; i++) {
cs_etm_magic = metadata[i][CS_ETM_MAGIC];
switch (cs_etm_magic) {
case __perf_cs_etmv3_magic:
@@ -3015,12 +3017,13 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
* If we found AUX_HW_ID packets, then set any metadata marked as unused to the
* unused value to reduce the number of unneeded decoders created.
*/
-static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
+static int cs_etm__clear_unused_trace_ids_metadata(struct cs_etm_auxtrace *etm)
{
u64 cs_etm_magic;
+ u64 **metadata = etm->metadata;
int i;

- for (i = 0; i < num_cpu; i++) {
+ for (i = etm->first_cpu; i < etm->last_cpu; i++) {
cs_etm_magic = metadata[i][CS_ETM_MAGIC];
switch (cs_etm_magic) {
case __perf_cs_etmv3_magic:
@@ -3049,7 +3052,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
int event_header_size = sizeof(struct perf_event_header);
int total_size = auxtrace_info->header.size;
int priv_size = 0;
- int num_cpu;
+ int num_cpu, first_cpu = 0, last_cpu;
int err = 0;
int aux_hw_id_found;
int i, j;
@@ -3068,22 +3071,31 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
/* First the global part */
ptr = (u64 *) auxtrace_info->priv;
num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
- metadata = zalloc(sizeof(*metadata) * num_cpu);
+
+ /* Start parsing after the common part of the header */
+ i = CS_HEADER_VERSION_MAX;
+
+ /*Get CPU id of first event */
+ first_cpu = ptr[i + CS_ETM_CPU];
+ last_cpu = first_cpu + num_cpu;
+
+ if (first_cpu > cpu__max_cpu().cpu ||
+ last_cpu > cpu__max_cpu().cpu)
+ return -EINVAL;
+
+ metadata = zalloc(sizeof(*metadata) * last_cpu);
if (!metadata) {
err = -ENOMEM;
goto err_free_traceid_list;
}

- /* Start parsing after the common part of the header */
- i = CS_HEADER_VERSION_MAX;
-
/*
* The metadata is stored in the auxtrace_info section and encodes
* the configuration of the ARM embedded trace macrocell which is
* required by the trace decoder to properly decode the trace due
* to its highly compressed nature.
*/
- for (j = 0; j < num_cpu; j++) {
+ for (j = first_cpu; j < last_cpu; j++) {
if (ptr[i] == __perf_cs_etmv3_magic) {
metadata[j] =
cs_etm__create_meta_blk(ptr, &i,
@@ -3145,6 +3157,8 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
etm->machine = &session->machines.host;

etm->num_cpu = num_cpu;
+ etm->first_cpu = first_cpu;
+ etm->last_cpu = last_cpu;
etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
etm->metadata = metadata;
@@ -3152,7 +3166,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
etm->timeless_decoding = cs_etm__is_timeless_decoding(etm);

/* Use virtual timestamps if all ETMs report ts_source = 1 */
- etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
+ etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, etm);

if (!etm->has_virtual_ts)
ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
@@ -3232,10 +3246,10 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,

/* if HW ID found then clear any unused metadata ID values */
if (aux_hw_id_found)
- err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
+ err = cs_etm__clear_unused_trace_ids_metadata(etm);
/* otherwise, this is a file with metadata values only, map from metadata */
else
- err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
+ err = cs_etm__map_trace_ids_metadata(etm);

if (err)
goto err_delete_thread;
@@ -3256,7 +3270,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
zfree(&etm);
err_free_metadata:
/* No need to check @metadata[j], free(NULL) is supported */
- for (j = 0; j < num_cpu; j++)
+ for (j = first_cpu; j < last_cpu; j++)
zfree(&metadata[j]);
zfree(&metadata);
err_free_traceid_list:
--
2.39.2