Re: [RFC PATCH v4 03/15] perf pmu-events: Add functions in jevent.py to parse counter and event info for hardware aware grouping

From: Ian Rogers
Date: Sun Mar 24 2024 - 00:49:50 EST


On Thu, Feb 8, 2024 at 7:14 PM <weilin.wang@xxxxxxxxx> wrote:
>
> From: Weilin Wang <weilin.wang@xxxxxxxxx>
>
> These functions are added to parse event counter restrictions and counter
> availability info from json files so that the metric grouping method could
> do grouping based on the counter restriction of events and the counters
> that are available on the system.
>
> Signed-off-by: Weilin Wang <weilin.wang@xxxxxxxxx>
> ---
> tools/perf/pmu-events/jevents.py | 185 +++++++++++++++++++++++++++--
> tools/perf/pmu-events/pmu-events.h | 34 +++++-
> 2 files changed, 207 insertions(+), 12 deletions(-)
>
> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> index 2c7e5d61ce92..bc91b7efa49a 100755
> --- a/tools/perf/pmu-events/jevents.py
> +++ b/tools/perf/pmu-events/jevents.py
> @@ -23,6 +23,8 @@ _metric_tables = []
> _sys_metric_tables = []
> # Mapping between sys event table names and sys metric table names.
> _sys_event_table_to_metric_table_mapping = {}
> +# List of regular PMU counter layout tables.
> +_pmu_layouts_tables = []
> # Map from an event name to an architecture standard
> # JsonEvent. Architecture standard events are in json files in the top
> # f'{_args.starting_dir}/{_args.arch}' directory.
> @@ -31,6 +33,10 @@ _arch_std_events = {}
> _pending_events = []
> # Name of events table to be written out
> _pending_events_tblname = None
> +# PMU counter layout to write out when the layout table is closed
> +_pending_pmu_counts = []
> +# Name of PMU counter layout table to be written out
> +_pending_pmu_counts_tblname = None
> # Metrics to write out when the table is closed
> _pending_metrics = []
> # Name of metrics table to be written out
> @@ -47,10 +53,17 @@ _json_event_attributes = [
> 'event',
> # Short things in alphabetical order.
> 'compat', 'deprecated', 'perpkg', 'unit',
> + # The list of counter(s) this event could use
> + 'counters',
> # Longer things (the last won't be iterated over during decompress).
> 'long_desc'
> ]
>
> +# Attributes that are in pmu_unit_layout.
> +_json_layout_attributes = [
> + 'pmu', 'desc', 'num_counters', 'num_fixed_counters'
> +]
> +
> # Attributes that are in pmu_metric rather than pmu_event.
> _json_metric_attributes = [
> 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
> @@ -58,7 +71,9 @@ _json_metric_attributes = [
> 'default_metricgroup_name', 'aggr_mode', 'event_grouping'
> ]
> # Attributes that are bools or enum int values, encoded as '0', '1',...
> -_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg']
> +_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg',
> + 'num_counters', 'num_fixed_counters'
> +]
>
> def removesuffix(s: str, suffix: str) -> str:
> """Remove the suffix from a string
> @@ -334,6 +349,12 @@ class JsonEvent:
> if 'Errata' in jd:
> extra_desc += ' Spec update: ' + jd['Errata']
> self.pmu = unit_to_pmu(jd.get('Unit'))
> + # The list of counter(s) the event could be collected with
> + self.counters = jd.get('Counter')
> + # Number of generic counter
> + self.num_counters = jd.get('NumCounters')
> + # Number of fixed counter
> + self.num_fixed_counters = jd.get('NumFixedCounters')

I'm not a fan of counters vs fixed_counters, perhaps in the python:

self.counters['generic'].list = jd.get('Counter')
self.counters['generic'].num = jd.get('NumCounters')
self.counters['fixed'].num = jd.get('NumFixedCounters')

Would make things a bit more intention revealing. Fwiw, the json name
is worse as Counter is really for a plural of things. I don't think we
can update this in the Intel perfmon events, but maybe we can do it in
the create_perf_json.py converter script and its generated json.

This is pretty trivial as clean up so I'd prefer it before my
Reviewed-by unless there's something I'm overlooking.

Thanks,
Ian

> filter = jd.get('Filter')
> self.unit = jd.get('ScaleUnit')
> self.perpkg = jd.get('PerPkg')
> @@ -405,8 +426,16 @@ class JsonEvent:
> s += f'\t{attr} = {value},\n'
> return s + '}'
>
> - def build_c_string(self, metric: bool) -> str:
> + def build_c_string(self, metric: bool, layout: bool) -> str:
> s = ''
> + if layout:
> + for attr in _json_layout_attributes:
> + x = getattr(self, attr)
> + if attr in _json_enum_attributes:
> + s += x if x else '0'
> + else:
> + s += f'{x}\\000' if x else '\\000'
> + return s
> for attr in _json_metric_attributes if metric else _json_event_attributes:
> x = getattr(self, attr)
> if metric and x and attr == 'metric_expr':
> @@ -421,10 +450,10 @@ class JsonEvent:
> s += f'{x}\\000' if x else '\\000'
> return s
>
> - def to_c_string(self, metric: bool) -> str:
> + def to_c_string(self, metric: bool, layout: bool) -> str:
> """Representation of the event as a C struct initializer."""
>
> - s = self.build_c_string(metric)
> + s = self.build_c_string(metric, layout)
> return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'
>
>
> @@ -461,6 +490,8 @@ def preprocess_arch_std_files(archpath: str) -> None:
> _arch_std_events[event.name.lower()] = event
> if event.metric_name:
> _arch_std_events[event.metric_name.lower()] = event
> + if event.num_counters:
> + _arch_std_events[event.pmu.lower()] = event
>
>
> def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
> @@ -470,6 +501,8 @@ def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
> _pending_events.append(e)
> if e.metric_name:
> _pending_metrics.append(e)
> + if e.num_counters:
> + _pending_pmu_counts.append(e)
>
>
> def print_pending_events() -> None:
> @@ -510,7 +543,7 @@ def print_pending_events() -> None:
> last_pmu = event.pmu
> pmus.add((event.pmu, pmu_name))
>
> - _args.output_file.write(event.to_c_string(metric=False))
> + _args.output_file.write(event.to_c_string(metric=False, layout=False))
> _pending_events = []
>
> _args.output_file.write(f"""
> @@ -565,7 +598,7 @@ def print_pending_metrics() -> None:
> last_pmu = metric.pmu
> pmus.add((metric.pmu, pmu_name))
>
> - _args.output_file.write(metric.to_c_string(metric=True))
> + _args.output_file.write(metric.to_c_string(metric=True, layout=False))
> _pending_metrics = []
>
> _args.output_file.write(f"""
> @@ -583,6 +616,35 @@ const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
> """)
> _args.output_file.write('};\n\n')
>
> +def print_pending_pmu_counter_layout_table() -> None:
> + '''Print counter layout data from counter.json file to counter layout table in
> + c-string'''
> +
> + def pmu_counts_cmp_key(j: JsonEvent) -> Tuple[bool, str, str]:
> + def fix_none(s: Optional[str]) -> str:
> + if s is None:
> + return ''
> + return s
> +
> + return (j.desc is not None, fix_none(j.pmu), fix_none(j.num_counters))
> +
> + global _pending_pmu_counts
> + if not _pending_pmu_counts:
> + return
> +
> + global _pending_pmu_counts_tblname
> + global pmu_layouts_tables
> + _pmu_layouts_tables.append(_pending_pmu_counts_tblname)
> +
> + _args.output_file.write(
> + f'static const struct compact_pmu_event {_pending_pmu_counts_tblname}[] = {{\n')
> +
> + for pmu_layout in sorted(_pending_pmu_counts, key=pmu_counts_cmp_key):
> + _args.output_file.write(pmu_layout.to_c_string(metric=False, layout=True))
> + _pending_pmu_counts = []
> +
> + _args.output_file.write('};\n\n')
> +
> def get_topic(topic: str) -> str:
> if topic.endswith('metrics.json'):
> return 'metrics'
> @@ -619,10 +681,12 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
> pmu_name = f"{event.pmu}\\000"
> if event.name:
> _bcs.add(pmu_name, metric=False)
> - _bcs.add(event.build_c_string(metric=False), metric=False)
> + _bcs.add(event.build_c_string(metric=False, layout=False), metric=False)
> if event.metric_name:
> _bcs.add(pmu_name, metric=True)
> - _bcs.add(event.build_c_string(metric=True), metric=True)
> + _bcs.add(event.build_c_string(metric=True, layout=False), metric=True)
> + if event.num_counters:
> + _bcs.add(event.build_c_string(metric=False, layout=True), metric=False)
>
> def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
> """Process a JSON file during the main walk."""
> @@ -636,11 +700,14 @@ def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
> if item.is_dir() and is_leaf_dir(item.path):
> print_pending_events()
> print_pending_metrics()
> + print_pending_pmu_counter_layout_table()
>
> global _pending_events_tblname
> _pending_events_tblname = file_name_to_table_name('pmu_events_', parents, item.name)
> global _pending_metrics_tblname
> _pending_metrics_tblname = file_name_to_table_name('pmu_metrics_', parents, item.name)
> + global _pending_pmu_counts_tblname
> + _pending_pmu_counts_tblname = file_name_to_table_name('pmu_layouts_', parents, item.name)
>
> if item.name == 'sys':
> _sys_event_table_to_metric_table_mapping[_pending_events_tblname] = _pending_metrics_tblname
> @@ -674,6 +741,12 @@ struct pmu_metrics_table {
> uint32_t num_pmus;
> };
>
> +/* Struct used to make the PMU counter layout table implementation opaque to callers. */
> +struct pmu_layouts_table {
> + const struct compact_pmu_event *entries;
> + size_t length;
> +};
> +
> /*
> * Map a CPU to its table of PMU events. The CPU is identified by the
> * cpuid field, which is an arch-specific identifier for the CPU.
> @@ -687,6 +760,7 @@ struct pmu_events_map {
> const char *cpuid;
> struct pmu_events_table event_table;
> struct pmu_metrics_table metric_table;
> + struct pmu_layouts_table layout_table;
> };
>
> /*
> @@ -731,6 +805,12 @@ const struct pmu_events_map pmu_events_map[] = {
> metric_size = '0'
> if event_size == '0' and metric_size == '0':
> continue
> + layout_tblname = file_name_to_table_name('pmu_layouts_', [], row[2].replace('/', '_'))
> + if layout_tblname in _pmu_layouts_tables:
> + layout_size = f'ARRAY_SIZE({layout_tblname})'
> + else:
> + layout_tblname = 'NULL'
> + layout_size = '0'
> cpuid = row[0].replace('\\', '\\\\')
> _args.output_file.write(f"""{{
> \t.arch = "{arch}",
> @@ -742,6 +822,10 @@ const struct pmu_events_map pmu_events_map[] = {
> \t.metric_table = {{
> \t\t.pmus = {metric_tblname},
> \t\t.num_pmus = {metric_size}
> +\t}},
> +\t.layout_table = {{
> +\t\t.entries = {layout_tblname},
> +\t\t.length = {layout_size}
> \t}}
> }},
> """)
> @@ -752,6 +836,7 @@ const struct pmu_events_map pmu_events_map[] = {
> \t.cpuid = 0,
> \t.event_table = { 0, 0 },
> \t.metric_table = { 0, 0 },
> +\t.layout_table = { 0, 0 },
> }
> };
> """)
> @@ -840,6 +925,24 @@ static void decompress_metric(int offset, struct pmu_metric *pm)
> _args.output_file.write('\twhile (*p++);')
> _args.output_file.write("""}
>
> +static void decompress_layout(int offset, struct pmu_layout *pm)
> +{
> +\tconst char *p = &big_c_string[offset];
> +""")
> + for attr in _json_layout_attributes:
> + _args.output_file.write(f'\n\tpm->{attr} = ')
> + if attr in _json_enum_attributes:
> + _args.output_file.write("*p - '0';\n")
> + else:
> + _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
> + if attr == _json_layout_attributes[-1]:
> + continue
> + if attr in _json_enum_attributes:
> + _args.output_file.write('\tp++;')
> + else:
> + _args.output_file.write('\twhile (*p++);')
> + _args.output_file.write("""}
> +
> static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
> const struct pmu_table_entry *pmu,
> pmu_event_iter_fn fn,
> @@ -995,6 +1098,21 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
> return 0;
> }
>
> +int pmu_layouts_table__for_each_layout(const struct pmu_layouts_table *table,
> + pmu_layout_iter_fn fn,
> + void *data) {
> + for (size_t i = 0; i < table->length; i++) {
> + struct pmu_layout pm;
> + int ret;
> +
> + decompress_layout(table->entries[i].offset, &pm);
> + ret = fn(&pm, data);
> + if (ret)
> + return ret;
> + }
> + return 0;
> +}
> +
> static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
> {
> static struct {
> @@ -1090,6 +1208,33 @@ const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pm
> return NULL;
> }
>
> +const struct pmu_layouts_table *perf_pmu__find_layouts_table(struct perf_pmu *pmu)
> +{
> + const struct pmu_layouts_table *table = NULL;
> + char *cpuid = perf_pmu__getcpuid(pmu);
> + int i;
> +
> + /* on some platforms which uses cpus map, cpuid can be NULL for
> + * PMUs other than CORE PMUs.
> + */
> + if (!cpuid)
> + return NULL;
> +
> + i = 0;
> + for (;;) {
> + const struct pmu_events_map *map = &pmu_events_map[i++];
> + if (!map->arch)
> + break;
> +
> + if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
> + table = &map->layout_table;
> + break;
> + }
> + }
> + free(cpuid);
> + return table;
> +}
> +
> const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
> {
> for (const struct pmu_events_map *tables = &pmu_events_map[0];
> @@ -1111,6 +1256,16 @@ const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const
> }
> return NULL;
> }
> +const struct pmu_layouts_table *find_core_layouts_table(const char *arch, const char *cpuid)
> +{
> + for (const struct pmu_events_map *tables = &pmu_events_map[0];
> + tables->arch;
> + tables++) {
> + if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
> + return &tables->layout_table;
> + }
> + return NULL;
> +}
>
> int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
> {
> @@ -1139,6 +1294,19 @@ int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
> return 0;
> }
>
> +int pmu_for_each_core_layout(pmu_layout_iter_fn fn, void *data)
> +{
> + for (const struct pmu_events_map *tables = &pmu_events_map[0];
> + tables->arch;
> + tables++) {
> + int ret = pmu_layouts_table__for_each_layout(&tables->layout_table, fn, data);
> +
> + if (ret)
> + return ret;
> + }
> + return 0;
> +}
> +
> const struct pmu_events_table *find_sys_events_table(const char *name)
> {
> for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
> @@ -1295,6 +1463,7 @@ struct pmu_table_entry {
> ftw(arch_path, [], process_one_file)
> print_pending_events()
> print_pending_metrics()
> + print_pending_pmu_counter_layout_table()
>
> print_mapping_table(archs)
> print_system_mapping_table()
> diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
> index f5aa96f1685c..e245e4738970 100644
> --- a/tools/perf/pmu-events/pmu-events.h
> +++ b/tools/perf/pmu-events/pmu-events.h
> @@ -45,6 +45,11 @@ struct pmu_event {
> const char *desc;
> const char *topic;
> const char *long_desc;
> + /**
> + * The list of counter(s) the event could be collected on.
> + * eg., "0,1,2,3,4,5,6,7".
> + */
> + const char *counters;
> const char *pmu;
> const char *unit;
> bool perpkg;
> @@ -67,8 +72,20 @@ struct pmu_metric {
> enum metric_event_groups event_grouping;
> };
>
> +struct pmu_layout {
> + const char *pmu;
> + const char *desc;
> + /** Total number of generic counters*/
> + int num_counters;
> + /**
> + * Total number of fixed counters.
> + * Set to zero if no fixed counter on the unit.*/
> + int num_fixed_counters;
> +};
> +
> struct pmu_events_table;
> struct pmu_metrics_table;
> +struct pmu_layouts_table;
>
> typedef int (*pmu_event_iter_fn)(const struct pmu_event *pe,
> const struct pmu_events_table *table,
> @@ -78,15 +95,21 @@ typedef int (*pmu_metric_iter_fn)(const struct pmu_metric *pm,
> const struct pmu_metrics_table *table,
> void *data);
>
> +typedef int (*pmu_layout_iter_fn)(const struct pmu_layout *pm,
> + void *data);
> +
> int pmu_events_table__for_each_event(const struct pmu_events_table *table,
> struct perf_pmu *pmu,
> pmu_event_iter_fn fn,
> void *data);
> int pmu_events_table__find_event(const struct pmu_events_table *table,
> - struct perf_pmu *pmu,
> - const char *name,
> - pmu_event_iter_fn fn,
> - void *data);
> + struct perf_pmu *pmu,
> + const char *name,
> + pmu_event_iter_fn fn,
> + void *data);
> +int pmu_layouts_table__for_each_layout(const struct pmu_layouts_table *table,
> + pmu_layout_iter_fn fn,
> + void *data);
> size_t pmu_events_table__num_events(const struct pmu_events_table *table,
> struct perf_pmu *pmu);
>
> @@ -95,10 +118,13 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, pm
>
> const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu);
> const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu);
> +const struct pmu_layouts_table *perf_pmu__find_layouts_table(struct perf_pmu *pmu);
> const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid);
> const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid);
> +const struct pmu_layouts_table *find_core_layouts_table(const char *arch, const char *cpuid);
> int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data);
> int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data);
> +int pmu_for_each_core_layout(pmu_layout_iter_fn fn, void *data);
>
> const struct pmu_events_table *find_sys_events_table(const char *name);
> const struct pmu_metrics_table *find_sys_metrics_table(const char *name);
> --
> 2.42.0
>