[PATCH v1 25/25] perf jevents: Sort strings in the big C string to reduce faults

From: Ian Rogers
Date: Wed Aug 23 2023 - 04:22:35 EST


Sort the strings within the big C string based on whether they were
for a metric and then by when they were added. This helps group
related strings and reduce minor faults by approximately 10 in 1740,
about 0.57%.

Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
---
tools/perf/pmu-events/jevents.py | 31 +++++++++++++++++++++++--------
1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index 01335a452e70..e5bce57f5688 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -113,13 +113,24 @@ class BigCString:
strings: Set[str]
big_string: Sequence[str]
offsets: Dict[str, int]
+ insert_number: int
+ insert_point: Dict[str, int]
+ metrics: Set[str]

def __init__(self):
self.strings = set()
+ self.insert_number = 0;
+ self.insert_point = {}
+ self.metrics = set()

- def add(self, s: str) -> None:
+ def add(self, s: str, metric: bool) -> None:
"""Called to add to the big string."""
- self.strings.add(s)
+ if s not in self.strings:
+ self.strings.add(s)
+ self.insert_point[s] = self.insert_number
+ self.insert_number += 1
+ if metric:
+ self.metrics.add(s)

def compute(self) -> None:
"""Called once all strings are added to compute the string and offsets."""
@@ -160,8 +171,11 @@ class BigCString:
self.big_string = []
self.offsets = {}

+ def string_cmp_key(s: str) -> Tuple[bool, int, str]:
+ return (s in self.metrics, self.insert_point[s], s)
+
# Emit all strings that aren't folded in a sorted manner.
- for s in sorted(self.strings):
+ for s in sorted(self.strings, key=string_cmp_key):
if s not in folded_strings:
self.offsets[s] = big_string_offset
self.big_string.append(f'/* offset={big_string_offset} */ "')
@@ -574,19 +588,20 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
assert len(mgroup) > 1, parents
description = f"{metricgroup_descriptions[mgroup]}\\000"
mgroup = f"{mgroup}\\000"
- _bcs.add(mgroup)
- _bcs.add(description)
+ _bcs.add(mgroup, metric=True)
+ _bcs.add(description, metric=True)
_metricgroups[mgroup] = description
return

topic = get_topic(item.name)
for event in read_json_events(item.path, topic):
pmu_name = f"{event.pmu}\\000"
- _bcs.add(pmu_name)
if event.name:
- _bcs.add(event.build_c_string(metric=False))
+ _bcs.add(pmu_name, metric=False)
+ _bcs.add(event.build_c_string(metric=False), metric=False)
if event.metric_name:
- _bcs.add(event.build_c_string(metric=True))
+ _bcs.add(pmu_name, metric=True)
+ _bcs.add(event.build_c_string(metric=True), metric=True)

def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
"""Process a JSON file during the main walk."""
--
2.42.0.rc1.204.g551eb34607-goog