[PATCH 3/4] perf record: event synthesization multithreading support

From: kan . liang
Date: Fri Oct 13 2017 - 10:10:06 EST


From: Kan Liang <Kan.liang@xxxxxxxxx>

The process function process_synthesized_event writes the process
result to perf.data, which is not multithreading friendly.

Realloc buffer for each thread to temporarily keep the processing
result. Write them to the perf.data at the end of event synthesization.
The new method doesn't impact the final result, because
- The order of the synthesized event is not important.
- The number of synthesized event is limited. Usually, it only needs
hundreds of Kilobyte to store all the synthesized event.
It's unlikly failed because of lack of memory.

The threads number hard code to online CPU number. The following patch
will introduce an option to set it.

The multithreading synthesize is only available for per cpu monitoring.

Signed-off-by: Kan Liang <Kan.liang@xxxxxxxxx>
---
tools/perf/builtin-record.c | 86 ++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 81 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4ede9bf..1978021 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -62,6 +62,11 @@ struct switch_output {
bool set;
};

+struct synthesize_buf {
+ void *entry;
+ size_t size;
+};
+
struct record {
struct perf_tool tool;
struct record_opts opts;
@@ -80,6 +85,7 @@ struct record {
bool timestamp_filename;
struct switch_output switch_output;
unsigned long long samples;
+ struct synthesize_buf *buf;
};

static volatile int auxtrace_record__snapshot_started;
@@ -120,14 +126,35 @@ static int record__write(struct record *rec, void *bf, size_t size)
return 0;
}

+static int buf__write(struct record *rec, int idx, void *bf, size_t size)
+{
+ size_t target_size = rec->buf[idx].size;
+ void *target;
+
+ target = realloc(rec->buf[idx].entry, target_size + size);
+ if (target == NULL)
+ return -ENOMEM;
+
+ memcpy(target + target_size, bf, size);
+
+ rec->buf[idx].entry = target;
+ rec->buf[idx].size += size;
+
+ return 0;
+}
+
static int process_synthesized_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused,
- struct thread_info *thread __maybe_unused)
+ struct thread_info *thread)
{
struct record *rec = container_of(tool, struct record, tool);
- return record__write(rec, event, event->header.size);
+
+ if (!perf_singlethreaded && thread)
+ return buf__write(rec, thread->idx, event, event->header.size);
+ else
+ return record__write(rec, event, event->header.size);
}

static int record__pushfn(void *to, void *bf, size_t size)
@@ -690,6 +717,48 @@ static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
return NULL;
}

+static int record__multithread_synthesize(struct record *rec,
+ struct machine *machine,
+ struct perf_tool *tool,
+ struct record_opts *opts)
+{
+ int i, err, nr_thread = sysconf(_SC_NPROCESSORS_ONLN);
+
+ if (nr_thread > 1) {
+ perf_set_multithreaded();
+
+ rec->buf = calloc(nr_thread, sizeof(struct synthesize_buf));
+ if (rec->buf == NULL) {
+ pr_err("Could not do multithread synthesize\n");
+ nr_thread = 1;
+ perf_set_singlethreaded();
+ }
+ }
+
+ err = __machine__synthesize_threads(machine, tool, &opts->target,
+ rec->evlist->threads,
+ process_synthesized_event,
+ opts->sample_address,
+ opts->proc_map_timeout, nr_thread);
+ if (err < 0)
+ goto free;
+
+ if (nr_thread > 1) {
+ for (i = 0; i < nr_thread; i++)
+ record__write(rec, rec->buf[i].entry, rec->buf[i].size);
+ }
+
+free:
+ if (nr_thread > 1) {
+ for (i = 0; i < nr_thread; i++)
+ free(rec->buf[i].entry);
+ free(rec->buf);
+ perf_set_singlethreaded();
+ }
+
+ return err;
+}
+
static int record__synthesize(struct record *rec, bool tail)
{
struct perf_session *session = rec->session;
@@ -766,9 +835,16 @@ static int record__synthesize(struct record *rec, bool tail)
perf_event__synthesize_guest_os, tool);
}

- err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
- process_synthesized_event, opts->sample_address,
- opts->proc_map_timeout, 1);
+ /* multithreading synthesize is only available for cpu monitoring */
+ if (target__has_cpu(&opts->target))
+ err = record__multithread_synthesize(rec, machine, tool, opts);
+ else
+ err = __machine__synthesize_threads(machine, tool,
+ &opts->target,
+ rec->evlist->threads,
+ process_synthesized_event,
+ opts->sample_address,
+ opts->proc_map_timeout, 1);
out:
return err;
}
--
2.7.4