Re: [PATCH V4 14/24] perf tools: Add Intel PT decoder

From: Adrian Hunter
Date: Thu May 21 2015 - 09:00:40 EST


On 11/05/15 16:22, Arnaldo Carvalho de Melo wrote:
> Em Thu, Apr 30, 2015 at 05:37:37PM +0300, Adrian Hunter escreveu:
>> Add support for decoding an Intel Processor Trace.
>
> Thanks for the function comments in kerneldoc style, we need more of that!
>
> Some issues below:
>
> - Arnaldo
>
>> Signed-off-by: Adrian Hunter <adrian.hunter@xxxxxxxxx>
>> ---
>> tools/perf/util/intel-pt-decoder/Build | 2 +-
>> .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 1738 ++++++++++++++++++++
>> .../perf/util/intel-pt-decoder/intel-pt-decoder.h | 89 +
>> 3 files changed, 1828 insertions(+), 1 deletion(-)
>> create mode 100644 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
>> create mode 100644 tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
>>
>> diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
>> index 587321a..fa12eac 100644
>> --- a/tools/perf/util/intel-pt-decoder/Build
>> +++ b/tools/perf/util/intel-pt-decoder/Build
>> @@ -1,4 +1,4 @@
>> -libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o
>> +libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
>>
>> inat_tables_script = ../../arch/x86/tools/gen-insn-attr-x86.awk
>> inat_tables_maps = ../../arch/x86/lib/x86-opcode-map.txt
>> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
>> new file mode 100644
>> index 0000000..435b61b
>> --- /dev/null
>> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
>> @@ -0,0 +1,1738 @@
>> +/*
>> + * intel_pt_decoder.c: Intel Processor Trace support
>> + * Copyright (c) 2013-2014, Intel Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
>> + * more details.
>> + *
>> + */
>> +
>> +#ifndef _GNU_SOURCE
>> +#define _GNU_SOURCE
>> +#endif
>> +#include <stdlib.h>
>> +#include <stdbool.h>
>> +#include <string.h>
>> +#include <errno.h>
>> +#include <stdint.h>
>> +#include <inttypes.h>
>> +
>> +#include "intel-pt-insn-decoder.h"
>> +#include "intel-pt-pkt-decoder.h"
>> +#include "intel-pt-decoder.h"
>> +#include "intel-pt-log.h"
>> +
>> +#define INTEL_PT_BLK_SIZE 1024
>> +
>> +#define BIT63 (((uint64_t)1 << 63))
>> +
>> +#define INTEL_PT_RETURN 1
>> +
>> +struct intel_pt_blk {
>> + struct intel_pt_blk *prev;
>> + uint64_t ip[INTEL_PT_BLK_SIZE];
>> +};
>> +
>> +struct intel_pt_stack {
>> + struct intel_pt_blk *blk;
>> + struct intel_pt_blk *spare;
>> + int pos;
>> +};
>> +
>> +enum intel_pt_pkt_state {
>> + INTEL_PT_STATE_NO_PSB,
>> + INTEL_PT_STATE_NO_IP,
>> + INTEL_PT_STATE_ERR_RESYNC,
>> + INTEL_PT_STATE_IN_SYNC,
>> + INTEL_PT_STATE_TNT,
>> + INTEL_PT_STATE_TIP,
>> + INTEL_PT_STATE_TIP_PGD,
>> + INTEL_PT_STATE_FUP,
>> + INTEL_PT_STATE_FUP_NO_TIP,
>> +};
>> +
>> +#ifdef INTEL_PT_STRICT
>> +#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
>> +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
>> +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB
>> +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB
>> +#else
>> +#define INTEL_PT_STATE_ERR1 (decoder->pkt_state)
>> +#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP
>> +#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC
>> +#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC
>> +#endif
>> +
>> +struct intel_pt_decoder {
>> + int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
>> + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
>> + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
>> + uint64_t max_insn_cnt, void *data);
>> + void *data;
>> + struct intel_pt_state state;
>> + const unsigned char *buf;
>> + size_t len;
>> + bool return_compression;
>> + bool pge;
>> + uint64_t pos;
>> + uint64_t last_ip;
>> + uint64_t ip;
>> + uint64_t cr3;
>> + uint64_t timestamp;
>> + uint64_t tsc_timestamp;
>> + uint64_t ref_timestamp;
>> + uint64_t ret_addr;
>> + struct intel_pt_stack stack;
>> + enum intel_pt_pkt_state pkt_state;
>> + struct intel_pt_pkt packet;
>> + struct intel_pt_pkt tnt;
>> + int pkt_step;
>> + int pkt_len;
>> + unsigned int cbr;
>> + int exec_mode;
>> + unsigned int insn_bytes;
>> + uint64_t sign_bit;
>> + uint64_t sign_bits;
>> + uint64_t period;
>> + enum intel_pt_period_type period_type;
>> + uint64_t period_insn_cnt;
>> + uint64_t period_mask;
>> + uint64_t period_ticks;
>> + uint64_t last_masked_timestamp;
>> + bool continuous_period;
>> + bool overflow;
>> + bool set_fup_tx_flags;
>> + unsigned int fup_tx_flags;
>> + unsigned int tx_flags;
>> + uint64_t timestamp_insn_cnt;
>> + const unsigned char *next_buf;
>> + size_t next_len;
>> + unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
>> +};
>> +
>> +static uint64_t intel_pt_lower_power_of_2(uint64_t x)
>> +{
>> + int i;
>> +
>> + for (i = 0; x != 1; i++)
>> + x >>= 1;
>> +
>> + return x << i;
>> +}
>
> We have in tools/perf/ the same function used in the kernel:
>
> /**
> * rounddown_pow_of_two - round the given value down to nearest power of
> * two
> * @n - parameter
> *
> * round the given value down to the nearest power of two
> * - the result is undefined when n == 0
> * - this can be used to initialise global variables from constant data
> */
> #define rounddown_pow_of_two(n) \
> ( \
> __builtin_constant_p(n) ? ( \
> (1UL << ilog2(n))) : \
> __rounddown_pow_of_two(n) \
> )

That supports 'unsigned long' but the type is 64-bit.

>
>> +
>> +static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
>> +{
>> + if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
>> + uint64_t period;
>> +
>> + period = intel_pt_lower_power_of_2(decoder->period);
>> + decoder->period_mask = ~(period - 1);
>> + decoder->period_ticks = period;
>> + }
>> +}
>> +
>> +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
>> +{
>> + struct intel_pt_decoder *decoder;
>> +
>> + if (!params->get_trace || !params->walk_insn)
>> + return NULL;
>> +
>> + decoder = malloc(sizeof(struct intel_pt_decoder));
>> + if (!decoder)
>> + return NULL;
>> +
>> + memset(decoder, 0, sizeof(struct intel_pt_decoder));
>
>
> We have either zalloc or calloc for the above sequence of allocating +
> zeroing.

OK

>
>> +
>> + decoder->get_trace = params->get_trace;
>> + decoder->walk_insn = params->walk_insn;
>> + decoder->data = params->data;
>> + decoder->return_compression = params->return_compression;
>
> I am not strict about this, but its common in reviews to ask for
> aligning the =.

OK

>
>> +
>> + decoder->sign_bit = (uint64_t)1 << 47;
>> + decoder->sign_bits = ~(((uint64_t)1 << 48) - 1);
>> +
>> + decoder->period = params->period;
>> + decoder->period_type = params->period_type;
>> +
>> + intel_pt_setup_period(decoder);
>> +
>> + return decoder;
>> +}
>> +
>> +static void intel_pt_pop_blk(struct intel_pt_stack *stack)
>> +{
>> + struct intel_pt_blk *blk;
>> +
>> + blk = stack->blk;
>
>
> Also not strict about this, but this makes the function shorter:
>
> + struct intel_pt_blk *blk = stack->blk;

OK

>
>> + stack->blk = blk->prev;
>> + if (!stack->spare)
>> + stack->spare = blk;
>> + else
>> + free(blk);
>> +}
>> +
>> +static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
>> +{
>> + if (!stack->pos) {
>> + if (!stack->blk)
>> + return 0;
>> + intel_pt_pop_blk(stack);
>> + if (!stack->blk)
>> + return 0;
>> + stack->pos = INTEL_PT_BLK_SIZE;
>> + }
>> + return stack->blk->ip[--stack->pos];
>> +}
>> +
>> +static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
>> +{
>> + struct intel_pt_blk *blk;
>> +
>> + if (stack->spare) {
>> + blk = stack->spare;
>> + stack->spare = NULL;
>> + } else {
>> + blk = malloc(sizeof(struct intel_pt_blk));
>> + if (!blk)
>> + return -ENOMEM;
>> + }
>> +
>> + blk->prev = stack->blk;
>> + stack->blk = blk;
>> + stack->pos = 0;
>> + return 0;
>> +}
>> +
>> +static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
>> +{
>> + int err;
>> +
>> + if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
>> + err = intel_pt_alloc_blk(stack);
>> + if (err)
>> + return err;
>> + }
>> +
>> + stack->blk->ip[stack->pos++] = ip;
>> + return 0;
>> +}
>
>
> All of those routines is not "intel_pt" specific at all, right?

I have used intel_pt as a name space for all functions

>
>> +
>> +static void intel_pt_clear_stack(struct intel_pt_stack *stack)
>> +{
>> + while (stack->blk)
>> + intel_pt_pop_blk(stack);
>> + stack->pos = 0;
>> +}
>> +
>> +static void intel_pt_free_stack(struct intel_pt_stack *stack)
>> +{
>> + intel_pt_clear_stack(stack);
>> + free(stack->blk);
>> + free(stack->spare);
>> +}
>
> zfree was introduced to zero out these variables, i.e.:
>
> zfree(&stack->blk);
> zfree(&stack->spare);

OK

>
>> +
>> +void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
>> +{
>> + intel_pt_free_stack(&decoder->stack);
>> + free(decoder);
>> +}
>> +
>> +const char *intel_pt_error_message(int code)
>> +{
>> + switch (code) {
>> + case ENOMEM:
>> + return "Memory allocation failed";
>> + case ENOSYS:
>> + return "Internal error";
>> + case EBADMSG:
>> + return "Bad packet";
>> + case ENODATA:
>> + return "No more data";
>> + case EILSEQ:
>> + return "Failed to get instruction";
>> + case ENOENT:
>> + return "Trace doesn't match instruction";
>> + case EOVERFLOW:
>> + return "Overflow packet";
>> + case ESHUTDOWN:
>> + return "Trace stop packet";
>> + default:
>> + return "Unknown error!";
>> + }
>
>
> The above idiom uses intel_pt__strerror(int err) {} elsewhere, i.e. a
> way to map a errno to a string ios called "strerror", see for instance:
>
> [acme@zoo linux]$ grep __strerror tools/perf/*.c
> tools/perf/builtin-kvm.c: target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
> tools/perf/builtin-record.c: target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
> tools/perf/builtin-record.c: target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
> tools/perf/builtin-top.c: dso__strerror_load(al.map->dso, serr, sizeof(serr));
> tools/perf/builtin-top.c: target__strerror(target, status, errbuf, BUFSIZ);
> tools/perf/builtin-top.c: target__strerror(target, status, errbuf, BUFSIZ);
> tools/perf/builtin-trace.c: debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
> tools/perf/builtin-trace.c: debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
> tools/perf/builtin-trace.c: perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
> tools/perf/builtin-trace.c: perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
> tools/perf/builtin-trace.c: target__strerror(&trace.opts.target, err, bf, sizeof(bf));
> tools/perf/builtin-trace.c: target__strerror(&trace.opts.target, err, bf, sizeof(bf));
> [acme@zoo linux]$
>
> Also, we try to be consistennt in separating the class name (intel_pt) from the
> method (strerror).

OK

>
>> +}
>> +
>> +static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
>> + const struct intel_pt_pkt *packet,
>> + uint64_t last_ip)
>> +{
>> + uint64_t ip;
>> +
>> + switch (packet->count) {
>> + case 2:
>> + ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
>> + packet->payload;
>> + break;
>> + case 4:
>> + ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
>> + packet->payload;
>> + break;
>> + case 6:
>> + ip = packet->payload;
>> + break;
>> + default:
>> + return 0;
>> + }
>> +
>> + if (ip & decoder->sign_bit)
>> + return ip | decoder->sign_bits;
>> +
>> + return ip;
>> +}
>> +
>> +static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
>> +{
>> + decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
>> + decoder->last_ip);
>> +}
>> +
>> +static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
>> +{
>> + intel_pt_set_last_ip(decoder);
>> + decoder->ip = decoder->last_ip;
>> +}
>> +
>> +static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
>> +{
>> + intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
>> + decoder->buf);
>> +}
>> +
>> +static int intel_pt_bug(struct intel_pt_decoder *decoder)
>> +{
>> + intel_pt_log("ERROR: Internal error\n");
>> + decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
>> + return -ENOSYS;
>> +}
>> +
>> +static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
>> +{
>> + decoder->tx_flags = 0;
>> +}
>> +
>> +static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
>> +{
>> + decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
>> +}
>> +
>> +static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
>> +{
>> + intel_pt_clear_tx_flags(decoder);
>> + decoder->pkt_len = 1;
>> + decoder->pkt_step = 1;
>> + intel_pt_decoder_log_packet(decoder);
>> + if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
>> + intel_pt_log("ERROR: Bad packet\n");
>
>
> And all this intel_pt_log(), do we really need a separate logging facility?
> What is wrong with using pr_err(), pr_warning(), pr_debug(), as tools/ try to
> use (we need to improve that more, but still), and the kernel as well?

The log contains all decoded packets and all decoded instructions, so it is
a log not just error messages. And consequently it is very big - always many
megabytes. Tracing 'ls' gives about 32M log file.

>
>> + decoder->pkt_state = INTEL_PT_STATE_ERR1;
>> + }
>> + return -EBADMSG;
>> +}
>> +
>> +static int intel_pt_get_data(struct intel_pt_decoder *decoder)
>> +{
>> + struct intel_pt_buffer buffer = { .buf = 0, };
>> + int ret;
>> +
>> + decoder->pkt_step = 0;
>> +
>> + intel_pt_log("Getting more data\n");
>> + ret = decoder->get_trace(&buffer, decoder->data);
>> + if (ret)
>> + return ret;
>> + decoder->buf = buffer.buf;
>> + decoder->len = buffer.len;
>> + if (!decoder->len) {
>> + intel_pt_log("No more data\n");
>> + return -ENODATA;
>> + }
>> + if (!buffer.consecutive) {
>> + decoder->ip = 0;
>> + decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
>> + decoder->ref_timestamp = buffer.ref_timestamp;
>> + decoder->timestamp = 0;
>> + decoder->state.trace_nr = buffer.trace_nr;
>> + intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
>> + decoder->ref_timestamp);
>> + return -ENOLINK;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
>> +{
>> + if (!decoder->next_buf)
>> + return intel_pt_get_data(decoder);
>> +
>> + decoder->buf = decoder->next_buf;
>> + decoder->len = decoder->next_len;
>> + decoder->next_buf = 0;
>> + decoder->next_len = 0;
>> + return 0;
>> +}
>> +
>> +static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
>> +{
>> + unsigned char *buf = decoder->temp_buf;
>> + size_t old_len, len, n;
>> + int ret;
>> +
>> + old_len = decoder->len;
>> + len = decoder->len;
>> + memcpy(buf, decoder->buf, len);
>> +
>> + ret = intel_pt_get_data(decoder);
>> + if (ret) {
>> + decoder->pos += old_len;
>> + return ret < 0 ? ret : -EINVAL;
>> + }
>> +
>> + n = INTEL_PT_PKT_MAX_SZ - len;
>> + if (n > decoder->len)
>> + n = decoder->len;
>> + memcpy(buf + len, decoder->buf, n);
>> + len += n;
>> +
>> + ret = intel_pt_get_packet(buf, len, &decoder->packet);
>> + if (ret < (int)old_len) {
>> + decoder->next_buf = decoder->buf;
>> + decoder->next_len = decoder->len;
>> + decoder->buf = buf;
>> + decoder->len = old_len;
>> + return intel_pt_bad_packet(decoder);
>> + }
>> +
>> + decoder->next_buf = decoder->buf + (ret - old_len);
>> + decoder->next_len = decoder->len - (ret - old_len);
>> +
>> + decoder->buf = buf;
>> + decoder->len = ret;
>> +
>> + return ret;
>> +}
>> +
>> +static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
>> +{
>> + int ret;
>> +
>> + do {
>> + decoder->pos += decoder->pkt_step;
>> + decoder->buf += decoder->pkt_step;
>> + decoder->len -= decoder->pkt_step;
>> +
>> + if (!decoder->len) {
>> + ret = intel_pt_get_next_data(decoder);
>> + if (ret)
>> + return ret;
>> + }
>> +
>> + ret = intel_pt_get_packet(decoder->buf, decoder->len,
>> + &decoder->packet);
>> + if (ret == INTEL_PT_NEED_MORE_BYTES &&
>> + decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
>> + ret = intel_pt_get_split_packet(decoder);
>> + if (ret < 0)
>> + return ret;
>> + }
>> + if (ret <= 0)
>> + return intel_pt_bad_packet(decoder);
>> +
>> + decoder->pkt_len = ret;
>> + decoder->pkt_step = ret;
>> + intel_pt_decoder_log_packet(decoder);
>> + } while (decoder->packet.type == INTEL_PT_PAD);
>> +
>> + return 0;
>> +}
>> +
>> +static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
>> +{
>> + uint64_t timestamp, masked_timestamp;
>> +
>> + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
>> + masked_timestamp = timestamp & decoder->period_mask;
>> + if (decoder->continuous_period) {
>> + if (masked_timestamp != decoder->last_masked_timestamp)
>> + return 1;
>> + } else {
>> + timestamp += 1;
>> + masked_timestamp = timestamp & decoder->period_mask;
>> + if (masked_timestamp != decoder->last_masked_timestamp) {
>> + decoder->last_masked_timestamp = masked_timestamp;
>> + decoder->continuous_period = true;
>> + }
>> + }
>> + return decoder->period_ticks - (timestamp - masked_timestamp);
>> +}
>> +
>> +static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
>> +{
>> + switch (decoder->period_type) {
>> + case INTEL_PT_PERIOD_INSTRUCTIONS:
>> + return decoder->period - decoder->period_insn_cnt;
>> + case INTEL_PT_PERIOD_TICKS:
>> + return intel_pt_next_period(decoder);
>> + case INTEL_PT_PERIOD_NONE:
>> + default:
>> + return 0;
>> + }
>> +}
>> +
>> +static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
>> +{
>> + uint64_t timestamp, masked_timestamp;
>> +
>> + switch (decoder->period_type) {
>> + case INTEL_PT_PERIOD_INSTRUCTIONS:
>> + decoder->period_insn_cnt = 0;
>> + break;
>> + case INTEL_PT_PERIOD_TICKS:
>> + timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
>> + masked_timestamp = timestamp & decoder->period_mask;
>> + decoder->last_masked_timestamp = masked_timestamp;
>> + break;
>> + case INTEL_PT_PERIOD_NONE:
>> + default:
>> + break;
>> + }
>> +
>> + decoder->state.type |= INTEL_PT_INSTRUCTION;
>> +}
>> +
>> +static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
>> + struct intel_pt_insn *intel_pt_insn, uint64_t ip)
>> +{
>> + uint64_t max_insn_cnt, insn_cnt = 0;
>> + int err;
>> +
>> + max_insn_cnt = intel_pt_next_sample(decoder);
>> +
>> + err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
>> + max_insn_cnt, decoder->data);
>> +
>> + decoder->timestamp_insn_cnt += insn_cnt;
>> + decoder->period_insn_cnt += insn_cnt;
>> +
>> + if (err) {
>> + decoder->pkt_state = INTEL_PT_STATE_ERR2;
>> + intel_pt_log_at("ERROR: Failed to get instruction",
>> + decoder->ip);
>> + if (err == -ENOENT)
>> + return -ENOLINK;
>> + return -EILSEQ;
>> + }
>> +
>> + if (ip && decoder->ip == ip) {
>> + err = -EAGAIN;
>> + goto out;
>> + }
>> +
>> + if (max_insn_cnt && insn_cnt >= max_insn_cnt)
>> + intel_pt_sample_insn(decoder);
>> +
>> + if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
>> + decoder->state.type = INTEL_PT_INSTRUCTION;
>> + decoder->state.from_ip = decoder->ip;
>> + decoder->state.to_ip = 0;
>> + decoder->ip += intel_pt_insn->length;
>> + err = INTEL_PT_RETURN;
>> + goto out;
>> + }
>> +
>> + if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
>> + /* Zero-length calls are excluded */
>> + if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
>> + intel_pt_insn->rel) {
>> + err = intel_pt_push(&decoder->stack, decoder->ip +
>> + intel_pt_insn->length);
>> + if (err)
>> + goto out;
>> + }
>> + } else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
>> + decoder->ret_addr = intel_pt_pop(&decoder->stack);
>> + }
>> +
>> + if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
>> + decoder->state.from_ip = decoder->ip;
>> + decoder->ip += intel_pt_insn->length +
>> + intel_pt_insn->rel;
>> + decoder->state.to_ip = decoder->ip;
>> + err = INTEL_PT_RETURN;
>> + }
>> +out:
>> + decoder->state.insn_op = intel_pt_insn->op;
>> + decoder->state.insn_len = intel_pt_insn->length;
>> +
>> + if (decoder->tx_flags & INTEL_PT_IN_TX)
>> + decoder->state.flags |= INTEL_PT_IN_TX;
>> +
>> + return err;
>> +}
>> +
>> +static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
>> +{
>> + struct intel_pt_insn intel_pt_insn;
>> + uint64_t ip;
>> + int err;
>> +
>> + ip = decoder->last_ip;
>> +
>> + while (1) {
>> + err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
>> + if (err == INTEL_PT_RETURN)
>> + return 0;
>> + if (err == -EAGAIN) {
>> + if (decoder->set_fup_tx_flags) {
>> + decoder->set_fup_tx_flags = false;
>> + decoder->tx_flags = decoder->fup_tx_flags;
>> + decoder->state.type = INTEL_PT_TRANSACTION;
>> + decoder->state.from_ip = decoder->ip;
>> + decoder->state.to_ip = 0;
>> + decoder->state.flags = decoder->fup_tx_flags;
>> + return 0;
>> + }
>> + return err;
>> + }
>> + decoder->set_fup_tx_flags = false;
>> + if (err)
>> + return err;
>> +
>> + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
>> + intel_pt_log_at("ERROR: Unexpected indirect branch",
>> + decoder->ip);
>> + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
>> + return -ENOENT;
>> + }
>> +
>> + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
>> + intel_pt_log_at("ERROR: Unexpected conditional branch",
>> + decoder->ip);
>> + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
>> + return -ENOENT;
>> + }
>> +
>> + intel_pt_bug(decoder);
>> + }
>> +}
>> +
>> +static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
>> +{
>> + struct intel_pt_insn intel_pt_insn;
>> + int err;
>> +
>> + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
>> + if (err == INTEL_PT_RETURN)
>> + return 0;
>> + if (err)
>> + return err;
>> +
>> + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
>> + if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
>> + decoder->pge = false;
>> + decoder->continuous_period = false;
>> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
>> + decoder->state.from_ip = decoder->ip;
>> + decoder->state.to_ip = 0;
>> + if (decoder->packet.count != 0)
>> + decoder->ip = decoder->last_ip;
>> + } else {
>> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
>> + decoder->state.from_ip = decoder->ip;
>> + if (decoder->packet.count == 0) {
>> + decoder->state.to_ip = 0;
>> + } else {
>> + decoder->state.to_ip = decoder->last_ip;
>> + decoder->ip = decoder->last_ip;
>> + }
>> + }
>> + return 0;
>> + }
>> +
>> + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
>> + intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
>> + decoder->ip);
>> + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
>> + return -ENOENT;
>> + }
>> +
>> + return intel_pt_bug(decoder);
>> +}
>> +
>> +static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
>> +{
>> + struct intel_pt_insn intel_pt_insn;
>> + int err;
>> +
>> + while (1) {
>> + err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
>> + if (err == INTEL_PT_RETURN)
>> + return 0;
>> + if (err)
>> + return err;
>> +
>> + if (intel_pt_insn.op == INTEL_PT_OP_RET) {
>> + if (!decoder->return_compression) {
>> + intel_pt_log_at("ERROR: RET when expecting conditional branch",
>> + decoder->ip);
>> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
>> + return -ENOENT;
>> + }
>> + if (!decoder->ret_addr) {
>> + intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
>> + decoder->ip);
>> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
>> + return -ENOENT;
>> + }
>> + if (!(decoder->tnt.payload & BIT63)) {
>> + intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
>> + decoder->ip);
>> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
>> + return -ENOENT;
>> + }
>> + decoder->tnt.count -= 1;
>> + if (!decoder->tnt.count)
>> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
>> + decoder->tnt.payload <<= 1;
>> + decoder->state.from_ip = decoder->ip;
>> + decoder->ip = decoder->ret_addr;
>> + decoder->state.to_ip = decoder->ip;
>> + return 0;
>> + }
>> +
>> + if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
>> + /* Handle deferred TIPs */
>> + err = intel_pt_get_next_packet(decoder);
>> + if (err)
>> + return err;
>> + if (decoder->packet.type != INTEL_PT_TIP ||
>> + decoder->packet.count == 0) {
>> + intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
>> + decoder->ip);
>> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
>> + decoder->pkt_step = 0;
>> + return -ENOENT;
>> + }
>> + intel_pt_set_last_ip(decoder);
>> + decoder->state.from_ip = decoder->ip;
>> + decoder->state.to_ip = decoder->last_ip;
>> + decoder->ip = decoder->last_ip;
>> + return 0;
>> + }
>> +
>> + if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
>> + decoder->tnt.count -= 1;
>> + if (!decoder->tnt.count)
>> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
>> + if (decoder->tnt.payload & BIT63) {
>> + decoder->tnt.payload <<= 1;
>> + decoder->state.from_ip = decoder->ip;
>> + decoder->ip += intel_pt_insn.length +
>> + intel_pt_insn.rel;
>> + decoder->state.to_ip = decoder->ip;
>> + return 0;
>> + }
>> + /* Instruction sample for a non-taken branch */
>> + if (decoder->state.type & INTEL_PT_INSTRUCTION) {
>> + decoder->tnt.payload <<= 1;
>> + decoder->state.type = INTEL_PT_INSTRUCTION;
>> + decoder->state.from_ip = decoder->ip;
>> + decoder->state.to_ip = 0;
>> + decoder->ip += intel_pt_insn.length;
>> + return 0;
>> + }
>> + decoder->ip += intel_pt_insn.length;
>> + if (!decoder->tnt.count)
>> + return -EAGAIN;
>> + decoder->tnt.payload <<= 1;
>> + continue;
>> + }
>> +
>> + return intel_pt_bug(decoder);
>> + }
>> +}
>> +
>> +static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
>> +{
>> + unsigned int fup_tx_flags;
>> + int err;
>> +
>> + fup_tx_flags = decoder->packet.payload &
>> + (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
>> + err = intel_pt_get_next_packet(decoder);
>> + if (err)
>> + return err;
>> + if (decoder->packet.type == INTEL_PT_FUP) {
>> + decoder->fup_tx_flags = fup_tx_flags;
>> + decoder->set_fup_tx_flags = true;
>> + if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
>> + *no_tip = true;
>> + } else {
>> + intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
>> + decoder->pos);
>> + intel_pt_update_in_tx(decoder);
>> + }
>> + return 0;
>> +}
>> +
>> +static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
>> +{
>> + uint64_t timestamp;
>> +
>> + if (decoder->ref_timestamp) {
>> + timestamp = decoder->packet.payload |
>> + (decoder->ref_timestamp & (0xffULL << 56));
>> + if (timestamp < decoder->ref_timestamp) {
>> + if (decoder->ref_timestamp - timestamp > (1ULL << 55))
>> + timestamp += (1ULL << 56);
>> + } else {
>> + if (timestamp - decoder->ref_timestamp > (1ULL << 55))
>> + timestamp -= (1ULL << 56);
>> + }
>> + decoder->tsc_timestamp = timestamp;
>> + decoder->timestamp = timestamp;
>> + decoder->ref_timestamp = 0;
>> + decoder->timestamp_insn_cnt = 0;
>> + } else if (decoder->timestamp) {
>> + timestamp = decoder->packet.payload |
>> + (decoder->timestamp & (0xffULL << 56));
>> + if (timestamp < decoder->timestamp &&
>> + decoder->timestamp - timestamp < 0x100) {
>> + intel_pt_log_to("ERROR: Suppressing backwards timestamp",
>> + timestamp);
>> + timestamp = decoder->timestamp;
>> + }
>> + while (timestamp < decoder->timestamp) {
>> + intel_pt_log_to("Wraparound timestamp", timestamp);
>> + timestamp += (1ULL << 56);
>> + }
>> + decoder->tsc_timestamp = timestamp;
>> + decoder->timestamp = timestamp;
>> + decoder->timestamp_insn_cnt = 0;
>> + }
>> +
>> + intel_pt_log_to("Setting timestamp", decoder->timestamp);
>> +}
>> +
>> +static int intel_pt_overflow(struct intel_pt_decoder *decoder)
>> +{
>> + intel_pt_log("ERROR: Buffer overflow\n");
>> + intel_pt_clear_tx_flags(decoder);
>> + decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
>> + decoder->overflow = true;
>> + return -EOVERFLOW;
>> +}
>> +
>> +/* Walk PSB+ packets when already in sync. */
>> +static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
>> +{
>> + int err;
>> +
>> + while (1) {
>> + err = intel_pt_get_next_packet(decoder);
>> + if (err)
>> + return err;
>> +
>> + switch (decoder->packet.type) {
>> + case INTEL_PT_PSBEND:
>> + return 0;
>> +
>> + case INTEL_PT_TIP_PGD:
>> + case INTEL_PT_TIP_PGE:
>> + case INTEL_PT_TIP:
>> + case INTEL_PT_TNT:
>> + case INTEL_PT_BAD:
>> + case INTEL_PT_PSB:
>> + intel_pt_log("ERROR: Unexpected packet\n");
>> + return -EAGAIN;
>> +
>> + case INTEL_PT_OVF:
>> + return intel_pt_overflow(decoder);
>> +
>> + case INTEL_PT_TSC:
>> + intel_pt_calc_tsc_timestamp(decoder);
>> + break;
>> +
>> + case INTEL_PT_CBR:
>> + decoder->cbr = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_MODE_EXEC:
>> + decoder->exec_mode = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_PIP:
>> + decoder->cr3 = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_FUP:
>> + decoder->pge = true;
>> + break;
>> +
>> + case INTEL_PT_MODE_TSX:
>> + intel_pt_update_in_tx(decoder);
>> + break;
>> +
>> + case INTEL_PT_PAD:
>> + default:
>> + break;
>> + }
>> + }
>> +}
>> +
>> +static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
>> +{
>> + int err;
>> +
>> + if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
>> + decoder->tx_flags = 0;
>> + decoder->state.flags &= ~INTEL_PT_IN_TX;
>> + decoder->state.flags |= INTEL_PT_ABORT_TX;
>> + } else {
>> + decoder->state.flags |= INTEL_PT_ASYNC;
>> + }
>> +
>> + while (1) {
>> + err = intel_pt_get_next_packet(decoder);
>> + if (err)
>> + return err;
>> +
>> + switch (decoder->packet.type) {
>> + case INTEL_PT_TNT:
>> + case INTEL_PT_FUP:
>> + case INTEL_PT_PSB:
>> + case INTEL_PT_TSC:
>> + case INTEL_PT_CBR:
>> + case INTEL_PT_MODE_TSX:
>> + case INTEL_PT_BAD:
>> + case INTEL_PT_PSBEND:
>> + intel_pt_log("ERROR: Missing TIP after FUP\n");
>> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
>> + return -ENOENT;
>> +
>> + case INTEL_PT_OVF:
>> + return intel_pt_overflow(decoder);
>> +
>> + case INTEL_PT_TIP_PGD:
>> + decoder->state.from_ip = decoder->ip;
>> + decoder->state.to_ip = 0;
>> + if (decoder->packet.count != 0) {
>> + intel_pt_set_ip(decoder);
>> + intel_pt_log("Omitting PGD ip " x64_fmt "\n",
>> + decoder->ip);
>> + }
>> + decoder->pge = false;
>> + decoder->continuous_period = false;
>> + return 0;
>> +
>> + case INTEL_PT_TIP_PGE:
>> + decoder->pge = true;
>> + intel_pt_log("Omitting PGE ip " x64_fmt "\n",
>> + decoder->ip);
>> + decoder->state.from_ip = 0;
>> + if (decoder->packet.count == 0) {
>> + decoder->state.to_ip = 0;
>> + } else {
>> + intel_pt_set_ip(decoder);
>> + decoder->state.to_ip = decoder->ip;
>> + }
>> + return 0;
>> +
>> + case INTEL_PT_TIP:
>> + decoder->state.from_ip = decoder->ip;
>> + if (decoder->packet.count == 0) {
>> + decoder->state.to_ip = 0;
>> + } else {
>> + intel_pt_set_ip(decoder);
>> + decoder->state.to_ip = decoder->ip;
>> + }
>> + return 0;
>> +
>> + case INTEL_PT_PIP:
>> + decoder->cr3 = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_MODE_EXEC:
>> + decoder->exec_mode = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_PAD:
>> + break;
>> +
>> + default:
>> + return intel_pt_bug(decoder);
>> + }
>> + }
>> +}
>> +
>> +static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
>> +{
>> + bool no_tip = false;
>> + int err;
>> +
>> + while (1) {
>> + err = intel_pt_get_next_packet(decoder);
>> + if (err)
>> + return err;
>> +next:
>> + switch (decoder->packet.type) {
>> + case INTEL_PT_TNT:
>> + if (!decoder->packet.count)
>> + break;
>> + decoder->tnt = decoder->packet;
>> + decoder->pkt_state = INTEL_PT_STATE_TNT;
>> + err = intel_pt_walk_tnt(decoder);
>> + if (err == -EAGAIN)
>> + break;
>> + return err;
>> +
>> + case INTEL_PT_TIP_PGD:
>> + if (decoder->packet.count != 0)
>> + intel_pt_set_last_ip(decoder);
>> + decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
>> + return intel_pt_walk_tip(decoder);
>> +
>> + case INTEL_PT_TIP_PGE: {
>> + decoder->pge = true;
>> + if (decoder->packet.count == 0) {
>> + intel_pt_log_at("Skipping zero TIP.PGE",
>> + decoder->pos);
>> + break;
>> + }
>> + intel_pt_set_ip(decoder);
>> + decoder->state.from_ip = 0;
>> + decoder->state.to_ip = decoder->ip;
>> + return 0;
>> + }
>> +
>> + case INTEL_PT_OVF:
>> + return intel_pt_overflow(decoder);
>> +
>> + case INTEL_PT_TIP:
>> + if (decoder->packet.count != 0)
>> + intel_pt_set_last_ip(decoder);
>> + decoder->pkt_state = INTEL_PT_STATE_TIP;
>> + return intel_pt_walk_tip(decoder);
>> +
>> + case INTEL_PT_FUP:
>> + if (decoder->packet.count == 0) {
>> + intel_pt_log_at("Skipping zero FUP",
>> + decoder->pos);
>> + no_tip = false;
>> + break;
>> + }
>> + intel_pt_set_last_ip(decoder);
>> + err = intel_pt_walk_fup(decoder);
>> + if (err != -EAGAIN) {
>> + if (err)
>> + return err;
>> + if (no_tip)
>> + decoder->pkt_state =
>> + INTEL_PT_STATE_FUP_NO_TIP;
>> + else
>> + decoder->pkt_state = INTEL_PT_STATE_FUP;
>> + return 0;
>> + }
>> + if (no_tip) {
>> + no_tip = false;
>> + break;
>> + }
>> + return intel_pt_walk_fup_tip(decoder);
>> +
>> + case INTEL_PT_PSB:
>> + intel_pt_clear_stack(&decoder->stack);
>> + err = intel_pt_walk_psbend(decoder);
>> + if (err == -EAGAIN)
>> + goto next;
>> + if (err)
>> + return err;
>> + break;
>> +
>> + case INTEL_PT_PIP:
>> + decoder->cr3 = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_TSC:
>> + intel_pt_calc_tsc_timestamp(decoder);
>> + break;
>> +
>> + case INTEL_PT_CBR:
>> + decoder->cbr = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_MODE_EXEC:
>> + decoder->exec_mode = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_MODE_TSX:
>> + /* MODE_TSX need not be followed by FUP */
>> + if (!decoder->pge) {
>> + intel_pt_update_in_tx(decoder);
>> + break;
>> + }
>> + err = intel_pt_mode_tsx(decoder, &no_tip);
>> + if (err)
>> + return err;
>> + goto next;
>> +
>> + case INTEL_PT_BAD: /* Does not happen */
>> + return intel_pt_bug(decoder);
>> +
>> + case INTEL_PT_PSBEND:
>> + case INTEL_PT_PAD:
>> + break;
>> +
>> + default:
>> + return intel_pt_bug(decoder);
>> + }
>> + }
>> +}
>> +
>> +/* Walk PSB+ packets to get in sync. */
>> +static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
>> +{
>> + int err;
>> +
>> + while (1) {
>> + err = intel_pt_get_next_packet(decoder);
>> + if (err)
>> + return err;
>> +
>> + switch (decoder->packet.type) {
>> + case INTEL_PT_TIP_PGD:
>> + decoder->continuous_period = false;
>> + case INTEL_PT_TIP_PGE:
>> + case INTEL_PT_TIP:
>> + intel_pt_log("ERROR: Unexpected packet\n");
>> + return -ENOENT;
>> +
>> + case INTEL_PT_FUP:
>> + decoder->pge = true;
>> + if (decoder->last_ip || decoder->packet.count == 6 ||
>> + decoder->packet.count == 0) {
>> + uint64_t current_ip = decoder->ip;
>> +
>> + intel_pt_set_ip(decoder);
>> + if (current_ip)
>> + intel_pt_log_to("Setting IP",
>> + decoder->ip);
>> + }
>> + break;
>> +
>> + case INTEL_PT_TSC:
>> + intel_pt_calc_tsc_timestamp(decoder);
>> + break;
>> +
>> + case INTEL_PT_CBR:
>> + decoder->cbr = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_PIP:
>> + decoder->cr3 = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_MODE_EXEC:
>> + decoder->exec_mode = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_MODE_TSX:
>> + intel_pt_update_in_tx(decoder);
>> + break;
>> +
>> + case INTEL_PT_TNT:
>> + intel_pt_log("ERROR: Unexpected packet\n");
>> + if (decoder->ip)
>> + decoder->pkt_state = INTEL_PT_STATE_ERR4;
>> + else
>> + decoder->pkt_state = INTEL_PT_STATE_ERR3;
>> + return -ENOENT;
>> +
>> + case INTEL_PT_BAD: /* Does not happen */
>> + return intel_pt_bug(decoder);
>> +
>> + case INTEL_PT_OVF:
>> + return intel_pt_overflow(decoder);
>> +
>> + case INTEL_PT_PSBEND:
>> + return 0;
>> +
>> + case INTEL_PT_PSB:
>> + case INTEL_PT_PAD:
>> + default:
>> + break;
>> + }
>> + }
>> +}
>> +
>> +static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
>> +{
>> + int err;
>> +
>> + while (1) {
>> + err = intel_pt_get_next_packet(decoder);
>> + if (err)
>> + return err;
>> +
>> + switch (decoder->packet.type) {
>> + case INTEL_PT_TIP_PGD:
>> + decoder->continuous_period = false;
>> + case INTEL_PT_TIP_PGE:
>> + case INTEL_PT_TIP:
>> + decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
>> + if (decoder->last_ip || decoder->packet.count == 6 ||
>> + decoder->packet.count == 0)
>> + intel_pt_set_ip(decoder);
>> + if (decoder->ip)
>> + return 0;
>> + break;
>> +
>> + case INTEL_PT_FUP:
>> + if (decoder->overflow) {
>> + if (decoder->last_ip ||
>> + decoder->packet.count == 6 ||
>> + decoder->packet.count == 0)
>> + intel_pt_set_ip(decoder);
>> + if (decoder->ip)
>> + return 0;
>> + }
>> + if (decoder->packet.count)
>> + intel_pt_set_last_ip(decoder);
>> + break;
>> +
>> + case INTEL_PT_TSC:
>> + intel_pt_calc_tsc_timestamp(decoder);
>> + break;
>> +
>> + case INTEL_PT_CBR:
>> + decoder->cbr = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_PIP:
>> + decoder->cr3 = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_MODE_EXEC:
>> + decoder->exec_mode = decoder->packet.payload;
>> + break;
>> +
>> + case INTEL_PT_MODE_TSX:
>> + intel_pt_update_in_tx(decoder);
>> + break;
>> +
>> + case INTEL_PT_OVF:
>> + return intel_pt_overflow(decoder);
>> +
>> + case INTEL_PT_BAD: /* Does not happen */
>> + return intel_pt_bug(decoder);
>> +
>> + case INTEL_PT_PSB:
>> + err = intel_pt_walk_psb(decoder);
>> + if (err)
>> + return err;
>> + if (decoder->ip) {
>> + /* Do not have a sample */
>> + decoder->state.type = 0;
>> + return 0;
>> + }
>> + break;
>> +
>> + case INTEL_PT_TNT:
>> + case INTEL_PT_PSBEND:
>> + case INTEL_PT_PAD:
>> + default:
>> + break;
>> + }
>> + }
>> +}
>> +
>> +static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
>> +{
>> + int err;
>> +
>> + intel_pt_log("Scanning for full IP\n");
>> + err = intel_pt_walk_to_ip(decoder);
>> + if (err)
>> + return err;
>> +
>> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
>> + decoder->overflow = false;
>> +
>> + decoder->state.from_ip = 0;
>> + decoder->state.to_ip = decoder->ip;
>> + intel_pt_log_to("Setting IP", decoder->ip);
>> +
>> + return 0;
>> +}
>> +
>> +static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
>> +{
>> + const unsigned char *end = decoder->buf + decoder->len;
>> + size_t i;
>> +
>> + for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
>> + if (i > decoder->len)
>> + continue;
>> + if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
>> + return i;
>> + }
>> + return 0;
>> +}
>> +
>> +static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
>> +{
>> + size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
>> + const char *psb = INTEL_PT_PSB_STR;
>> +
>> + if (rest_psb > decoder->len ||
>> + memcmp(decoder->buf, psb + part_psb, rest_psb))
>> + return 0;
>> +
>> + return rest_psb;
>> +}
>> +
>> +static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
>> + int part_psb)
>> +{
>> + int rest_psb, ret;
>> +
>> + decoder->pos += decoder->len;
>> + decoder->len = 0;
>> +
>> + ret = intel_pt_get_next_data(decoder);
>> + if (ret)
>> + return ret;
>> +
>> + rest_psb = intel_pt_rest_psb(decoder, part_psb);
>> + if (!rest_psb)
>> + return 0;
>> +
>> + decoder->pos -= part_psb;
>> + decoder->next_buf = decoder->buf + rest_psb;
>> + decoder->next_len = decoder->len - rest_psb;
>> + memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
>> + decoder->buf = decoder->temp_buf;
>> + decoder->len = INTEL_PT_PSB_LEN;
>> +
>> + return 0;
>> +}
>> +
>> +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
>> +{
>> + unsigned char *next;
>> + int ret;
>> +
>> + intel_pt_log("Scanning for PSB\n");
>> + while (1) {
>> + if (!decoder->len) {
>> + ret = intel_pt_get_next_data(decoder);
>> + if (ret)
>> + return ret;
>> + }
>> +
>> + next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
>> + INTEL_PT_PSB_LEN);
>> + if (!next) {
>> + int part_psb;
>> +
>> + part_psb = intel_pt_part_psb(decoder);
>> + if (part_psb) {
>> + ret = intel_pt_get_split_psb(decoder, part_psb);
>> + if (ret)
>> + return ret;
>> + } else {
>> + decoder->pos += decoder->len;
>> + decoder->len = 0;
>> + }
>> + continue;
>> + }
>> +
>> + decoder->pkt_step = next - decoder->buf;
>> + return intel_pt_get_next_packet(decoder);
>> + }
>> +}
>> +
>> +static int intel_pt_sync(struct intel_pt_decoder *decoder)
>> +{
>> + int err;
>> +
>> + decoder->pge = false;
>> + decoder->continuous_period = false;
>> + decoder->last_ip = 0;
>> + decoder->ip = 0;
>> + intel_pt_clear_stack(&decoder->stack);
>> +
>> + err = intel_pt_scan_for_psb(decoder);
>> + if (err)
>> + return err;
>> +
>> + decoder->pkt_state = INTEL_PT_STATE_NO_IP;
>> +
>> + err = intel_pt_walk_psb(decoder);
>> + if (err)
>> + return err;
>> +
>> + if (decoder->ip) {
>> + decoder->state.type = 0; /* Do not have a sample */
>> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
>> + } else {
>> + return intel_pt_sync_ip(decoder);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
>> +{
>> + int err;
>> +
>> + do {
>> + decoder->state.type = INTEL_PT_BRANCH;
>> + decoder->state.flags = 0;
>> +
>> + switch (decoder->pkt_state) {
>> + case INTEL_PT_STATE_NO_PSB:
>> + err = intel_pt_sync(decoder);
>> + break;
>> + case INTEL_PT_STATE_NO_IP:
>> + decoder->last_ip = 0;
>> + /* Fall through */
>> + case INTEL_PT_STATE_ERR_RESYNC:
>> + err = intel_pt_sync_ip(decoder);
>> + break;
>> + case INTEL_PT_STATE_IN_SYNC:
>> + err = intel_pt_walk_trace(decoder);
>> + break;
>> + case INTEL_PT_STATE_TNT:
>> + err = intel_pt_walk_tnt(decoder);
>> + if (err == -EAGAIN)
>> + err = intel_pt_walk_trace(decoder);
>> + break;
>> + case INTEL_PT_STATE_TIP:
>> + case INTEL_PT_STATE_TIP_PGD:
>> + err = intel_pt_walk_tip(decoder);
>> + break;
>> + case INTEL_PT_STATE_FUP:
>> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
>> + err = intel_pt_walk_fup(decoder);
>> + if (err == -EAGAIN)
>> + err = intel_pt_walk_fup_tip(decoder);
>> + else if (!err)
>> + decoder->pkt_state = INTEL_PT_STATE_FUP;
>> + break;
>> + case INTEL_PT_STATE_FUP_NO_TIP:
>> + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
>> + err = intel_pt_walk_fup(decoder);
>> + if (err == -EAGAIN)
>> + err = intel_pt_walk_trace(decoder);
>> + break;
>> + default:
>> + err = intel_pt_bug(decoder);
>> + break;
>> + }
>> + } while (err == -ENOLINK);
>> +
>> + decoder->state.err = err;
>> + decoder->state.timestamp = decoder->timestamp;
>> + decoder->state.est_timestamp = decoder->timestamp +
>> + (decoder->timestamp_insn_cnt << 1);
>> + decoder->state.cr3 = decoder->cr3;
>> +
>> + if (err)
>> + decoder->state.from_ip = decoder->ip;
>> +
>> + return &decoder->state;
>> +}
>> +
>> +static bool intel_pt_at_psb(unsigned char *buf, size_t len)
>> +{
>> + if (len < INTEL_PT_PSB_LEN)
>> + return false;
>> + return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
>> + INTEL_PT_PSB_LEN);
>> +}
>> +
>> +/**
>> + * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
>> + * @buf: pointer to buffer pointer
>> + * @len: size of buffer
>> + *
>> + * Updates the buffer pointer to point to the start of the next PSB packet if
>> + * there is one, otherwise the buffer pointer is unchanged. If @buf is updated,
>> + * @len is adjusted accordingly.
>> + *
>> + * Return: %true if a PSB packet is found, %false otherwise.
>> + */
>> +static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
>> +{
>> + unsigned char *next;
>> +
>> + next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
>> + if (next) {
>> + *len -= next - *buf;
>> + *buf = next;
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> +/**
>> + * intel_pt_step_psb - move buffer pointer to the start of the following PSB
>> + * packet.
>> + * @buf: pointer to buffer pointer
>> + * @len: size of buffer
>> + *
>> + * Updates the buffer pointer to point to the start of the following PSB packet
>> + * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
>> + * pointer is unchanged. If @buf is updated, @len is adjusted accordingly.
>> + *
>> + * Return: %true if a PSB packet is found, %false otherwise.
>> + */
>> +static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
>> +{
>> + unsigned char *next;
>> +
>> + if (!*len)
>> + return false;
>> +
>> + next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
>> + if (next) {
>> + *len -= next - *buf;
>> + *buf = next;
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> +/**
>> + * intel_pt_last_psb - find the last PSB packet in a buffer.
>> + * @buf: buffer
>> + * @len: size of buffer
>> + *
>> + * This function finds the last PSB in a buffer.
>> + *
>> + * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
>> + */
>> +static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
>> +{
>> + const char *n = INTEL_PT_PSB_STR;
>> + unsigned char *p;
>> + size_t k;
>> +
>> + if (len < INTEL_PT_PSB_LEN)
>> + return NULL;
>> +
>> + k = len - INTEL_PT_PSB_LEN + 1;
>> + while (1) {
>> + p = memrchr(buf, n[0], k);
>> + if (!p)
>> + return NULL;
>> + if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
>> + return p;
>> + k = p - buf;
>> + if (!k)
>> + return NULL;
>> + }
>> +}
>> +
>> +/**
>> + * intel_pt_next_tsc - find and return next TSC.
>> + * @buf: buffer
>> + * @len: size of buffer
>> + * @tsc: TSC value returned
>> + *
>> + * Find a TSC packet in @buf and return the TSC value. This function assumes
>> + * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
>> + * PSBEND packet is found.
>> + *
>> + * Return: %true if TSC is found, false otherwise.
>> + */
>> +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
>> +{
>> + struct intel_pt_pkt packet;
>> + int ret;
>> +
>> + while (len) {
>> + ret = intel_pt_get_packet(buf, len, &packet);
>> + if (ret <= 0)
>> + return false;
>> + if (packet.type == INTEL_PT_TSC) {
>> + *tsc = packet.payload;
>> + return true;
>> + }
>> + if (packet.type == INTEL_PT_PSBEND)
>> + return false;
>> + buf += ret;
>> + len -= ret;
>> + }
>> + return false;
>> +}
>> +
>> +/**
>> + * intel_pt_tsc_cmp - compare 7-byte TSCs.
>> + * @tsc1: first TSC to compare
>> + * @tsc2: second TSC to compare
>> + *
>> + * This function compares 7-byte TSC values allowing for the possibility that
>> + * TSC wrapped around. Generally it is not possible to know if TSC has wrapped
>> + * around so for that purpose this function assumes the absolute difference is
>> + * less than half the maximum difference.
>> + *
>> + * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
>> + * after @tsc2.
>> + */
>> +static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
>> +{
>> + const uint64_t halfway = (1ULL << 55);
>> +
>> + if (tsc1 == tsc2)
>> + return 0;
>> +
>> + if (tsc1 < tsc2) {
>> + if (tsc2 - tsc1 < halfway)
>> + return -1;
>> + else
>> + return 1;
>> + } else {
>> + if (tsc1 - tsc2 < halfway)
>> + return 1;
>> + else
>> + return -1;
>> + }
>> +}
>> +
>> +/**
>> + * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
>> + * using TSC.
>> + * @buf_a: first buffer
>> + * @len_a: size of first buffer
>> + * @buf_b: second buffer
>> + * @len_b: size of second buffer
>> + *
>> + * If the trace contains TSC we can look at the last TSC of @buf_a and the
>> + * first TSC of @buf_b in order to determine if the buffers overlap, and then
>> + * walk forward in @buf_b until a later TSC is found. A precondition is that
>> + * @buf_a and @buf_b are positioned at a PSB.
>> + *
>> + * Return: A pointer into @buf_b from where non-overlapped data starts, or
>> + * @buf_b + @len_b if there is no non-overlapped data.
>> + */
>> +static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
>> + size_t len_a,
>> + unsigned char *buf_b,
>> + size_t len_b)
>> +{
>> + uint64_t tsc_a, tsc_b;
>> + unsigned char *p;
>> + size_t len;
>> +
>> + p = intel_pt_last_psb(buf_a, len_a);
>> + if (!p)
>> + return buf_b; /* No PSB in buf_a => no overlap */
>> +
>> + len = len_a - (p - buf_a);
>> + if (!intel_pt_next_tsc(p, len, &tsc_a)) {
>> + /* The last PSB+ in buf_a is incomplete, so go back one more */
>> + len_a -= len;
>> + p = intel_pt_last_psb(buf_a, len_a);
>> + if (!p)
>> + return buf_b; /* No full PSB+ => assume no overlap */
>> + len = len_a - (p - buf_a);
>> + if (!intel_pt_next_tsc(p, len, &tsc_a))
>> + return buf_b; /* No TSC in buf_a => assume no overlap */
>> + }
>> +
>> + while (1) {
>> + /* Ignore PSB+ with no TSC */
>> + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
>> + intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
>> + return buf_b; /* tsc_a < tsc_b => no overlap */
>> +
>> + if (!intel_pt_step_psb(&buf_b, &len_b))
>> + return buf_b + len_b; /* No PSB in buf_b => no data */
>> + }
>> +}
>> +
>> +/**
>> + * intel_pt_find_overlap - determine start of non-overlapped trace data.
>> + * @buf_a: first buffer
>> + * @len_a: size of first buffer
>> + * @buf_b: second buffer
>> + * @len_b: size of second buffer
>> + * @have_tsc: can use TSC packets to detect overlap
>> + *
>> + * When trace samples or snapshots are recorded there is the possibility that
>> + * the data overlaps. Note that, for the purposes of decoding, data is only
>> + * useful if it begins with a PSB packet.
>> + *
>> + * Return: A pointer into @buf_b from where non-overlapped data starts, or
>> + * @buf_b + @len_b if there is no non-overlapped data.
>> + */
>> +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
>> + unsigned char *buf_b, size_t len_b,
>> + bool have_tsc)
>> +{
>> + unsigned char *found;
>> +
>> + /* Buffer 'b' must start at PSB so throw away everything before that */
>> + if (!intel_pt_next_psb(&buf_b, &len_b))
>> + return buf_b + len_b; /* No PSB */
>> +
>> + if (!intel_pt_next_psb(&buf_a, &len_a))
>> + return buf_b; /* No overlap */
>> +
>> + if (have_tsc) {
>> + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
>> + if (found)
>> + return found;
>> + }
>> +
>> + /*
>> + * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
>> + * we can ignore the first part of buffer 'a'.
>> + */
>> + while (len_b < len_a) {
>> + if (!intel_pt_step_psb(&buf_a, &len_a))
>> + return buf_b; /* No overlap */
>> + }
>> +
>> + /* Now len_b >= len_a */
>> + if (len_b > len_a) {
>> + /* The leftover buffer 'b' must start at a PSB */
>> + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
>> + if (!intel_pt_step_psb(&buf_a, &len_a))
>> + return buf_b; /* No overlap */
>> + }
>> + }
>> +
>> + while (1) {
>> + /* Potential overlap so check the bytes */
>> + found = memmem(buf_a, len_a, buf_b, len_a);
>> + if (found)
>> + return buf_b + len_a;
>> +
>> + /* Try again at next PSB in buffer 'a' */
>> + if (!intel_pt_step_psb(&buf_a, &len_a))
>> + return buf_b; /* No overlap */
>> +
>> + /* The leftover buffer 'b' must start at a PSB */
>> + while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
>> + if (!intel_pt_step_psb(&buf_a, &len_a))
>> + return buf_b; /* No overlap */
>> + }
>> + }
>> +}
>> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
>> new file mode 100644
>> index 0000000..e55615a
>> --- /dev/null
>> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
>> @@ -0,0 +1,89 @@
>> +/*
>> + * intel_pt_decoder.h: Intel Processor Trace support
>> + * Copyright (c) 2013-2014, Intel Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
>> + * more details.
>> + *
>> + */
>> +
>> +#ifndef INCLUDE__INTEL_PT_DECODER_H__
>> +#define INCLUDE__INTEL_PT_DECODER_H__
>> +
>> +#include <stdint.h>
>> +#include <stddef.h>
>> +#include <stdbool.h>
>> +
>> +#include "intel-pt-insn-decoder.h"
>> +
>> +#define INTEL_PT_IN_TX (1 << 0)
>> +#define INTEL_PT_ABORT_TX (1 << 1)
>> +#define INTEL_PT_ASYNC (1 << 2)
>> +
>> +enum intel_pt_sample_type {
>> + INTEL_PT_BRANCH = 1 << 0,
>> + INTEL_PT_INSTRUCTION = 1 << 1,
>> + INTEL_PT_TRANSACTION = 1 << 2,
>> +};
>> +
>> +enum intel_pt_period_type {
>> + INTEL_PT_PERIOD_NONE,
>> + INTEL_PT_PERIOD_INSTRUCTIONS,
>> + INTEL_PT_PERIOD_TICKS,
>> +};
>> +
>> +struct intel_pt_state {
>> + enum intel_pt_sample_type type;
>> + int err;
>> + uint64_t from_ip;
>> + uint64_t to_ip;
>> + uint64_t cr3;
>> + uint64_t timestamp;
>> + uint64_t est_timestamp;
>> + uint64_t trace_nr;
>> + uint32_t flags;
>> + enum intel_pt_insn_op insn_op;
>> + int insn_len;
>> +};
>> +
>> +struct intel_pt_insn;
>> +
>> +struct intel_pt_buffer {
>> + const unsigned char *buf;
>> + size_t len;
>> + bool consecutive;
>> + uint64_t ref_timestamp;
>> + uint64_t trace_nr;
>> +};
>> +
>> +struct intel_pt_params {
>> + int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
>> + int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
>> + uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
>> + uint64_t max_insn_cnt, void *data);
>> + void *data;
>> + bool return_compression;
>> + uint64_t period;
>> + enum intel_pt_period_type period_type;
>> +};
>> +
>> +struct intel_pt_decoder;
>> +
>> +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
>> +void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
>> +
>> +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
>> +
>> +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
>> + unsigned char *buf_b, size_t len_b,
>> + bool have_tsc);
>> +
>> +const char *intel_pt_error_message(int code);
>> +
>> +#endif
>> --
>> 1.9.1
>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/