Re: [PATCH v2 3/8] KVM: selftests: Test Intel PMU architectural events on gp counters

From: Sean Christopherson
Date: Wed Jun 28 2023 - 16:44:05 EST


On Tue, May 30, 2023, Jinrong Liang wrote:
> +/* Guest payload for any performance counter counting */
> +#define NUM_BRANCHES 10
> +
> +static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
> + void *guest_code)
> +{
> + struct kvm_vm *vm;
> +
> + vm = vm_create_with_one_vcpu(vcpu, guest_code);
> + vm_init_descriptor_tables(vm);
> + vcpu_init_descriptor_tables(*vcpu);
> +
> + return vm;
> +}
> +
> +static uint64_t run_vcpu(struct kvm_vcpu *vcpu, uint64_t *ucall_arg)
> +{
> + struct ucall uc;
> +
> + vcpu_run(vcpu);
> + switch (get_ucall(vcpu, &uc)) {
> + case UCALL_SYNC:
> + *ucall_arg = uc.args[1];
> + break;
> + case UCALL_DONE:
> + break;
> + default:
> + TEST_ASSERT(false, "Unexpected exit: %s",
> + exit_reason_str(vcpu->run->exit_reason));

TEST_FAIL()

> + }
> + return uc.cmd;
> +}
> +
> +static void intel_guest_run_arch_event(uint8_t version, uint8_t max_gp_num,

Unless I'm mistaken, this isn't specific to arch events. And with a bit of
massaging, it doesn't need to be Intel specific. Typically we try to avoid
speculatively creating infrastructure, but in this case we *know* AMD has vPMU
support, and we *know* from KVM-Unit-Tests that accounting for the differences
between MSRs on Intel vs. AMD is doable, so we should write code with an eye
toward supporting both AMD and Intel.

And then we can avoid having to prefix so many functions with "intel", e.g. this
can be something like

static void guest_measure_loop()

or whatever.

> + uint32_t ctr_base_msr, uint64_t evt_code)
> +{
> + uint32_t global_msr = MSR_CORE_PERF_GLOBAL_CTRL;
> + unsigned int i;
> +
> + for (i = 0; i < max_gp_num; i++) {
> + wrmsr(ctr_base_msr + i, 0);
> + wrmsr(MSR_P6_EVNTSEL0 + i, EVENTSEL_OS | EVENTSEL_EN | evt_code);
> + if (version > 1)
> + wrmsr(global_msr, BIT_ULL(i));
> +
> + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
> +
> + if (version > 1)
> + wrmsr(global_msr, 0);
> +
> + GUEST_SYNC(_rdpmc(i));
> + }
> +
> + GUEST_DONE();
> +}
> +
> +static void test_arch_events_cpuid(struct kvm_vcpu *vcpu, uint8_t evt_vector,

"vector" is confusing, as "vector" usually refers to a vector number, e.g. for
IRQs and exceptions. This is the _length_ of a so called vector. I vote to ignore
the SDM's use of "vector" in this case and instead call it something like
arch_events_bitmap_size. And then arch_events_unavailable_mask?

> + uint8_t unavl_mask, uint8_t idx)
> +{
> + struct kvm_cpuid_entry2 *entry;
> + uint32_t ctr_msr = MSR_IA32_PERFCTR0;
> + bool is_supported;
> + uint64_t counter_val = 0;
> +
> + entry = vcpu_get_cpuid_entry(vcpu, 0xa);
> + entry->eax = (entry->eax & ~EVT_LEN_MASK) |
> + (evt_vector << EVT_LEN_OFS_BIT);

EVT_LEN_OFS_BIT can be a KVM_x86_PROPERTY. And please also add a helper to set
properties, the whole point of the FEATURE and PROPERTY frameworks is to avoid
open coding CPUID manipulations. E.g.

static inline void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_property property,
uint32_t value)
{
...
}

> + entry->ebx = (entry->ebx & ~EVENTS_MASK) | unavl_mask;
> + vcpu_set_cpuid(vcpu);
> +
> + if (vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
> + ctr_msr = MSR_IA32_PMC0;

This can be done in the guest, no?

> +
> + /* Arch event x is supported if EBX[x]=0 && EAX[31:24]>x */
> + is_supported = !(entry->ebx & BIT_ULL(idx)) &&
> + (((entry->eax & EVT_LEN_MASK) >> EVT_LEN_OFS_BIT) > idx);

Please add a helper for this.

> +
> + vcpu_args_set(vcpu, 4, X86_INTEL_PMU_VERSION, X86_INTEL_MAX_GP_CTR_NUM,
> + ctr_msr, arch_events[idx]);
> +
> + while (run_vcpu(vcpu, &counter_val) != UCALL_DONE)
> + TEST_ASSERT(is_supported == !!counter_val,
> + "Unavailable arch event is counting.");
> +}
> +
> +static void intel_check_arch_event_is_unavl(uint8_t idx)
> +{
> + uint8_t eax_evt_vec, ebx_unavl_mask, i, j;
> + struct kvm_vcpu *vcpu;
> + struct kvm_vm *vm;
> +
> + /*
> + * A brute force iteration of all combinations of values is likely to
> + * exhaust the limit of the single-threaded thread fd nums, so it's
> + * tested here by iterating through all valid values on a single bit.
> + */
> + for (i = 0; i < ARRAY_SIZE(arch_events); i++) {
> + eax_evt_vec = BIT_ULL(i);
> + for (j = 0; j < ARRAY_SIZE(arch_events); j++) {
> + ebx_unavl_mask = BIT_ULL(j);
> + vm = pmu_vm_create_with_one_vcpu(&vcpu,
> + intel_guest_run_arch_event);
> + test_arch_events_cpuid(vcpu, eax_evt_vec,
> + ebx_unavl_mask, idx);
> +
> + kvm_vm_free(vm);
> + }
> + }
> +}
> +
> +static void intel_test_arch_events(void)
> +{
> + uint8_t idx;
> +
> + for (idx = 0; idx < ARRAY_SIZE(arch_events); idx++) {
> + /*
> + * Given the stability of performance event recurrence,
> + * only these arch events are currently being tested:
> + *
> + * - Core cycle event (idx = 0)
> + * - Instruction retired event (idx = 1)
> + * - Reference cycles event (idx = 2)
> + * - Branch instruction retired event (idx = 5)
> + *
> + * Note that reference cycles is one event that actually cannot
> + * be successfully virtualized.
> + */
> + if (idx > 2 && idx != 5)

As request in a previous patch, use enums, then the need to document the magic
numbers goes away.

> + continue;
> +
> + intel_check_arch_event_is_unavl(idx);
> + }
> +}
> +
> +static void intel_test_pmu_cpuid(void)
> +{
> + intel_test_arch_events();

Either put the Intel-specific TEST_REQUIRE()s in here, or open code the calls.
Adding a helper and then splitting code across the helper and its sole caller is
unnecessary.

> +}
> +
> +int main(int argc, char *argv[])
> +{
> + TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
> +
> + if (host_cpu_is_intel) {

Presumably AMD will be supported at some point, but until then, this needs to be

TEST_REQUIRE(host_cpu_is_intel);

> + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
> + TEST_REQUIRE(X86_INTEL_PMU_VERSION > 0);
> + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
> +
> + intel_test_pmu_cpuid();
> + }
> +
> + return 0;
> +}
> --
> 2.31.1
>