Re: [PATCH v3 03/11] KVM: selftests: Test Intel PMU architectural events on gp counters

From: Sean Christopherson
Date: Thu Aug 17 2023 - 18:55:28 EST


On Mon, Aug 14, 2023, Jinrong Liang wrote:
> Add test case for AMD Guest PerfMonV2. Also test Intel
> MSR_CORE_PERF_GLOBAL_STATUS and MSR_CORE_PERF_GLOBAL_OVF_CTRL.
>
> Signed-off-by: Jinrong Liang <cloudliang@xxxxxxxxxxx>
> ---
> .../kvm/x86_64/pmu_basic_functionality_test.c | 48 ++++++++++++++++++-
> 1 file changed, 46 insertions(+), 2 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/x86_64/pmu_basic_functionality_test.c b/tools/testing/selftests/kvm/x86_64/pmu_basic_functionality_test.c
> index cb2a7ad5c504..02bd1fe3900b 100644
> --- a/tools/testing/selftests/kvm/x86_64/pmu_basic_functionality_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/pmu_basic_functionality_test.c
> @@ -58,7 +58,9 @@ static uint64_t run_vcpu(struct kvm_vcpu *vcpu, uint64_t *ucall_arg)
>
> static void guest_measure_loop(uint64_t event_code)
> {
> + uint64_t global_ovf_ctrl_msr, global_status_msr, global_ctrl_msr;
> uint8_t nr_gp_counters, pmu_version = 1;
> + uint8_t gp_counter_bit_width = 48;
> uint64_t event_sel_msr;
> uint32_t counter_msr;
> unsigned int i;
> @@ -68,6 +70,12 @@ static void guest_measure_loop(uint64_t event_code)
> pmu_version = this_cpu_property(X86_PROPERTY_PMU_VERSION);
> event_sel_msr = MSR_P6_EVNTSEL0;
>
> + if (pmu_version > 1) {
> + global_ovf_ctrl_msr = MSR_CORE_PERF_GLOBAL_OVF_CTRL;
> + global_status_msr = MSR_CORE_PERF_GLOBAL_STATUS;
> + global_ctrl_msr = MSR_CORE_PERF_GLOBAL_CTRL;
> + }
> +
> if (rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
> counter_msr = MSR_IA32_PMC0;
> else
> @@ -76,6 +84,17 @@ static void guest_measure_loop(uint64_t event_code)
> nr_gp_counters = AMD64_NR_COUNTERS;
> event_sel_msr = MSR_K7_EVNTSEL0;
> counter_msr = MSR_K7_PERFCTR0;
> +
> + if (this_cpu_has(X86_FEATURE_AMD_PMU_EXT_CORE) &&
> + this_cpu_has(X86_FEATURE_AMD_PERFMON_V2)) {
> + nr_gp_counters = this_cpu_property(X86_PROPERTY_AMD_PMU_NR_CORE_COUNTERS);
> + global_ovf_ctrl_msr = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR;
> + global_status_msr = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS;
> + global_ctrl_msr = MSR_AMD64_PERF_CNTR_GLOBAL_CTL;
> + event_sel_msr = MSR_F15H_PERF_CTL0;
> + counter_msr = MSR_F15H_PERF_CTR0;
> + pmu_version = 2;
> + }

Please use an if-else when the two things are completely exclusive, i.e. don't
set "defaults" and then override them.

> }
>
> for (i = 0; i < nr_gp_counters; i++) {
> @@ -84,14 +103,39 @@ static void guest_measure_loop(uint64_t event_code)
> ARCH_PERFMON_EVENTSEL_ENABLE | event_code);
>
> if (pmu_version > 1) {
> - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
> + wrmsr(global_ctrl_msr, BIT_ULL(i));
> __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
> - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> + wrmsr(global_ctrl_msr, 0);
> GUEST_SYNC(_rdpmc(i));
> } else {
> __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
> GUEST_SYNC(_rdpmc(i));
> }

This is extremely difficult to follow. I think the same thing to do is to split
this up into helpers, e.g. send pmu_version > 1 into one path, and pmu_version <= 1
into an entirely different path.

E.g. something like this?

static void guest_measure_loop(uint64_t event_code)
{
uint64_t global_ovf_ctrl_msr, global_status_msr, global_ctrl_msr;
uint8_t nr_gp_counters, pmu_version;
uint8_t gp_counter_bit_width;
uint64_t event_sel_msr;
uint32_t counter_msr;
unsigned int i;

if (host_cpu_is_intel)
pmu_version = this_cpu_property(X86_PROPERTY_PMU_VERSION);
else if (this_cpu_has(X86_FEATURE_PERFCTR_CORE) &&
this_cpu_has(X86_FEATURE_PERFMON_V2)) {
pmu_version = 2;
} else {
pmu_version = 1;
}

if (pmu_version <= 1) {
guest_measure_pmu_legacy(...);
return;
}

if (host_cpu_is_intel) {
nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
global_ovf_ctrl_msr = MSR_CORE_PERF_GLOBAL_OVF_CTRL;
global_status_msr = MSR_CORE_PERF_GLOBAL_STATUS;
global_ctrl_msr = MSR_CORE_PERF_GLOBAL_CTRL;
gp_counter_bit_width = this_cpu_property(X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH);

if (rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
counter_msr = MSR_IA32_PMC0;
else
counter_msr = MSR_IA32_PERFCTR0;
} else {
nr_gp_counters = this_cpu_property(X86_PROPERTY_AMD_PMU_NR_CORE_COUNTERS);
global_ovf_ctrl_msr = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR;
global_status_msr = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS;
global_ctrl_msr = MSR_AMD64_PERF_CNTR_GLOBAL_CTL;
event_sel_msr = MSR_F15H_PERF_CTL0;
counter_msr = MSR_F15H_PERF_CTR0;
gp_counter_bit_width = 48;
}

for (i = 0; i < nr_gp_counters; i++) {
wrmsr(counter_msr + i, 0);
wrmsr(event_sel_msr + i, ARCH_PERFMON_EVENTSEL_OS |
ARCH_PERFMON_EVENTSEL_ENABLE | event_code);

wrmsr(global_ctrl_msr, BIT_ULL(i));
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
wrmsr(global_ctrl_msr, 0);
counter = _rdpmc(i);
GUEST_ASSERT_EQ(this_pmu_has(...), !!counter);

if ( _rdpmc(i)) {
wrmsr(global_ctrl_msr, 0);
wrmsr(counter_msr + i, 0);
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
GUEST_ASSERT(!_rdpmc(i));

wrmsr(global_ctrl_msr, BIT_ULL(i));
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
GUEST_ASSERT(_rdpmc(i));

wrmsr(global_ctrl_msr, 0);
wrmsr(counter_msr + i, (1ULL << gp_counter_bit_width) - 2);
wrmsr(global_ctrl_msr, BIT_ULL(i));
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
GUEST_ASSERT(rdmsr(global_status_msr) & BIT_ULL(i));

wrmsr(global_ctrl_msr, 0);
wrmsr(global_ovf_ctrl_msr, BIT_ULL(i));
GUEST_ASSERT(!(rdmsr(global_status_msr) & BIT_ULL(i)));
}
}