Re: [PATCH v8 04/13] KVM: arm64: PMU: Set PMCR_EL0.N for vCPU based on the associated PMU

From: Marc Zyngier
Date: Mon Oct 23 2023 - 07:51:12 EST


On Fri, 20 Oct 2023 22:40:44 +0100,
Raghavendra Rao Ananta <rananta@xxxxxxxxxx> wrote:
>
> The number of PMU event counters is indicated in PMCR_EL0.N.
> For a vCPU with PMUv3 configured, the value is set to the same
> value as the current PE on every vCPU reset. Unless the vCPU is
> pinned to PEs that has the PMU associated to the guest from the
> initial vCPU reset, the value might be different from the PMU's
> PMCR_EL0.N on heterogeneous PMU systems.
>
> Fix this by setting the vCPU's PMCR_EL0.N to the PMU's PMCR_EL0.N
> value. Track the PMCR_EL0.N per guest, as only one PMU can be set
> for the guest (PMCR_EL0.N must be the same for all vCPUs of the
> guest), and it is convenient for updating the value.
>
> To achieve this, the patch introduces a helper,
> kvm_arm_pmu_get_max_counters(), that reads the maximum number of
> counters from the arm_pmu associated to the VM. Make the function
> global as upcoming patches will be interested to know the value
> while setting the PMCR.N of the guest from userspace.
>
> KVM does not yet support userspace modifying PMCR_EL0.N.
> The following patch will add support for that.
>
> Signed-off-by: Reiji Watanabe <reijiw@xxxxxxxxxx>
> Signed-off-by: Raghavendra Rao Ananta <rananta@xxxxxxxxxx>
> ---
> arch/arm64/include/asm/kvm_host.h | 3 +++
> arch/arm64/kvm/pmu-emul.c | 26 +++++++++++++++++++++++++-
> arch/arm64/kvm/sys_regs.c | 28 ++++++++++++++--------------
> include/kvm/arm_pmu.h | 6 ++++++
> 4 files changed, 48 insertions(+), 15 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 846a7706e925c..5653d3553e3ee 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -290,6 +290,9 @@ struct kvm_arch {
>
> cpumask_var_t supported_cpus;
>
> + /* PMCR_EL0.N value for the guest */
> + u8 pmcr_n;
> +
> /* Hypercall features firmware registers' descriptor */
> struct kvm_smccc_features smccc_feat;
> struct maple_tree smccc_filter;
> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> index 097bf7122130d..9e24581206c24 100644
> --- a/arch/arm64/kvm/pmu-emul.c
> +++ b/arch/arm64/kvm/pmu-emul.c
> @@ -690,6 +690,9 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
> if (!entry)
> goto out_unlock;
>
> + WARN_ON((pmu->num_events <= 0) ||
> + (pmu->num_events > ARMV8_PMU_MAX_COUNTERS));
> +

So if we find a PMU that is completely bonkers (we *know* we cannot
make use of it), we still pick it? What is the point?

Honestly, I don't think this warning adds any value, and doesn't seem
to be required for this patch anyway.

> entry->arm_pmu = pmu;
> list_add_tail(&entry->entry, &arm_pmus);
>
> @@ -873,11 +876,29 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
> return true;
> }
>
> +/**
> + * kvm_arm_pmu_get_max_counters - Return the max number of PMU counters.
> + * @kvm: The kvm pointer
> + */
> +int kvm_arm_pmu_get_max_counters(struct kvm *kvm)
> +{
> + struct arm_pmu *arm_pmu = kvm->arch.arm_pmu;
> +
> + lockdep_assert_held(&kvm->arch.config_lock);
> +
> + /*
> + * The arm_pmu->num_events considers the cycle counter as well.
> + * Ignore that and return only the general-purpose counters.
> + */
> + return arm_pmu->num_events - 1;

How is that going to work when the PMU supports a fixed instruction
counter, as it is the case with FEAT_PMUv3_ICNTR? The kernel doesn't
support it yet, but this will eventually be the case, and this little
game will break.

> +}
> +
> static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
> {
> lockdep_assert_held(&kvm->arch.config_lock);
>
> kvm->arch.arm_pmu = arm_pmu;
> + kvm->arch.pmcr_n = kvm_arm_pmu_get_max_counters(kvm);

Can you make the return type of kvm_arm_pmu_get_max_counters()
homogeneous with that of pmcr_n?

> }
>
> /**
> @@ -1091,5 +1112,8 @@ u8 kvm_arm_pmu_get_pmuver_limit(void)
> */
> u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
> {
> - return __vcpu_sys_reg(vcpu, PMCR_EL0);
> + u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0) &
> + ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
> +
> + return pmcr | ((u64)vcpu->kvm->arch.pmcr_n << ARMV8_PMU_PMCR_N_SHIFT);
> }
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index a31cecb3d29fb..faf97878dfbbb 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -721,12 +721,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
> {
> u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX);
>
> - /* No PMU available, any PMU reg may UNDEF... */
> - if (!kvm_arm_support_pmu_v3())
> - return 0;
> -
> - n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT;
> - n &= ARMV8_PMU_PMCR_N_MASK;
> + n = vcpu->kvm->arch.pmcr_n;
> if (n)
> mask |= GENMASK(n - 1, 0);
>
> @@ -762,17 +757,15 @@ static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
>
> static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
> {
> - u64 pmcr;
> + u64 pmcr = 0;
>
> - /* No PMU available, PMCR_EL0 may UNDEF... */
> - if (!kvm_arm_support_pmu_v3())
> - return 0;
> -
> - /* Only preserve PMCR_EL0.N, and reset the rest to 0 */
> - pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
> if (!kvm_supports_32bit_el0())
> pmcr |= ARMV8_PMU_PMCR_LC;
>
> + /*
> + * The value of PMCR.N field is included when the
> + * vCPU register is read via kvm_vcpu_read_pmcr().
> + */
> __vcpu_sys_reg(vcpu, r->reg) = pmcr;
>
> return __vcpu_sys_reg(vcpu, r->reg);
> @@ -1103,6 +1096,13 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
> return true;
> }
>
> +static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> + u64 *val)
> +{
> + *val = kvm_vcpu_read_pmcr(vcpu);
> + return 0;
> +}
> +
> /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
> #define DBG_BCR_BVR_WCR_WVR_EL1(n) \
> { SYS_DESC(SYS_DBGBVRn_EL1(n)), \
> @@ -2235,7 +2235,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
> { SYS_DESC(SYS_SVCR), undef_access },
>
> { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
> - .reset = reset_pmcr, .reg = PMCR_EL0 },
> + .reset = reset_pmcr, .reg = PMCR_EL0, .get_user = get_pmcr },

So since you don't provide a set_user() callback, userspace can still
write anything it wants. Should we take this opportunity to sanitise
things a bit?

> { PMU_SYS_REG(PMCNTENSET_EL0),
> .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
> { PMU_SYS_REG(PMCNTENCLR_EL0),
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index cd980d78b86b5..2e90f38090e6d 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -102,6 +102,7 @@ void kvm_vcpu_pmu_resync_el0(void);
>
> u8 kvm_arm_pmu_get_pmuver_limit(void);
> int kvm_arm_set_default_pmu(struct kvm *kvm);
> +int kvm_arm_pmu_get_max_counters(struct kvm *kvm);
>
> u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu);
> #else
> @@ -181,6 +182,11 @@ static inline int kvm_arm_set_default_pmu(struct kvm *kvm)
> return -ENODEV;
> }
>
> +static inline int kvm_arm_pmu_get_max_counters(struct kvm *kvm)
> +{
> + return -ENODEV;
> +}
> +
> static inline u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
> {
> return 0;

Thanks,

M.

--
Without deviation from the norm, progress is not possible.