Re: [PATCH v4 03/14] arm64/fpsimd: Support FEAT_FPMR

From: Marc Zyngier
Date: Fri Feb 23 2024 - 06:07:14 EST


On Mon, 22 Jan 2024 16:28:06 +0000,
Mark Brown <broonie@xxxxxxxxxx> wrote:
>
> FEAT_FPMR defines a new EL0 accessible register FPMR use to configure the
> FP8 related features added to the architecture at the same time. Detect
> support for this register and context switch it for EL0 when present.
>
> Due to the sharing of responsibility for saving floating point state
> between the host kernel and KVM FP8 support is not yet implemented in KVM
> and a stub similar to that used for SVCR is provided for FPMR in order to
> avoid bisection issues. To make it easier to share host state with the
> hypervisor we store FPMR as a hardened usercopy field in uw (along with
> some padding).
>
> Signed-off-by: Mark Brown <broonie@xxxxxxxxxx>
> ---
> arch/arm64/include/asm/cpufeature.h | 5 +++++
> arch/arm64/include/asm/fpsimd.h | 2 ++
> arch/arm64/include/asm/kvm_host.h | 1 +
> arch/arm64/include/asm/processor.h | 4 ++++
> arch/arm64/kernel/cpufeature.c | 9 +++++++++
> arch/arm64/kernel/fpsimd.c | 13 +++++++++++++
> arch/arm64/kvm/fpsimd.c | 1 +
> arch/arm64/tools/cpucaps | 1 +
> 8 files changed, 36 insertions(+)
>
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 21c824edf8ce..34fcdbc65d7d 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -768,6 +768,11 @@ static __always_inline bool system_supports_tpidr2(void)
> return system_supports_sme();
> }
>
> +static __always_inline bool system_supports_fpmr(void)
> +{
> + return alternative_has_cap_unlikely(ARM64_HAS_FPMR);
> +}
> +
> static __always_inline bool system_supports_cnp(void)
> {
> return alternative_has_cap_unlikely(ARM64_HAS_CNP);
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 50e5f25d3024..6cf72b0d2c04 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -89,6 +89,7 @@ struct cpu_fp_state {
> void *sve_state;
> void *sme_state;
> u64 *svcr;
> + unsigned long *fpmr;
> unsigned int sve_vl;
> unsigned int sme_vl;
> enum fp_type *fp_type;
> @@ -154,6 +155,7 @@ extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
> extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
> extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
> extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
> +extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused);
>
> extern u64 read_smcr_features(void);
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 21c57b812569..7993694a54af 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -543,6 +543,7 @@ struct kvm_vcpu_arch {
> enum fp_type fp_type;
> unsigned int sve_max_vl;
> u64 svcr;
> + unsigned long fpmr;

As this directly represents a register, I'd rather you use a type that
represents the size of that register unambiguously (u64).

>
> /* Stage 2 paging state used by the hardware on next switch */
> struct kvm_s2_mmu *hw_mmu;
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 5b0a04810b23..b453c66d3fae 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -155,6 +155,8 @@ struct thread_struct {
> struct {
> unsigned long tp_value; /* TLS register */
> unsigned long tp2_value;
> + unsigned long fpmr;
> + unsigned long pad;
> struct user_fpsimd_state fpsimd_state;
> } uw;
>
> @@ -253,6 +255,8 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
> BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) !=
> sizeof_field(struct thread_struct, uw.tp_value) +
> sizeof_field(struct thread_struct, uw.tp2_value) +
> + sizeof_field(struct thread_struct, uw.fpmr) +
> + sizeof_field(struct thread_struct, uw.pad) +
> sizeof_field(struct thread_struct, uw.fpsimd_state));
>
> *offset = offsetof(struct thread_struct, uw);
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index eae59ec0f4b0..0263565f617a 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -272,6 +272,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
> };
>
> static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = {
> + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0),
> ARM64_FTR_END,
> };
>
> @@ -2767,6 +2768,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
> .type = ARM64_CPUCAP_SYSTEM_FEATURE,
> .matches = has_lpa2,
> },
> + {
> + .desc = "FPMR",
> + .type = ARM64_CPUCAP_SYSTEM_FEATURE,
> + .capability = ARM64_HAS_FPMR,
> + .matches = has_cpuid_feature,
> + .cpu_enable = cpu_enable_fpmr,
> + ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, FPMR, IMP)
> + },
> {},
> };
>
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index a5dc6f764195..8e24b5e5e192 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -359,6 +359,9 @@ static void task_fpsimd_load(void)
> WARN_ON(preemptible());
> WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
>
> + if (system_supports_fpmr())
> + write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
> +
> if (system_supports_sve() || system_supports_sme()) {
> switch (current->thread.fp_type) {
> case FP_STATE_FPSIMD:
> @@ -446,6 +449,9 @@ static void fpsimd_save_user_state(void)
> if (test_thread_flag(TIF_FOREIGN_FPSTATE))
> return;
>
> + if (system_supports_fpmr())
> + *(last->fpmr) = read_sysreg_s(SYS_FPMR);
> +
> /*
> * If a task is in a syscall the ABI allows us to only
> * preserve the state shared with FPSIMD so don't bother
> @@ -688,6 +694,12 @@ static void sve_to_fpsimd(struct task_struct *task)
> }
> }
>
> +void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
> +{
> + write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
> + SYS_SCTLR_EL1);
> +}
> +
> #ifdef CONFIG_ARM64_SVE
> /*
> * Call __sve_free() directly only if you know task can't be scheduled
> @@ -1680,6 +1692,7 @@ static void fpsimd_bind_task_to_cpu(void)
> last->sve_vl = task_get_sve_vl(current);
> last->sme_vl = task_get_sme_vl(current);
> last->svcr = &current->thread.svcr;
> + last->fpmr = &current->thread.uw.fpmr;
> last->fp_type = &current->thread.fp_type;
> last->to_save = FP_STATE_CURRENT;
> current->thread.fpsimd_cpu = smp_processor_id();
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 8c1d0d4853df..e3e611e30e91 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -153,6 +153,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
> fp_state.sve_vl = vcpu->arch.sve_max_vl;
> fp_state.sme_state = NULL;
> fp_state.svcr = &vcpu->arch.svcr;
> + fp_state.fpmr = &vcpu->arch.fpmr;
> fp_state.fp_type = &vcpu->arch.fp_type;

Given the number of fields you keep track of, it would make a lot more
sense if these FP-related fields were in their own little structure
and tracked by a single pointer (I don't think there is a case where
we track them independently).

Thanks,

M.

--
Without deviation from the norm, progress is not possible.