Re: [PATCH v7 06/26] x86/fpu/xstate: Create guest fpstate with guest specific config

From: Maxim Levitsky
Date: Thu Nov 30 2023 - 12:36:37 EST


On Fri, 2023-11-24 at 00:53 -0500, Yang Weijiang wrote:
> Use fpu_guest_cfg to calculate guest fpstate settings, open code for
> __fpstate_reset() to avoid using kernel FPU config.
>
> Below configuration steps are currently enforced to get guest fpstate:
> 1) Kernel sets up guest FPU settings in fpu__init_system_xstate().
> 2) User space sets vCPU thread group xstate permits via arch_prctl().
> 3) User space creates guest fpstate via __fpu_alloc_init_guest_fpstate()
> for vcpu thread.
> 4) User space enables guest dynamic xfeatures and re-allocate guest
> fpstate.
>
> By adding kernel dynamic xfeatures in above #1 and #2, guest xstate area
> size is expanded to hold (fpu_kernel_cfg.default_features | kernel dynamic
> xfeatures | user dynamic xfeatures), then host xsaves/xrstors can operate
> for all guest xfeatures.
>
> The user_* fields remain unchanged for compatibility with KVM uAPIs.
>
> Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
> ---
> arch/x86/kernel/fpu/core.c | 48 ++++++++++++++++++++++++++++--------
> arch/x86/kernel/fpu/xstate.c | 2 +-
> arch/x86/kernel/fpu/xstate.h | 1 +
> 3 files changed, 40 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
> index 516af626bf6a..985eaf8b55e0 100644
> --- a/arch/x86/kernel/fpu/core.c
> +++ b/arch/x86/kernel/fpu/core.c
> @@ -194,8 +194,6 @@ void fpu_reset_from_exception_fixup(void)
> }
>
> #if IS_ENABLED(CONFIG_KVM)
> -static void __fpstate_reset(struct fpstate *fpstate, u64 xfd);
> -
> static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
> {
> struct fpu_state_perm *fpuperm;
> @@ -216,25 +214,55 @@ static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
> gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED;
> }
>
> -bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
> +static struct fpstate *__fpu_alloc_init_guest_fpstate(struct fpu_guest *gfpu)
> {
> + bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
> + unsigned int gfpstate_size, size;
> struct fpstate *fpstate;
> - unsigned int size;
>
> - size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
> + /*
> + * fpu_guest_cfg.default_features includes all enabled xfeatures
> + * except the user dynamic xfeatures. If the user dynamic xfeatures
> + * are enabled, the guest fpstate will be re-allocated to hold all
> + * guest enabled xfeatures, so omit user dynamic xfeatures here.
> + */

This is a very good comment to have, although I don't think there is any way
to ensure that the whole thing is not utterly confusing.....


> + gfpstate_size = xstate_calculate_size(fpu_guest_cfg.default_features,
> + compacted);
> +
> + size = gfpstate_size + ALIGN(offsetof(struct fpstate, regs), 64);
> +
> fpstate = vzalloc(size);
> if (!fpstate)
> - return false;
> + return NULL;
> + /*
> + * Initialize sizes and feature masks, use fpu_user_cfg.*
> + * for user_* settings for compatibility of exiting uAPIs.
> + */
> + fpstate->size = gfpstate_size;
> + fpstate->xfeatures = fpu_guest_cfg.default_features;

> + fpstate->user_size = fpu_user_cfg.default_size;
> + fpstate->user_xfeatures = fpu_user_cfg.default_features;

The whole thing makes my head spin like the good old CD/DVD writers used to ....

So just to summarize this is what we have:


KERNEL FPU CONFIG

/*
all known and CPU supported user and supervisor features except
- "dynamic" kernel features" (CET_S)
- "independent" kernel features (XFEATURE_LBR)
*/
fpu_kernel_cfg.max_features;

/*
all known and CPU supported user and supervisor features except
- "dynamic" kernel features" (CET_S)
- "independent" kernel features (arch LBRs)
- "dynamic" userspace features (AMX state)
*/
fpu_kernel_cfg.default_features;


// size of compacted buffer with 'fpu_kernel_cfg.max_features'
fpu_kernel_cfg.max_size;


// size of compacted buffer with 'fpu_kernel_cfg.default_features'
fpu_kernel_cfg.default_size;


USER FPU CONFIG

/*
all known and CPU supported user features
*/
fpu_user_cfg.max_features;

/*
all known and CPU supported user features except
- "dynamic" userspace features (AMX state)
*/
fpu_user_cfg.default_features;

// size of non compacted buffer with 'fpu_user_cfg.max_features'
fpu_user_cfg.max_size;

// size of non compacted buffer with 'fpu_user_cfg.default_features'
fpu_user_cfg.default_size;


GUEST FPU CONFIG
/*
all known and CPU supported user and supervisor features except
- "independent" kernel features (XFEATURE_LBR)
*/
fpu_guest_cfg.max_features;

/*
all known and CPU supported user and supervisor features except
- "independent" kernel features (arch LBRs)
- "dynamic" userspace features (AMX state)
*/
fpu_guest_cfg.default_features;

// size of compacted buffer with 'fpu_guest_cfg.max_features'
fpu_guest_cfg.max_size;

// size of compacted buffer with 'fpu_guest_cfg.default_features'
fpu_guest_cfg.default_size;



---


So in essence, guest FPU config is guest kernel fpu config and that is why
'fpu_user_cfg.default_size' had to be used above.

How about that we have fpu_guest_kernel_config and fpu_guest_user_config instead
to make the whole horrible thing maybe even more complicated but at least a bit more orthogonal?

Best regards,
Maxim Levitsky





> + fpstate->xfd = 0;
>
> - /* Leave xfd to 0 (the reset value defined by spec) */
> - __fpstate_reset(fpstate, 0);
> fpstate_init_user(fpstate);
> fpstate->is_valloc = true;
> fpstate->is_guest = true;
>
> gfpu->fpstate = fpstate;
> - gfpu->xfeatures = fpu_user_cfg.default_features;
> - gfpu->perm = fpu_user_cfg.default_features;
> + gfpu->xfeatures = fpu_guest_cfg.default_features;
> + gfpu->perm = fpu_guest_cfg.default_features;
> +
> + return fpstate;
> +}
> +
> +bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
> +{
> + struct fpstate *fpstate;
> +
> + fpstate = __fpu_alloc_init_guest_fpstate(gfpu);
> +
> + if (!fpstate)
> + return false;
>
> /*
> * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state
> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index aa8f8595cd41..253944cb2298 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -559,7 +559,7 @@ static bool __init check_xstate_against_struct(int nr)
> return true;
> }
>
> -static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
> +unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
> {
> unsigned int topmost = fls64(xfeatures) - 1;
> unsigned int offset = xstate_offsets[topmost];
> diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
> index 3518fb26d06b..c032acb56306 100644
> --- a/arch/x86/kernel/fpu/xstate.h
> +++ b/arch/x86/kernel/fpu/xstate.h
> @@ -55,6 +55,7 @@ extern void fpu__init_cpu_xstate(void);
> extern void fpu__init_system_xstate(unsigned int legacy_size);
>
> extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
> +extern unsigned int xstate_calculate_size(u64 xfeatures, bool compacted);
>
> static inline u64 xfeatures_mask_supervisor(void)
> {