[PATCH v8 15/26] x86/fpu/xstate: Support both legacy and expanded signal XSTATE size

From: Chang S. Bae
Date: Sat Jul 17 2021 - 11:36:32 EST


Prepare to support two XSTATE sizes on the signal stack -- legacy and
expanded. Legacy programs have not requested access to AMX (or later
features), and the XSTATE on their signal stack can include up through
AVX-512.

Programs that request access to AVX (and/or later features) will have an
uncompressed XSTATE that includes those features. If such program that also
use the sigaltstack, they must assure that their sigaltstack is large
enough to handle that full XSTATE format. (This is most easily done by
using signal.h from glibc 2.34 or later)

Introduce a new XSTATE size variable for the legacy stack and some helpers.

Signed-off-by: Chang S. Bae <chang.seok.bae@xxxxxxxxx>
Reviewed-by: Len Brown <len.brown@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
Changes from v6:
* Massage the code comments.

Changes form v5:
* Added as a new patch.
---
arch/x86/include/asm/fpu/internal.h | 23 +++++++++--
arch/x86/include/asm/fpu/xstate.h | 3 +-
arch/x86/kernel/fpu/init.c | 1 +
arch/x86/kernel/fpu/signal.c | 63 ++++++++++++++++++++---------
arch/x86/kernel/fpu/xstate.c | 25 +++++++++++-
5 files changed, 89 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index e3590cf55325..3b52cfb62ab5 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -337,15 +337,30 @@ static inline void os_xrstor(struct xregs_state *xstate, u64 mask)
*/
static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
{
+ u32 lmask, hmask;
+ u64 mask;
+ int err;
+
/*
* Include the features which are not xsaved/rstored by the kernel
* internally, e.g. PKRU. That's user space ABI and also required
* to allow the signal handler to modify PKRU.
*/
- u64 mask = xfeatures_mask_uabi();
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
+ mask = xfeatures_mask_uabi();
+
+ /*
+ * Exclude dynamic user states for non-opt-in threads.
+ */
+ if (xfeatures_mask_user_dynamic) {
+ struct fpu *fpu = &current->thread.fpu;
+
+ mask &= fpu->dynamic_state_perm ?
+ fpu->state_mask :
+ ~xfeatures_mask_user_dynamic;
+ }
+
+ lmask = mask;
+ hmask = mask >> 32;

/*
* Clear the xsave header first, so that reserved fields are
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 89516c226dc6..eb53e162636b 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -139,7 +139,8 @@ extern void __init update_regset_xstate_info(unsigned int size,
enum xstate_config {
XSTATE_MIN_SIZE,
XSTATE_MAX_SIZE,
- XSTATE_USER_SIZE
+ XSTATE_USER_SIZE,
+ XSTATE_USER_MINSIG_SIZE,
};

extern unsigned int get_xstate_config(enum xstate_config cfg);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 3e4e14ca723b..acbd3da0e022 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -210,6 +210,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
set_xstate_config(XSTATE_MIN_SIZE, xstate_size);
set_xstate_config(XSTATE_MAX_SIZE, xstate_size);
set_xstate_config(XSTATE_USER_SIZE, xstate_size);
+ set_xstate_config(XSTATE_USER_MINSIG_SIZE, xstate_size);
}

/* Legacy code to initialize eager fpu mode. */
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index f70f84d53442..78696b412b56 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -15,9 +15,26 @@
#include <asm/sigframe.h>
#include <asm/trace/fpu.h>

+/*
+ * Record the signal xstate size and feature bits. Exclude dynamic user
+ * states. See fpu__init_prepare_fx_sw_frame(). The opt-in tasks will
+ * dynamically adjust the data.
+ */
static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init;
static struct _fpx_sw_bytes fx_sw_reserved_ia32 __ro_after_init;

+static unsigned int current_sig_xstate_size(void)
+{
+ return current->thread.fpu.dynamic_state_perm ?
+ get_xstate_config(XSTATE_USER_SIZE) :
+ get_xstate_config(XSTATE_USER_MINSIG_SIZE);
+}
+
+static inline int extend_sig_xstate_size(unsigned int size)
+{
+ return use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size;
+}
+
/*
* Check for the presence of extended state information in the
* user fpstate pointer in the sigcontext.
@@ -36,7 +53,7 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
/* Check for the first magic field and other error scenarios. */
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
fx_sw->xstate_size < min_xstate_size ||
- fx_sw->xstate_size > get_xstate_config(XSTATE_USER_SIZE) ||
+ fx_sw->xstate_size > current_sig_xstate_size() ||
fx_sw->xstate_size > fx_sw->extended_size)
goto setfx;

@@ -94,20 +111,32 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)

static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
{
+ unsigned int current_xstate_size = current_sig_xstate_size();
struct xregs_state __user *x = buf;
- struct _fpx_sw_bytes *sw_bytes;
+ struct _fpx_sw_bytes sw_bytes;
u32 xfeatures;
int err;

- /* Setup the bytes not touched by the [f]xsave and reserved for SW. */
- sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
- err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
+ /*
+ * Setup the bytes not touched by the [f]xsave and reserved for SW.
+ *
+ * Use the recorded values if it matches with the current task. Otherwise,
+ * adjust it.
+ */
+ sw_bytes = ia32_frame ? fx_sw_reserved_ia32 : fx_sw_reserved;
+ if (sw_bytes.xstate_size != current_xstate_size) {
+ unsigned int default_xstate_size = sw_bytes.xstate_size;
+
+ sw_bytes.xfeatures = xfeatures_mask_uabi();
+ sw_bytes.xstate_size = current_xstate_size;
+ sw_bytes.extended_size += (current_xstate_size - default_xstate_size);
+ }
+ err = __copy_to_user(&x->i387.sw_reserved, &sw_bytes, sizeof(sw_bytes));

if (!use_xsave())
return err;

- err |= __put_user(FP_XSTATE_MAGIC2,
- (__u32 __user *)(buf + get_xstate_config(XSTATE_USER_SIZE)));
+ err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *)(buf + current_xstate_size));

/*
* Read the xfeatures which we copied (directly from the cpu or
@@ -144,7 +173,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
else
err = fnsave_to_user_sigframe((struct fregs_state __user *) buf);

- if (unlikely(err) && __clear_user(buf, get_xstate_config(XSTATE_USER_SIZE)))
+ if (unlikely(err) && __clear_user(buf, current_sig_xstate_size()))
err = -EFAULT;
return err;
}
@@ -205,7 +234,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
fpregs_unlock();

if (ret) {
- if (!fault_in_pages_writeable(buf_fx, get_xstate_config(XSTATE_USER_SIZE)))
+ if (!fault_in_pages_writeable(buf_fx, current_sig_xstate_size()))
goto retry;
return -EFAULT;
}
@@ -418,19 +447,13 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
fpregs_unlock();
return ret;
}
-static inline int xstate_sigframe_size(void)
-{
- int xstate_size = get_xstate_config(XSTATE_USER_SIZE);
-
- return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
-}

/*
* Restore FPU state from a sigframe:
*/
int fpu__restore_sig(void __user *buf, int ia32_frame)
{
- unsigned int size = xstate_sigframe_size();
+ unsigned int size = extend_sig_xstate_size(current_sig_xstate_size());
struct fpu *fpu = &current->thread.fpu;
void __user *buf_fx = buf;
bool ia32_fxstate = false;
@@ -477,7 +500,7 @@ unsigned long
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long *buf_fx, unsigned long *size)
{
- unsigned long frame_size = xstate_sigframe_size();
+ unsigned long frame_size = extend_sig_xstate_size(current_sig_xstate_size());

*buf_fx = sp = round_down(sp - frame_size, 64);
if (ia32_frame && use_fxsr()) {
@@ -492,7 +515,7 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,

unsigned long fpu__get_fpstate_size(void)
{
- unsigned long ret = xstate_sigframe_size();
+ unsigned long ret = extend_sig_xstate_size(get_xstate_config(XSTATE_USER_SIZE));

/*
* This space is needed on (most) 32-bit kernels, or when a 32-bit
@@ -517,12 +540,12 @@ unsigned long fpu__get_fpstate_size(void)
*/
void fpu__init_prepare_fx_sw_frame(void)
{
- int xstate_size = get_xstate_config(XSTATE_USER_SIZE);
+ int xstate_size = get_xstate_config(XSTATE_USER_MINSIG_SIZE);
int ext_size = xstate_size + FP_XSTATE_MAGIC2_SIZE;

fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = ext_size;
- fx_sw_reserved.xfeatures = xfeatures_mask_uabi();
+ fx_sw_reserved.xfeatures = xfeatures_mask_uabi() & ~xfeatures_mask_user_dynamic;
fx_sw_reserved.xstate_size = xstate_size;

if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index e0fa5ec500bc..b9cdd1ff7777 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -94,10 +94,13 @@ static bool xstate_aligns[XFEATURE_MAX] __ro_after_init =
* contains all the enabled state components.
* @user_size: The size of user-space buffer for signal and
* ptrace frames, in the non-compacted format.
+ * @user_minsig_size: The non-compacted legacy xstate size for signal.
+ * Legacy programs do not request to access dynamic
+ * states.
*/
struct fpu_xstate_buffer_config {
unsigned int min_size, max_size;
- unsigned int user_size;
+ unsigned int user_size, user_minsig_size;
};

static struct fpu_xstate_buffer_config buffer_config __ro_after_init;
@@ -111,6 +114,8 @@ unsigned int get_xstate_config(enum xstate_config cfg)
return buffer_config.max_size;
case XSTATE_USER_SIZE:
return buffer_config.user_size;
+ case XSTATE_USER_MINSIG_SIZE:
+ return buffer_config.user_minsig_size;
default:
return 0;
}
@@ -128,6 +133,9 @@ void set_xstate_config(enum xstate_config cfg, unsigned int value)
break;
case XSTATE_USER_SIZE:
buffer_config.user_size = value;
+ break;
+ case XSTATE_USER_MINSIG_SIZE:
+ buffer_config.user_minsig_size = value;
}
}

@@ -859,6 +867,21 @@ static int __init init_xstate_size(void)
* User space is always in standard format.
*/
set_xstate_config(XSTATE_USER_SIZE, xsave_size);
+
+ /*
+ * The minimum signal xstate size is for non-opt-in user threads
+ * that do not access dynamic states.
+ */
+ if (xfeatures_mask_user_dynamic) {
+ int nr = fls64(xfeatures_mask_uabi() & ~xfeatures_mask_user_dynamic) - 1;
+ unsigned int size, offset, ecx, edx;
+
+ cpuid_count(XSTATE_CPUID, nr, &size, &offset, &ecx, &edx);
+ set_xstate_config(XSTATE_USER_MINSIG_SIZE, offset + size);
+ } else {
+ set_xstate_config(XSTATE_USER_MINSIG_SIZE, xsave_size);
+ }
+
return 0;
}

--
2.17.1