[PATCH Bugfix v2 2/4] x86/xsaves: Define and use user_xstate_size for xstate size in signal context

From: Fenghua Yu
Date: Wed Apr 22 2015 - 00:55:28 EST


From: Fenghua Yu <fenghua.yu@xxxxxxxxx>

If "xsaves" is enabled, kernel always uses compact format of xsave area.
But user space still uses standard format of xsave area. Thus, xstate size
in kernel's xsave area is smaller than xstate size in user's xsave area.
xstate in signal frame should be in standard format for user's signal
handler to access.

In no "xsaves" case, xsave area in both user space and kernel space are in
standard format. Therefore, user's and kernel's xstate sizes are equal.

In "xsaves" case, xsave area in user space is in standard format while
xsave area in kernel space is in compact format. Therefore, kernel's
xstate size is less than user's xstate size.

So here is the problem: currently kernel uses the kernel's xstate size
for xstate size in signal frame. This is not a problem in no "xsaves" case.
But it is an issue in "xsaves" case because kernel's xstate size is smaller
than user's xstate size. When setting up signal math frame in
alloc_ mathframe(), the fpstate is in standard format; but a smaller size
of fpstate buffer is allocated in signal frame for standard format
xstate. Then kernel saves only part of xstate registers into this smaller
user's fpstate buffer and user will see part of the xstate registers in
signal context. Similar issue happens after returning from signal handler:
kernel will only restore part of xstate registers from user's fpstate
buffer in signal frame.

This patch defines and uses user_xstate_size for xstate size in signal
frame. It's read from returned value in ebx from CPUID leaf 0x0D subleaf
0x0. This is maximum size required by enabled states in XCR0 and may be
different from ecx when states at the end of the xsave area are not
enabled. This value indicates the size required for XSAVE to save all
supported user states in legacy/standard format.

And in order to copy kernel's xsave area in compact format to user xsave
area in standard format, we use copy_to_user_xstate().

Signed-off-by: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Reviewed-by: Dave Hansen <dave.hansen@xxxxxxxxx>
---
arch/x86/include/asm/fpu-internal.h | 3 +-
arch/x86/include/asm/processor.h | 1 +
arch/x86/include/asm/xsave.h | 1 -
arch/x86/kernel/xsave.c | 95 +++++++++++++++++++++++++++++++++----
4 files changed, 90 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index da5e967..c00c769 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -496,7 +496,8 @@ extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);

static inline int xstate_sigframe_size(void)
{
- return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+ return use_xsave() ? user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
+ user_xstate_size;
}

static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 23ba676..576ff8c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -483,6 +483,7 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
#endif /* X86_64 */

extern unsigned int xstate_size;
+extern unsigned int user_xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;

diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index c9a6d68..7799d18 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -44,7 +44,6 @@
#define REX_PREFIX
#endif

-extern unsigned int xstate_size;
extern u64 pcntxt_mask;
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern struct xsave_struct *init_xstate_buf;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 3c0a9d1..b8373a0 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -29,6 +29,7 @@ static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
static unsigned int *xstate_offsets, *xstate_sizes;
static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8];
static unsigned int xstate_features;
+unsigned int user_xstate_size;

/*
* If a processor implementation discern that a processor state component is
@@ -85,7 +86,7 @@ void __sanitize_i387_state(struct task_struct *tsk)
*/
while (xstate_bv) {
if (xstate_bv & 0x1) {
- int offset = xstate_offsets[feature_bit];
+ int offset = xstate_comp_offsets[feature_bit];
int size = xstate_sizes[feature_bit];

memcpy(((void *) fx) + offset,
@@ -116,7 +117,7 @@ static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
/* Check for the first magic field and other error scenarios. */
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
fx_sw->xstate_size < min_xstate_size ||
- fx_sw->xstate_size > xstate_size ||
+ fx_sw->xstate_size > user_xstate_size ||
fx_sw->xstate_size > fx_sw->extended_size)
return -1;

@@ -173,7 +174,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
if (!use_xsave())
return err;

- err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+ err |= __put_user(FP_XSTATE_MAGIC2,
+ (__u32 *)(buf + user_xstate_size));

/*
* Read the xstate_bv which we copied (directly from the cpu or
@@ -210,12 +212,85 @@ static inline int save_user_xstate(struct xsave_struct __user *buf)
else
err = fsave_user((struct i387_fsave_struct __user *) buf);

- if (unlikely(err) && __clear_user(buf, xstate_size))
+ if (unlikely(err) && __clear_user(buf, user_xstate_size))
err = -EFAULT;
return err;
}

/*
+ * Copy xsave area from kernel to user.
+ */
+static int copy_to_user_xstate(void __user *buf_fx, struct xsave_struct *xsave)
+{
+ int i;
+ u64 xcomp_bv;
+ struct xsave_struct __user *user_xsave;
+
+ /*
+ * If kernel's xsave area is in standard format, copy the xsave
+ * area directly from kernel to user buffer because both of the
+ * xsave areas are in the same format.
+ */
+ if (!cpu_has_xsaves) {
+ if (__copy_to_user(buf_fx, xsave, user_xstate_size))
+ return -1;
+ }
+
+ /*
+ * If kernel's xsave area is in compact format, copy xstates one
+ * by one from kernel to user buffer because user's xsave area
+ * is in standard format which is different from kernel.
+ *
+ * This is inefficient. It makes a bunch of smaller
+ * copy_to_user() calls when it might be possible to make fewer
+ * large ones. But, this code does not get called on "eagerfpu"
+ * (user_has_fpu() always true) systems which also happen to be
+ * the ones with lots of xsave state.
+ *
+ * First, clear buf_fx.
+ */
+ if (__clear_user(buf_fx, user_xstate_size))
+ return -1;
+
+ /*
+ * copy FP, SSE, and header to user.
+ */
+ if (__copy_to_user(buf_fx, xsave, FXSAVE_SIZE + XSAVE_HDR_SIZE))
+ return -1;
+
+ /*
+ * Clear xcomp_bv[63] in user's xsave area header to indicate
+ * buf_fx is in standard format.
+ */
+ xcomp_bv = xsave->xsave_hdr.xcomp_bv;
+ user_xsave = buf_fx;
+ xcomp_bv &= ~((u64)1 << 63);
+
+ if (__put_user(xcomp_bv, &user_xsave->xsave_hdr.xcomp_bv))
+ return -1;
+
+ /*
+ * Copy rest of xstates in compact format to user.
+ */
+ for (i = 2; i < xstate_features; i++) {
+ if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
+ int user_offset, kernel_offset;
+ int size;
+
+ user_offset = xstate_offsets[i];
+ kernel_offset = xstate_comp_offsets[i];
+ size = xstate_sizes[i];
+
+ if (__copy_to_user(buf_fx + user_offset,
+ xsave + kernel_offset, size))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
* Save the fpu, extended register state to the user signal frame.
*
* 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
@@ -261,7 +336,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
fpu_fxsave(&tsk->thread.fpu);
} else {
sanitize_i387_state(tsk);
- if (__copy_to_user(buf_fx, xsave, xstate_size))
+ if (copy_to_user_xstate(buf_fx, xsave))
return -1;
}

@@ -434,7 +509,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
static void prepare_fx_sw_frame(void)
{
int fsave_header_size = sizeof(struct i387_fsave_struct);
- int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+ int size = user_xstate_size + FP_XSTATE_MAGIC2_SIZE;

if (config_enabled(CONFIG_X86_32))
size += fsave_header_size;
@@ -442,7 +517,7 @@ static void prepare_fx_sw_frame(void)
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = size;
fx_sw_reserved.xstate_bv = pcntxt_mask;
- fx_sw_reserved.xstate_size = xstate_size;
+ fx_sw_reserved.xstate_size = user_xstate_size;

if (config_enabled(CONFIG_IA32_EMULATION)) {
fx_sw_reserved_ia32 = fx_sw_reserved;
@@ -576,14 +651,18 @@ __setup("eagerfpu=", eager_fpu_setup);

/*
* Calculate total size of enabled xstates in XCR0/pcntxt_mask.
+ *
+ * XCR0/pcntxt_mask should not be changed after this.
*/
static void __init init_xstate_size(void)
{
unsigned int eax, ebx, ecx, edx;
int i;

+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ user_xstate_size = ebx;
+
if (!cpu_has_xsaves) {
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
xstate_size = ebx;
return;
}
--
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/