[GIT PULL] x86/fpu changes for v4.10

From: Ingo Molnar
Date: Mon Dec 12 2016 - 04:48:34 EST


Linus,

Please pull the latest x86-fpu-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-fpu-for-linus

# HEAD: 064e6a8ba61a751625478f656c6f76a6f37a009e Merge branch 'linus' into x86/fpu, to resolve conflicts

The main changes in this cycle were:

- Do a large round of simplifications after all CPUs do 'eager' FPU context
switching in v4.9: remove CR0 twiddling, remove leftover eager/lazy bts, etc.
(Andy Lutomirski)

- More FPU code simplifications: remov struct fpu::counter, clarify nomenclature,
remove unnecessary arguments/functions and better structure the code.
(Rik van Riel)

out-of-topic modifications in x86-fpu-for-linus:
--------------------------------------------------
drivers/char/hw_random/via-rng.c # 5a83d60c074d: x86/fpu: Remove irq_ts_save(
drivers/crypto/padlock-aes.c # 5a83d60c074d: x86/fpu: Remove irq_ts_save(
drivers/crypto/padlock-sha.c # 5a83d60c074d: x86/fpu: Remove irq_ts_save(
drivers/lguest/hypercalls.c # cd95ea81f256: x86/fpu, lguest: Remove CR0.
drivers/lguest/lg.h # cd95ea81f256: x86/fpu, lguest: Remove CR0.
drivers/lguest/x86/core.c # cd95ea81f256: x86/fpu, lguest: Remove CR0.
include/linux/kvm_host.h # 3d42de25d290: x86/fpu, kvm: Remove KVM vcp

Thanks,

Ingo

------------------>
Andy Lutomirski (13):
x86/crypto, x86/fpu: Remove X86_FEATURE_EAGER_FPU #ifdef from the crc32c code
x86/fpu: Hard-disable lazy FPU mode
x86/fpu: Remove the XFEATURE_MASK_EAGER/LAZY distinction
x86/fpu: Remove use_eager_fpu()
x86/fpu: Finish excising 'eagerfpu'
x86/fpu: Get rid of two redundant clts() calls
x86/fpu: Stop saving and restoring CR0.TS in fpu__init_check_bugs()
x86/fpu: Remove irq_ts_save() and irq_ts_restore()
x86/fpu, kvm: Remove host CR0.TS manipulation
x86/fpu, lguest: Remove CR0.TS support
x86/fpu: Handle #NM without FPU emulation as an error
x86/fpu: Remove stts()
x86/fpu: Remove clts()

Rik van Riel (7):
x86/fpu: Remove struct fpu::counter
x86/fpu, kvm: Remove KVM vcpu->fpu_counter
x86/fpu: Rename lazy restore functions to "register state valid"
x86/fpu: Remove __fpregs_(de)activate()
x86/fpu: Split old & new FPU code paths
x86/fpu: Remove 'cpu' argument from __cpu_invalidate_fpregs_state()
x86/fpu: Split old_fpu & new_fpu handling into separate functions


Documentation/kernel-parameters.txt | 6 --
arch/x86/crypto/crc32c-intel_glue.c | 22 +----
arch/x86/include/asm/cpufeatures.h | 1 -
arch/x86/include/asm/fpu/api.h | 10 ---
arch/x86/include/asm/fpu/internal.h | 139 ++++++++++---------------------
arch/x86/include/asm/fpu/types.h | 34 --------
arch/x86/include/asm/fpu/xstate.h | 17 ++--
arch/x86/include/asm/lguest_hcall.h | 1 -
arch/x86/include/asm/paravirt.h | 5 --
arch/x86/include/asm/paravirt_types.h | 2 -
arch/x86/include/asm/special_insns.h | 13 ---
arch/x86/include/asm/trace/fpu.h | 5 +-
arch/x86/kernel/fpu/bugs.c | 7 --
arch/x86/kernel/fpu/core.c | 74 ++--------------
arch/x86/kernel/fpu/init.c | 107 +-----------------------
arch/x86/kernel/fpu/signal.c | 8 +-
arch/x86/kernel/fpu/xstate.c | 9 --
arch/x86/kernel/paravirt.c | 1 -
arch/x86/kernel/paravirt_patch_32.c | 2 -
arch/x86/kernel/paravirt_patch_64.c | 2 -
arch/x86/kernel/process_32.c | 5 +-
arch/x86/kernel/process_64.c | 5 +-
arch/x86/kernel/smpboot.c | 2 +-
arch/x86/kernel/traps.c | 20 ++++-
arch/x86/kvm/cpuid.c | 4 +-
arch/x86/kvm/vmx.c | 12 +--
arch/x86/kvm/x86.c | 19 +----
arch/x86/lguest/boot.c | 29 ++-----
arch/x86/mm/pkeys.c | 3 +-
arch/x86/xen/enlighten.c | 13 ---
drivers/char/hw_random/via-rng.c | 8 +-
drivers/crypto/padlock-aes.c | 23 +----
drivers/crypto/padlock-sha.c | 18 ----
drivers/lguest/hypercalls.c | 4 -
drivers/lguest/lg.h | 1 -
drivers/lguest/x86/core.c | 19 +----
include/linux/kvm_host.h | 1 -
tools/arch/x86/include/asm/cpufeatures.h | 1 -
38 files changed, 105 insertions(+), 547 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 37babf91f2cb..459b301137c2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1074,12 +1074,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nopku [X86] Disable Memory Protection Keys CPU feature found
in some Intel CPUs.

- eagerfpu= [X86]
- on enable eager fpu restore
- off disable eager fpu restore
- auto selects the default scheme, which automatically
- enables eagerfpu restore for xsaveopt.
-
module.async_probe [KNL]
Enable asynchronous probe on this module.

diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 0857b1a1de3b..c194d5717ae5 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -48,26 +48,13 @@
#ifdef CONFIG_X86_64
/*
* use carryless multiply version of crc32c when buffer
- * size is >= 512 (when eager fpu is enabled) or
- * >= 1024 (when eager fpu is disabled) to account
+ * size is >= 512 to account
* for fpu state save/restore overhead.
*/
-#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512
-#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024
+#define CRC32C_PCL_BREAKEVEN 512

asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
unsigned int crc_init);
-static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
-#if defined(X86_FEATURE_EAGER_FPU)
-#define set_pcl_breakeven_point() \
-do { \
- if (!use_eager_fpu()) \
- crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
-} while (0)
-#else
-#define set_pcl_breakeven_point() \
- (crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
-#endif
#endif /* CONFIG_X86_64 */

static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
@@ -190,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
* use faster PCL version if datasize is large enough to
* overcome kernel fpu state save/restore overhead
*/
- if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
+ if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
kernel_fpu_begin();
*crcp = crc_pcl(data, len, *crcp);
kernel_fpu_end();
@@ -202,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
- if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
+ if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
kernel_fpu_begin();
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
kernel_fpu_end();
@@ -261,7 +248,6 @@ static int __init crc32c_intel_mod_init(void)
alg.update = crc32c_pcl_intel_update;
alg.finup = crc32c_pcl_intel_finup;
alg.digest = crc32c_pcl_intel_digest;
- set_pcl_breakeven_point();
}
#endif
return crypto_register_shash(&alg);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a39629206864..cddd5d06e1cb 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -104,7 +104,6 @@
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */

/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 1429a7c736db..0877ae018fc9 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -27,16 +27,6 @@ extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);

/*
- * Some instructions like VIA's padlock instructions generate a spurious
- * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context as well. To prevent these kernel instructions
- * in interrupt context interacting wrongly with other user/kernel fpu usage, we
- * should use them only in the context of irq_ts_save/restore()
- */
-extern int irq_ts_save(void);
-extern void irq_ts_restore(int TS_state);
-
-/*
* Query the presence of one or more xfeatures. Works on any legacy CPU as well.
*
* If 'feature_name' is set then put a human-readable description of
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 2737366ea583..d4a684997497 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
/*
* FPU related CPU feature flag helper routines:
*/
-static __always_inline __pure bool use_eager_fpu(void)
-{
- return static_cpu_has(X86_FEATURE_EAGER_FPU);
-}
-
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -484,42 +479,42 @@ extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size)
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);

/*
- * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
- * on this CPU.
+ * The in-register FPU state for an FPU context on a CPU is assumed to be
+ * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
+ * matches the FPU.
*
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
+ * If the FPU register state is valid, the kernel can skip restoring the
+ * FPU state from memory.
+ *
+ * Any code that clobbers the FPU registers or updates the in-memory
+ * FPU state for a task MUST let the rest of the kernel know that the
+ * FPU registers are no longer valid for this task.
+ *
+ * Either one of these invalidation functions is enough. Invalidate
+ * a resource you control: CPU if using the CPU for something else
+ * (with preemption disabled), FPU for the current task, or a task that
+ * is prevented from running by the current task.
*/
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+static inline void __cpu_invalidate_fpregs_state(void)
{
- per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
+ __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}

-static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
-{
- return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
-}
-
-
-/*
- * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
- * idiom, which is then paired with the sw-flag (fpregs_active) later on:
- */
-
-static inline void __fpregs_activate_hw(void)
+static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
{
- if (!use_eager_fpu())
- clts();
+ fpu->last_cpu = -1;
}

-static inline void __fpregs_deactivate_hw(void)
+static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
- if (!use_eager_fpu())
- stts();
+ return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}

-/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
-static inline void __fpregs_deactivate(struct fpu *fpu)
+/*
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own:
+ */
+static inline void fpregs_deactivate(struct fpu *fpu)
{
WARN_ON_FPU(!fpu->fpregs_active);

@@ -528,8 +523,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
trace_x86_fpu_regs_deactivated(fpu);
}

-/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
-static inline void __fpregs_activate(struct fpu *fpu)
+static inline void fpregs_activate(struct fpu *fpu)
{
WARN_ON_FPU(fpu->fpregs_active);

@@ -554,51 +548,19 @@ static inline int fpregs_active(void)
}

/*
- * Encapsulate the CR0.TS handling together with the
- * software flag.
- *
- * These generally need preemption protection to work,
- * do try to avoid using these on their own.
- */
-static inline void fpregs_activate(struct fpu *fpu)
-{
- __fpregs_activate_hw();
- __fpregs_activate(fpu);
-}
-
-static inline void fpregs_deactivate(struct fpu *fpu)
-{
- __fpregs_deactivate(fpu);
- __fpregs_deactivate_hw();
-}
-
-/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
*
- * - switch_fpu_prepare() saves the old state and
- * sets the new state of the CR0.TS bit. This is
- * done within the context of the old process.
+ * - switch_fpu_prepare() saves the old state.
+ * This is done within the context of the old process.
*
* - switch_fpu_finish() restores the new state as
* necessary.
*/
-typedef struct { int preload; } fpu_switch_t;
-
-static inline fpu_switch_t
-switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
+static inline void
+switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- fpu_switch_t fpu;
-
- /*
- * If the task has used the math, pre-load the FPU on xsave processors
- * or if the past 5 consecutive context-switches used math.
- */
- fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
- new_fpu->fpstate_active &&
- (use_eager_fpu() || new_fpu->counter > 5);
-
if (old_fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
@@ -608,29 +570,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
/* But leave fpu_fpregs_owner_ctx! */
old_fpu->fpregs_active = 0;
trace_x86_fpu_regs_deactivated(old_fpu);
-
- /* Don't change CR0.TS if we just switch! */
- if (fpu.preload) {
- new_fpu->counter++;
- __fpregs_activate(new_fpu);
- trace_x86_fpu_regs_activated(new_fpu);
- prefetch(&new_fpu->state);
- } else {
- __fpregs_deactivate_hw();
- }
- } else {
- old_fpu->counter = 0;
+ } else
old_fpu->last_cpu = -1;
- if (fpu.preload) {
- new_fpu->counter++;
- if (fpu_want_lazy_restore(new_fpu, cpu))
- fpu.preload = 0;
- else
- prefetch(&new_fpu->state);
- fpregs_activate(new_fpu);
- }
- }
- return fpu;
}

/*
@@ -638,15 +579,19 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
*/

/*
- * By the time this gets called, we've already cleared CR0.TS and
- * given the process the FPU if we are going to preload the FPU
- * state - all we need to do is to conditionally restore the register
- * state itself.
+ * Set up the userspace FPU context for the new task, if the task
+ * has used the FPU.
*/
-static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
+static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
{
- if (fpu_switch.preload)
- copy_kernel_to_fpregs(&new_fpu->state);
+ bool preload = static_cpu_has(X86_FEATURE_FPU) &&
+ new_fpu->fpstate_active;
+
+ if (preload) {
+ if (!fpregs_state_valid(new_fpu, cpu))
+ copy_kernel_to_fpregs(&new_fpu->state);
+ fpregs_activate(new_fpu);
+ }
}

/*
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 48df486b02f9..3c80f5b9c09d 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -322,17 +322,6 @@ struct fpu {
unsigned char fpregs_active;

/*
- * @counter:
- *
- * This counter contains the number of consecutive context switches
- * during which the FPU stays used. If this is over a threshold, the
- * lazy FPU restore logic becomes eager, to save the trap overhead.
- * This is an unsigned char so that after 256 iterations the counter
- * wraps and the context switch behavior turns lazy again; this is to
- * deal with bursty apps that only use the FPU for a short time:
- */
- unsigned char counter;
- /*
* @state:
*
* In-memory copy of all FPU registers that we save/restore
@@ -340,29 +329,6 @@ struct fpu {
* the registers in the FPU are more recent than this state
* copy. If the task context-switches away then they get
* saved here and represent the FPU state.
- *
- * After context switches there may be a (short) time period
- * during which the in-FPU hardware registers are unchanged
- * and still perfectly match this state, if the tasks
- * scheduled afterwards are not using the FPU.
- *
- * This is the 'lazy restore' window of optimization, which
- * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
- *
- * We detect whether a subsequent task uses the FPU via setting
- * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
- *
- * During this window, if the task gets scheduled again, we
- * might be able to skip having to do a restore from this
- * memory buffer to the hardware registers - at the cost of
- * incurring the overhead of #NM fault traps.
- *
- * Note that on modern CPUs that support the XSAVEOPT (or other
- * optimized XSAVE instructions), we don't use #NM traps anymore,
- * as the hardware can track whether FPU registers need saving
- * or not. On such CPUs we activate the non-lazy ('eagerfpu')
- * logic, which unconditionally saves/restores all FPU state
- * across context switches. (if FPU state exists.)
*/
union fpregs_state state;
/*
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 430bacf73074..1b2799e0699a 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -21,21 +21,16 @@
/* Supervisor features */
#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)

-/* Supported features which support lazy state saving */
-#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
+/* All currently supported features */
+#define XCNTXT_MASK (XFEATURE_MASK_FP | \
XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | \
XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | \
- XFEATURE_MASK_Hi16_ZMM)
-
-/* Supported features which require eager state saving */
-#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR | \
- XFEATURE_MASK_PKRU)
-
-/* All currently supported features */
-#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
+ XFEATURE_MASK_Hi16_ZMM | \
+ XFEATURE_MASK_PKRU | \
+ XFEATURE_MASK_BNDREGS | \
+ XFEATURE_MASK_BNDCSR)

#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index ef01fef3eebc..6c119cfae218 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -9,7 +9,6 @@
#define LHCALL_FLUSH_TLB 5
#define LHCALL_LOAD_IDT_ENTRY 6
#define LHCALL_SET_STACK 7
-#define LHCALL_TS 8
#define LHCALL_SET_CLOCKEVENT 9
#define LHCALL_HALT 10
#define LHCALL_SET_PMD 13
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ce932812f142..f1fb4dbe9a3e 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -41,11 +41,6 @@ static inline void set_debugreg(unsigned long val, int reg)
PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
}

-static inline void clts(void)
-{
- PVOP_VCALL0(pv_cpu_ops.clts);
-}
-
static inline unsigned long read_cr0(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 0f400c0e4979..545426aa61ef 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -103,8 +103,6 @@ struct pv_cpu_ops {
unsigned long (*get_debugreg)(int regno);
void (*set_debugreg)(int regno, unsigned long value);

- void (*clts)(void);
-
unsigned long (*read_cr0)(void);
void (*write_cr0)(unsigned long);

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 19a2224f9e16..12af3e35edfa 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,11 +6,6 @@

#include <asm/nops.h>

-static inline void native_clts(void)
-{
- asm volatile("clts");
-}
-
/*
* Volatile isn't enough to prevent the compiler from reordering the
* read/write functions for the control registers and messing everything up.
@@ -208,16 +203,8 @@ static inline void load_gs_index(unsigned selector)

#endif

-/* Clear the 'TS' bit */
-static inline void clts(void)
-{
- native_clts();
-}
-
#endif/* CONFIG_PARAVIRT */

-#define stts() write_cr0(read_cr0() | X86_CR0_TS)
-
static inline void clflush(volatile void *__p)
{
asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 9217ab1f5bf6..342e59789fcd 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -14,7 +14,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
__field(struct fpu *, fpu)
__field(bool, fpregs_active)
__field(bool, fpstate_active)
- __field(int, counter)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),
@@ -23,17 +22,15 @@ DECLARE_EVENT_CLASS(x86_fpu,
__entry->fpu = fpu;
__entry->fpregs_active = fpu->fpregs_active;
__entry->fpstate_active = fpu->fpstate_active;
- __entry->counter = fpu->counter;
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
}
),
- TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx",
+ TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
__entry->fpu,
__entry->fpregs_active,
__entry->fpstate_active,
- __entry->counter,
__entry->xfeatures,
__entry->xcomp_bv
)
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index aad34aafc0e0..d913047f832c 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -23,17 +23,12 @@ static double __initdata y = 3145727.0;
*/
void __init fpu__init_check_bugs(void)
{
- u32 cr0_saved;
s32 fdiv_bug;

/* kernel_fpu_begin/end() relies on patched alternative instructions. */
if (!boot_cpu_has(X86_FEATURE_FPU))
return;

- /* We might have CR0::TS set already, clear it: */
- cr0_saved = read_cr0();
- write_cr0(cr0_saved & ~X86_CR0_TS);
-
kernel_fpu_begin();

/*
@@ -56,8 +51,6 @@ void __init fpu__init_check_bugs(void)

kernel_fpu_end();

- write_cr0(cr0_saved);
-
if (fdiv_bug) {
set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
pr_warn("Hmm, FPU with FDIV bug\n");
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index ebb4e95fbd74..e4e97a5355ce 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -58,27 +58,9 @@ static bool kernel_fpu_disabled(void)
return this_cpu_read(in_kernel_fpu);
}

-/*
- * Were we in an interrupt that interrupted kernel mode?
- *
- * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
- * pair does nothing at all: the thread must not have fpu (so
- * that we don't try to save the FPU state), and TS must
- * be set (so that the clts/stts pair does nothing that is
- * visible in the interrupted kernel thread).
- *
- * Except for the eagerfpu case when we return true; in the likely case
- * the thread has FPU but we are not going to set/clear TS.
- */
static bool interrupted_kernel_fpu_idle(void)
{
- if (kernel_fpu_disabled())
- return false;
-
- if (use_eager_fpu())
- return true;
-
- return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
+ return !kernel_fpu_disabled();
}

/*
@@ -125,8 +107,7 @@ void __kernel_fpu_begin(void)
*/
copy_fpregs_to_fpstate(fpu);
} else {
- this_cpu_write(fpu_fpregs_owner_ctx, NULL);
- __fpregs_activate_hw();
+ __cpu_invalidate_fpregs_state();
}
}
EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -137,8 +118,6 @@ void __kernel_fpu_end(void)

if (fpu->fpregs_active)
copy_kernel_to_fpregs(&fpu->state);
- else
- __fpregs_deactivate_hw();

kernel_fpu_enable();
}
@@ -159,35 +138,6 @@ void kernel_fpu_end(void)
EXPORT_SYMBOL_GPL(kernel_fpu_end);

/*
- * CR0::TS save/restore functions:
- */
-int irq_ts_save(void)
-{
- /*
- * If in process context and not atomic, we can take a spurious DNA fault.
- * Otherwise, doing clts() in process context requires disabling preemption
- * or some heavy lifting like kernel_fpu_begin()
- */
- if (!in_atomic())
- return 0;
-
- if (read_cr0() & X86_CR0_TS) {
- clts();
- return 1;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(irq_ts_save);
-
-void irq_ts_restore(int TS_state)
-{
- if (TS_state)
- stts();
-}
-EXPORT_SYMBOL_GPL(irq_ts_restore);
-
-/*
* Save the FPU state (mark it for reload if necessary):
*
* This only ever gets called for the current task.
@@ -200,10 +150,7 @@ void fpu__save(struct fpu *fpu)
trace_x86_fpu_before_save(fpu);
if (fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(fpu)) {
- if (use_eager_fpu())
- copy_kernel_to_fpregs(&fpu->state);
- else
- fpregs_deactivate(fpu);
+ copy_kernel_to_fpregs(&fpu->state);
}
}
trace_x86_fpu_after_save(fpu);
@@ -247,7 +194,6 @@ EXPORT_SYMBOL_GPL(fpstate_init);

int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
- dst_fpu->counter = 0;
dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;

@@ -260,8 +206,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* Don't let 'init optimized' areas of the XSAVE area
* leak into the child task:
*/
- if (use_eager_fpu())
- memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
+ memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);

/*
* Save current FPU registers directly into the child
@@ -283,10 +228,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
memcpy(&src_fpu->state, &dst_fpu->state,
fpu_kernel_xstate_size);

- if (use_eager_fpu())
- copy_kernel_to_fpregs(&src_fpu->state);
- else
- fpregs_deactivate(src_fpu);
+ copy_kernel_to_fpregs(&src_fpu->state);
}
preempt_enable();

@@ -366,7 +308,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)

if (fpu->fpstate_active) {
/* Invalidate any lazy state: */
- fpu->last_cpu = -1;
+ __fpu_invalidate_fpregs_state(fpu);
} else {
fpstate_init(&fpu->state);
trace_x86_fpu_init_state(fpu);
@@ -409,7 +351,7 @@ void fpu__current_fpstate_write_begin(void)
* ensures we will not be lazy and skip a XRSTOR in the
* future.
*/
- fpu->last_cpu = -1;
+ __fpu_invalidate_fpregs_state(fpu);
}

/*
@@ -459,7 +401,6 @@ void fpu__restore(struct fpu *fpu)
trace_x86_fpu_before_restore(fpu);
fpregs_activate(fpu);
copy_kernel_to_fpregs(&fpu->state);
- fpu->counter++;
trace_x86_fpu_after_restore(fpu);
kernel_fpu_enable();
}
@@ -477,7 +418,6 @@ EXPORT_SYMBOL_GPL(fpu__restore);
void fpu__drop(struct fpu *fpu)
{
preempt_disable();
- fpu->counter = 0;

if (fpu->fpregs_active) {
/* Ignore delayed exceptions from user space */
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 2f2b8c7ccb85..60dece392b3a 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -10,18 +10,6 @@
#include <linux/init.h>

/*
- * Initialize the TS bit in CR0 according to the style of context-switches
- * we are using:
- */
-static void fpu__init_cpu_ctx_switch(void)
-{
- if (!boot_cpu_has(X86_FEATURE_EAGER_FPU))
- stts();
- else
- clts();
-}
-
-/*
* Initialize the registers found in all CPUs, CR0 and CR4:
*/
static void fpu__init_cpu_generic(void)
@@ -58,7 +46,6 @@ void fpu__init_cpu(void)
{
fpu__init_cpu_generic();
fpu__init_cpu_xstate();
- fpu__init_cpu_ctx_switch();
}

/*
@@ -233,82 +220,16 @@ static void __init fpu__init_system_xstate_size_legacy(void)
}

/*
- * FPU context switching strategies:
- *
- * Against popular belief, we don't do lazy FPU saves, due to the
- * task migration complications it brings on SMP - we only do
- * lazy FPU restores.
- *
- * 'lazy' is the traditional strategy, which is based on setting
- * CR0::TS to 1 during context-switch (instead of doing a full
- * restore of the FPU state), which causes the first FPU instruction
- * after the context switch (whenever it is executed) to fault - at
- * which point we lazily restore the FPU state into FPU registers.
- *
- * Tasks are of course under no obligation to execute FPU instructions,
- * so it can easily happen that another context-switch occurs without
- * a single FPU instruction being executed. If we eventually switch
- * back to the original task (that still owns the FPU) then we have
- * not only saved the restores along the way, but we also have the
- * FPU ready to be used for the original task.
- *
- * 'lazy' is deprecated because it's almost never a performance win
- * and it's much more complicated than 'eager'.
- *
- * 'eager' switching is by default on all CPUs, there we switch the FPU
- * state during every context switch, regardless of whether the task
- * has used FPU instructions in that time slice or not. This is done
- * because modern FPU context saving instructions are able to optimize
- * state saving and restoration in hardware: they can detect both
- * unused and untouched FPU state and optimize accordingly.
- *
- * [ Note that even in 'lazy' mode we might optimize context switches
- * to use 'eager' restores, if we detect that a task is using the FPU
- * frequently. See the fpu->counter logic in fpu/internal.h for that. ]
- */
-static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
-
-/*
* Find supported xfeatures based on cpu features and command-line input.
* This must be called after fpu__init_parse_early_param() is called and
* xfeatures_mask is enumerated.
*/
u64 __init fpu__get_supported_xfeatures_mask(void)
{
- /* Support all xfeatures known to us */
- if (eagerfpu != DISABLE)
- return XCNTXT_MASK;
-
- /* Warning of xfeatures being disabled for no eagerfpu mode */
- if (xfeatures_mask & XFEATURE_MASK_EAGER) {
- pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
- xfeatures_mask & XFEATURE_MASK_EAGER);
- }
-
- /* Return a mask that masks out all features requiring eagerfpu mode */
- return ~XFEATURE_MASK_EAGER;
+ return XCNTXT_MASK;
}

-/*
- * Disable features dependent on eagerfpu.
- */
-static void __init fpu__clear_eager_fpu_features(void)
-{
- setup_clear_cpu_cap(X86_FEATURE_MPX);
-}
-
-/*
- * Pick the FPU context switching strategy:
- *
- * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of
- * the following is true:
- *
- * (1) the cpu has xsaveopt, as it has the optimization and doing eager
- * FPU switching has a relatively low cost compared to a plain xsave;
- * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU
- * switching. Should the kernel boot with noxsaveopt, we support MPX
- * with eager FPU switching at a higher cost.
- */
+/* Legacy code to initialize eager fpu mode. */
static void __init fpu__init_system_ctx_switch(void)
{
static bool on_boot_cpu __initdata = 1;
@@ -317,17 +238,6 @@ static void __init fpu__init_system_ctx_switch(void)
on_boot_cpu = 0;

WARN_ON_FPU(current->thread.fpu.fpstate_active);
-
- if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
- eagerfpu = ENABLE;
-
- if (xfeatures_mask & XFEATURE_MASK_EAGER)
- eagerfpu = ENABLE;
-
- if (eagerfpu == ENABLE)
- setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
-
- printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy");
}

/*
@@ -336,11 +246,6 @@ static void __init fpu__init_system_ctx_switch(void)
*/
static void __init fpu__init_parse_early_param(void)
{
- if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
- eagerfpu = DISABLE;
- fpu__clear_eager_fpu_features();
- }
-
if (cmdline_find_option_bool(boot_command_line, "no387"))
setup_clear_cpu_cap(X86_FEATURE_FPU);

@@ -375,14 +280,6 @@ void __init fpu__init_system(struct cpuinfo_x86 *c)
*/
fpu__init_cpu();

- /*
- * But don't leave CR0::TS set yet, as some of the FPU setup
- * methods depend on being able to execute FPU instructions
- * that will fault on a set TS, such as the FXSAVE in
- * fpu__init_system_mxcsr().
- */
- clts();
-
fpu__init_system_generic();
fpu__init_system_xstate_size_legacy();
fpu__init_system_xstate();
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index a184c210efba..83c23c230b4c 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -340,11 +340,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
}

fpu->fpstate_active = 1;
- if (use_eager_fpu()) {
- preempt_disable();
- fpu__restore(fpu);
- preempt_enable();
- }
+ preempt_disable();
+ fpu__restore(fpu);
+ preempt_enable();

return err;
} else {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 095ef7ddd6ae..c7c11cc988b7 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -890,15 +890,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
*/
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
- /*
- * For most XSAVE components, this would be an arduous task:
- * brining fpstate up to date with fpregs, updating fpstate,
- * then re-populating fpregs. But, for components that are
- * never lazily managed, we can just access the fpregs
- * directly. PKRU is never managed lazily, so we can just
- * manipulate it directly. Make sure it stays that way.
- */
- WARN_ON_ONCE(!use_eager_fpu());

/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bbf3d5933eaa..a1bfba0f7234 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -328,7 +328,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.cpuid = native_cpuid,
.get_debugreg = native_get_debugreg,
.set_debugreg = native_set_debugreg,
- .clts = native_clts,
.read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0,
.read_cr4 = native_read_cr4,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 920c6ae08592..d3f7f14bb328 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -8,7 +8,6 @@ DEF_NATIVE(pv_cpu_ops, iret, "iret");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
-DEF_NATIVE(pv_cpu_ops, clts, "clts");

#if defined(CONFIG_PARAVIRT_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
@@ -48,7 +47,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
- PATCH_SITE(pv_cpu_ops, clts);
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
if (pv_is_native_spin_unlock()) {
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index bb3840cedb4f..915a4c0b217c 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
-DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");

DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -58,7 +57,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
- PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd);
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index bd7be8efdc4c..7dc8c9c3d801 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -232,11 +232,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
- fpu_switch_t fpu_switch;

/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

- fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
+ switch_fpu_prepare(prev_fpu, cpu);

/*
* Save away %gs. No need to save %fs, as it was saved on the
@@ -295,7 +294,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (prev->gs | next->gs)
lazy_load_gs(next->gs);

- switch_fpu_finish(next_fpu, fpu_switch);
+ switch_fpu_finish(next_fpu, cpu);

this_cpu_write(current_task, next_p);

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3760b3c1ca0..9c3a7b04e59e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -265,9 +265,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
unsigned prev_fsindex, prev_gsindex;
- fpu_switch_t fpu_switch;

- fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
+ switch_fpu_prepare(prev_fpu, cpu);

/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
@@ -417,7 +416,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
prev->gsbase = 0;
prev->gsindex = prev_gsindex;

- switch_fpu_finish(next_fpu, fpu_switch);
+ switch_fpu_finish(next_fpu, cpu);

/*
* Switch the PDA and FPU contexts.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 42f5eb7b4f6c..d29c85250108 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1111,7 +1111,7 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
return err;

/* the FPU context is blank, nobody can own it */
- __cpu_disable_lazy_restore(cpu);
+ per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;

common_cpu_up(cpu, tidle);

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bd4e3d4d3625..bf0c6d049080 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -853,6 +853,8 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code)
{
+ unsigned long cr0;
+
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");

#ifdef CONFIG_MATH_EMULATION
@@ -866,10 +868,20 @@ do_device_not_available(struct pt_regs *regs, long error_code)
return;
}
#endif
- fpu__restore(&current->thread.fpu); /* interrupts still off */
-#ifdef CONFIG_X86_32
- cond_local_irq_enable(regs);
-#endif
+
+ /* This should not happen. */
+ cr0 = read_cr0();
+ if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
+ /* Try to fix it up and carry on. */
+ write_cr0(cr0 & ~X86_CR0_TS);
+ } else {
+ /*
+ * Something terrible happened, and we're better off trying
+ * to kill the task than getting stuck in a never-ending
+ * loop of #NM faults.
+ */
+ die("unexpected #NM exception", regs, error_code);
+ }
}
NOKPROBE_SYMBOL(do_device_not_available);

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index afa7bbb596cd..0aefb626fa8f 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,7 +16,6 @@
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
-#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */
#include <asm/user.h>
#include <asm/fpu/xstate.h>
#include "cpuid.h"
@@ -114,8 +113,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);

- if (use_eager_fpu())
- kvm_x86_ops->fpu_activate(vcpu);
+ kvm_x86_ops->fpu_activate(vcpu);

/*
* The existing code assumes virtual address is 48-bit in the canonical
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5382b82462fc..3980da515fd0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2145,12 +2145,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#endif
if (vmx->host_state.msr_host_bndcfgs)
wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
- /*
- * If the FPU is not active (through the host task or
- * the guest vcpu), then restore the cr0.TS bit.
- */
- if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded)
- stts();
load_gdt(this_cpu_ptr(&host_gdt));
}

@@ -4845,9 +4839,11 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
u32 low32, high32;
unsigned long tmpl;
struct desc_ptr dt;
- unsigned long cr4;
+ unsigned long cr0, cr4;

- vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */
+ cr0 = read_cr0();
+ WARN_ON(cr0 & X86_CR0_TS);
+ vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */

/* Save the most likely value for this task's CR4 in the VMCS. */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 04c5d96b1d67..56900f3e7bcf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5081,11 +5081,6 @@ static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
{
preempt_disable();
kvm_load_guest_fpu(emul_to_vcpu(ctxt));
- /*
- * CR0.TS may reference the host fpu state, not the guest fpu state,
- * so it may be clear at this point.
- */
- clts();
}

static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
@@ -7407,25 +7402,13 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)

void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- if (!vcpu->guest_fpu_loaded) {
- vcpu->fpu_counter = 0;
+ if (!vcpu->guest_fpu_loaded)
return;
- }

vcpu->guest_fpu_loaded = 0;
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
__kernel_fpu_end();
++vcpu->stat.fpu_reload;
- /*
- * If using eager FPU mode, or if the guest is a frequent user
- * of the FPU, just leave the FPU active for next time.
- * Every 255 times fpu_counter rolls over to 0; a guest that uses
- * the FPU in bursts will revert to loading it on demand.
- */
- if (!use_eager_fpu()) {
- if (++vcpu->fpu_counter < 5)
- kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
- }
trace_kvm_fpu(0);
}

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 25da5bc8d83d..4ca0d78adcf0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -497,38 +497,24 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
* a whole series of functions like read_cr0() and write_cr0().
*
* We start with cr0. cr0 allows you to turn on and off all kinds of basic
- * features, but Linux only really cares about one: the horrifically-named Task
- * Switched (TS) bit at bit 3 (ie. 8)
+ * features, but the only cr0 bit that Linux ever used at runtime was the
+ * horrifically-named Task Switched (TS) bit at bit 3 (ie. 8)
*
* What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if
* the floating point unit is used. Which allows us to restore FPU state
- * lazily after a task switch, and Linux uses that gratefully, but wouldn't a
- * name like "FPUTRAP bit" be a little less cryptic?
+ * lazily after a task switch if we wanted to, but wouldn't a name like
+ * "FPUTRAP bit" be a little less cryptic?
*
- * We store cr0 locally because the Host never changes it. The Guest sometimes
- * wants to read it and we'd prefer not to bother the Host unnecessarily.
+ * Fortunately, Linux keeps it simple and doesn't use TS, so we can ignore
+ * cr0.
*/
-static unsigned long current_cr0;
static void lguest_write_cr0(unsigned long val)
{
- lazy_hcall1(LHCALL_TS, val & X86_CR0_TS);
- current_cr0 = val;
}

static unsigned long lguest_read_cr0(void)
{
- return current_cr0;
-}
-
-/*
- * Intel provided a special instruction to clear the TS bit for people too cool
- * to use write_cr0() to do it. This "clts" instruction is faster, because all
- * the vowels have been optimized out.
- */
-static void lguest_clts(void)
-{
- lazy_hcall1(LHCALL_TS, 0);
- current_cr0 &= ~X86_CR0_TS;
+ return 0;
}

/*
@@ -1432,7 +1418,6 @@ __init void lguest_init(void)
pv_cpu_ops.load_tls = lguest_load_tls;
pv_cpu_ops.get_debugreg = lguest_get_debugreg;
pv_cpu_ops.set_debugreg = lguest_set_debugreg;
- pv_cpu_ops.clts = lguest_clts;
pv_cpu_ops.read_cr0 = lguest_read_cr0;
pv_cpu_ops.write_cr0 = lguest_write_cr0;
pv_cpu_ops.read_cr4 = lguest_read_cr4;
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index f88ce0e5efd9..2dab69a706ec 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -141,8 +141,7 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
* Called from the FPU code when creating a fresh set of FPU
* registers. This is called from a very specific context where
* we know the FPU regstiers are safe for use and we can use PKRU
- * directly. The fact that PKRU is only available when we are
- * using eagerfpu mode makes this possible.
+ * directly.
*/
void copy_init_pkru_to_fpregs(void)
{
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bdd855685403..ced7027b3fbc 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -980,17 +980,6 @@ static void xen_io_delay(void)
{
}

-static void xen_clts(void)
-{
- struct multicall_space mcs;
-
- mcs = xen_mc_entry(0);
-
- MULTI_fpu_taskswitch(mcs.mc, 0);
-
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-}
-
static DEFINE_PER_CPU(unsigned long, xen_cr0_value);

static unsigned long xen_read_cr0(void)
@@ -1233,8 +1222,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.set_debugreg = xen_set_debugreg,
.get_debugreg = xen_get_debugreg,

- .clts = xen_clts,
-
.read_cr0 = xen_read_cr0,
.write_cr0 = xen_write_cr0,

diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 44ce80606944..d1f5bb534e0e 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -70,21 +70,17 @@ enum {
* until we have 4 bytes, thus returning a u32 at a time,
* instead of the current u8-at-a-time.
*
- * Padlock instructions can generate a spurious DNA fault, so
- * we have to call them in the context of irq_ts_save/restore()
+ * Padlock instructions can generate a spurious DNA fault, but the
+ * kernel doesn't use CR0.TS, so this doesn't matter.
*/

static inline u32 xstore(u32 *addr, u32 edx_in)
{
u32 eax_out;
- int ts_state;
-
- ts_state = irq_ts_save();

asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */"
: "=m" (*addr), "=a" (eax_out), "+d" (edx_in), "+D" (addr));

- irq_ts_restore(ts_state);
return eax_out;
}

diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 441e86b23571..b3869748cc6b 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -183,8 +183,8 @@ static inline void padlock_store_cword(struct cword *cword)

/*
* While the padlock instructions don't use FP/SSE registers, they
- * generate a spurious DNA fault when cr0.ts is '1'. These instructions
- * should be used only inside the irq_ts_save/restore() context
+ * generate a spurious DNA fault when CR0.TS is '1'. Fortunately,
+ * the kernel doesn't use CR0.TS.
*/

static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key,
@@ -298,24 +298,18 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct aes_ctx *ctx = aes_ctx(tfm);
- int ts_state;

padlock_reset_key(&ctx->cword.encrypt);
- ts_state = irq_ts_save();
ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1);
- irq_ts_restore(ts_state);
padlock_store_cword(&ctx->cword.encrypt);
}

static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct aes_ctx *ctx = aes_ctx(tfm);
- int ts_state;

padlock_reset_key(&ctx->cword.encrypt);
- ts_state = irq_ts_save();
ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1);
- irq_ts_restore(ts_state);
padlock_store_cword(&ctx->cword.encrypt);
}

@@ -346,14 +340,12 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
- int ts_state;

padlock_reset_key(&ctx->cword.encrypt);

blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);

- ts_state = irq_ts_save();
while ((nbytes = walk.nbytes)) {
padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
ctx->E, &ctx->cword.encrypt,
@@ -361,7 +353,6 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- irq_ts_restore(ts_state);

padlock_store_cword(&ctx->cword.encrypt);

@@ -375,14 +366,12 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
- int ts_state;

padlock_reset_key(&ctx->cword.decrypt);

blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);

- ts_state = irq_ts_save();
while ((nbytes = walk.nbytes)) {
padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
ctx->D, &ctx->cword.decrypt,
@@ -390,7 +379,6 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- irq_ts_restore(ts_state);

padlock_store_cword(&ctx->cword.encrypt);

@@ -425,14 +413,12 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
- int ts_state;

padlock_reset_key(&ctx->cword.encrypt);

blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);

- ts_state = irq_ts_save();
while ((nbytes = walk.nbytes)) {
u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr,
walk.dst.virt.addr, ctx->E,
@@ -442,7 +428,6 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- irq_ts_restore(ts_state);

padlock_store_cword(&ctx->cword.decrypt);

@@ -456,14 +441,12 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
- int ts_state;

padlock_reset_key(&ctx->cword.encrypt);

blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);

- ts_state = irq_ts_save();
while ((nbytes = walk.nbytes)) {
padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr,
ctx->D, walk.iv, &ctx->cword.decrypt,
@@ -472,8 +455,6 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
err = blkcipher_walk_done(desc, &walk, nbytes);
}

- irq_ts_restore(ts_state);
-
padlock_store_cword(&ctx->cword.encrypt);

return err;
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index 8c5f90647b7a..bc72d20c32c3 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -89,7 +89,6 @@ static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
struct sha1_state state;
unsigned int space;
unsigned int leftover;
- int ts_state;
int err;

dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -120,14 +119,11 @@ static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,

memcpy(result, &state.state, SHA1_DIGEST_SIZE);

- /* prevent taking the spurious DNA fault with padlock. */
- ts_state = irq_ts_save();
asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
: \
: "c"((unsigned long)state.count + count), \
"a"((unsigned long)state.count), \
"S"(in), "D"(result));
- irq_ts_restore(ts_state);

padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);

@@ -155,7 +151,6 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
struct sha256_state state;
unsigned int space;
unsigned int leftover;
- int ts_state;
int err;

dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -186,14 +181,11 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,

memcpy(result, &state.state, SHA256_DIGEST_SIZE);

- /* prevent taking the spurious DNA fault with padlock. */
- ts_state = irq_ts_save();
asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
: \
: "c"((unsigned long)state.count + count), \
"a"((unsigned long)state.count), \
"S"(in), "D"(result));
- irq_ts_restore(ts_state);

padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);

@@ -312,7 +304,6 @@ static int padlock_sha1_update_nano(struct shash_desc *desc,
u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
((aligned(STACK_ALIGN)));
u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
- int ts_state;

partial = sctx->count & 0x3f;
sctx->count += len;
@@ -328,23 +319,19 @@ static int padlock_sha1_update_nano(struct shash_desc *desc,
memcpy(sctx->buffer + partial, data,
done + SHA1_BLOCK_SIZE);
src = sctx->buffer;
- ts_state = irq_ts_save();
asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
: "+S"(src), "+D"(dst) \
: "a"((long)-1), "c"((unsigned long)1));
- irq_ts_restore(ts_state);
done += SHA1_BLOCK_SIZE;
src = data + done;
}

/* Process the left bytes from the input data */
if (len - done >= SHA1_BLOCK_SIZE) {
- ts_state = irq_ts_save();
asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
: "+S"(src), "+D"(dst)
: "a"((long)-1),
"c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
- irq_ts_restore(ts_state);
done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
src = data + done;
}
@@ -401,7 +388,6 @@ static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
((aligned(STACK_ALIGN)));
u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
- int ts_state;

partial = sctx->count & 0x3f;
sctx->count += len;
@@ -417,23 +403,19 @@ static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
memcpy(sctx->buf + partial, data,
done + SHA256_BLOCK_SIZE);
src = sctx->buf;
- ts_state = irq_ts_save();
asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
: "+S"(src), "+D"(dst)
: "a"((long)-1), "c"((unsigned long)1));
- irq_ts_restore(ts_state);
done += SHA256_BLOCK_SIZE;
src = data + done;
}

/* Process the left bytes from input data*/
if (len - done >= SHA256_BLOCK_SIZE) {
- ts_state = irq_ts_save();
asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
: "+S"(src), "+D"(dst)
: "a"((long)-1),
"c"((unsigned long)((len - done) / 64)));
- irq_ts_restore(ts_state);
done += ((len - done) - (len - done) % 64);
src = data + done;
}
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 19a32280731d..601f81c04873 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -109,10 +109,6 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
case LHCALL_SET_CLOCKEVENT:
guest_set_clockevent(cpu, args->arg1);
break;
- case LHCALL_TS:
- /* This sets the TS flag, as we saw used in run_guest(). */
- cpu->ts = args->arg1;
- break;
case LHCALL_HALT:
/* Similarly, this sets the halted flag for run_guest(). */
cpu->halted = 1;
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 69b3814afd2f..2356a2318034 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -43,7 +43,6 @@ struct lg_cpu {
struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */

u32 cr2;
- int ts;
u32 esp1;
u16 ss1;

diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 6e9042e3d2a9..743253fc638f 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -247,14 +247,6 @@ unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, size_t reg_off, bool any)
void lguest_arch_run_guest(struct lg_cpu *cpu)
{
/*
- * Remember the awfully-named TS bit? If the Guest has asked to set it
- * we set it now, so we can trap and pass that trap to the Guest if it
- * uses the FPU.
- */
- if (cpu->ts && fpregs_active())
- stts();
-
- /*
* SYSENTER is an optimized way of doing system calls. We can't allow
* it because it always jumps to privilege level 0. A normal Guest
* won't try it because we don't advertise it in CPUID, but a malicious
@@ -282,10 +274,6 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
if (boot_cpu_has(X86_FEATURE_SEP))
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);

- /* Clear the host TS bit if it was set above. */
- if (cpu->ts && fpregs_active())
- clts();
-
/*
* If the Guest page faulted, then the cr2 register will tell us the
* bad virtual address. We have to grab this now, because once we
@@ -421,12 +409,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
kill_guest(cpu, "Writing cr2");
break;
case 7: /* We've intercepted a Device Not Available fault. */
- /*
- * If the Guest doesn't want to know, we already restored the
- * Floating Point Unit, so we just continue without telling it.
- */
- if (!cpu->ts)
- return;
+ /* No special handling is needed here. */
break;
case 32 ... 255:
/* This might be a syscall. */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 01c0b9cc3915..cfc212d1cd60 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -224,7 +224,6 @@ struct kvm_vcpu {

int fpu_active;
int guest_fpu_loaded, guest_xcr0_loaded;
- unsigned char fpu_counter;
struct swait_queue_head wq;
struct pid *pid;
int sigset_active;
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index a39629206864..cddd5d06e1cb 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -104,7 +104,6 @@
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */

/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */