[patch] PIII/Katmai & FXSAVE support, disable serial-#, 2.2.2

Ingo Molnar (mingo@chiara.csoma.elte.hu)
Tue, 2 Mar 1999 21:54:37 +0100 (CET)


this is a new version of the Katmai patch. Changes:

- disables PIII serial# on all CPUs (Doug Ledford)
[we might make this configurable in the future for Wine
compatibility]

- more correct tagword conversion (Gabriel Paubert)

this is not yet the final version, but it already enables to execute
user-space MMX2 instructions on real PIII CPUs. (and uses the FXSAVE /
RESTORE instructions for FPU context switching on FX-capable CPUs like the
Deschutes, Celeron, Xeon). The patch has been tested on SMP and UP systems
too.

-- mingo

--- linux/include/asm-i386/bugs.h.orig Mon Jan 11 05:08:03 1999
+++ linux/include/asm-i386/bugs.h Tue Mar 2 21:47:54 1999
@@ -18,6 +18,7 @@
*/

#include <linux/config.h>
+#include <linux/stddef.h>
#include <asm/processor.h>

#define CONFIG_BUGi386
@@ -61,6 +62,39 @@
#endif
return;
}
+#if CONFIG_X86_FX
+ /*
+ * If we got so far we can safely turn on FXSAVE/FXRESTORE,
+ * but make sure we are 16-byte aligned first.
+ */
+ if (offsetof(struct task_struct, tss.i387.hard) & 15) {
+ /*
+ * This triggers a link-time error if we manage to
+ * break alignment somehow.
+ */
+ extern void __buggy_fxsr_alignment(void);
+
+ __buggy_fxsr_alignment();
+ }
+ printk("Enabling extended fast FPU save and restore ... ");
+ set_in_cr4(X86_CR4_OSFXSR);
+ printk("done.\n");
+ /*
+ * Note, Katmai instructions are enabled as soon as you start
+ * using the FXSAVE/RESTORE stuff. This setting only
+ * indicates support for the masked/unmasked exceptions on
+ * the new PIII cpus. We don't have an Exception 16 handler
+ * for this yet, so we can't set this bit.
+ */
+ if (boot_cpu_data.x86_capability & X86_FEATURE_XMM) {
+ printk("Enabling KNI unmasked exception support ... (not yet) ");
+#if 0
+ set_in_cr4(X86_CR4_OSXMMEXCPT);
+#endif
+ printk("done.\n");
+ }
+#endif
+ disable_serial_nr();
/*
* check if exception 16 works correctly.. This is truly evil
* code: it disables the high 8 interrupts to make sure that
@@ -84,23 +118,31 @@
return;
if (!ignore_irq13) {
printk("OK, FPU using old IRQ 13 error reporting\n");
- return;
+ } else {
+ __asm__("fninit\n\t"
+ "fldl %1\n\t"
+ "fdivl %2\n\t"
+ "fmull %2\n\t"
+ "fldl %1\n\t"
+ "fsubp %%st,%%st(1)\n\t"
+ "fistpl %0\n\t"
+ "fwait\n\t"
+ "fninit"
+ : "=m" (*&boot_cpu_data.fdiv_bug)
+ : "m" (*&x), "m" (*&y));
+ if (!boot_cpu_data.fdiv_bug)
+ printk("OK, FPU using exception 16 error reporting.\n");
+ else
+ printk("Hmm, FPU using exception 16 error reporting with FDIV bug.\n");
}
- __asm__("fninit\n\t"
- "fldl %1\n\t"
- "fdivl %2\n\t"
- "fmull %2\n\t"
- "fldl %1\n\t"
- "fsubp %%st,%%st(1)\n\t"
- "fistpl %0\n\t"
- "fwait\n\t"
- "fninit"
- : "=m" (*&boot_cpu_data.fdiv_bug)
- : "m" (*&x), "m" (*&y));
- if (!boot_cpu_data.fdiv_bug)
- printk("OK, FPU using exception 16 error reporting.\n");
- else
- printk("Hmm, FPU using exception 16 error reporting with FDIV bug.\n");
+#if CONFIG_X86_FX
+ /*
+ * Silly check but we want to have a perfect FPU for
+ * FXSAVE/FXRESTORE.
+ */
+ if (boot_cpu_data.fdiv_bug || !ignore_irq13)
+ panic("huh, PIII/Xeon/Deschutes CPU but buggy FPU?");
+#endif
}

__initfunc(static void check_hlt(void))
@@ -352,6 +394,13 @@
#if defined(CONFIG_X86_GOOD_APIC) && defined(CONFIG_SMP)
if (smp_found_config && boot_cpu_data.x86 <= 5)
panic("Kernel compiled for PPro+, assumes local APIC without read-before-write bug");
+#endif
+/*
+ * If we configured for FXRS we better have it!
+ */
+#if CONFIG_X86_FX
+ if (!(boot_cpu_data.x86_capability & X86_FEATURE_FXSR))
+ panic("Kernel configured for Xeon/Katmai/PIII, but CPU does not support FXSR!");
#endif
}

--- linux/include/asm-i386/processor.h.orig Tue Jan 26 10:47:10 1999
+++ linux/include/asm-i386/processor.h Tue Mar 2 21:47:41 1999
@@ -7,10 +7,11 @@
#ifndef __ASM_I386_PROCESSOR_H
#define __ASM_I386_PROCESSOR_H

+#include <linux/config.h>
#include <asm/vm86.h>
#include <asm/math_emu.h>
-#include <asm/segment.h>
#include <asm/page.h>
+#include <asm/user.h>

/*
* CPU type and hardware bug flags. Kept separately for each CPU.
@@ -71,14 +72,14 @@
#define X86_FEATURE_CMOV 0x00008000 /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
#define X86_FEATURE_PAT 0x00010000 /* Page Attribute Table */
#define X86_FEATURE_PSE36 0x00020000 /* 36-bit PSEs */
-#define X86_FEATURE_18 0x00040000
+#define X86_FEATURE_PN 0x00040000 /* 96 bit CPU serial # */
#define X86_FEATURE_19 0x00080000
#define X86_FEATURE_20 0x00100000
#define X86_FEATURE_21 0x00200000
#define X86_FEATURE_22 0x00400000
#define X86_FEATURE_MMX 0x00800000 /* multimedia extensions */
#define X86_FEATURE_FXSR 0x01000000 /* FXSAVE and FXRSTOR instructions (fast save and restore of FPU context), and CR4.OSFXSR (OS uses these instructions) available */
-#define X86_FEATURE_25 0x02000000
+#define X86_FEATURE_XMM 0x02000000 /* Intel MMX2 instruction set */
#define X86_FEATURE_26 0x04000000
#define X86_FEATURE_27 0x08000000
#define X86_FEATURE_28 0x10000000
@@ -88,6 +89,55 @@

extern struct cpuinfo_x86 boot_cpu_data;

+#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
+#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
+#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
+#define X86_CR4_DE 0x0008 /* enable debugging extensions */
+#define X86_CR4_PSE 0x0010 /* enable page size extensions */
+#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
+#define X86_CR4_MCE 0x0040 /* Machine check enable */
+#define X86_CR4_PGE 0x0080 /* enable global pages */
+#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR 0x0200 /* fast FPU save/restore */
+#define X86_CR4_OSXMMEXCPT 0x0400 /* KNI (MMX2) unmasked exception 16 */
+ /* handler is available */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4(unsigned long mask)
+{
+ mmu_cr4_features |= mask;
+ __asm__("movl %%cr4,%%eax\n\t"
+ "orl %0,%%eax\n\t"
+ "movl %%eax,%%cr4\n"
+ : : "irg" (mask)
+ :"ax");
+}
+
+static inline void disable_serial_nr(void)
+{
+ if (boot_cpu_data.x86_capability & X86_FEATURE_PN) {
+ printk("Disabling CPUID Serial number ... ");
+ __asm__ __volatile__( "movl $0x119,%%ecx\n\t"
+ "rdmsr\n\t"
+ "orl $0x00200000,%%eax\n\t"
+ "wrmsr":::"ax","dx","cx","memory");
+ /*
+ * We might need to re-read the x86 capability set now to
+ * make sure that the PN bit has been turned off so
+ * we know that the serial number stuff is disabled
+ */
+ printk("done.\n");
+ }
+}
+
+
#ifdef __SMP__
extern struct cpuinfo_x86 cpu_data[];
#define current_cpu_data cpu_data[smp_processor_id()]
@@ -164,6 +214,27 @@
*/
#define IO_BITMAP_SIZE 32

+/*
+ * We have two (incompatible) floating-point state 'hard' layouts:
+ *
+ * - the new MMX2-ready layout used by newer PII/Xeon/PIII(Katmai) CPUs
+ * - the 'old' i387 FPU layout used by everything else
+ *
+ * we use the one apropriate for the CPU picked at kernel compilation time.
+ *
+ * to keep the FPU code a bit more transparent, we define the same
+ * structure in both cases, the parameter names are identical too.
+ * for cases where we cannot make this transparent, we define access
+ * functions.
+ *
+ * we also have two FPU formats visible in APIs, 'hard format' and
+ * 'user-format'. The user-format is decoupled from the actual hard
+ * format (which can be i387 or KNI) and conversion routines are
+ * provided. user-format is currently i387, which we cannot change
+ * (only extend) due to external APIs (iBCS2, ELF coredumps, ptrace API).
+ */
+
+#ifndef CONFIG_X86_FX
struct i387_hard_struct {
long cwd;
long swd;
@@ -176,6 +247,84 @@
long status; /* software status information */
};

+/*
+ * Fill out the reserved bits
+ */
+#define i387_set_cwd(x,v) do { (x).cwd = ((long)(v) | 0xffff0000); } while (0)
+#define i387_set_swd(x,v) do { (x).swd = ((long)(v) | 0xffff0000); } while (0)
+#define i387_set_twd(x,v) do { (x).twd = ((long)(v) | 0xffff0000); } while (0)
+
+#define i387_save_hard(x) \
+ do { __asm__("fnsave %0; fwait;": "=m" (x)); } while (0)
+
+#define i387_restore_hard(x) \
+ do { __asm__("frstor %0": :"m" (x)); } while (0)
+
+#else
+
+/*
+ * has to be 128-bit aligned
+ */
+struct i387_hard_struct {
+ unsigned short cwd;
+ unsigned short swd;
+ unsigned char twd;
+ unsigned char __reserved_01;
+ unsigned short fopcode;
+ long fip;
+ long fcs;
+ long foo;
+ long fos;
+ long __reserved_02, __reserved_03;
+ long st_space[32]; /* 8*16 bytes for each FP/MMX-reg = 128 bytes */
+ long __reserved_04 [22*16]; /* 22 cachelines for MMX2 registers */
+} __attribute__ ((aligned (16)));
+
+/*
+ * tag word conversion (thanks to Gabriel Paubert for noticing the
+ * subtle format difference and implementing these functions)
+ *
+ * there are several erratas wrt. the tag word in the i387, thus
+ * any software relying on it's value is questionable, but we
+ * definitely want to be as close as possible.
+ */
+static inline unsigned short fputag_KNIto387(unsigned char tb) {
+ unsigned short tw = tb;
+ tw = ((tw<<4) | tw) &0x0f0f; /* zzzz7654zzzz3210 */
+ tw = ((tw<<2) | tw) &0x3333; /* zz76zz54zz32zz10 */
+ tw = ((tw<<1) | tw) &0x5555; /* z7z6z5z4z3z2z1z0 */
+ return ~(tw*3);
+}
+
+static inline unsigned char fputag_387toKNI(unsigned short tw) {
+ tw = ~tw;
+ tw = (tw | (tw>>1)) & 0x5555; /* z7z6z5z4z3z2z1z0 */
+ tw = (tw | (tw>>1)) & 0x3333; /* zz76zz54zz32zz10 */
+ tw = (tw | (tw>>3)) & 0x0f0f; /* zzzz7654zzzz3210 */
+ return (tw|(tw>>4)) & 0x00ff; /* zzzzzzzz76543210 */
+}
+
+#define i387_set_cwd(x,v) do { (x).cwd = (short)(v); } while (0)
+#define i387_set_swd(x,v) do { (x).swd = (short)(v); } while (0)
+#define i387_set_twd(x,v) do { (x).twd = fputag_387toKNI(v); } while (0)
+
+/*
+ * GAS is not ready yet, so we encode the FXSAVE/FXRSTOR
+ * opcodes directly:
+ */
+#define i387_save_hard(x) \
+do { __asm__ __volatile__(".byte 0x0f, 0xae, 0x06": :"S" (&(x))); } while (0)
+
+#define i387_restore_hard(x) \
+do { __asm__ __volatile__(".byte 0x0f, 0xae, 0x4f, 0x00": :"D" (&(x))); } while (0)
+
+#endif
+
+extern int i387_hard_to_user ( struct user_i387_struct * user,
+ struct i387_hard_struct * hard);
+extern int i387_user_to_hard (struct i387_hard_struct * hard,
+ struct user_i387_struct * user);
+
struct i387_soft_struct {
long cwd;
long swd;
@@ -287,7 +436,7 @@
* FPU lazy state save handling..
*/
#define save_fpu(tsk) do { \
- asm volatile("fnsave %0\n\tfwait":"=m" (tsk->tss.i387)); \
+ i387_save_hard(tsk->tss.i387); \
tsk->flags &= ~PF_USEDFPU; \
stts(); \
} while (0)
--- linux/arch/i386/mm/init.c.orig Tue Jan 26 10:47:16 1999
+++ linux/arch/i386/mm/init.c Tue Mar 2 19:51:58 1999
@@ -182,34 +182,6 @@
extern char _text, _etext, _edata, __bss_start, _end;
extern char __init_begin, __init_end;

-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
-#define X86_CR4_MCE 0x0040 /* Machine check enable */
-#define X86_CR4_PGE 0x0080 /* enable global pages */
-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
-
-/*
- * Save the cr4 feature set we're using (ie
- * Pentium 4MB enable and PPro Global page
- * enable), so that any CPU's that boot up
- * after us can get the correct flags.
- */
-unsigned long mmu_cr4_features __initdata = 0;
-
-static inline void set_in_cr4(unsigned long mask)
-{
- mmu_cr4_features |= mask;
- __asm__("movl %%cr4,%%eax\n\t"
- "orl %0,%%eax\n\t"
- "movl %%eax,%%cr4\n"
- : : "irg" (mask)
- :"ax");
-}
-
/*
* allocate page table(s) for compile-time fixed mappings
*/
--- linux/arch/i386/kernel/process.c.orig Tue Jan 26 10:47:12 1999
+++ linux/arch/i386/kernel/process.c Tue Mar 2 19:51:11 1999
@@ -580,6 +580,106 @@
}

/*
+ * FPU state handling functions
+ */
+
+#ifndef CONFIG_X86_FX
+
+int i387_hard_to_user ( struct user_i387_struct * user,
+ struct i387_hard_struct * hard)
+{
+ int err;
+
+ err = __copy_to_user((void *)(user), (hard),
+ sizeof(struct user_i387_struct));
+ return err;
+}
+
+int i387_user_to_hard (struct i387_hard_struct * hard,
+ struct user_i387_struct * user)
+{
+ int err;
+
+ err = __copy_from_user((hard), (void *)(user),
+ sizeof(struct user_i387_struct));
+ return err;
+
+}
+
+#else
+
+int i387_hard_to_user ( struct user_i387_struct * user,
+ struct i387_hard_struct * hard)
+{
+ int i, err = 0;
+ short *tmp, *tmp2;
+
+#define PUT(x) err |= put_user (hard->x, &user->x)
+ PUT(cwd);
+ PUT(swd);
+
+ err |= put_user (fputag_KNIto387(hard->twd), &user->twd);
+
+ PUT(fip);
+ PUT(fcs);
+ PUT(foo);
+ PUT(fos);
+#undef PUT
+
+ tmp = (short *)&user->st_space;
+ tmp2 = (short *)&hard->st_space;
+
+ /*
+ * Transform the two layouts:
+ * (we do not mix 32-bit access with 16-bit access because
+ * thats suboptimal on PPros)
+ */
+ for (i = 0; i < 8; i++) {
+ err |= put_user(*tmp2, tmp); tmp++; tmp2++;
+ err |= put_user(*tmp2, tmp); tmp++; tmp2++;
+ err |= put_user(*tmp2, tmp); tmp++; tmp2++;
+ err |= put_user(*tmp2, tmp); tmp++; tmp2++;
+ err |= put_user(*tmp2, tmp); tmp++; tmp2 += 3;
+ }
+ return err;
+}
+
+int i387_user_to_hard (struct i387_hard_struct * hard,
+ struct user_i387_struct * user)
+{
+ int i, err = 0;
+ short *tmp, *tmp2;
+
+#define GET(x) err |= get_user (hard->x, &user->x)
+ GET(cwd);
+ GET(swd);
+
+ GET(twd);
+ hard->twd = fputag_387toKNI(hard->twd);
+
+ GET(twd);
+ GET(fip);
+ GET(fcs);
+ GET(foo);
+ GET(fos);
+#undef GET
+
+ tmp2 = (short *)&hard->st_space;
+ tmp = (short *)&user->st_space;
+
+ for (i = 0; i < 8; i++) {
+ err |= get_user(*tmp2, tmp); tmp++; tmp2++;
+ err |= get_user(*tmp2, tmp); tmp++; tmp2++;
+ err |= get_user(*tmp2, tmp); tmp++; tmp2++;
+ err |= get_user(*tmp2, tmp); tmp++; tmp2++;
+ err |= get_user(*tmp2, tmp); tmp++; tmp2 += 3;
+ }
+ return err;
+}
+
+#endif
+
+/*
* Save a segment.
*/
#define savesegment(seg,value) \
@@ -690,8 +790,8 @@
/*
* switch_to(x,yn) should switch tasks from x to y.
*
- * We fsave/fwait so that an exception goes off at the right time
- * (as a call from the fsave or fwait in effect) rather than to
+ * We fpu_save so that an exception goes off at the right time
+ * (as a call from the f*save or fwait in effect) rather than to
* the wrong process. Lazy FP saving no longer makes any sense
* with modern CPU's, and this simplifies a lot of things (SMP
* and UP become the same).
--- linux/arch/i386/kernel/ptrace.c.orig Thu Jan 14 14:38:55 1999
+++ linux/arch/i386/kernel/ptrace.c Tue Mar 2 19:51:11 1999
@@ -615,6 +615,9 @@
};

case PTRACE_GETFPREGS: { /* Get the child FPU state. */
+ /*
+ * user-space expects an 'old-style' FPU dump.
+ */
if (!access_ok(VERIFY_WRITE, (unsigned *)data,
sizeof(struct user_i387_struct)))
{
@@ -624,15 +627,17 @@
ret = 0;
if ( !child->used_math ) {
/* Simulate an empty FPU. */
- child->tss.i387.hard.cwd = 0xffff037f;
- child->tss.i387.hard.swd = 0xffff0000;
- child->tss.i387.hard.twd = 0xffffffff;
+ i387_set_cwd(child->tss.i387.hard, 0x037f);
+ i387_set_swd(child->tss.i387.hard, 0x0000);
+ i387_set_twd(child->tss.i387.hard, 0xffff);
}
#ifdef CONFIG_MATH_EMULATION
if ( boot_cpu_data.hard_math ) {
#endif
- __copy_to_user((void *)data, &child->tss.i387.hard,
- sizeof(struct user_i387_struct));
+ i387_hard_to_user(
+ (struct user_i387_struct *)data,
+ &child->tss.i387.hard
+ );
#ifdef CONFIG_MATH_EMULATION
} else {
save_i387_soft(&child->tss.i387.soft,
@@ -653,8 +658,12 @@
#ifdef CONFIG_MATH_EMULATION
if ( boot_cpu_data.hard_math ) {
#endif
- __copy_from_user(&child->tss.i387.hard, (void *)data,
- sizeof(struct user_i387_struct));
+ {
+ i387_user_to_hard(
+ &child->tss.i387.hard,
+ (struct user_i387_struct *)data
+ );
+ }
#ifdef CONFIG_MATH_EMULATION
} else {
restore_i387_soft(&child->tss.i387.soft,
--- linux/arch/i386/kernel/setup.c.orig Tue Mar 2 19:50:54 1999
+++ linux/arch/i386/kernel/setup.c Tue Mar 2 19:51:11 1999
@@ -46,6 +46,7 @@

char ignore_irq13 = 0; /* set if exception 16 works */
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+unsigned long mmu_cr4_features __initdata = 0;

/*
* Bus types ..
@@ -599,8 +600,9 @@
NULL, NULL, NULL, NULL }},
{ X86_VENDOR_INTEL, 6,
{ "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)",
- NULL, "Pentium II (Deschutes)", "Celeron (Mendocino)", NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }},
+ NULL, "Pentium II (Deschutes)", "Celeron (Mendocino)",
+ "Pentium III (Katmai)", NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL }},
{ X86_VENDOR_AMD, 4,
{ NULL, NULL, NULL, "486 DX/2", NULL, NULL, NULL, "486 DX/2-WB",
"486 DX/4", "486 DX/4-WB", NULL, NULL, NULL, NULL, "Am5x86-WT",
@@ -836,7 +838,9 @@
x86_cap_flags[14] = "mca";
x86_cap_flags[16] = "pat";
x86_cap_flags[17] = "pse36";
- x86_cap_flags[24] = "osfxsr";
+ x86_cap_flags[18] = "pn";
+ x86_cap_flags[24] = "fxsr";
+ x86_cap_flags[25] = "xmm";
}

sep_bug = c->x86_vendor == X86_VENDOR_INTEL &&
--- linux/arch/i386/kernel/signal.c.orig Tue Dec 29 16:36:58 1998
+++ linux/arch/i386/kernel/signal.c Tue Mar 2 19:51:11 1999
@@ -153,9 +153,16 @@

static inline int restore_i387_hard(struct _fpstate *buf)
{
+ int err = 0;
struct task_struct *tsk = current;
clear_fpu(tsk);
- return __copy_from_user(&tsk->tss.i387.hard, buf, sizeof(*buf));
+
+#ifdef CONFIG_MATH_EMULATION
+ err = get_user(tsk->tss.i387.hard.status, &buf->status);
+#endif
+ err |= i387_user_to_hard(&tsk->tss.i387.hard,
+ (struct user_i387_struct *)buf);
+ return err;
}

static inline int restore_i387(struct _fpstate *buf)
@@ -305,11 +312,16 @@

static inline int save_i387_hard(struct _fpstate * buf)
{
+ int err = 0;
struct task_struct *tsk = current;

unlazy_fpu(tsk);
- tsk->tss.i387.hard.status = tsk->tss.i387.hard.swd;
- if (__copy_to_user(buf, &tsk->tss.i387.hard, sizeof(*buf)))
+#ifdef CONFIG_MATH_EMULATION
+ err = put_user(tsk->tss.i387.hard.status, &buf->status);
+#endif
+ err |= i387_hard_to_user((struct user_i387_struct *)buf,
+ &tsk->tss.i387.hard);
+ if (err)
return -1;
return 1;
}
--- linux/arch/i386/kernel/traps.c.orig Tue Mar 2 19:50:54 1999
+++ linux/arch/i386/kernel/traps.c Tue Mar 2 19:51:11 1999
@@ -452,17 +452,16 @@
asmlinkage void math_state_restore(struct pt_regs regs)
{
__asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */
- if(current->used_math)
- __asm__("frstor %0": :"m" (current->tss.i387));
- else
- {
+ if(current->used_math) {
+ i387_restore_hard(current->tss.i387);
+ } else {
/*
* Our first FPU usage, clean the chip.
*/
__asm__("fninit");
current->used_math = 1;
}
- current->flags|=PF_USEDFPU; /* So we fnsave on switch_to() */
+ current->flags|=PF_USEDFPU; /* So we fpu_save on switch_to() */
}

#ifndef CONFIG_MATH_EMULATION
--- linux/arch/i386/kernel/smp.c.orig Tue Mar 2 21:28:15 1999
+++ linux/arch/i386/kernel/smp.c Tue Mar 2 21:28:24 1999
@@ -881,6 +881,7 @@
*/
int __init start_secondary(void *unused)
{
+ disable_serial_nr();
/*
* Dont put anything before smp_callin(), SMP
* booting is too fragile that we want to limit the
--- linux/arch/i386/Makefile.orig Fri Jan 8 14:31:30 1999
+++ linux/arch/i386/Makefile Tue Mar 2 19:51:11 1999
@@ -43,6 +43,10 @@
CFLAGS := $(CFLAGS) -m486 -malign-loops=2 -malign-jumps=2 -malign-functions=2 -DCPU=686
endif

+ifdef CONFIG_M686FX
+CFLAGS := $(CFLAGS) -m486 -malign-loops=0 -malign-jumps=0 -malign-functions=0 -DCPU=686
+endif
+
HEAD := arch/i386/kernel/head.o arch/i386/kernel/init_task.o

SUBDIRS := $(SUBDIRS) arch/i386/kernel arch/i386/mm arch/i386/lib
--- linux/arch/i386/config.in.orig Tue Mar 2 19:50:54 1999
+++ linux/arch/i386/config.in Tue Mar 2 19:51:11 1999
@@ -16,7 +16,8 @@
486/Cx486 CONFIG_M486 \
586/K5/5x86/6x86 CONFIG_M586 \
Pentium/K6/TSC CONFIG_M586TSC \
- PPro/6x86MX CONFIG_M686" PPro
+ PPro/6x86MX/PII CONFIG_M686 \
+ PIII/Xeon/Deschutes CONFIG_M686FX" PIII
#
# Define implied options from the CPU selection here
#
@@ -26,14 +27,20 @@
define_bool CONFIG_X86_BSWAP y
define_bool CONFIG_X86_POPAD_OK y
fi
-if [ "$CONFIG_M686" = "y" -o "$CONFIG_M586TSC" = "y" ]; then
+if [ "$CONFIG_M686FX" = "y" -o "$CONFIG_M686" = "y" \
+ -o "$CONFIG_M586TSC" = "y" ]; then
define_bool CONFIG_X86_TSC y
fi
-if [ "$CONFIG_M686" = "y" ]; then
+if [ "$CONFIG_M686FX" = "y" -o "$CONFIG_M686" = "y" ]; then
define_bool CONFIG_X86_GOOD_APIC y
fi
+if [ "$CONFIG_M686FX" = "y" ]; then
+ define_bool CONFIG_X86_FX y
+fi

-bool 'Math emulation' CONFIG_MATH_EMULATION
+if [ "$CONFIG_X86_FX" != "y" ]; then
+ bool 'Math emulation' CONFIG_MATH_EMULATION
+fi
bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
bool 'Symmetric multi-processing support' CONFIG_SMP
endmenu
--- linux/Documentation/Configure.help.orig Tue Mar 2 19:50:54 1999
+++ linux/Documentation/Configure.help Tue Mar 2 19:51:11 1999
@@ -1604,10 +1604,10 @@
all x86 CPU types (albeit not optimally fast), you can specify
"386" here.

- If you specify one of "486" or "586" or "Pentium" or "PPro", then
- the kernel will not necessarily run on earlier architectures (e.g. a
- Pentium optimized kernel will run on a PPro, but not necessarily on
- a i486).
+ If you specify one of "486" or "586" or "Pentium" or "PPro" or "PIII",
+ then the kernel will not necessarily run on earlier architectures
+ (e.g. a Pentium optimized kernel will run on a PPro, but not necessarily
+ on a i486).

Here are the settings recommended for greatest speed:
- "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
@@ -1621,6 +1621,9 @@
K6-3D.
- "PPro" for the Cyrix/IBM/National Semiconductor 6x86MX, MII and
Intel Pentium II/Pentium Pro.
+ - "PIII/Xeon/Deschutes" for the PIII (Katmai), Xeon and later PIIs
+ with the Deschutes or Mendocino core. You have to chose this for
+ MMX2 support.

If you don't know what to do, choose "386".

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/