i486 emu in mainline?

From: Christoph Hellwig
Date: Sat May 22 2004 - 18:42:39 EST


These days gcc uses i486+ only instruction by default in libstdc++ so
most modern distros wouldn't work on i386 cpus anymore. To make it work
again Debian merged Willy Tarreau's patch to trap those and emulate them
on real i386 cpus. The patch is extremely non-invasive and would
certainly be usefull for mainline. Any reason not to include it?


--- kernel-source-2.6.6/arch/i386/Kconfig 2004-05-10 19:47:45.000000000 +1000
+++ kernel-source-2.6.6-1/arch/i386/Kconfig 2004-05-10 22:21:08.000000000 +1000
@@ -330,6 +330,41 @@
This is really intended for distributors who need more
generic optimizations.

+config X86_EMU486
+ bool "486 emulation"
+ help
+ When used on a 386, Linux can emulate 3 instructions from the 486
+ set. This allows user space programs compiled for 486 to run on a
+ 386 without crashing with a SIGILL. As any emulation, performance
+ will be very low, but since these instruction are not often used,
+ this might not hurt. The emulated instructions are:
+ - bswap (does the same as htonl())
+ - cmpxchg (used in multi-threading, mutex locking)
+ - xadd (rarely used)
+
+ Note that this can also allow Step-A 486's to correctly run
+ multi-thread applications since cmpxchg has a wrong opcode on this
+ early CPU.
+
+ Don't use this to enable multi-threading on an SMP machine, the lock
+ atomicity can't be guaranted!
+
+ Although it's highly preferable that you only execute programs
+ targetted for your CPU, it may happen that, consecutively to a
+ hardware replacement, or during rescue of a damaged system, you have
+ to execute such programs on an inadapted processor. In this case,
+ this option will help you get your programs working, even if they
+ will be slower.
+
+ It is recommended that you say N here in any case, except for the
+ kernels that you will use on your rescue disks.
+
+ This option should not be left on by default, because it means that
+ you execute a program not targetted for your CPU. You should
+ recompile your applications whenever possible.
+
+ If you are not sure, say N.
+
endif

#
@@ -438,6 +473,7 @@

config SMP
bool "Symmetric multi-processing support"
+ depends on !X86_EMU486
---help---
This enables support for systems with more than one CPU. If you have
a system with only one CPU, like most personal computers, say N. If
--- kernel-source-2.6.6/arch/i386/kernel/Makefile 2004-05-10 19:47:45.000000000 +1000
+++ kernel-source-2.6.6-1/arch/i386/kernel/Makefile 2004-05-10 22:21:08.000000000 +1000
@@ -28,6 +28,7 @@
obj-$(CONFIG_MODULES) += module.o
obj-y += sysenter.o vsyscall.o
obj-$(CONFIG_ACPI_SRAT) += srat.o
+obj-$(CONFIG_X86_EMU486) += emu.o
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
--- kernel-source-2.6.6/arch/i386/kernel/emu.c 1970-01-01 10:00:00.000000000 +1000
+++ kernel-source-2.6.6-1/arch/i386/kernel/emu.c 2003-08-01 20:21:31.000000000 +1000
@@ -0,0 +1,434 @@
+/*
+ * linux/arch/i386/emu.c
+ *
+ * Copyright (C) 2002 Willy Tarreau
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/linkage.h>
+#include <linux/preempt.h>
+#include <linux/ptrace.h>
+#include <linux/types.h>
+
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+asmlinkage void do_general_protection(struct pt_regs *regs, long error_code);
+asmlinkage void do_invalid_op(struct pt_regs *regs, long error_code);
+
+/* gives the address of any register member in a struct pt_regs */
+static const int reg_ofs[8] = {
+ (int)&((struct pt_regs *)0)->eax,
+ (int)&((struct pt_regs *)0)->ecx,
+ (int)&((struct pt_regs *)0)->edx,
+ (int)&((struct pt_regs *)0)->ebx,
+ (int)&((struct pt_regs *)0)->esp,
+ (int)&((struct pt_regs *)0)->ebp,
+ (int)&((struct pt_regs *)0)->esi,
+ (int)&((struct pt_regs *)0)->edi
+};
+
+#define REG_PTR(regs, reg) ((unsigned long *)(((void *)(regs)) + reg_ofs[reg]))
+
+/* This code can be used to allow old 386's to hopefully correctly execute some
+ * code which was originally compiled for a 486, and to allow CMOV-disabled
+ * processors to emulate CMOV instructions. In user space, only 3 instructions
+ * have been added between the 386 the 486 :
+ * - BSWAP reg performs exactly htonl())
+ * - CMPXCHG reg/mem, reg used for mutex locking
+ * - XADD reg/mem, reg not encountered yet.
+ *
+ * Warning: this will NEVER allow a kernel compiled for a 486 to boot on a 386,
+ * neither will it allow a CMOV-optimized kernel to run on a processor without
+ * CMOV ! It will only help to port programs, or save you on a rescue disk, but
+ * for performance's sake, it's far better to recompile.
+ *
+ * Tests patterns have been submitted to this code on a 386, and it now seems
+ * OK. If you think you've found a bug, please report it to
+ * Willy Tarreau <willy@xxxxxxxxxx>.
+ */
+
+/* [modrm_address] returns a pointer to a user-space location by decoding the
+ * mod/rm byte and the bytes at <from>, which point to the mod/reg/rm byte.
+ * This must only be called if modrm indicates memory and not register. The
+ * <from> parameter is updated when bytes are read.
+ * NOTE: this code has some ugly lines, which produce a better assembler output
+ * than the "cleaner" version.
+ */
+static void *modrm_address(struct pt_regs *regs, u8 **from,
+ int bit32, int modrm)
+{
+ u32 offset = 0;
+ u8 sib, mod, rm;
+
+ /* better optimization to compute them here, even
+ * if rm is not always used
+ */
+ rm = modrm & 7;
+ mod = modrm & 0xC0;
+
+ if (bit32) { /* 32-bits addressing mode (default) */
+ if (mod == 0 && rm == 5) /* 32 bits offset and nothing more */
+ return (void *)*((u32*)*from)++;
+
+ if (rm == 4) {
+ /* SIB byte is present and must be used */
+ sib = *(*from)++; /* SS(7-6) IDX(5-3) BASE(2-0) */
+
+ /* index * scale */
+ if (((sib >> 3) & 7) != 4)
+ offset += *REG_PTR(regs, (sib >> 3) & 7) << (sib >> 6);
+
+ rm = (sib & 7); /* base replaces rm from now */
+ if (mod == 0 && rm == 5) /* base off32 + scaled index */
+ return (void *)offset + *((u32*)*from)++;
+ }
+
+ /* base register */
+ offset += *REG_PTR(regs, rm);
+
+ if (mod) {
+ if (mod & 0x80) /* 32 bits unsigned offset */
+ offset += *((u32*)*from)++;
+ else /* 0x40: 8 bits signed offset */
+ offset += *((s8*)*from)++;
+ }
+
+ return (void *)offset;
+
+ } else { /* 16-bits addressing mode */
+ /* handle special case now */
+ if (mod == 0 && rm == 6) /* 16 bits offset */
+ return (void *)(u32)*((u16*)*from)++;
+
+ if ((rm & 4) == 0)
+ offset += (rm & 2) ? regs->ebp : regs->ebx;
+ if (rm < 6)
+ offset += (rm & 1) ? regs->edi : regs->esi;
+ else if (rm == 6) /* bp */
+ offset += regs->ebp;
+ else if (rm == 7) /* bx */
+ offset += regs->ebx;
+
+ /* now, let's include 8/16 bits offset */
+ if (mod) {
+ if (mod & 0x80) /* 16 bits unsigned offset */
+ offset += *((u16*)*from)++;
+ else /* 0x40: 8 bits signed offset */
+ offset += *((s8*)*from)++;
+ }
+ return (void *)(offset & 0xFFFF);
+ }
+}
+
+
+/*
+ * skip_modrm() computes the EIP value of next instruction from the
+ * pointer <from> which points to the first byte after the mod/rm byte.
+ * Its purpose is to implement a fast alternative to modrm_address()
+ * when offset value is not needed.
+ */
+static inline void *skip_modrm(u8 *from, int bit32, int modrm)
+{
+ u8 mod,rm;
+
+ /* better optimization to compute them here, even
+ * if rm is not always used
+ */
+ rm = modrm & 7;
+ mod = modrm & 0xC0;
+
+ /* most common case first : registers */
+ if (mod == 0xC0)
+ return from;
+
+ if (bit32) { /* 32 bits addressing mode (default) */
+ if (rm == 4) /* SIB byte : rm becomes base */
+ rm = (*from++ & 7);
+ if (mod == 0x00) {
+ if (rm == 5) /* 32 bits offset and nothing more */
+ return from + 4;
+ else
+ return from;
+ }
+ }
+ else { /* 16 bits mode */
+ if (mod == 0x00) {
+ if (rm == 6) /* 16 bits offset and nothing more */
+ return from + 2;
+ else
+ return from;
+ }
+ }
+
+ if (mod & 0x80)
+ return from + (2 * (bit32 + 1)); /* + 2 or 4 bytes */
+ else
+ return from + 1;
+}
+
+
+/* [reg_address] returns a pointer to a register in the regs struct, depending
+ * on <w> (byte/word) and reg. Since the caller knows about <w>, it's
+ * responsible for understanding the result as a byte, word or dword pointer.
+ * Only the 3 lower bits of <reg> are meaningful, higher ones are ignored.
+ */
+static inline void *reg_address(struct pt_regs *regs, char w, u8 reg)
+{
+ if (w)
+ /* 16/32 bits mode */
+ return REG_PTR(regs, reg & 7);
+ else
+ /* 8 bits mode : al,cl,dl,bl,ah,ch,dh,bh */
+ return ((reg & 4) >> 2) + (u8*)REG_PTR(regs, reg & 3);
+
+ /* this is set just to prevent the compiler from complaining */
+ return NULL;
+}
+
+/* [do_emu] is called by exception 6 after an invalid opcode has been
+ * encountered. It will decode the prefixes and the instruction code, to try
+ * to emulate it, and will send a SIGILL or SIGSEGV to the process if not
+ * possible.
+ * REP/REPN prefixes are not supported anymore because it didn't make sense
+ * to emulate instructions prefixed with such opcodes since no arch-specific
+ * instruction start by one of them. At most, they will be the start of newer
+ * arch-specific instructions (SSE ?).
+ */
+asmlinkage void do_emu(struct pt_regs *regs, long error_code)
+{
+ enum {
+ PREFIX_ES = 1,
+ PREFIX_CS = 2,
+ PREFIX_SS = 4,
+ PREFIX_DS = 8,
+ PREFIX_FS = 16,
+ PREFIX_GS = 32,
+ PREFIX_SEG = 63, /* any seg */
+ PREFIX_D32 = 64,
+ PREFIX_A32 = 128,
+ PREFIX_LOCK = 256,
+ } prefixes = 0;
+
+ u32 *src, *dst;
+ u8 *eip;
+
+ preempt_disable();
+ eip = (u8*)regs->eip;
+
+#ifdef BENCH_CPU_EXCEPTION_BUT_NOT_THE_CODE
+ regs->eip += 3;
+ goto out;
+#endif
+ /* we'll first read all known opcode prefixes, and discard obviously
+ invalid combinations.*/
+ while (1) {
+ /* prefix for CMOV, BSWAP, CMPXCHG, XADD */
+ if (*eip == 0x0F) {
+ eip++;
+
+ /* we'll verify if this is a BSWAP opcode, main source of SIGILL on 386's */
+ if ((*eip & 0xF8) == 0xC8) { /* BSWAP */
+ u8 reg;
+
+ reg = *eip++ & 0x07;
+ src = reg_address(regs, 1, reg);
+
+ __asm__ __volatile__ (
+ "xchgb %%al, %%ah\n\t"
+ "roll $16, %%eax\n\t"
+ "xchgb %%al, %%ah\n\t"
+ : "=a" (*(u32*)src)
+ : "a" (*(u32*)src));
+ regs->eip = (u32)eip;
+ goto out;
+ }
+
+
+ /* we'll also try to emulate the CMPXCHG instruction (used in mutex locks).
+ This instruction is often locked, but it's not possible to put a lock
+ here. Anyway, I don't believe that there are lots of multiprocessors
+ 386 out there ...
+ */
+ if ((*eip & 0xFE) == 0xB0) { /* CMPXCHG */
+ u8 w, reg, modrm;
+
+ w = *eip & 1;
+ modrm = *(eip + 1);
+ eip += 2; /* skips all the opcodes */
+
+ reg = (modrm >> 3) & 7;
+
+ dst = reg_address(regs, w, reg);
+ if ((modrm & 0xC0) == 0xC0) /* register to register */
+ src = reg_address(regs, w, modrm);
+ else {
+ src = modrm_address(regs, &eip, !(prefixes & PREFIX_A32), modrm);
+ /* we must verify that src is valid for this task */
+ if ((prefixes & (PREFIX_FS | PREFIX_GS)) ||
+ verify_area(VERIFY_WRITE, (void *)src, (w?((prefixes & PREFIX_D32)?2:4):1))) {
+ do_general_protection(regs, error_code);
+ goto out;
+ }
+ }
+
+ if (!w) { /* 8 bits operands */
+ if ((u8)regs->eax == *(u8*)src) {
+ *(u8*)src = *(u8*)dst;
+ regs->eflags |= X86_EFLAGS_ZF; /* set Zero Flag */
+ }
+ else {
+ *(u8*)&(regs->eax) = *(u8*)src;
+ regs->eflags &= ~X86_EFLAGS_ZF; /* clear Zero Flag */
+ }
+ }
+ else if (!(prefixes & PREFIX_D32)) { /* 32 bits operands */
+ if ((u32)regs->eax == *(u32*)src) {
+ *(u32*)src = *(u32*)dst;
+ regs->eflags |= X86_EFLAGS_ZF; /* set Zero Flag */
+ }
+ else {
+ regs->eax = *(u32*)src;
+ regs->eflags &= ~X86_EFLAGS_ZF; /* clear Zero Flag */
+ }
+ }
+ else { /* 16 bits operands */
+ if ((u16)regs->eax == *(u16*)src) {
+ *(u16*)src = *(u16*)dst;
+ regs->eflags |= X86_EFLAGS_ZF; /* set Zero Flag */
+ }
+ else {
+ *(u16*)&regs->eax = *(u16*)src;
+ regs->eflags &= ~X86_EFLAGS_ZF; /* clear Zero Flag */
+ }
+ }
+ regs->eip = (u32)eip;
+ goto out;
+ }
+
+ /* we'll also try to emulate the XADD instruction (not very common) */
+ if ((*eip & 0xFE) == 0xC0) { /* XADD */
+ u8 w, reg, modrm;
+ u32 op1, op2;
+
+ w = *eip & 1;
+ modrm = *(eip + 1);
+ eip += 2; /* skips all the opcodes */
+
+ reg = (modrm >> 3) & 7;
+
+ dst = reg_address(regs, w, reg);
+ if ((modrm & 0xC0) == 0xC0) /* register to register */
+ src = reg_address(regs, w, modrm);
+ else {
+ src = modrm_address(regs, &eip, !(prefixes & PREFIX_A32), modrm);
+ /* we must verify that src is valid for this task */
+ if ((prefixes & (PREFIX_FS | PREFIX_GS)) ||
+ verify_area(VERIFY_WRITE, (void *)src, (w?((prefixes & PREFIX_D32)?2:4):1))) {
+ do_general_protection(regs, error_code);
+ goto out;
+ }
+ }
+
+ if (!w) { /* 8 bits operands */
+ op1 = *(u8*)src;
+ op2 = *(u8*)dst;
+ *(u8*)src = op1 + op2;
+ *(u8*)dst = op1;
+ }
+ else if (!(prefixes & PREFIX_D32)) { /* 32 bits operands */
+ op1 = *(u32*)src;
+ op2 = *(u32*)dst;
+ *(u32*)src = op1 + op2;
+ *(u32*)dst = op1;
+ }
+ else { /* 16 bits operands */
+ op1 = *(u16*)src;
+ op2 = *(u16*)dst;
+ *(u16*)src = op1 + op2;
+ *(u16*)dst = op1;
+ }
+ regs->eip = (u32)eip;
+ goto out;
+ }
+ } /* if (*eip == 0x0F) */
+ else if ((*eip & 0xfc) == 0x64) {
+ switch (*eip) {
+ case 0x66: /* Operand switches 16/32 bits */
+ if (prefixes & PREFIX_D32)
+ goto invalid_opcode;
+ prefixes |= PREFIX_D32;
+ eip++;
+ continue;
+ case 0x67: /* Address switches 16/32 bits */
+ if (prefixes & PREFIX_A32)
+ goto invalid_opcode;
+ prefixes |= PREFIX_A32;
+ eip++;
+ continue;
+ case 0x64: /* FS: */
+ if (prefixes & PREFIX_SEG)
+ goto invalid_opcode;
+ prefixes |= PREFIX_FS;
+ eip++;
+ continue;
+ case 0x65: /* GS: */
+ if (prefixes & PREFIX_SEG)
+ goto invalid_opcode;
+ prefixes |= PREFIX_GS;
+ eip++;
+ continue;
+ }
+ }
+ else if (*eip == 0xf0) { /* lock */
+ if (prefixes & PREFIX_LOCK)
+ goto invalid_opcode;
+ prefixes |= PREFIX_LOCK;
+ eip++;
+ continue;
+ }
+ else if ((*eip & 0xe7) == 0x26) {
+ switch (*eip) {
+ case 0x26: /* ES: */
+ if (prefixes & PREFIX_SEG)
+ goto invalid_opcode;
+ prefixes |= PREFIX_ES;
+ eip++;
+ continue;
+ case 0x2E: /* CS: */
+ if (prefixes & PREFIX_SEG)
+ goto invalid_opcode;
+ prefixes |= PREFIX_CS;
+ eip++;
+ continue;
+ case 0x36: /* SS: */
+ if (prefixes & PREFIX_SEG)
+ goto invalid_opcode;
+ prefixes |= PREFIX_SS;
+ eip++;
+ continue;
+ case 0x3E: /* DS: */
+ if (prefixes & PREFIX_SEG)
+ goto invalid_opcode;
+ prefixes |= PREFIX_DS;
+ eip++;
+ continue;
+ }
+ }
+ /* if this opcode has not been processed, it's not a prefix. */
+ break;
+ }
+
+ /* it's a case we can't handle. Unknown opcode or too many prefixes. */
+invalid_opcode:
+ preempt_enable();
+#ifdef CONFIG_CPU_EMU486_DEBUG
+ printk(KERN_DEBUG "do_emu() : invalid opcode detected @%p : %02x %02x ...\n", eip, eip[0], eip[1]);
+#endif
+ do_invalid_op(regs, error_code);
+ return;
+
+out:
+ preempt_enable();
+}
--- kernel-source-2.6.6/arch/i386/kernel/entry.S 2004-05-10 19:47:45.000000000 +1000
+++ kernel-source-2.6.6-1/arch/i386/kernel/entry.S 2004-05-10 22:21:09.000000000 +1000
@@ -562,7 +562,11 @@

ENTRY(invalid_op)
pushl $0
+#ifdef CONFIG_X86_EMU486
+ pushl $do_emu
+#else
pushl $do_invalid_op
+#endif
jmp error_code

ENTRY(coprocessor_segment_overrun)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/