Re: [PATCH RFC] x86: check for and defend against BIOS memorycorruption

From: Hugh Dickins
Date: Fri Aug 29 2008 - 09:21:56 EST


On Fri, 29 Aug 2008, Alan Jenkins wrote:
> RafaÅ MiÅecki wrote:
> >
> > I tried your patch anyway (after applying Jeremy's patch of course)
> > and it doesn't seem to work. The only output is:
> > scanning 2 areas for BIOS corruption
> > after using s2ram. I do not get any
> > Corrupted low memory at*
> >
> It seemed to work for me. Did you remember to plug HDMI to trigger the
> corruption before you used s2ram?

I hope that's what got missed.

Here's my version of Jeremy's patch, that I've now tested on my machines,
as x86_32 and as x86_64. It addresses none of the points Alan Cox made,
and it stays silent for me, even after suspend+resume, unless I actually
introduce corruption myself. Omits Jeremy's check in fault.c, but does
a check every minute, so should soon detect RafaÅ's HDMI corruption
without any need to suspend+resume.

Hugh

--- 2.6.27-rc5/Documentation/kernel-parameters.txt 2008-08-29 01:02:34.000000000 +0100
+++ linux/Documentation/kernel-parameters.txt 2008-08-29 11:17:16.000000000 +0100
@@ -360,6 +360,11 @@ and is between 256 and 4096 characters.
Format: <io>,<irq>,<mode>
See header of drivers/net/hamradio/baycom_ser_hdx.c.

+ bios_corruption_check=0/1 [X86]
+ Some BIOSes seem to corrupt the first 64k of memory
+ when doing things like suspend/resume. Setting this
+ option will scan the memory looking for corruption.
+
boot_delay= Milliseconds to delay each printk during boot.
Values larger than 10 seconds (10000) are changed to
no delay (0).
--- 2.6.27-rc5/arch/x86/Kconfig 2008-08-29 01:02:35.000000000 +0100
+++ linux/arch/x86/Kconfig 2008-08-29 11:17:16.000000000 +0100
@@ -201,6 +201,9 @@ config X86_TRAMPOLINE
depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
default y

+config X86_CHECK_BIOS_CORRUPTION
+ def_bool y
+
config KTIME_SCALAR
def_bool X86_32
source "init/Kconfig"
--- 2.6.27-rc5/arch/x86/kernel/setup.c 2008-08-29 01:02:35.000000000 +0100
+++ linux/arch/x86/kernel/setup.c 2008-08-29 13:50:19.000000000 +0100
@@ -579,6 +579,106 @@ static struct x86_quirks default_x86_qui
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;

/*
+ * Some BIOSes seem to corrupt the low 64k of memory during events
+ * like suspend/resume and unplugging an HDMI cable. Reserve all
+ * remaining free memory in that area and fill it with a distinct
+ * pattern.
+ */
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+#define MAX_SCAN_AREAS 8
+static struct e820entry scan_areas[MAX_SCAN_AREAS];
+static int num_scan_areas;
+
+static void __init setup_bios_corruption_check(void)
+{
+ u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
+
+ while (addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) {
+ u64 size;
+ addr = find_e820_area_size(addr, &size, PAGE_SIZE);
+
+ if (addr == 0)
+ break;
+
+ if ((addr + size) > 0x10000)
+ size = 0x10000 - addr;
+
+ if (size == 0)
+ break;
+
+ e820_update_range(addr, size, E820_RAM, E820_RESERVED);
+ scan_areas[num_scan_areas].addr = addr;
+ scan_areas[num_scan_areas].size = size;
+ num_scan_areas++;
+
+ /* Assume we've already mapped this early memory */
+ memset(__va(addr), 0, size);
+
+ addr += size;
+ }
+
+ printk(KERN_INFO "scanning %d areas for BIOS corruption\n",
+ num_scan_areas);
+ update_e820();
+}
+
+static int __read_mostly bios_corruption_check = 1;
+static struct timer_list periodic_check_timer;
+
+void check_for_bios_corruption(void)
+{
+ int i;
+ int corruption = 0;
+
+ if (!bios_corruption_check)
+ return;
+
+ for (i = 0; i < num_scan_areas; i++) {
+ unsigned int *addr = __va(scan_areas[i].addr);
+ unsigned long size = scan_areas[i].size;
+
+ for (; size; addr++, size -= sizeof(unsigned int)) {
+ if (!*addr)
+ continue;
+ printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08x\n",
+ addr, __pa(addr), *addr);
+ *addr = 0;
+ corruption = 1;
+ }
+ }
+
+ if (corruption)
+ dump_stack();
+}
+
+static void periodic_check_for_corruption(unsigned long data)
+{
+ check_for_bios_corruption();
+ mod_timer(&periodic_check_timer, jiffies + 60*HZ);
+}
+
+void start_periodic_check_for_corruption(void)
+{
+ if (!bios_corruption_check)
+ return;
+
+ init_timer(&periodic_check_timer);
+ periodic_check_timer.function = &periodic_check_for_corruption;
+ periodic_check_for_corruption(0);
+}
+
+static int set_bios_corruption_check(char *arg)
+{
+ char *end;
+
+ bios_corruption_check = simple_strtol(arg, &end, 10);
+
+ return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("bios_corruption_check", set_bios_corruption_check);
+#endif
+
+/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
* for initialization. Note, the efi init code path is determined by the
@@ -750,6 +850,10 @@ void __init setup_arch(char **cmdline_p)
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
#endif

+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+ setup_bios_corruption_check();
+#endif
+
/* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;
--- 2.6.27-rc5/arch/x86/mm/init_32.c 2008-07-29 04:24:15.000000000 +0100
+++ linux/arch/x86/mm/init_32.c 2008-08-29 11:52:21.000000000 +0100
@@ -907,6 +907,8 @@ void __init mem_init(void)
int codesize, reservedpages, datasize, initsize;
int tmp;

+ start_periodic_check_for_corruption();
+
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
--- 2.6.27-rc5/arch/x86/mm/init_64.c 2008-08-29 01:02:35.000000000 +0100
+++ linux/arch/x86/mm/init_64.c 2008-08-29 11:53:09.000000000 +0100
@@ -769,6 +769,8 @@ void __init mem_init(void)
{
long codesize, reservedpages, datasize, initsize;

+ start_periodic_check_for_corruption();
+
pci_iommu_alloc();

/* clear_bss() already clear the empty_zero_page */
--- 2.6.27-rc5/drivers/base/power/main.c 2008-08-29 01:02:35.000000000 +0100
+++ linux/drivers/base/power/main.c 2008-08-29 11:17:16.000000000 +0100
@@ -254,6 +254,7 @@ static char *pm_verb(int event)

static void pm_dev_dbg(struct device *dev, pm_message_t state, char *info)
{
+ check_for_bios_corruption();
dev_dbg(dev, "%s%s%s\n", info, pm_verb(state.event),
((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ?
", may wakeup" : "");
--- 2.6.27-rc5/include/linux/kernel.h 2008-08-13 04:14:50.000000000 +0100
+++ linux/include/linux/kernel.h 2008-08-29 11:49:23.000000000 +0100
@@ -240,6 +240,22 @@ extern const char *print_tainted(void);
extern void add_taint(unsigned);
extern int root_mountflags;

+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+/*
+ * This is obviously not a great place for this, but we want to be
+ * able to scatter it around anywhere in the kernel.
+ */
+void check_for_bios_corruption(void);
+void start_periodic_check_for_corruption(void);
+#else
+static inline void check_for_bios_corruption(void)
+{
+}
+static inline void start_periodic_check_for_corruption(void)
+{
+}
+#endif
+
/* Values used for system_state */
extern enum system_states {
SYSTEM_BOOTING,