[PATCH 4/4 Rebase] x86, MCE: Avoid potential deadlock in MCE context

From: Chen, Gong
Date: Wed May 20 2015 - 02:42:45 EST


Printing in MCE context is a no-no, currently, as printk is not
NMI-safe. If some of the notifiers on the MCE chain call *printk*, we
may deadlock. In order to avoid that, delay printk into process context
to fix it.

Background info at: https://lkml.org/lkml/2014/6/27/26

Reported-by: Xie XiuQi <xiexiuqi@xxxxxxxxxx>
Signed-off-by: Chen, Gong <gong.chen@xxxxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/1406797523-28710-6-git-send-email-gong.chen@xxxxxxxxxxxxxxx
[ Boris: rewrite a bit. ]
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
---
arch/x86/include/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce-apei.c | 2 +-
arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++--
arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 -
arch/x86/kernel/cpu/mcheck/therm_throt.c | 1 +
arch/x86/kernel/cpu/mcheck/threshold.c | 1 +
6 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index d16f983f46f5..781432dd8123 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -198,6 +198,7 @@ enum mcp_flags {
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);

int mce_notify_irq(void);
+void mce_queue_irq_work(void);

DECLARE_PER_CPU(struct mce, injectm);

diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index a1aef9533154..380e3ac8fb62 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -57,7 +57,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)

m.addr = mem_err->physical_addr;
mce_log(&m);
- mce_notify_irq();
+ mce_queue_irq_work();
}
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b369b5fcda1d..a3be97961e22 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -156,7 +156,7 @@ void mce_log(struct mce *mce)
/* Emit the trace record: */
trace_mce_record(mce);

- atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+ mce_genpool_add(mce);

mce->finished = 0;
wmb();
@@ -486,6 +486,11 @@ static void mce_irq_work_cb(struct irq_work *entry)
mce_schedule_work();
}

+void mce_queue_irq_work(void)
+{
+ irq_work_queue(&mce_irq_work);
+}
+
static void mce_report_event(struct pt_regs *regs)
{
if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
@@ -1105,7 +1110,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
/* assuming valid severity level != 0 */
m.severity = severity;
m.usable_addr = mce_usable_address(&m);
- mce_genpool_add(&m);

mce_log(&m);

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b4a41cf030ed..caf6b7e25768 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -216,7 +216,6 @@ static void intel_threshold_interrupt(void)
return;

machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
- mce_notify_irq();
}

/*
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 1af51b1586d7..2733f275237d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -427,6 +427,7 @@ static inline void __smp_thermal_interrupt(void)
{
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
+ mce_queue_irq_work();
}

asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 7245980186ee..d695faa234eb 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -22,6 +22,7 @@ static inline void __smp_threshold_interrupt(void)
{
inc_irq_stat(irq_threshold_count);
mce_threshold_vector();
+ mce_queue_irq_work();
}

asmlinkage __visible void smp_threshold_interrupt(void)
--
2.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/