[PATCH 06/11] ACPI, APEI, Printk queued error record before panic

From: Huang Ying
Date: Wed Dec 07 2011 - 22:26:04 EST


Because printk is not safe inside NMI handler, the recoverable error
records received in NMI handler will be queued to be printked in a
delayed IRQ context via irq_work. If a fatal error occurs after the
recoverable error and before the irq_work processed, we lost a error
report.

To solve the issue, the queued error records are printked in NMI
handler if system will go panic.

Signed-off-by: Huang Ying <ying.huang@xxxxxxxxx>
---
drivers/acpi/apei/ghes.c | 53 +++++++++++++++++++++++++++++++++++++++--------
1 file changed, 44 insertions(+), 9 deletions(-)

--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -740,26 +740,34 @@ static int ghes_notify_sci(struct notifi
return ret;
}

+static struct llist_node *llist_nodes_reverse(struct llist_node *llnode)
+{
+ struct llist_node *next, *tail = NULL;
+
+ while (llnode) {
+ next = llnode->next;
+ llnode->next = tail;
+ tail = llnode;
+ llnode = next;
+ }
+
+ return tail;
+}
+
static void ghes_proc_in_irq(struct irq_work *irq_work)
{
- struct llist_node *llnode, *next, *tail = NULL;
+ struct llist_node *llnode, *next;
struct ghes_estatus_node *estatus_node;
struct acpi_hest_generic *generic;
struct acpi_hest_generic_status *estatus;
u32 len, node_len;

+ llnode = llist_del_all(&ghes_estatus_llist);
/*
* Because the time order of estatus in list is reversed,
* revert it back to proper order.
*/
- llnode = llist_del_all(&ghes_estatus_llist);
- while (llnode) {
- next = llnode->next;
- llnode->next = tail;
- tail = llnode;
- llnode = next;
- }
- llnode = tail;
+ llnode = llist_nodes_reverse(llnode);
while (llnode) {
next = llnode->next;
estatus_node = llist_entry(llnode, struct ghes_estatus_node,
@@ -779,6 +787,32 @@ static void ghes_proc_in_irq(struct irq_
}
}

+static void ghes_print_queued_estatus(void)
+{
+ struct llist_node *llnode;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+ u32 len, node_len;
+
+ llnode = llist_del_all(&ghes_estatus_llist);
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_nodes_reverse(llnode);
+ while (llnode) {
+ estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+ llnode);
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ len = apei_estatus_len(estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ generic = estatus_node->generic;
+ ghes_print_estatus(NULL, generic, estatus);
+ llnode = llnode->next;
+ }
+}
+
static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
{
struct ghes *ghes, *ghes_global = NULL;
@@ -804,6 +838,7 @@ static int ghes_notify_nmi(unsigned int

if (sev_global >= GHES_SEV_PANIC) {
oops_begin();
+ ghes_print_queued_estatus();
__ghes_print_estatus(KERN_EMERG, ghes_global->generic,
ghes_global->estatus);
/* reboot to log the error! */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/