[PATCH v3 3/4] arm64: ghes: handle the case when memory_failure recovery failed

From: Xie XiuQi
Date: Mon Dec 05 2022 - 10:43:58 EST


memory_failure() may not always recovery successfully. In synchronous
external data abort case, if memory_failure() recovery failed, we must handle it.

In this case, if the recovery fails, the common helper function
arch_apei_do_recovery_failed() is invoked. For arm64 platform, we just
send a SIGBUS.

Signed-off-by: Xie XiuQi <xiexiuqi@xxxxxxxxxx>
---
drivers/acpi/apei/ghes.c | 3 ++-
include/linux/mm.h | 2 +-
mm/memory-failure.c | 24 +++++++++++++++++-------
3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ba0631c54c52..ddc4da603215 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -435,7 +435,8 @@ static void ghes_kick_task_work(struct callback_head *head)

estatus_node = container_of(head, struct ghes_estatus_node, task_work);
if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
- memory_failure_queue_kick(estatus_node->task_work_cpu);
+ if (memory_failure_queue_kick(estatus_node->task_work_cpu))
+ arch_apei_do_recovery_failed();

estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 974ccca609d2..126d1395c208 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3290,7 +3290,7 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
unsigned long count, int mf_flags);
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
-extern void memory_failure_queue_kick(int cpu);
+extern int memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index bead6bccc7f2..b9398f67264a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2240,12 +2240,12 @@ void memory_failure_queue(unsigned long pfn, int flags)
}
EXPORT_SYMBOL_GPL(memory_failure_queue);

-static void memory_failure_work_func(struct work_struct *work)
+static int __memory_failure_work_func(struct work_struct *work)
{
struct memory_failure_cpu *mf_cpu;
struct memory_failure_entry entry = { 0, };
unsigned long proc_flags;
- int gotten;
+ int gotten, ret = 0, result;

mf_cpu = container_of(work, struct memory_failure_cpu, work);
for (;;) {
@@ -2254,24 +2254,34 @@ static void memory_failure_work_func(struct work_struct *work)
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
- if (entry.flags & MF_SOFT_OFFLINE)
+ if (entry.flags & MF_SOFT_OFFLINE) {
soft_offline_page(entry.pfn, entry.flags);
- else
- memory_failure(entry.pfn, entry.flags);
+ } else {
+ result = memory_failure(entry.pfn, entry.flags);
+ if (ret == 0 && result != 0)
+ ret = result;
+ }
}
+
+ return ret;
+}
+
+static void memory_failure_work_func(struct work_struct *work)
+{
+ __memory_failure_work_func(work);
}

/*
* Process memory_failure work queued on the specified CPU.
* Used to avoid return-to-userspace racing with the memory_failure workqueue.
*/
-void memory_failure_queue_kick(int cpu)
+int memory_failure_queue_kick(int cpu)
{
struct memory_failure_cpu *mf_cpu;

mf_cpu = &per_cpu(memory_failure_cpu, cpu);
cancel_work_sync(&mf_cpu->work);
- memory_failure_work_func(&mf_cpu->work);
+ return __memory_failure_work_func(&mf_cpu->work);
}

static int __init memory_failure_init(void)
--
2.20.1