[PATCH v2 5/9] mm, hwpoison: make unpoison aware of raw error info in hwpoisoned hugepage

From: Naoya Horiguchi
Date: Thu Jun 23 2022 - 19:52:57 EST


From: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>

Raw error info list needs to be removed when hwpoisoned hugetlb is
unpoisioned. And unpoison handler needs to know how many errors there
are in the target hugepage. So add them.

Signed-off-by: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>
---
include/linux/swapops.h | 9 +++++++++
mm/memory-failure.c | 34 +++++++++++++++++++++++++++-------
2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 9584c2e1c54a..ad62776ee99c 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -494,6 +494,11 @@ static inline void num_poisoned_pages_dec(void)
atomic_long_dec(&num_poisoned_pages);
}

+static inline void num_poisoned_pages_sub(long i)
+{
+ atomic_long_sub(i, &num_poisoned_pages);
+}
+
#else

static inline swp_entry_t make_hwpoison_entry(struct page *page)
@@ -514,6 +519,10 @@ static inline struct page *hwpoison_entry_to_page(swp_entry_t entry)
static inline void num_poisoned_pages_inc(void)
{
}
+
+static inline void num_poisoned_pages_sub(long i)
+{
+}
#endif

static inline int non_swap_entry(swp_entry_t entry)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index af571fa6f2af..6005e953d011 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1571,23 +1571,34 @@ static inline int hugetlb_set_page_hwpoison(struct page *hpage,
return ret;
}

-inline int hugetlb_clear_page_hwpoison(struct page *hpage)
+static inline long free_raw_hwp_pages(struct page *hpage, bool move_flag)
{
struct llist_head *head;
struct llist_node *t, *tnode;
+ long count = 0;

- if (raw_hwp_unreliable(hpage))
- return -EBUSY;
- ClearPageHWPoison(hpage);
head = raw_hwp_list_head(hpage);
llist_for_each_safe(tnode, t, head->first) {
struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);

- SetPageHWPoison(p->page);
+ if (move_flag)
+ SetPageHWPoison(p->page);
kfree(p);
+ count++;
}
llist_del_all(head);
- return 0;
+ return count;
+}
+
+inline int hugetlb_clear_page_hwpoison(struct page *hpage)
+{
+ int ret = -EBUSY;
+
+ if (raw_hwp_unreliable(hpage))
+ return ret;
+ ret = !TestClearPageHWPoison(hpage);
+ free_raw_hwp_pages(hpage, true);
+ return ret;
}

/*
@@ -1729,6 +1740,10 @@ static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *
{
return 0;
}
+
+static inline void free_raw_hwp_pages(struct page *hpage, bool move_flag)
+{
+}
#endif

static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
@@ -2188,6 +2203,7 @@ int unpoison_memory(unsigned long pfn)
struct page *p;
int ret = -EBUSY;
int freeit = 0;
+ long count = 1;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);

@@ -2235,6 +2251,8 @@ int unpoison_memory(unsigned long pfn)

ret = get_hwpoison_page(p, MF_UNPOISON);
if (!ret) {
+ if (PageHuge(p))
+ count = free_raw_hwp_pages(page, false);
ret = TestClearPageHWPoison(page) ? 0 : -EBUSY;
} else if (ret < 0) {
if (ret == -EHWPOISON) {
@@ -2243,6 +2261,8 @@ int unpoison_memory(unsigned long pfn)
unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
pfn, &unpoison_rs);
} else {
+ if (PageHuge(p))
+ count = free_raw_hwp_pages(page, false);
freeit = !!TestClearPageHWPoison(p);

put_page(page);
@@ -2255,7 +2275,7 @@ int unpoison_memory(unsigned long pfn)
unlock_mutex:
mutex_unlock(&mf_mutex);
if (!ret || freeit) {
- num_poisoned_pages_dec();
+ num_poisoned_pages_sub(count);
unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
page_to_pfn(p), &unpoison_rs);
}
--
2.25.1