[PATCH v1] mm, pagemap: expose hwpoison entry

From: Naoya Horiguchi
Date: Mon Oct 04 2021 - 07:50:56 EST


From: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>

A hwpoison entry is a non-present page table entry to report
memory error events to userspace. If we have an easy way to know
which processes have hwpoison entries, that might be useful for
user processes to take proper actions. But we don't have it now.
So make pagemap interface expose hwpoison entries to userspace.

Hwpoison entry for hugepage is also exposed by this patch. The below
example shows how pagemap is visible in the case where a memory error
hit a hugepage mapped to a process.

$ ./page-types --no-summary --pid $PID --raw --list --addr 0x700000000+0x400
voffset offset len flags
700000000 12fa00 1 ___U_______Ma__H_G_________________f_______1
700000001 12fa01 1ff ___________Ma___TG_________________f_______1
700000200 12f800 1 __________B________X_______________f______w_
700000201 12f801 1 ___________________X_______________f______w_ // memory failure hit this page
700000202 12f802 1fe __________B________X_______________f______w_

The entries with both of "X" flag (hwpoison flag) and "w" flag (swap
flag) are considered as hwpoison entries. So all pages in 2MB range
are inaccessible from the process. We can get actual error location
by page-types in physical address mode.

$ ./page-types --no-summary --addr 0x12f800+0x200 --raw --list
offset len flags
12f800 1 __________B_________________________________
12f801 1 ___________________X________________________
12f802 1fe __________B_________________________________

Signed-off-by: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>
---
fs/proc/task_mmu.c | 41 ++++++++++++++++++++++++++++++++---------
include/linux/swapops.h | 13 +++++++++++++
tools/vm/page-types.c | 7 ++++++-
3 files changed, 51 insertions(+), 10 deletions(-)

diff --git v5.15-rc3/fs/proc/task_mmu.c v5.15-rc3_patched/fs/proc/task_mmu.c
index cf25be3e0321..bfc4772a58fb 100644
--- v5.15-rc3/fs/proc/task_mmu.c
+++ v5.15-rc3_patched/fs/proc/task_mmu.c
@@ -1298,6 +1298,7 @@ struct pagemapread {
#define PM_SOFT_DIRTY BIT_ULL(55)
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
#define PM_UFFD_WP BIT_ULL(57)
+#define PM_HWPOISON BIT_ULL(60)
#define PM_FILE BIT_ULL(61)
#define PM_SWAP BIT_ULL(62)
#define PM_PRESENT BIT_ULL(63)
@@ -1386,6 +1387,10 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_SWAP;
if (is_pfn_swap_entry(entry))
page = pfn_swap_entry_to_page(entry);
+ if (is_hwpoison_entry(entry)) {
+ page = hwpoison_entry_to_page(entry);
+ flags |= PM_HWPOISON;
+ }
}

if (page && !PageAnon(page))
@@ -1505,34 +1510,52 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
u64 flags = 0, frame = 0;
int err = 0;
pte_t pte;
+ struct page *page = NULL;

if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;

pte = huge_ptep_get(ptep);
if (pte_present(pte)) {
- struct page *page = pte_page(pte);
-
- if (!PageAnon(page))
- flags |= PM_FILE;
-
- if (page_mapcount(page) == 1)
- flags |= PM_MMAP_EXCLUSIVE;
+ page = pte_page(pte);

flags |= PM_PRESENT;
if (pm->show_pfn)
frame = pte_pfn(pte) +
((addr & ~hmask) >> PAGE_SHIFT);
+ } else if (is_swap_pte(pte)) {
+ swp_entry_t entry = pte_to_swp_entry(pte);
+ unsigned long offset;
+
+ if (pm->show_pfn) {
+ offset = swp_offset(entry) +
+ ((addr & ~hmask) >> PAGE_SHIFT);
+ frame = swp_type(entry) |
+ (offset << MAX_SWAPFILES_SHIFT);
+ }
+ flags |= PM_SWAP;
+ if (is_migration_entry(entry))
+ page = compound_head(pfn_swap_entry_to_page(entry));
+ if (is_hwpoison_entry(entry))
+ flags |= PM_HWPOISON;
}

+ if (page && !PageAnon(page))
+ flags |= PM_FILE;
+ if (page && page_mapcount(page) == 1)
+ flags |= PM_MMAP_EXCLUSIVE;
+
for (; addr != end; addr += PAGE_SIZE) {
pagemap_entry_t pme = make_pme(frame, flags);

err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
- if (pm->show_pfn && (flags & PM_PRESENT))
- frame++;
+ if (pm->show_pfn)
+ if (flags & PM_PRESENT)
+ frame++;
+ else if (flags & PM_SWAP)
+ frame += (1 << MAX_SWAPFILES_SHIFT);
}

cond_resched();
diff --git v5.15-rc3/include/linux/swapops.h v5.15-rc3_patched/include/linux/swapops.h
index d356ab4047f7..bb6141e5c069 100644
--- v5.15-rc3/include/linux/swapops.h
+++ v5.15-rc3_patched/include/linux/swapops.h
@@ -360,6 +360,14 @@ static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
return swp_offset(entry);
}

+static inline struct page *hwpoison_entry_to_page(swp_entry_t entry)
+{
+ struct page *p = pfn_to_page(swp_offset(entry));
+
+ WARN_ON(!PageHWPoison(p));
+ return p;
+}
+
static inline void num_poisoned_pages_inc(void)
{
atomic_long_inc(&num_poisoned_pages);
@@ -382,6 +390,11 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
return 0;
}

+static inline struct page *hwpoison_entry_to_page(swp_entry_t entry)
+{
+ return NULL;
+}
+
static inline void num_poisoned_pages_inc(void)
{
}
diff --git v5.15-rc3/tools/vm/page-types.c v5.15-rc3_patched/tools/vm/page-types.c
index b1ed76d9a979..483e417fda41 100644
--- v5.15-rc3/tools/vm/page-types.c
+++ v5.15-rc3_patched/tools/vm/page-types.c
@@ -53,6 +53,7 @@
#define PM_SWAP_OFFSET(x) (((x) & PM_PFRAME_MASK) >> MAX_SWAPFILES_SHIFT)
#define PM_SOFT_DIRTY (1ULL << 55)
#define PM_MMAP_EXCLUSIVE (1ULL << 56)
+#define PM_HWPOISON (1ULL << 60)
#define PM_FILE (1ULL << 61)
#define PM_SWAP (1ULL << 62)
#define PM_PRESENT (1ULL << 63)
@@ -311,6 +312,8 @@ static unsigned long pagemap_pfn(uint64_t val)

if (val & PM_PRESENT)
pfn = PM_PFRAME(val);
+ else if (val & PM_SWAP)
+ pfn = PM_SWAP_OFFSET(val);
else
pfn = 0;

@@ -492,6 +495,8 @@ static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme)
flags |= BIT(FILE);
if (pme & PM_SWAP)
flags |= BIT(SWAP);
+ if (pme & PM_HWPOISON)
+ flags |= BIT(HWPOISON);
if (pme & PM_MMAP_EXCLUSIVE)
flags |= BIT(MMAP_EXCLUSIVE);

@@ -742,7 +747,7 @@ static void walk_vma(unsigned long index, unsigned long count)
pfn = pagemap_pfn(buf[i]);
if (pfn)
walk_pfn(index + i, pfn, 1, buf[i]);
- if (buf[i] & PM_SWAP)
+ else if (buf[i] & PM_SWAP)
walk_swap(index + i, buf[i]);
}

--
2.25.1