[PATCH 1/2 v2] fs/proc/task_mmu: report SOFT_DIRTY bits through the PAGEMAP_SCAN ioctl

From: Andrei Vagin
Date: Mon Nov 06 2023 - 17:10:09 EST


The PAGEMAP_SCAN ioctl returns information regarding page table entries.
It is more efficient compared to reading pagemap files. CRIU can start
to utilize this ioctl, but it needs info about soft-dirty bits to track
memory changes.

We are aware of a new method for tracking memory changes implemented in
the PAGEMAP_SCAN ioctl. For CRIU, the primary advantage of this method
is its usability by unprivileged users. However, it is not feasible to
transparently replace the soft-dirty tracker with the new one. The main
problem here is userfault descriptors that have to be preserved between
pre-dump iterations. It means criu continues supporting the soft-dirty
method to avoid breakage for current users. The new method will be
implemented as a separate feature.

Cc: Muhammad Usama Anjum <usama.anjum@xxxxxxxxxxxxx>
Cc: Michał Mirosław <mirq-linux@xxxxxxxxxxxx>
Signed-off-by: Andrei Vagin <avagin@xxxxxxxxxx>
---
v2: check the soft-dirty bit in pagemap_page_category

Documentation/admin-guide/mm/pagemap.rst | 1 +
fs/proc/task_mmu.c | 17 ++++++++++++++++-
include/uapi/linux/fs.h | 1 +
3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index fe17cf210426..f5f065c67615 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -253,6 +253,7 @@ Following flags about pages are currently supported:
- ``PAGE_IS_SWAPPED`` - Page is in swapped
- ``PAGE_IS_PFNZERO`` - Page has zero PFN
- ``PAGE_IS_HUGE`` - Page is THP or Hugetlb backed
+- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty

The ``struct pm_scan_arg`` is used as the argument of the IOCTL.

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ef2eb12906da..51e0ec658457 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1761,7 +1761,7 @@ static int pagemap_release(struct inode *inode, struct file *file)
#define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
PAGE_IS_FILE | PAGE_IS_PRESENT | \
PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
- PAGE_IS_HUGE)
+ PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY)
#define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)

struct pagemap_scan_private {
@@ -1793,6 +1793,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,

if (is_zero_pfn(pte_pfn(pte)))
categories |= PAGE_IS_PFNZERO;
+ if (pte_soft_dirty(pte))
+ categories |= PAGE_IS_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
swp_entry_t swp;

@@ -1806,6 +1808,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
!PageAnon(pfn_swap_entry_to_page(swp)))
categories |= PAGE_IS_FILE;
}
+ if (pte_swp_soft_dirty(pte))
+ categories |= PAGE_IS_SOFT_DIRTY;
}

return categories;
@@ -1853,12 +1857,16 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,

if (is_zero_pfn(pmd_pfn(pmd)))
categories |= PAGE_IS_PFNZERO;
+ if (pmd_soft_dirty(pmd))
+ categories |= PAGE_IS_SOFT_DIRTY;
} else if (is_swap_pmd(pmd)) {
swp_entry_t swp;

categories |= PAGE_IS_SWAPPED;
if (!pmd_swp_uffd_wp(pmd))
categories |= PAGE_IS_WRITTEN;
+ if (pmd_swp_soft_dirty(pmd))
+ categories |= PAGE_IS_SOFT_DIRTY;

if (p->masks_of_interest & PAGE_IS_FILE) {
swp = pmd_to_swp_entry(pmd);
@@ -1905,10 +1913,14 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
categories |= PAGE_IS_FILE;
if (is_zero_pfn(pte_pfn(pte)))
categories |= PAGE_IS_PFNZERO;
+ if (pte_soft_dirty(pte))
+ categories |= PAGE_IS_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
categories |= PAGE_IS_SWAPPED;
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
+ if (pte_swp_soft_dirty(pte))
+ categories |= PAGE_IS_SOFT_DIRTY;
}

return categories;
@@ -1991,6 +2003,9 @@ static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
if (vma->vm_flags & VM_PFNMAP)
return 1;

+ if (vma->vm_flags & VM_SOFTDIRTY)
+ vma_category |= PAGE_IS_SOFT_DIRTY;
+
if (!pagemap_scan_is_interesting_vma(vma_category, p))
return 1;

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index da43810b7485..48ad69f7722e 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -316,6 +316,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_SWAPPED (1 << 4)
#define PAGE_IS_PFNZERO (1 << 5)
#define PAGE_IS_HUGE (1 << 6)
+#define PAGE_IS_SOFT_DIRTY (1 << 7)

/*
* struct page_region - Page region with flags
--
2.42.0.869.gea05f2083d-goog