[RFC PATCH 02/10] mm/swap: move no readahead swapin code to a stand-alone helper

From: Kairui Song
Date: Tue Mar 26 2024 - 15:04:49 EST


From: Kairui Song <kasong@xxxxxxxxxxx>

Simply move the routine to a standalone function, having a cleaner
split and avoid helpers being referenced corss multiple files.

Basically no feature change, but the error path is very slightly
different. Previously a mem_cgroup_swapin_charge_folio fail will cause
direct OOM, now we go through the error checking path in do_swap_pte, if
the page is already there, just return as the page fault was handled.

Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx>
---
mm/memory.c | 42 +++-------------------------------
mm/swap.h | 8 +++++++
mm/swap_state.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 71 insertions(+), 39 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index f2bc6dd15eb8..e42fadc25268 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3937,7 +3937,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
swp_entry_t entry;
pte_t pte;
vm_fault_t ret = 0;
- void *shadow = NULL;

if (!pte_unmap_same(vmf))
goto out;
@@ -4001,47 +4000,12 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (!folio) {
if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
__swap_count(entry) == 1) {
- /*
- * Prevent parallel swapin from proceeding with
- * the cache flag. Otherwise, another thread may
- * finish swapin first, free the entry, and swapout
- * reusing the same entry. It's undetectable as
- * pte_same() returns true due to entry reuse.
- */
- if (swapcache_prepare(entry)) {
- /* Relax a bit to prevent rapid repeated page faults */
- schedule_timeout_uninterruptible(1);
+ /* skip swapcache and readahead */
+ folio = swapin_direct(entry, GFP_HIGHUSER_MOVABLE, vmf);
+ if (PTR_ERR(folio) == -EBUSY)
goto out;
- }
need_clear_cache = true;
-
- /* skip swapcache */
- folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
- vma, vmf->address, false);
page = &folio->page;
- if (folio) {
- __folio_set_locked(folio);
- __folio_set_swapbacked(folio);
-
- if (mem_cgroup_swapin_charge_folio(folio,
- vma->vm_mm, GFP_KERNEL,
- entry)) {
- ret = VM_FAULT_OOM;
- goto out_page;
- }
- mem_cgroup_swapin_uncharge_swap(entry);
-
- shadow = get_shadow_from_swap_cache(entry);
- if (shadow)
- workingset_refault(folio, shadow);
-
- folio_add_lru(folio);
-
- /* To provide entry to swap_read_folio() */
- folio->swap = entry;
- swap_read_folio(folio, true, NULL);
- folio->private = NULL;
- }
} else {
page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
vmf);
diff --git a/mm/swap.h b/mm/swap.h
index fc2f6ade7f80..40e902812cc5 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -55,6 +55,8 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags,
bool skip_if_exists);
struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
struct mempolicy *mpol, pgoff_t ilx);
+struct folio *swapin_direct(swp_entry_t entry, gfp_t flag,
+ struct vm_fault *vmf);
struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
struct vm_fault *vmf);

@@ -87,6 +89,12 @@ static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
return NULL;
}

+static inline struct folio *swapin_direct(swp_entry_t entry, gfp_t flag,
+ struct vm_fault *vmf)
+{
+ return NULL;
+}
+
static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
struct vm_fault *vmf)
{
diff --git a/mm/swap_state.c b/mm/swap_state.c
index bfc7e8c58a6d..0a3fa48b3893 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -879,6 +879,66 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
return folio;
}

+/**
+ * swapin_direct - swap in folios skipping swap cache and readahead
+ * @entry: swap entry of this memory
+ * @gfp_mask: memory allocation flags
+ * @vmf: fault information
+ *
+ * Returns the struct folio for entry and addr after the swap entry is read
+ * in.
+ */
+struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
+ struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct folio *folio;
+ void *shadow = NULL;
+
+ /*
+ * Prevent parallel swapin from proceeding with
+ * the cache flag. Otherwise, another thread may
+ * finish swapin first, free the entry, and swapout
+ * reusing the same entry. It's undetectable as
+ * pte_same() returns true due to entry reuse.
+ */
+ if (swapcache_prepare(entry)) {
+ /* Relax a bit to prevent rapid repeated page faults */
+ schedule_timeout_uninterruptible(1);
+ return ERR_PTR(-EBUSY);
+ }
+
+ /* skip swapcache */
+ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
+ vma, vmf->address, false);
+ if (folio) {
+ __folio_set_locked(folio);
+ __folio_set_swapbacked(folio);
+
+ if (mem_cgroup_swapin_charge_folio(folio,
+ vma->vm_mm, GFP_KERNEL,
+ entry)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ return NULL;
+ }
+ mem_cgroup_swapin_uncharge_swap(entry);
+
+ shadow = get_shadow_from_swap_cache(entry);
+ if (shadow)
+ workingset_refault(folio, shadow);
+
+ folio_add_lru(folio);
+
+ /* To provide entry to swap_read_folio() */
+ folio->swap = entry;
+ swap_read_folio(folio, true, NULL);
+ folio->private = NULL;
+ }
+
+ return folio;
+}
+
/**
* swapin_readahead - swap in pages in hope we need them soon
* @entry: swap entry of this memory
--
2.43.0