[RFC 4/6] mm/swap_state: respect FAULT_FLAG_RETRY_NOWAIT

From: Nadav Amit
Date: Thu Feb 25 2021 - 02:36:50 EST


From: Nadav Amit <namit@xxxxxxxxxx>

Certain use-cases (e.g., prefetch_page()) may want to avoid polling
while a page is brought from the swap. Yet, swap_cluster_readahead()
and swap_vma_readahead() do not respect FAULT_FLAG_RETRY_NOWAIT.

Add support to respect FAULT_FLAG_RETRY_NOWAIT by not polling in these
cases.

Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Sean Christopherson <seanjc@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: x86@xxxxxxxxxx
Signed-off-by: Nadav Amit <namit@xxxxxxxxxx>
---
mm/memory.c | 15 +++++++++++++--
mm/shmem.c | 1 +
mm/swap_state.c | 12 +++++++++---
3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index feff48e1465a..13b9cf36268f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3326,12 +3326,23 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
}

if (!page) {
+ /*
+ * Back out if we failed to bring the page while we
+ * tried to avoid I/O.
+ */
+ if (fault_flag_allow_retry_first(vmf->flags) &&
+ (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ ret = VM_FAULT_RETRY;
+ delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+ goto out;
+ }
+
/*
* Back out if somebody else faulted in this pte
* while we released the pte lock.
*/
- vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
- vmf->address, &vmf->ptl);
+ vmf->pte = pte_offset_map_lock(vma->vm_mm,
+ vmf->pmd, vmf->address, &vmf->ptl);
if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
ret = VM_FAULT_OOM;
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
diff --git a/mm/shmem.c b/mm/shmem.c
index 7c6b6d8f6c39..b108e9ba9e89 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1525,6 +1525,7 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
shmem_pseudo_vma_init(&pvma, info, index);
vmf.vma = &pvma;
vmf.address = 0;
+ vmf.flags = 0;
page = swap_cluster_readahead(swap, gfp, &vmf);
shmem_pseudo_vma_destroy(&pvma);

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 751c1ef2fe0e..1e930f7ff8b3 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -656,10 +656,13 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
unsigned long mask;
struct swap_info_struct *si = swp_swap_info(entry);
struct blk_plug plug;
- bool do_poll = true, page_allocated;
+ bool page_allocated, do_poll;
struct vm_area_struct *vma = vmf->vma;
unsigned long addr = vmf->address;

+ do_poll = !fault_flag_allow_retry_first(vmf->flags) ||
+ !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
+
mask = swapin_nr_pages(offset) - 1;
if (!mask)
goto skip;
@@ -838,7 +841,7 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
pte_t *pte, pentry;
swp_entry_t entry;
unsigned int i;
- bool page_allocated;
+ bool page_allocated, do_poll;
struct vma_swap_readahead ra_info = {
.win = 1,
};
@@ -873,9 +876,12 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
}
blk_finish_plug(&plug);
lru_add_drain();
+
skip:
+ do_poll = (!fault_flag_allow_retry_first(vmf->flags) ||
+ !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) && ra_info.win == 1;
return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
- ra_info.win == 1);
+ do_poll);
}

/**
--
2.25.1