[RFC PATCH 18/26] hugetlb: use struct hugetlb_pte for walk_hugetlb_range

From: James Houghton
Date: Fri Jun 24 2022 - 13:38:58 EST


Although this change is large, it is somewhat straightforward. Before,
all users of walk_hugetlb_range could get the size of the PTE just be
checking the hmask or the mm_walk struct. With HGM, that information is
held in the hugetlb_pte struct, so we provide that instead of the raw
pte_t*.

Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx>
---
arch/s390/mm/gmap.c | 8 ++++++--
fs/proc/task_mmu.c | 35 +++++++++++++++++++----------------
include/linux/pagewalk.h | 3 ++-
mm/damon/vaddr.c | 34 ++++++++++++++++++----------------
mm/hmm.c | 7 ++++---
mm/mempolicy.c | 11 ++++++++---
mm/mincore.c | 4 ++--
mm/mprotect.c | 6 +++---
mm/pagewalk.c | 18 ++++++++++++++++--
9 files changed, 78 insertions(+), 48 deletions(-)

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index b8ae4a4aa2ba..518cebfd72cd 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2620,10 +2620,14 @@ static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
return 0;
}

-static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
- unsigned long hmask, unsigned long next,
+static int __s390_enable_skey_hugetlb(struct hugetlb_pte *hpte,
+ unsigned long addr, unsigned long next,
struct mm_walk *walk)
{
+ if (!hugetlb_pte_present_leaf(hpte) ||
+ hugetlb_pte_size(hpte) != PMD_SIZE)
+ return -EINVAL;
+
pmd_t *pmd = (pmd_t *)pte;
unsigned long start, end;
struct page *page = pmd_page(*pmd);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2d04e3470d4c..b2d683f99fa9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -714,18 +714,19 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
}

#ifdef CONFIG_HUGETLB_PAGE
-static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
+static int smaps_hugetlb_range(struct hugetlb_pte *hpte,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
struct page *page = NULL;
+ pte_t pte = hugetlb_ptep_get(hpte);

- if (pte_present(*pte)) {
- page = vm_normal_page(vma, addr, *pte);
- } else if (is_swap_pte(*pte)) {
- swp_entry_t swpent = pte_to_swp_entry(*pte);
+ if (hugetlb_pte_present_leaf(hpte)) {
+ page = vm_normal_page(vma, addr, pte);
+ } else if (is_swap_pte(pte)) {
+ swp_entry_t swpent = pte_to_swp_entry(pte);

if (is_pfn_swap_entry(swpent))
page = pfn_swap_entry_to_page(swpent);
@@ -734,9 +735,9 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
int mapcount = page_mapcount(page);

if (mapcount >= 2)
- mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
+ mss->shared_hugetlb += hugetlb_pte_size(hpte);
else
- mss->private_hugetlb += huge_page_size(hstate_vma(vma));
+ mss->private_hugetlb += hugetlb_pte_size(hpte);
}
return 0;
}
@@ -1535,7 +1536,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,

#ifdef CONFIG_HUGETLB_PAGE
/* This function walks within one hugetlb entry in the single call */
-static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
+static int pagemap_hugetlb_range(struct hugetlb_pte *hpte,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
@@ -1543,13 +1544,13 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
struct vm_area_struct *vma = walk->vma;
u64 flags = 0, frame = 0;
int err = 0;
- pte_t pte;
+ unsigned long hmask = hugetlb_pte_mask(hpte);

if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;

- pte = huge_ptep_get(ptep);
- if (pte_present(pte)) {
+ if (hugetlb_pte_present_leaf(hpte)) {
+ pte_t pte = hugetlb_ptep_get(hpte);
struct page *page = pte_page(pte);

if (!PageAnon(page))
@@ -1565,7 +1566,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
if (pm->show_pfn)
frame = pte_pfn(pte) +
((addr & ~hmask) >> PAGE_SHIFT);
- } else if (pte_swp_uffd_wp_any(pte)) {
+ } else if (pte_swp_uffd_wp_any(hugetlb_ptep_get(hpte))) {
flags |= PM_UFFD_WP;
}

@@ -1869,17 +1870,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
return 0;
}
#ifdef CONFIG_HUGETLB_PAGE
-static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
- unsigned long addr, unsigned long end, struct mm_walk *walk)
+static int gather_hugetlb_stats(struct hugetlb_pte *hpte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
{
- pte_t huge_pte = huge_ptep_get(pte);
+ pte_t huge_pte = hugetlb_ptep_get(hpte);
struct numa_maps *md;
struct page *page;

- if (!pte_present(huge_pte))
+ if (!hugetlb_pte_present_leaf(hpte))
return 0;

page = pte_page(huge_pte);
+ if (page != compound_head(page))
+ return 0;

md = walk->private;
gather_stats(page, md, pte_dirty(huge_pte), 1);
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index ac7b38ad5903..0d21e25df37f 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -3,6 +3,7 @@
#define _LINUX_PAGEWALK_H

#include <linux/mm.h>
+#include <linux/hugetlb.h>

struct mm_walk;

@@ -47,7 +48,7 @@ struct mm_walk_ops {
unsigned long next, struct mm_walk *walk);
int (*pte_hole)(unsigned long addr, unsigned long next,
int depth, struct mm_walk *walk);
- int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
+ int (*hugetlb_entry)(struct hugetlb_pte *hpte,
unsigned long addr, unsigned long next,
struct mm_walk *walk);
int (*test_walk)(unsigned long addr, unsigned long next,
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 59e1653799f8..ce50b937dcf2 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -324,14 +324,15 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
}

#ifdef CONFIG_HUGETLB_PAGE
-static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
+static void damon_hugetlb_mkold(struct hugetlb_pte *hpte, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long addr)
{
bool referenced = false;
pte_t entry = huge_ptep_get(pte);
struct page *page = pte_page(entry);
+ struct page *hpage = compound_head(page);

- get_page(page);
+ get_page(hpage);

if (pte_young(entry)) {
referenced = true;
@@ -342,18 +343,18 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,

#ifdef CONFIG_MMU_NOTIFIER
if (mmu_notifier_clear_young(mm, addr,
- addr + huge_page_size(hstate_vma(vma))))
+ addr + hugetlb_pte_size(hpte));
referenced = true;
#endif /* CONFIG_MMU_NOTIFIER */

if (referenced)
- set_page_young(page);
+ set_page_young(hpage);

- set_page_idle(page);
- put_page(page);
+ set_page_idle(hpage);
+ put_page(hpage);
}

-static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
+static int damon_mkold_hugetlb_entry(struct hugetlb_pte *hpte,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
@@ -361,12 +362,12 @@ static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
spinlock_t *ptl;
pte_t entry;

- ptl = huge_pte_lock(h, walk->mm, pte);
- entry = huge_ptep_get(pte);
+ ptl = huge_pte_lock_shift(hpte->shift, walk->mm, hpte->ptep);
+ entry = huge_ptep_get(hpte->ptep);
if (!pte_present(entry))
goto out;

- damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr);
+ damon_hugetlb_mkold(hpte, walk->mm, walk->vma, addr);

out:
spin_unlock(ptl);
@@ -474,31 +475,32 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
}

#ifdef CONFIG_HUGETLB_PAGE
-static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
+static int damon_young_hugetlb_entry(struct hugetlb_pte *hpte,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct damon_young_walk_private *priv = walk->private;
struct hstate *h = hstate_vma(walk->vma);
- struct page *page;
+ struct page *page, *hpage;
spinlock_t *ptl;
pte_t entry;

- ptl = huge_pte_lock(h, walk->mm, pte);
+ ptl = huge_pte_lock_shift(hpte->shift, walk->mm, hpte->ptep);
entry = huge_ptep_get(pte);
if (!pte_present(entry))
goto out;

page = pte_page(entry);
- get_page(page);
+ hpage = compound_head(page);
+ get_page(hpage);

- if (pte_young(entry) || !page_is_idle(page) ||
+ if (pte_young(entry) || !page_is_idle(hpage) ||
mmu_notifier_test_young(walk->mm, addr)) {
*priv->page_sz = huge_page_size(h);
priv->young = true;
}

- put_page(page);
+ put_page(hpage);

out:
spin_unlock(ptl);
diff --git a/mm/hmm.c b/mm/hmm.c
index 3fd3242c5e50..1ad5d76fa8be 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -472,7 +472,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
#endif

#ifdef CONFIG_HUGETLB_PAGE
-static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
+static int hmm_vma_walk_hugetlb_entry(struct hugetlb_pte *hpte,
unsigned long start, unsigned long end,
struct mm_walk *walk)
{
@@ -483,11 +483,12 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
unsigned int required_fault;
unsigned long pfn_req_flags;
unsigned long cpu_flags;
+ unsigned long hmask = hugetlb_pte_mask(hpte);
spinlock_t *ptl;
pte_t entry;

- ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
- entry = huge_ptep_get(pte);
+ ptl = huge_pte_lock_shift(hpte->shift, walk->mm, hpte->ptep);
+ entry = huge_ptep_get(hpte->ptep);

i = (start - range->start) >> PAGE_SHIFT;
pfn_req_flags = range->hmm_pfns[i];
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d39b01fd52fe..a1d82db7c19f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -559,7 +559,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
return addr != end ? -EIO : 0;
}

-static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
+static int queue_pages_hugetlb(struct hugetlb_pte *hpte,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
@@ -571,8 +571,13 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
spinlock_t *ptl;
pte_t entry;

- ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
- entry = huge_ptep_get(pte);
+ /* We don't migrate high-granularity HugeTLB mappings for now. */
+ if (hugetlb_pte_size(hpte) !=
+ huge_page_size(hstate_vma(walk->vma)))
+ return -EINVAL;
+
+ ptl = hugetlb_pte_lock(walk->mm, hpte);
+ entry = hugetlb_ptep_get(hpte);
if (!pte_present(entry))
goto unlock;
page = pte_page(entry);
diff --git a/mm/mincore.c b/mm/mincore.c
index fa200c14185f..dc1717dc6a2c 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -22,7 +22,7 @@
#include <linux/uaccess.h>
#include "swap.h"

-static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
+static int mincore_hugetlb(struct hugetlb_pte *hpte, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
#ifdef CONFIG_HUGETLB_PAGE
@@ -33,7 +33,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
* Hugepages under user process are always in RAM and never
* swapped out, but theoretically it needs to be checked.
*/
- present = pte && !huge_pte_none(huge_ptep_get(pte));
+ present = hpte->ptep && !hugetlb_pte_none(hpte);
for (; addr != end; vec++, addr += PAGE_SIZE)
*vec = present;
walk->private = vec;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ba5592655ee3..9c5a35a1c0eb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -476,12 +476,12 @@ static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
0 : -EACCES;
}

-static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
+static int prot_none_hugetlb_entry(struct hugetlb_pte *hpte,
unsigned long addr, unsigned long next,
struct mm_walk *walk)
{
- return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
- 0 : -EACCES;
+ return pfn_modify_allowed(pte_pfn(*hpte->ptep),
+ *(pgprot_t *)(walk->private)) ? 0 : -EACCES;
}

static int prot_none_test(unsigned long addr, unsigned long next,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 9b3db11a4d1d..f8e24a0a0179 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -3,6 +3,7 @@
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/hugetlb.h>
+#include <linux/minmax.h>

/*
* We want to know the real level where a entry is located ignoring any
@@ -301,13 +302,26 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
pte_t *pte;
const struct mm_walk_ops *ops = walk->ops;
int err = 0;
+ struct hugetlb_pte hpte;

do {
- next = hugetlb_entry_end(h, addr, end);
pte = huge_pte_offset(walk->mm, addr & hmask, sz);
+ if (!pte) {
+ next = hugetlb_entry_end(h, addr, end);
+ } else {
+ hugetlb_pte_populate(&hpte, pte, huge_page_shift(h));
+ if (hugetlb_hgm_enabled(vma)) {
+ err = hugetlb_walk_to(walk->mm, &hpte, addr,
+ PAGE_SIZE,
+ /*stop_at_none=*/true);
+ if (err)
+ break;
+ }
+ next = min(addr + hugetlb_pte_size(&hpte), end);
+ }

if (pte)
- err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
+ err = ops->hugetlb_entry(&hpte, addr, next, walk);
else if (ops->pte_hole)
err = ops->pte_hole(addr, next, -1, walk);

--
2.37.0.rc0.161.g10f37bed90-goog