[PATCH] hugetlb: Adjust hugetlbp page ref/map counts for PMD sharing

From: Mike Kravetz
Date: Mon Jan 30 2023 - 23:14:14 EST


When hugetlb PMDS are shared, the sharing code simply adds the shared
PMD to another processes page table. It will not update the ref/map
counts of pages referenced by the shared PMD. As a result, the ref/map
count will only reflect when the page was added to the shared PMD. Even
though the shared PMD may be in MANY process page tables, ref/map counts
on the pages will only appear to be that of a single process.

Update ref/map counts to take PMD sharing into account. This is done in
three distinct places:
1) At PMD share time in huge_pmd_share(),
Go through all entries in the PMD, and increment map and ref count for
all referenced pages. huge_pmd_share is just adding another use and
mapping of each page.
2) At PMD unshare time in huge_pmd_unshare(),
Go through all entries in the PMD, and decrement map and ref count for
all referenced pages. huge_pmd_unshare is just removing one use and
mapping of each page.
3) When faulting in a new hugetlb page,
Check if we are adding a new entry to a shared PMD. If yes, add
'num_of_sharing__processes - 1' to the ref and map count.

Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
---
mm/hugetlb.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3a01a9dbf445..c7b1c6307a82 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -96,6 +96,7 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
+static void adjust_page_counts_for_shared_pmd(pte_t *ptep, struct folio *folio);

static inline bool subpool_is_free(struct hugepage_subpool *spool)
{
@@ -5905,10 +5906,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
if (!pte_same(huge_ptep_get(ptep), old_pte))
goto backout;

- if (anon_rmap)
+ if (anon_rmap) {
hugepage_add_new_anon_rmap(folio, vma, haddr);
- else
+ } else {
page_dup_file_rmap(&folio->page, true);
+ adjust_page_counts_for_shared_pmd(ptep, folio);
+ }
new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
&& (vma->vm_flags & VM_SHARED)));
/*
@@ -7036,6 +7039,43 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
*end = ALIGN(*end, PUD_SIZE);
}

+static void adjust_page_counts_for_shared_pmd(pte_t *ptep, struct folio *folio)
+{
+ int shared_count = page_count(virt_to_page(ptep));
+
+ if (shared_count < 2)
+ return;
+
+ folio_ref_add(folio, shared_count - 1);
+ atomic_add(shared_count - 1, &folio->_entire_mapcount);
+}
+
+static void adjust_shared_pmd_page_counts(pmd_t *pmd_start, int delta)
+{
+ struct folio *folio;
+ struct page *page;
+ pte_t *ptep, pte;
+ int i;
+
+ for (i= 0; i < PTRS_PER_PMD; i++) {
+ ptep = (pte_t *)(pmd_start + i);
+
+ pte = huge_ptep_get(ptep);
+ if (huge_pte_none(pte) || !pte_present(pte))
+ continue;
+
+ page = pte_page(pte);
+ folio = (struct folio *)page;
+ if (delta > 0) {
+ folio_get(folio);
+ atomic_inc(&folio->_entire_mapcount);
+ } else {
+ folio_put(folio);
+ atomic_dec(&folio->_entire_mapcount);
+ }
+ }
+}
+
/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
@@ -7078,9 +7118,11 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,

ptl = huge_pte_lock(hstate_vma(vma), mm, spte);
if (pud_none(*pud)) {
- pud_populate(mm, pud,
- (pmd_t *)((unsigned long)spte & PAGE_MASK));
+ pmd_t *pmdp = (pmd_t *)((unsigned long)spte & PAGE_MASK);
+
+ pud_populate(mm, pud, pmdp);
mm_inc_nr_pmds(mm);
+ adjust_shared_pmd_page_counts(pmdp, 1);
} else {
put_page(virt_to_page(spte));
}
@@ -7118,12 +7160,18 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,

pud_clear(pud);
put_page(virt_to_page(ptep));
+ adjust_shared_pmd_page_counts(
+ (pmd_t *)((unsigned long)ptep & PAGE_MASK), -1);
mm_dec_nr_pmds(mm);
return 1;
}

#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */

+static void adjust_page_counts_for_shared_pmd(pte_t *ptep, struct folio *folio)
+{
+}
+
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pud_t *pud)
{
--
2.39.1