[PATCH] mm: hugetlb_vmemmap: fix a race between vmemmap pmd split

From: Muchun Song
Date: Thu Jul 06 2023 - 23:42:52 EST


The local variable @page in __split_vmemmap_huge_pmd() to obtain a pmd
page without holding page_table_lock may possiblely get the page table
page instead of a huge pmd page. The effect may be in set_pte_at()
since we may pass an invalid page struct, if set_pte_at() wants to
access the page struct (e.g. CONFIG_PAGE_TABLE_CHECK is enabled), it
may crash the kernel. So fix it. And inline __split_vmemmap_huge_pmd()
since it only has one user.

Fixes: d8d55f5616cf ("mm: sparsemem: use page table lock to protect kernel pmd operations")
Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx>
---
mm/hugetlb_vmemmap.c | 34 ++++++++++++++--------------------
1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index c2007ef5e9b0..4b9734777f69 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -36,14 +36,22 @@ struct vmemmap_remap_walk {
struct list_head *vmemmap_pages;
};

-static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
+static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
{
pmd_t __pmd;
int i;
unsigned long addr = start;
- struct page *page = pmd_page(*pmd);
- pte_t *pgtable = pte_alloc_one_kernel(&init_mm);
+ struct page *head;
+ pte_t *pgtable;
+
+ spin_lock(&init_mm.page_table_lock);
+ head = pmd_leaf(*pmd) ? pmd_page(*pmd) : NULL;
+ spin_unlock(&init_mm.page_table_lock);

+ if (!head)
+ return 0;
+
+ pgtable = pte_alloc_one_kernel(&init_mm);
if (!pgtable)
return -ENOMEM;

@@ -53,7 +61,7 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
pte_t entry, *pte;
pgprot_t pgprot = PAGE_KERNEL;

- entry = mk_pte(page + i, pgprot);
+ entry = mk_pte(head + i, pgprot);
pte = pte_offset_kernel(&__pmd, addr);
set_pte_at(&init_mm, addr, pte, entry);
}
@@ -65,8 +73,8 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
* be treated as indepdenent small pages (as they can be freed
* individually).
*/
- if (!PageReserved(page))
- split_page(page, get_order(PMD_SIZE));
+ if (!PageReserved(head))
+ split_page(head, get_order(PMD_SIZE));

/* Make pte visible before pmd. See comment in pmd_install(). */
smp_wmb();
@@ -80,20 +88,6 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
return 0;
}

-static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
-{
- int leaf;
-
- spin_lock(&init_mm.page_table_lock);
- leaf = pmd_leaf(*pmd);
- spin_unlock(&init_mm.page_table_lock);
-
- if (!leaf)
- return 0;
-
- return __split_vmemmap_huge_pmd(pmd, start);
-}
-
static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end,
struct vmemmap_remap_walk *walk)
--
2.11.0