Re: [PATCH V2 1/3] mm/debug: Add tests validating arch page table helpers for core features

From: Anshuman Khandual
Date: Wed Mar 25 2020 - 22:18:43 EST



On 03/24/2020 06:59 PM, Zi Yan wrote:
> On 24 Mar 2020, at 1:22, Anshuman Khandual wrote:
>
>> This adds new tests validating arch page table helpers for these following
>> core memory features. These tests create and test specific mapping types at
>> various page table levels.
>>
>> 1. SPECIAL mapping
>> 2. PROTNONE mapping
>> 3. DEVMAP mapping
>> 4. SOFTDIRTY mapping
>> 5. SWAP mapping
>> 6. MIGRATION mapping
>> 7. HUGETLB mapping
>> 8. THP mapping
>>
>> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
>> Cc: Mike Rapoport <rppt@xxxxxxxxxxxxx>
>> Cc: Vineet Gupta <vgupta@xxxxxxxxxxxx>
>> Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
>> Cc: Will Deacon <will@xxxxxxxxxx>
>> Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
>> Cc: Paul Mackerras <paulus@xxxxxxxxx>
>> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
>> Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx>
>> Cc: Vasily Gorbik <gor@xxxxxxxxxxxxx>
>> Cc: Christian Borntraeger <borntraeger@xxxxxxxxxx>
>> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
>> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
>> Cc: Borislav Petkov <bp@xxxxxxxxx>
>> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
>> Cc: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx>
>> Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx>
>> Cc: Palmer Dabbelt <palmer@xxxxxxxxxxx>
>> Cc: linux-snps-arc@xxxxxxxxxxxxxxxxxxx
>> Cc: linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
>> Cc: linuxppc-dev@xxxxxxxxxxxxxxxx
>> Cc: linux-s390@xxxxxxxxxxxxxxx
>> Cc: linux-riscv@xxxxxxxxxxxxxxxxxxx
>> Cc: x86@xxxxxxxxxx
>> Cc: linux-arch@xxxxxxxxxxxxxxx
>> Cc: linux-kernel@xxxxxxxxxxxxxxx
>> Suggested-by: Catalin Marinas <catalin.marinas@xxxxxxx>
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>
>> ---
>> mm/debug_vm_pgtable.c | 291 +++++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 290 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>> index 98990a515268..15055a8f6478 100644
>> --- a/mm/debug_vm_pgtable.c
>> +++ b/mm/debug_vm_pgtable.c
>> @@ -289,6 +289,267 @@ static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp,
>> WARN_ON(pmd_bad(pmd));
>> }
>>
>> +static void __init pte_special_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pte_t pte = pfn_pte(pfn, prot);
>> +
>> + if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL))
>> + return;
>> +
>> + WARN_ON(!pte_special(pte_mkspecial(pte)));
>> +}
>> +
>> +static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pte_t pte = pfn_pte(pfn, prot);
>> +
>> + if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
>> + return;
>> +
>> + WARN_ON(!pte_protnone(pte));
>> + WARN_ON(!pte_present(pte));
>> +}
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pmd_t pmd = pfn_pmd(pfn, prot);
>> +
>> + if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
>> + return;
>> +
>> + WARN_ON(!pmd_protnone(pmd));
>> + WARN_ON(!pmd_present(pmd));
>> +}
>> +#else
>> +static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) { }
>> +#endif
>> +
>> +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
>> +static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pte_t pte = pfn_pte(pfn, prot);
>> +
>> + WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
>> +}
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pmd_t pmd = pfn_pmd(pfn, prot);
>> +
>> + WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd)));
>> +}
>> +
>> +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
>> +static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pud_t pud = pfn_pud(pfn, prot);
>> +
>> + WARN_ON(!pud_devmap(pud_mkdevmap(pud)));
>> +}
>> +#else
>> +static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
>> +#endif
>> +#else
>> +static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { }
>> +static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
>> +#endif
>> +#else
>> +static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot) { }
>> +static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { }
>> +static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
>> +#endif
>> +
>> +static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pte_t pte = pfn_pte(pfn, prot);
>> +
>> + if (!IS_ENABLED(CONFIG_HAVE_ARCH_SOFT_DIRTY))
>> + return;
>> +
>> + WARN_ON(!pte_soft_dirty(pte_mksoft_dirty(pte)));
>> + WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte)));
>> +}
>> +
>> +static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pte_t pte = pfn_pte(pfn, prot);
>> +
>> + if (!IS_ENABLED(CONFIG_HAVE_ARCH_SOFT_DIRTY))
>> + return;
>> +
>> + WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte)));
>> + WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte)));
>> +}
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pmd_t pmd = pfn_pmd(pfn, prot);
>> +
>> + if (!IS_ENABLED(CONFIG_HAVE_ARCH_SOFT_DIRTY))
>> + return;
>> +
>> + WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd)));
>> + WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd)));
>> +}
>> +
>> +static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pmd_t pmd = pfn_pmd(pfn, prot);
>> +
>> + if (!IS_ENABLED(CONFIG_HAVE_ARCH_SOFT_DIRTY) ||
>> + !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
>> + return;
>> +
>> + WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd)));
>> + WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd)));
>> +}
>> +#else
>> +static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) { }
>> +static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> +}
>> +#endif
>> +
>> +static void __init pte_swap_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + swp_entry_t swp;
>> + pte_t pte;
>> +
>> + pte = pfn_pte(pfn, prot);
>> + swp = __pte_to_swp_entry(pte);
>> + WARN_ON(!pte_same(pte, __swp_entry_to_pte(swp)));
>> +}
>> +
>> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
>> +static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + swp_entry_t swp;
>> + pmd_t pmd;
>> +
>> + pmd = pfn_pmd(pfn, prot);
>> + swp = __pmd_to_swp_entry(pmd);
>> + WARN_ON(!pmd_same(pmd, __swp_entry_to_pmd(swp)));
>> +}
>> +#else
>> +static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) { }
>> +#endif
>> +
>> +static void __init swap_migration_tests(void)
>> +{
>> + struct page *page;
>> + swp_entry_t swp;
>> +
>> + if (!IS_ENABLED(CONFIG_MIGRATION))
>> + return;
>> + /*
>> + * swap_migration_tests() requires a dedicated page as it needs to
>> + * be locked before creating a migration entry from it. Locking the
>> + * page that actually maps kernel text ('start_kernel') can be real
>> + * problematic. Lets allocate a dedicated page explicitly for this
>> + * purpose that will be freed subsequently.
>> + */
>> + page = alloc_page(GFP_KERNEL);
>> + if (!page) {
>> + pr_err("page allocation failed\n");
>> + return;
>> + }
>> +
>> + /*
>> + * make_migration_entry() expects given page to be
>> + * locked, otherwise it stumbles upon a BUG_ON().
>> + */
>> + __SetPageLocked(page);
>> + swp = make_migration_entry(page, 1);
>> + WARN_ON(!is_migration_entry(swp));
>> + WARN_ON(!is_write_migration_entry(swp));
>> +
>> + make_migration_entry_read(&swp);
>> + WARN_ON(!is_migration_entry(swp));
>> + WARN_ON(is_write_migration_entry(swp));
>> +
>> + swp = make_migration_entry(page, 0);
>> + WARN_ON(!is_migration_entry(swp));
>> + WARN_ON(is_write_migration_entry(swp));
>> + __ClearPageLocked(page);
>> + __free_page(page);
>> +}
>> +
>> +#ifdef CONFIG_HUGETLB_PAGE
>> +static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + struct page *page;
>> + pte_t pte;
>> +
>> + /*
>> + * Accessing the page associated with the pfn is safe here,
>> + * as it was previously derived from a real kernel symbol.
>> + */
>> + page = pfn_to_page(pfn);
>> + pte = mk_huge_pte(page, prot);
>> +
>> + WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte)));
>> + WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte))));
>> + WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte))));
>> +
>> +#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
>> + pte = pfn_pte(pfn, prot);
>> +
>> + WARN_ON(!pte_huge(pte_mkhuge(pte)));
>> +#endif
>> +}
>> +#else
>> +static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) { }
>> +#endif
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot)
>> +{
>> + pmd_t pmd;
>> +
>> + /*
>> + * pmd_trans_huge() and pmd_present() must return positive
>> + * after MMU invalidation with pmd_mknotpresent().
>> + */
>> + pmd = pfn_pmd(pfn, prot);
>> + WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd)));
>> +
>> +#ifndef __HAVE_ARCH_PMDP_INVALIDATE
>> + WARN_ON(!pmd_trans_huge(pmd_mknotpresent(pmd_mkhuge(pmd))));
>> + WARN_ON(!pmd_present(pmd_mknotpresent(pmd_mkhuge(pmd))));
>> +#endif
>
> I think we need a better comment here, because requiring pmd_trans_huge() and
> pmd_present() returning true after pmd_mknotpresent() is not straightforward.

Thats right.

>
> According to Andrea Arcangeliâs email (https://lore.kernel.org/linux-mm/20181017020930.GN30832@xxxxxxxxxx/),
> This behavior is an optimization for transparent huge page.
> pmd_trans_huge() must be true if pmd_page() returns you a valid THP to avoid
> taking the pmd_lock when others walk over non transhuge pmds (i.e. there are no
> THP allocated). Especially when we split a THP, removing the present bit from
> the pmd, pmd_trans_huge() still needs to return true. pmd_present() should
> be true whenever pmd_trans_huge() returns true.

Sure, will modify the existing comment here like this.

/*
* pmd_trans_huge() and pmd_present() must return positive after
* MMU invalidation with pmd_mknotpresent(). This behavior is an
* optimization for transparent huge page. pmd_trans_huge() must
* be true if pmd_page() returns a valid THP to avoid taking the
* pmd_lock when others walk over non transhuge pmds (i.e. there
* are no THP allocated). Especially when splitting a THP and
* removing the present bit from the pmd, pmd_trans_huge() still
* needs to return true. pmd_present() should be true whenever
* pmd_trans_huge() returns true.
*/

>
> I think it is also worth either putting Andresâs email or the link to it
> in the rst file in your 3rd patch. It is a good documentation for this special
> case.

Makes sense. Will update Andrea's email link in the .rst file as well.

>
> â
> Best Regards,
> Yan Zi
>