Re: [PATCH v2 04/32] mm/pgtable: allow pte_offset_map[_lock]() to fail

From: Yongqin Liu
Date: Fri Jul 28 2023 - 09:53:48 EST


Hi, Hugh

It seems this change makes pte_offset_map_lock not possible to be
called in out of tree modules,
otherwise it will report error like this:
ERROR: modpost: "__pte_offset_map_lock"
[../omap-modules/android-mainline/pvr/pvrsrvkm.ko] undefined!

Not sure if you have any idea about it, and any suggestions on how to
resolve it?

Thanks,
Yongqin Liu

On Fri, 9 Jun 2023 at 09:10, Hugh Dickins <hughd@xxxxxxxxxx> wrote:
>
> Make pte_offset_map() a wrapper for __pte_offset_map() (optionally
> outputs pmdval), pte_offset_map_lock() a sparse __cond_lock wrapper for
> __pte_offset_map_lock(): those __funcs added in mm/pgtable-generic.c.
>
> __pte_offset_map() do pmdval validation (including pmd_clear_bad()
> when pmd_bad()), returning NULL if pmdval is not for a page table.
> __pte_offset_map_lock() verify pmdval unchanged after getting the
> lock, trying again if it changed.
>
> No #ifdef CONFIG_TRANSPARENT_HUGEPAGE around them: that could be done
> to cover the imminent case, but we expect to generalize it later, and
> it makes a mess of where to do the pmd_bad() clearing.
>
> Add pte_offset_map_nolock(): outputs ptl like pte_offset_map_lock(),
> without actually taking the lock. This will be preferred to open uses of
> pte_lockptr(), because (when split ptlock is in page table's struct page)
> it points to the right lock for the returned pte pointer, even if *pmd
> gets changed racily afterwards.
>
> Update corresponding Documentation.
>
> Do not add the anticipated rcu_read_lock() and rcu_read_unlock()s yet:
> they have to wait until all architectures are balancing pte_offset_map()s
> with pte_unmap()s (as in the arch series posted earlier). But comment
> where they will go, so that it's easy to add them for experiments. And
> only when those are in place can transient racy failure cases be enabled.
> Add more safety for the PAE mismatched pmd_low pmd_high case at that time.
>
> Signed-off-by: Hugh Dickins <hughd@xxxxxxxxxx>
> ---
> Documentation/mm/split_page_table_lock.rst | 17 ++++---
> include/linux/mm.h | 27 +++++++----
> include/linux/pgtable.h | 22 ++++++---
> mm/pgtable-generic.c | 56 ++++++++++++++++++++++
> 4 files changed, 101 insertions(+), 21 deletions(-)
>
> diff --git a/Documentation/mm/split_page_table_lock.rst b/Documentation/mm/split_page_table_lock.rst
> index 50ee0dfc95be..a834fad9de12 100644
> --- a/Documentation/mm/split_page_table_lock.rst
> +++ b/Documentation/mm/split_page_table_lock.rst
> @@ -14,15 +14,20 @@ tables. Access to higher level tables protected by mm->page_table_lock.
> There are helpers to lock/unlock a table and other accessor functions:
>
> - pte_offset_map_lock()
> - maps pte and takes PTE table lock, returns pointer to the taken
> - lock;
> + maps PTE and takes PTE table lock, returns pointer to PTE with
> + pointer to its PTE table lock, or returns NULL if no PTE table;
> + - pte_offset_map_nolock()
> + maps PTE, returns pointer to PTE with pointer to its PTE table
> + lock (not taken), or returns NULL if no PTE table;
> + - pte_offset_map()
> + maps PTE, returns pointer to PTE, or returns NULL if no PTE table;
> + - pte_unmap()
> + unmaps PTE table;
> - pte_unmap_unlock()
> unlocks and unmaps PTE table;
> - pte_alloc_map_lock()
> - allocates PTE table if needed and take the lock, returns pointer
> - to taken lock or NULL if allocation failed;
> - - pte_lockptr()
> - returns pointer to PTE table lock;
> + allocates PTE table if needed and takes its lock, returns pointer to
> + PTE with pointer to its lock, or returns NULL if allocation failed;
> - pmd_lock()
> takes PMD table lock, returns pointer to taken lock;
> - pmd_lockptr()
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 27ce77080c79..3c2e56980853 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2787,14 +2787,25 @@ static inline void pgtable_pte_page_dtor(struct page *page)
> dec_lruvec_page_state(page, NR_PAGETABLE);
> }
>
> -#define pte_offset_map_lock(mm, pmd, address, ptlp) \
> -({ \
> - spinlock_t *__ptl = pte_lockptr(mm, pmd); \
> - pte_t *__pte = pte_offset_map(pmd, address); \
> - *(ptlp) = __ptl; \
> - spin_lock(__ptl); \
> - __pte; \
> -})
> +pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
> +static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr)
> +{
> + return __pte_offset_map(pmd, addr, NULL);
> +}
> +
> +pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
> + unsigned long addr, spinlock_t **ptlp);
> +static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
> + unsigned long addr, spinlock_t **ptlp)
> +{
> + pte_t *pte;
> +
> + __cond_lock(*ptlp, pte = __pte_offset_map_lock(mm, pmd, addr, ptlp));
> + return pte;
> +}
> +
> +pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
> + unsigned long addr, spinlock_t **ptlp);
>
> #define pte_unmap_unlock(pte, ptl) do { \
> spin_unlock(ptl); \
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index 94235ff2706e..3fabbb018557 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -94,14 +94,22 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
> #define pte_offset_kernel pte_offset_kernel
> #endif
>
> -#if defined(CONFIG_HIGHPTE)
> -#define pte_offset_map(dir, address) \
> - ((pte_t *)kmap_local_page(pmd_page(*(dir))) + \
> - pte_index((address)))
> -#define pte_unmap(pte) kunmap_local((pte))
> +#ifdef CONFIG_HIGHPTE
> +#define __pte_map(pmd, address) \
> + ((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address)))
> +#define pte_unmap(pte) do { \
> + kunmap_local((pte)); \
> + /* rcu_read_unlock() to be added later */ \
> +} while (0)
> #else
> -#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
> -#define pte_unmap(pte) ((void)(pte)) /* NOP */
> +static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
> +{
> + return pte_offset_kernel(pmd, address);
> +}
> +static inline void pte_unmap(pte_t *pte)
> +{
> + /* rcu_read_unlock() to be added later */
> +}
> #endif
>
> /* Find an entry in the second-level page table.. */
> diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
> index d2fc52bffafc..c7ab18a5fb77 100644
> --- a/mm/pgtable-generic.c
> +++ b/mm/pgtable-generic.c
> @@ -10,6 +10,8 @@
> #include <linux/pagemap.h>
> #include <linux/hugetlb.h>
> #include <linux/pgtable.h>
> +#include <linux/swap.h>
> +#include <linux/swapops.h>
> #include <linux/mm_inline.h>
> #include <asm/tlb.h>
>
> @@ -229,3 +231,57 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
> }
> #endif
> #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> +
> +pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp)
> +{
> + pmd_t pmdval;
> +
> + /* rcu_read_lock() to be added later */
> + pmdval = pmdp_get_lockless(pmd);
> + if (pmdvalp)
> + *pmdvalp = pmdval;
> + if (unlikely(pmd_none(pmdval) || is_pmd_migration_entry(pmdval)))
> + goto nomap;
> + if (unlikely(pmd_trans_huge(pmdval) || pmd_devmap(pmdval)))
> + goto nomap;
> + if (unlikely(pmd_bad(pmdval))) {
> + pmd_clear_bad(pmd);
> + goto nomap;
> + }
> + return __pte_map(&pmdval, addr);
> +nomap:
> + /* rcu_read_unlock() to be added later */
> + return NULL;
> +}
> +
> +pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
> + unsigned long addr, spinlock_t **ptlp)
> +{
> + pmd_t pmdval;
> + pte_t *pte;
> +
> + pte = __pte_offset_map(pmd, addr, &pmdval);
> + if (likely(pte))
> + *ptlp = pte_lockptr(mm, &pmdval);
> + return pte;
> +}
> +
> +pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
> + unsigned long addr, spinlock_t **ptlp)
> +{
> + spinlock_t *ptl;
> + pmd_t pmdval;
> + pte_t *pte;
> +again:
> + pte = __pte_offset_map(pmd, addr, &pmdval);
> + if (unlikely(!pte))
> + return pte;
> + ptl = pte_lockptr(mm, &pmdval);
> + spin_lock(ptl);
> + if (likely(pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
> + *ptlp = ptl;
> + return pte;
> + }
> + pte_unmap_unlock(pte, ptl);
> + goto again;
> +}
> --
> 2.35.3
>


--
Best Regards,
Yongqin Liu
---------------------------------------------------------------
#mailing list
linaro-android@xxxxxxxxxxxxxxxx
http://lists.linaro.org/mailman/listinfo/linaro-android