Re: [PATCH v1 3/3] mm: merge folio_is_secretmem() into folio_fast_pin_allowed()

From: Mike Rapoport
Date: Tue Mar 26 2024 - 02:31:32 EST


On Mon, Mar 25, 2024 at 02:41:14PM +0100, David Hildenbrand wrote:
> folio_is_secretmem() is currently only used during GUP-fast, and using
> it in wrong context where concurrent truncation might happen, could be
> problematic.
>
> Nowadays, folio_fast_pin_allowed() performs similar checks during
> GUP-fast and contains a lot of careful handling -- READ_ONCE( -- ), sanity
> checks -- lockdep_assert_irqs_disabled() -- and helpful comments on how
> this handling is safe and correct.
>
> So let's merge folio_is_secretmem() into folio_fast_pin_allowed(), still
> avoiding checking the actual mapping only if really required.
>
> Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>

Reviewed-by: Mike Rapoport (IBM) <rppt@xxxxxxxxxx>

A few comments below, no strong feelings about them.

> ---
> include/linux/secretmem.h | 21 ++-------------------
> mm/gup.c | 33 +++++++++++++++++++++------------
> 2 files changed, 23 insertions(+), 31 deletions(-)
>
> diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
> index 6996f1f53f14..e918f96881f5 100644
> --- a/include/linux/secretmem.h
> +++ b/include/linux/secretmem.h
> @@ -6,25 +6,8 @@
>
> extern const struct address_space_operations secretmem_aops;
>
> -static inline bool folio_is_secretmem(struct folio *folio)
> +static inline bool secretmem_mapping(struct address_space *mapping)
> {
> - struct address_space *mapping;
> -
> - /*
> - * Using folio_mapping() is quite slow because of the actual call
> - * instruction.
> - * We know that secretmem pages are not compound and LRU so we can
> - * save a couple of cycles here.
> - */
> - if (folio_test_large(folio) || folio_test_lru(folio))
> - return false;
> -
> - mapping = (struct address_space *)
> - ((unsigned long)folio->mapping & ~PAGE_MAPPING_FLAGS);
> -
> - if (!mapping || mapping != folio->mapping)
> - return false;
> -
> return mapping->a_ops == &secretmem_aops;
> }
>
> @@ -38,7 +21,7 @@ static inline bool vma_is_secretmem(struct vm_area_struct *vma)
> return false;
> }
>
> -static inline bool folio_is_secretmem(struct folio *folio)
> +static inline bool secretmem_mapping(struct address_space *mapping)
> {
> return false;
> }
> diff --git a/mm/gup.c b/mm/gup.c
> index e7510b6ce765..69d8bc8e4451 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -2472,6 +2472,8 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
> * This call assumes the caller has pinned the folio, that the lowest page table
> * level still points to this folio, and that interrupts have been disabled.
> *
> + * GUP-fast must reject all secretmem folios.
> + *
> * Writing to pinned file-backed dirty tracked folios is inherently problematic
> * (see comment describing the writable_file_mapping_allowed() function). We
> * therefore try to avoid the most egregious case of a long-term mapping doing
> @@ -2484,22 +2486,32 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
> static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags)

Now when this function checks for gup in general, maybe it's worth to
rename it to, say, folio_fast_gup_allowed.

> {
> struct address_space *mapping;
> + bool check_secretmem = false;
> + bool reject_file_backed = false;
> unsigned long mapping_flags;
>
> /*
> * If we aren't pinning then no problematic write can occur. A long term
> * pin is the most egregious case so this is the one we disallow.
> */
> - if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) !=
> + if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) ==
> (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE))
> - return true;
> + reject_file_backed = true;
> +
> + /* We hold a folio reference, so we can safely access folio fields. */
>
> - /* The folio is pinned, so we can safely access folio fields. */
> + /* secretmem folios are only order-0 folios and never LRU folios. */

Nit: ^ always

> + if (IS_ENABLED(CONFIG_SECRETMEM) && !folio_test_large(folio) &&
> + !folio_test_lru(folio))
> + check_secretmem = true;
> +
> + if (!reject_file_backed && !check_secretmem)
> + return true;
>
> if (WARN_ON_ONCE(folio_test_slab(folio)))
> return false;
>
> - /* hugetlb mappings do not require dirty-tracking. */
> + /* hugetlb neither requires dirty-tracking nor can be secretmem. */
> if (folio_test_hugetlb(folio))
> return true;
>
> @@ -2535,10 +2547,12 @@ static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags)
>
> /*
> * At this point, we know the mapping is non-null and points to an
> - * address_space object. The only remaining whitelisted file system is
> - * shmem.
> + * address_space object.
> */
> - return shmem_mapping(mapping);
> + if (check_secretmem && secretmem_mapping(mapping))
> + return false;
> + /* The only remaining allowed file system is shmem. */
> + return !reject_file_backed || shmem_mapping(mapping);
> }
>
> static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
> @@ -2624,11 +2638,6 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
> if (!folio)
> goto pte_unmap;
>
> - if (unlikely(folio_is_secretmem(folio))) {
> - gup_put_folio(folio, 1, flags);
> - goto pte_unmap;
> - }
> -
> if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
> unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
> gup_put_folio(folio, 1, flags);
> --
> 2.43.2
>

--
Sincerely yours,
Mike.