[PATCH 13/13] mm/mmu_notifier: kill invalidate_page

From: jglisse
Date: Thu Aug 31 2017 - 17:18:21 EST


From: JÃrÃme Glisse <jglisse@xxxxxxxxxx>

The invalidate_page callback suffered from 2 pitfalls. First it use to
happen after page table lock was release and thus a new page might have
setup before the call to invalidate_page() happened.

This is in a weird way fix by c7ab0d2fdc840266b39db94538f74207ec2afbf6
that moved the callback under the page table lock but this also break
several existing user of the mmu_notifier API that assumed they could
sleep inside this callback.

The second pitfall was invalidate_page being the only callback not taking
a range of address in respect to invalidation but was giving an address
and a page. Lot of the callback implementer assumed this could never be
THP and thus failed to invalidate the appropriate range for THP.

By killing this callback we unify the mmu_notifier callback API to always
take a virtual address range as input.

Finaly this also simplify the end user life as there is now 2 clear
choice:
- invalidate_range_start()/end() callback (which allow you to sleep)
- invalidate_range() where you can not sleep but happen right after
page table update under page table lock

Signed-off-by: JÃrÃme Glisse <jglisse@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Bernhard Held <berny156@xxxxxx>
Cc: Adam Borowski <kilobyte@xxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
Cc: Wanpeng Li <kernellwp@xxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Takashi Iwai <tiwai@xxxxxxx>
Cc: Nadav Amit <nadav.amit@xxxxxxxxx>
Cc: Mike Galbraith <efault@xxxxxx>
Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Cc: axie <axie@xxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---
include/linux/mmu_notifier.h | 25 -------------------------
mm/mmu_notifier.c | 14 --------------
2 files changed, 39 deletions(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index c91b3bcd158f..7b2e31b1745a 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -95,17 +95,6 @@ struct mmu_notifier_ops {
pte_t pte);

/*
- * Before this is invoked any secondary MMU is still ok to
- * read/write to the page previously pointed to by the Linux
- * pte because the page hasn't been freed yet and it won't be
- * freed until this returns. If required set_page_dirty has to
- * be called internally to this method.
- */
- void (*invalidate_page)(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long address);
-
- /*
* invalidate_range_start() and invalidate_range_end() must be
* paired and are called only when the mmap_sem and/or the
* locks protecting the reverse maps are held. If the subsystem
@@ -220,8 +209,6 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm,
unsigned long address);
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte);
-extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
- unsigned long address);
extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end);
extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
@@ -268,13 +255,6 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
__mmu_notifier_change_pte(mm, address, pte);
}

-static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
- unsigned long address)
-{
- if (mm_has_notifiers(mm))
- __mmu_notifier_invalidate_page(mm, address);
-}
-
static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
@@ -442,11 +422,6 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
{
}

-static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
- unsigned long address)
-{
-}
-
static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 54ca54562928..314285284e6e 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -174,20 +174,6 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
srcu_read_unlock(&srcu, id);
}

-void __mmu_notifier_invalidate_page(struct mm_struct *mm,
- unsigned long address)
-{
- struct mmu_notifier *mn;
- int id;
-
- id = srcu_read_lock(&srcu);
- hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
- if (mn->ops->invalidate_page)
- mn->ops->invalidate_page(mn, mm, address);
- }
- srcu_read_unlock(&srcu, id);
-}
-
void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
--
2.13.5