[PATCH] mmu_notifier: add call_srcu and sync function for listener to delay call and sync

From: Oded Gabbay
Date: Sat Aug 02 2014 - 09:11:16 EST


From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>

When kernel device drivers or subsystems want to bind their lifespan to the
lifespan of the mm_struct, they usually use one of the following methods:

1. Manually calling a function in the interested kernel module. The function
call needs to be placed in mmput. This method was rejected by several kernel
maintainers.

2. Registering to the mmu notifier release mechanism.

The problem with the latter approach is that the mmu_notifier_release callback
is called from__mmu_notifier_release (called from exit_mmap). That function
iterates over the list of mmu notifiers and don't expect the release callback
function to remove itself from the list. Therefore, the callback function in
the kernel module can't release the mmu_notifier_object, which is actually the
kernel module's object itself. As a result, the destruction of the kernel
module's object must to be done in a delayed fashion.

This patch adds support for this delayed callback, by adding a new
mmu_notifier_call_srcu function that receives a function ptr and calls that
function with call_srcu. In that function, the kernel module releases its
object. To use mmu_notifier_call_srcu, the calling module needs to call before
that a new function called mmu_notifier_unregister_no_release that as its name
implies, unregisters a notifier without calling its notifier release callback.

This patch also adds a function that will call barrier_srcu so those kernel
modules can sync with mmu_notifier.

Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: JÃrÃme Glisse <jglisse@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <oded.gabbay@xxxxxxx>
---
include/linux/mmu_notifier.h | 6 ++++++
mm/mmu_notifier.c | 40 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index deca874..2728869 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -170,6 +170,8 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn,
struct mm_struct *mm);
extern void mmu_notifier_unregister(struct mmu_notifier *mn,
struct mm_struct *mm);
+extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+ struct mm_struct *mm);
extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
extern void __mmu_notifier_release(struct mm_struct *mm);
extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
@@ -288,6 +290,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
set_pte_at(___mm, ___address, __ptep, ___pte); \
})

+extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
+ void (*func)(struct rcu_head *rcu));
+extern void mmu_notifier_synchronize(void);
+
#else /* CONFIG_MMU_NOTIFIER */

static inline void mmu_notifier_release(struct mm_struct *mm)
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 41cefdf..950813b 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -23,6 +23,25 @@
static struct srcu_struct srcu;

/*
+ * This function allows mmu_notifier::release callback to delay a call to
+ * a function that will free appropriate resources. The function must be
+ * quick and must not block.
+ */
+void mmu_notifier_call_srcu(struct rcu_head *rcu,
+ void (*func)(struct rcu_head *rcu))
+{
+ call_srcu(&srcu, rcu, func);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
+
+void mmu_notifier_synchronize(void)
+{
+ /* Wait for any running method to finish. */
+ srcu_barrier(&srcu);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
+
+/*
* This function can't run concurrently against mmu_notifier_register
* because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
* runs with mm_users == 0. Other tasks may still invoke mmu notifiers
@@ -53,7 +72,6 @@ void __mmu_notifier_release(struct mm_struct *mm)
*/
if (mn->ops->release)
mn->ops->release(mn, mm);
- srcu_read_unlock(&srcu, id);

spin_lock(&mm->mmu_notifier_mm->lock);
while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
@@ -69,6 +87,7 @@ void __mmu_notifier_release(struct mm_struct *mm)
hlist_del_init_rcu(&mn->hlist);
}
spin_unlock(&mm->mmu_notifier_mm->lock);
+ srcu_read_unlock(&srcu, id);

/*
* synchronize_srcu here prevents mmu_notifier_release from returning to
@@ -325,6 +344,25 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(mmu_notifier_unregister);

+/*
+ * Same as mmu_notifier_unregister but no callback and no srcu synchronization.
+ */
+void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+ struct mm_struct *mm)
+{
+ spin_lock(&mm->mmu_notifier_mm->lock);
+ /*
+ * Can not use list_del_rcu() since __mmu_notifier_release
+ * can delete it before we hold the lock.
+ */
+ hlist_del_init_rcu(&mn->hlist);
+ spin_unlock(&mm->mmu_notifier_mm->lock);
+
+ BUG_ON(atomic_read(&mm->mm_count) <= 0);
+ mmdrop(mm);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
+
static int __init mmu_notifier_init(void)
{
return init_srcu_struct(&srcu);
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/