[PATCH] mm,vmscan: Allow preallocating memory for register_shrinker().

From: Tetsuo Handa
Date: Tue Apr 03 2018 - 23:19:42 EST


syzbot is catching so many bugs triggered by commit 9ee332d99e4d5a97
("sget(): handle failures of register_shrinker()"). That commit expected
that calling kill_sb() from deactivate_locked_super() without successful
fill_super() is safe, but the reality was different; some callers assign
attributes which are needed for kill_sb() after sget() succeeds.

For example, [1] is a report where sb->s_mode (which seems to be either
FMODE_READ | FMODE_EXCL | FMODE_WRITE or FMODE_READ | FMODE_EXCL) is not
assigned unless sget() succeeds. But it does not worth complicate sget()
so that register_shrinker() failure path can safely call
kill_block_super() via kill_sb(). Making alloc_super() fail if memory
allocation for register_shrinker() failed is much simpler. Let's avoid
calling deactivate_locked_super() from sget_userns() by preallocating
memory for the shrinker and making register_shrinker() in sget_userns()
never fail.

[1] https://syzkaller.appspot.com/bug?id=588996a25a2587be2e3a54e8646728fb9cae44e7

Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
Reported-by: syzbot <syzbot+5a170e19c963a2e0df79@xxxxxxxxxxxxxxxxxxxxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
---
fs/super.c | 9 ++++-----
include/linux/shrinker.h | 6 ++++--
mm/vmscan.c | 15 ++++++++++++++-
3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 672538c..db00f67 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -166,6 +166,7 @@ static void destroy_unused_super(struct super_block *s)
security_sb_free(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
+ kfree(s->s_shrink.nr_deferred);
/* no delays needed */
destroy_super_work(&s->destroy_work);
}
@@ -251,6 +252,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
s->s_shrink.count_objects = super_cache_count;
s->s_shrink.batch = 1024;
s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
+ if (prepare_shrinker(&s->s_shrink))
+ goto fail;
return s;

fail:
@@ -517,11 +520,7 @@ struct super_block *sget_userns(struct file_system_type *type,
hlist_add_head(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
- err = register_shrinker(&s->s_shrink);
- if (err) {
- deactivate_locked_super(s);
- s = ERR_PTR(err);
- }
+ register_shrinker_prepared(&s->s_shrink);
return s;
}

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 388ff29..2728918 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -75,6 +75,8 @@ struct shrinker {
#define SHRINKER_NUMA_AWARE (1 << 0)
#define SHRINKER_MEMCG_AWARE (1 << 1)

-extern int register_shrinker(struct shrinker *);
-extern void unregister_shrinker(struct shrinker *);
+extern int prepare_shrinker(struct shrinker *shrinker);
+extern void register_shrinker_prepared(struct shrinker *shrinker);
+extern int register_shrinker(struct shrinker *shrinker);
+extern void unregister_shrinker(struct shrinker *shrinker);
#endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cd5dc3f..a10fe8e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -258,7 +258,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
/*
* Add a shrinker callback to be called from the vm.
*/
-int register_shrinker(struct shrinker *shrinker)
+int prepare_shrinker(struct shrinker *shrinker)
{
size_t size = sizeof(*shrinker->nr_deferred);

@@ -268,10 +268,23 @@ int register_shrinker(struct shrinker *shrinker)
shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
if (!shrinker->nr_deferred)
return -ENOMEM;
+ return 0;
+}

+void register_shrinker_prepared(struct shrinker *shrinker)
+{
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
up_write(&shrinker_rwsem);
+}
+
+int register_shrinker(struct shrinker *shrinker)
+{
+ int err = prepare_shrinker(shrinker);
+
+ if (err)
+ return err;
+ register_shrinker_prepared(shrinker);
return 0;
}
EXPORT_SYMBOL(register_shrinker);
--
1.8.3.1