Re: Severe performance regression w/ 4.4+ on Android due to cgroup locking changes

From: Dmitry Shmidt
Date: Wed Jul 13 2016 - 16:31:44 EST


On Wed, Jul 13, 2016 at 11:33 AM, Tejun Heo <tj@xxxxxxxxxx> wrote:
> On Wed, Jul 13, 2016 at 02:21:02PM -0400, Tejun Heo wrote:
>> One interesting thing to try would be replacing it with a regular
>> non-percpu rwsem and see how it behaves. That should easily tell us
>> whether this is from actual contention or artifacts from percpu_rwsem
>> implementation.
>
> So, something like the following. Can you please see whether this
> makes any difference?

It is making difference. We need to conduct more tests, but it
looks much better. I see only one 18.5 ms delay. Most of time it
is <200 us delay.

> Thanks.
>
> diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
> index 5b17de6..bc1e4d8 100644
> --- a/include/linux/cgroup-defs.h
> +++ b/include/linux/cgroup-defs.h
> @@ -14,7 +14,7 @@
> #include <linux/mutex.h>
> #include <linux/rcupdate.h>
> #include <linux/percpu-refcount.h>
> -#include <linux/percpu-rwsem.h>
> +#include <linux/rwsem.h>
> #include <linux/workqueue.h>
>
> #ifdef CONFIG_CGROUPS
> @@ -518,7 +518,7 @@ struct cgroup_subsys {
> unsigned int depends_on;
> };
>
> -extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
> +extern struct rw_semaphore cgroup_threadgroup_rwsem;
>
> /**
> * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
> @@ -529,7 +529,7 @@ extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
> */
> static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
> {
> - percpu_down_read(&cgroup_threadgroup_rwsem);
> + down_read(&cgroup_threadgroup_rwsem);
> }
>
> /**
> @@ -541,7 +541,7 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
> */
> static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
> {
> - percpu_up_read(&cgroup_threadgroup_rwsem);
> + up_read(&cgroup_threadgroup_rwsem);
> }
>
> #else /* CONFIG_CGROUPS */
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 86cb5c6..ed9c142 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -113,7 +113,7 @@ static DEFINE_SPINLOCK(cgroup_file_kn_lock);
> */
> static DEFINE_SPINLOCK(release_agent_path_lock);
>
> -struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
> +DECLARE_RWSEM(cgroup_threadgroup_rwsem);
>
> #define cgroup_assert_mutex_or_rcu_locked() \
> RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
> @@ -2899,7 +2899,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
> if (!cgrp)
> return -ENODEV;
>
> - percpu_down_write(&cgroup_threadgroup_rwsem);
> + down_write(&cgroup_threadgroup_rwsem);
> rcu_read_lock();
> if (pid) {
> tsk = find_task_by_vpid(pid);
> @@ -2937,7 +2937,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
> out_unlock_rcu:
> rcu_read_unlock();
> out_unlock_threadgroup:
> - percpu_up_write(&cgroup_threadgroup_rwsem);
> + up_write(&cgroup_threadgroup_rwsem);
> for_each_subsys(ss, ssid)
> if (ss->post_attach)
> ss->post_attach();
> @@ -3077,7 +3077,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
>
> lockdep_assert_held(&cgroup_mutex);
>
> - percpu_down_write(&cgroup_threadgroup_rwsem);
> + down_write(&cgroup_threadgroup_rwsem);
>
> /* look up all csses currently attached to @cgrp's subtree */
> spin_lock_bh(&css_set_lock);
> @@ -3112,7 +3112,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
> ret = cgroup_taskset_migrate(&tset, cgrp->root);
> out_finish:
> cgroup_migrate_finish(&preloaded_csets);
> - percpu_up_write(&cgroup_threadgroup_rwsem);
> + up_write(&cgroup_threadgroup_rwsem);
> return ret;
> }
>
> @@ -5601,7 +5601,7 @@ int __init cgroup_init(void)
> int ssid;
>
> BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
> - BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
> + //BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
> BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
> BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
>