Re: [RFC PATCH V3 1/6] sched: Unify runtime accounting across classes

From: Phil Auld
Date: Tue Jun 13 2023 - 09:26:13 EST


On Thu, Jun 08, 2023 at 05:58:13PM +0200 Daniel Bristot de Oliveira wrote:
> From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
>
> All classes use sched_entity::exec_start to track runtime and have
> copies of the exact same code around to compute runtime.
>
> Collapse all that.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> Signed-off-by: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>

Reviewed-by: Phil Auld <pauld@xxxxxxxxxx>

> ---
> include/linux/sched.h | 2 +-
> kernel/sched/deadline.c | 15 +++--------
> kernel/sched/fair.c | 57 ++++++++++++++++++++++++++++++----------
> kernel/sched/rt.c | 15 +++--------
> kernel/sched/sched.h | 12 ++-------
> kernel/sched/stop_task.c | 13 +--------
> 6 files changed, 53 insertions(+), 61 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 1292d38d66cc..26b1925a702a 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -521,7 +521,7 @@ struct sched_statistics {
> u64 block_max;
> s64 sum_block_runtime;
>
> - u64 exec_max;
> + s64 exec_max;
> u64 slice_max;
>
> u64 nr_migrations_cold;
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index f827067ad03b..030e7c11607f 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1301,9 +1301,8 @@ static void update_curr_dl(struct rq *rq)
> {
> struct task_struct *curr = rq->curr;
> struct sched_dl_entity *dl_se = &curr->dl;
> - u64 delta_exec, scaled_delta_exec;
> + s64 delta_exec, scaled_delta_exec;
> int cpu = cpu_of(rq);
> - u64 now;
>
> if (!dl_task(curr) || !on_dl_rq(dl_se))
> return;
> @@ -1316,21 +1315,13 @@ static void update_curr_dl(struct rq *rq)
> * natural solution, but the full ramifications of this
> * approach need further study.
> */
> - now = rq_clock_task(rq);
> - delta_exec = now - curr->se.exec_start;
> - if (unlikely((s64)delta_exec <= 0)) {
> + delta_exec = update_curr_common(rq);
> + if (unlikely(delta_exec <= 0)) {
> if (unlikely(dl_se->dl_yielded))
> goto throttle;
> return;
> }
>
> - schedstat_set(curr->stats.exec_max,
> - max(curr->stats.exec_max, delta_exec));
> -
> - trace_sched_stat_runtime(curr, delta_exec, 0);
> -
> - update_current_exec_runtime(curr, now, delta_exec);
> -
> if (dl_entity_is_special(dl_se))
> return;
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 6189d1a45635..fda67f05190d 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -891,23 +891,17 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
> }
> #endif /* CONFIG_SMP */
>
> -/*
> - * Update the current task's runtime statistics.
> - */
> -static void update_curr(struct cfs_rq *cfs_rq)
> +static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
> {
> - struct sched_entity *curr = cfs_rq->curr;
> - u64 now = rq_clock_task(rq_of(cfs_rq));
> - u64 delta_exec;
> -
> - if (unlikely(!curr))
> - return;
> + u64 now = rq_clock_task(rq);
> + s64 delta_exec;
>
> delta_exec = now - curr->exec_start;
> - if (unlikely((s64)delta_exec <= 0))
> - return;
> + if (unlikely(delta_exec <= 0))
> + return delta_exec;
>
> curr->exec_start = now;
> + curr->sum_exec_runtime += delta_exec;
>
> if (schedstat_enabled()) {
> struct sched_statistics *stats;
> @@ -917,8 +911,43 @@ static void update_curr(struct cfs_rq *cfs_rq)
> max(delta_exec, stats->exec_max));
> }
>
> - curr->sum_exec_runtime += delta_exec;
> - schedstat_add(cfs_rq->exec_clock, delta_exec);
> + return delta_exec;
> +}
> +
> +/*
> + * Used by other classes to account runtime.
> + */
> +s64 update_curr_common(struct rq *rq)
> +{
> + struct task_struct *curr = rq->curr;
> + s64 delta_exec;
> +
> + delta_exec = update_curr_se(rq, &curr->se);
> + if (unlikely(delta_exec <= 0))
> + return delta_exec;
> +
> + trace_sched_stat_runtime(curr, delta_exec, 0);
> +
> + account_group_exec_runtime(curr, delta_exec);
> + cgroup_account_cputime(curr, delta_exec);
> +
> + return delta_exec;
> +}
> +
> +/*
> + * Update the current task's runtime statistics.
> + */
> +static void update_curr(struct cfs_rq *cfs_rq)
> +{
> + struct sched_entity *curr = cfs_rq->curr;
> + s64 delta_exec;
> +
> + if (unlikely(!curr))
> + return;
> +
> + delta_exec = update_curr_se(rq_of(cfs_rq), curr);
> + if (unlikely(delta_exec <= 0))
> + return;
>
> curr->vruntime += calc_delta_fair(delta_exec, curr);
> update_min_vruntime(cfs_rq);
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index 00e0e5074115..efec4f3fef83 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1046,24 +1046,15 @@ static void update_curr_rt(struct rq *rq)
> {
> struct task_struct *curr = rq->curr;
> struct sched_rt_entity *rt_se = &curr->rt;
> - u64 delta_exec;
> - u64 now;
> + s64 delta_exec;
>
> if (curr->sched_class != &rt_sched_class)
> return;
>
> - now = rq_clock_task(rq);
> - delta_exec = now - curr->se.exec_start;
> - if (unlikely((s64)delta_exec <= 0))
> + delta_exec = update_curr_common(rq);
> + if (unlikely(delta_exec <= 0))
> return;
>
> - schedstat_set(curr->stats.exec_max,
> - max(curr->stats.exec_max, delta_exec));
> -
> - trace_sched_stat_runtime(curr, delta_exec, 0);
> -
> - update_current_exec_runtime(curr, now, delta_exec);
> -
> if (!rt_bandwidth_enabled())
> return;
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 556496c77dc2..da0cec2fc63a 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2176,6 +2176,8 @@ struct affinity_context {
> unsigned int flags;
> };
>
> +extern s64 update_curr_common(struct rq *rq);
> +
> struct sched_class {
>
> #ifdef CONFIG_UCLAMP_TASK
> @@ -3207,16 +3209,6 @@ extern int sched_dynamic_mode(const char *str);
> extern void sched_dynamic_update(int mode);
> #endif
>
> -static inline void update_current_exec_runtime(struct task_struct *curr,
> - u64 now, u64 delta_exec)
> -{
> - curr->se.sum_exec_runtime += delta_exec;
> - account_group_exec_runtime(curr, delta_exec);
> -
> - curr->se.exec_start = now;
> - cgroup_account_cputime(curr, delta_exec);
> -}
> -
> #ifdef CONFIG_SCHED_MM_CID
>
> #define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
> diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
> index 85590599b4d6..7595494ceb6d 100644
> --- a/kernel/sched/stop_task.c
> +++ b/kernel/sched/stop_task.c
> @@ -70,18 +70,7 @@ static void yield_task_stop(struct rq *rq)
>
> static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
> {
> - struct task_struct *curr = rq->curr;
> - u64 now, delta_exec;
> -
> - now = rq_clock_task(rq);
> - delta_exec = now - curr->se.exec_start;
> - if (unlikely((s64)delta_exec < 0))
> - delta_exec = 0;
> -
> - schedstat_set(curr->stats.exec_max,
> - max(curr->stats.exec_max, delta_exec));
> -
> - update_current_exec_runtime(curr, now, delta_exec);
> + update_curr_common(rq);
> }
>
> /*
> --
> 2.40.1
>

--