[PATCH v3 14/14] sched: Fix runtime accounting w/ proxy-execution

From: John Stultz
Date: Tue Apr 11 2023 - 00:27:29 EST


The idea here is we want to charge the selected task's vruntime
but charge the executed task's sum_exec_runtime.

This way cputime accounting goes against the task actually running
but vruntime accounting goes against the selected task so we get
proper fairness.

Cc: Joel Fernandes <joelaf@xxxxxxxxxx>
Cc: Qais Yousef <qyousef@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Valentin Schneider <vschneid@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Zimuzo Ezeozue <zezeozue@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Waiman Long <longman@xxxxxxxxxx>
Cc: Boqun Feng <boqun.feng@xxxxxxxxx>
Cc: "Paul E . McKenney" <paulmck@xxxxxxxxxx>
Signed-off-by: John Stultz <jstultz@xxxxxxxxxx>
---
kernel/sched/fair.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index acb8155f40c3..3abb48b3515c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -891,22 +891,36 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
}
#endif /* CONFIG_SMP */

-static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
+static s64 update_curr_se(struct rq *rq, struct sched_entity *se)
{
u64 now = rq_clock_task(rq);
s64 delta_exec;

- delta_exec = now - curr->exec_start;
+ /* Calculate the delta from selected se */
+ delta_exec = now - se->exec_start;
if (unlikely(delta_exec <= 0))
return delta_exec;

- curr->exec_start = now;
- curr->sum_exec_runtime += delta_exec;
+ /* Update selected se's exec_start */
+ se->exec_start = now;
+ if (entity_is_task(se)) {
+ struct task_struct *running = rq_curr(rq);
+ /*
+ * If se is a task, we account the time
+ * against the running task, as w/ proxy-exec
+ * they may not be the same.
+ */
+ running->se.exec_start = now;
+ running->se.sum_exec_runtime += delta_exec;
+ } else {
+ /* If not task, account the time against se */
+ se->sum_exec_runtime += delta_exec;
+ }

if (schedstat_enabled()) {
struct sched_statistics *stats;

- stats = __schedstats_from_se(curr);
+ stats = __schedstats_from_se(se);
__schedstat_set(stats->exec_max,
max(delta_exec, stats->exec_max));
}
--
2.40.0.577.gac1e443424-goog