[PATCH v2 1/2] sched: don't account throttle time for empty groups

From: Josh Don
Date: Mon Jun 12 2023 - 19:28:09 EST


It is easy for a cfs_rq to become throttled even when it has no enqueued
entities (for example, if we have just put_prev()'d the last runnable
task of the cfs_rq, and the cfs_rq is out of quota).

Avoid accounting this time towards total throttle time, since it
otherwise falsely inflates the stats.

Note that the dequeue path is special, since we normally disallow
migrations when a task is in a throttled hierarchy (see
throttled_lb_pair()).

Signed-off-by: Josh Don <joshdon@xxxxxxxxxx>
---
kernel/sched/fair.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 48b6f0ca13ac..ddd5dc18b238 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4873,8 +4873,14 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)

if (cfs_rq->nr_running == 1) {
check_enqueue_throttle(cfs_rq);
- if (!throttled_hierarchy(cfs_rq))
+ if (!throttled_hierarchy(cfs_rq)) {
list_add_leaf_cfs_rq(cfs_rq);
+ } else {
+#ifdef CONFIG_CFS_BANDWIDTH
+ if (!cfs_rq->throttled_clock)
+ cfs_rq->throttled_clock = rq_clock(rq_of(cfs_rq));
+#endif
+ }
}
}

@@ -5480,7 +5486,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
* throttled-list. rq->lock protects completion.
*/
cfs_rq->throttled = 1;
- cfs_rq->throttled_clock = rq_clock(rq);
+ SCHED_WARN_ON(cfs_rq->throttled_clock);
+ if (cfs_rq->nr_running)
+ cfs_rq->throttled_clock = rq_clock(rq);
return true;
}

@@ -5498,7 +5506,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
update_rq_clock(rq);

raw_spin_lock(&cfs_b->lock);
- cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+ if (cfs_rq->throttled_clock) {
+ cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+ cfs_rq->throttled_clock = 0;
+ }
list_del_rcu(&cfs_rq->throttled_list);
raw_spin_unlock(&cfs_b->lock);

--
2.41.0.162.gfafddb0af9-goog