[PATCH 5.18 0289/1095] nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt()

From: Greg Kroah-Hartman
Date: Mon Aug 15 2022 - 16:49:05 EST


From: Nicolas Saenz Julienne <nsaenzju@xxxxxxxxxx>

[ Upstream commit 5c66d1b9b30f737fcef85a0b75bfe0590e16b62a ]

dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
called sched_update_tick_dependency() preventing it from re-enabling the
tick on systems that no longer have pending SCHED_RT tasks but have
multiple runnable SCHED_OTHER tasks:

dequeue_task_rt()
dequeue_rt_entity()
dequeue_rt_stack()
dequeue_top_rt_rq()
sub_nr_running() // decrements rq->nr_running
sched_update_tick_dependency()
sched_can_stop_tick() // checks rq->rt.rt_nr_running,
...
__dequeue_rt_entity()
dec_rt_tasks() // decrements rq->rt.rt_nr_running
...

Every other scheduler class performs the operation in the opposite
order, and sched_update_tick_dependency() expects the values to be
updated as such. So avoid the misbehaviour by inverting the order in
which the above operations are performed in the RT scheduler.

Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Reviewed-by: Valentin Schneider <vschneid@xxxxxxxxxx>
Reviewed-by: Phil Auld <pauld@xxxxxxxxxx>
Link: https://lore.kernel.org/r/20220628092259.330171-1-nsaenzju@xxxxxxxxxx
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
kernel/sched/rt.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7891c0f0e1ff..907a5d7507a8 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -430,7 +430,7 @@ static inline void rt_queue_push_tasks(struct rq *rq)
#endif /* CONFIG_SMP */

static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
+static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);

static inline int on_rt_rq(struct sched_rt_entity *rt_se)
{
@@ -551,7 +551,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
rt_se = rt_rq->tg->rt_se[cpu];

if (!rt_se) {
- dequeue_top_rt_rq(rt_rq);
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
}
@@ -637,7 +637,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)

static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
- dequeue_top_rt_rq(rt_rq);
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
}

static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@@ -1039,7 +1039,7 @@ static void update_curr_rt(struct rq *rq)
}

static void
-dequeue_top_rt_rq(struct rt_rq *rt_rq)
+dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
{
struct rq *rq = rq_of_rt_rq(rt_rq);

@@ -1050,7 +1050,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)

BUG_ON(!rq->nr_running);

- sub_nr_running(rq, rt_rq->rt_nr_running);
+ sub_nr_running(rq, count);
rt_rq->rt_queued = 0;

}
@@ -1436,18 +1436,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct sched_rt_entity *back = NULL;
+ unsigned int rt_nr_running;

for_each_sched_rt_entity(rt_se) {
rt_se->back = back;
back = rt_se;
}

- dequeue_top_rt_rq(rt_rq_of_se(back));
+ rt_nr_running = rt_rq_of_se(back)->rt_nr_running;

for (rt_se = back; rt_se; rt_se = rt_se->back) {
if (on_rt_rq(rt_se))
__dequeue_rt_entity(rt_se, flags);
}
+
+ dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
}

static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
--
2.35.1