[RFC PATCH 42/86] sched: force preemption on tick expiration

From: Ankur Arora
Date: Tue Nov 07 2023 - 17:04:28 EST


The kernel can have long running tasks which don't pass through
preemption points for prolonged periods and so will never see
a scheduler's polite TIF_NEED_RESCHED_LAZY.

Force a reschedule at the next tick by upgrading to TIF_NEED_RESCHED,
which will get folded into the preempt_count and a reschedule at the
next safe preemption point.

TODO: deadline scheduler.

Originally-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx>
---
kernel/sched/fair.c | 32 +++++++++++++++++++++++---------
kernel/sched/rt.c | 7 ++++++-
kernel/sched/sched.h | 1 +
3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4d86c618ffa2..fe7e5e9b2207 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1016,8 +1016,11 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
* XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
* this is probably good enough.
*/
-static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void update_deadline(struct cfs_rq *cfs_rq,
+ struct sched_entity *se, bool tick)
{
+ struct rq *rq = rq_of(cfs_rq);
+
if ((s64)(se->vruntime - se->deadline) < 0)
return;

@@ -1033,13 +1036,19 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
se->deadline = se->vruntime + calc_delta_fair(se->slice, se);

+ if (cfs_rq->nr_running < 2)
+ return;
+
/*
- * The task has consumed its request, reschedule.
+ * The task has consumed its request, reschedule; eagerly
+ * if it ignored our last lazy reschedule.
*/
- if (cfs_rq->nr_running > 1) {
- resched_curr(rq_of(cfs_rq));
- clear_buddies(cfs_rq, se);
- }
+ if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY))
+ __resched_curr(rq, RESCHED_eager);
+ else
+ resched_curr(rq);
+
+ clear_buddies(cfs_rq, se);
}

#include "pelt.h"
@@ -1147,7 +1156,7 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
/*
* Update the current task's runtime statistics.
*/
-static void update_curr(struct cfs_rq *cfs_rq)
+static void __update_curr(struct cfs_rq *cfs_rq, bool tick)
{
struct sched_entity *curr = cfs_rq->curr;
u64 now = rq_clock_task(rq_of(cfs_rq));
@@ -1174,7 +1183,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
schedstat_add(cfs_rq->exec_clock, delta_exec);

curr->vruntime += calc_delta_fair(delta_exec, curr);
- update_deadline(cfs_rq, curr);
+ update_deadline(cfs_rq, curr, tick);
update_min_vruntime(cfs_rq);

if (entity_is_task(curr)) {
@@ -1188,6 +1197,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
account_cfs_rq_runtime(cfs_rq, delta_exec);
}

+static void update_curr(struct cfs_rq *cfs_rq)
+{
+ __update_curr(cfs_rq, false);
+}
+
static void update_curr_fair(struct rq *rq)
{
update_curr(cfs_rq_of(&rq->curr->se));
@@ -5309,7 +5323,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
/*
* Update run-time statistics of the 'current'.
*/
- update_curr(cfs_rq);
+ __update_curr(cfs_rq, true);

/*
* Ensure that runnable average is periodically updated.
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a79ce6746dd0..5fdb93f1b87e 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2664,7 +2664,12 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
for_each_sched_rt_entity(rt_se) {
if (rt_se->run_list.prev != rt_se->run_list.next) {
requeue_task_rt(rq, p, 0);
- resched_curr(rq);
+
+ if (test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY))
+ __resched_curr(rq, RESCHED_eager);
+ else
+ resched_curr(rq);
+
return;
}
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9e1329a4e890..e29a8897f573 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2434,6 +2434,7 @@ extern void init_sched_fair_class(void);

extern void reweight_task(struct task_struct *p, int prio);

+extern void __resched_curr(struct rq *rq, resched_t rs);
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);

--
2.31.1