[PATCH 1/2] sched/core: Introduce sched_class::can_stop_tick()

From: Hao Jia
Date: Mon Aug 21 2023 - 05:49:52 EST


Extract a can_stop_tick() callback function for each
sched_class from sched_can_stop_tick(). It will
clean up some checks about cfs_bandwidth in sched/core.c.
Put these checks into their own sched_class,
and make some functions static.

This also makes it easier for us to deal with
"nohz_full vs rt_bandwidth" case later.

No functional changes.

Signed-off-by: Hao Jia <jiahao.os@xxxxxxxxxxxxx>
---
kernel/sched/core.c | 67 ++++++-----------------------------------
kernel/sched/deadline.c | 16 ++++++++++
kernel/sched/fair.c | 56 +++++++++++++++++++++++++++++++---
kernel/sched/rt.c | 34 +++++++++++++++++++++
kernel/sched/sched.h | 5 +--
5 files changed, 114 insertions(+), 64 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index efe3848978a0..1107ce6e4f6c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1191,68 +1191,21 @@ static void nohz_csd_func(void *info)
#endif /* CONFIG_NO_HZ_COMMON */

#ifdef CONFIG_NO_HZ_FULL
-static inline bool __need_bw_check(struct rq *rq, struct task_struct *p)
-{
- if (rq->nr_running != 1)
- return false;
-
- if (p->sched_class != &fair_sched_class)
- return false;
-
- if (!task_on_rq_queued(p))
- return false;
-
- return true;
-}
-
bool sched_can_stop_tick(struct rq *rq)
{
- int fifo_nr_running;
-
- /* Deadline tasks, even if single, need the tick */
- if (rq->dl.dl_nr_running)
- return false;
-
- /*
- * If there are more than one RR tasks, we need the tick to affect the
- * actual RR behaviour.
- */
- if (rq->rt.rr_nr_running) {
- if (rq->rt.rr_nr_running == 1)
- return true;
- else
- return false;
- }
-
- /*
- * If there's no RR tasks, but FIFO tasks, we can skip the tick, no
- * forced preemption between FIFO tasks.
- */
- fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
- if (fifo_nr_running)
- return true;
-
- /*
- * If there are no DL,RR/FIFO tasks, there must only be CFS tasks left;
- * if there's more than one we need the tick for involuntary
- * preemption.
- */
- if (rq->nr_running > 1)
- return false;
+ const struct sched_class *class;
+ int stop_next = 0;
+ bool ret = true;

- /*
- * If there is one task and it has CFS runtime bandwidth constraints
- * and it's on the cpu now we don't want to stop the tick.
- * This check prevents clearing the bit if a newly enqueued task here is
- * dequeued by migrating while the constrained task continues to run.
- * E.g. going from 2->1 without going through pick_next_task().
- */
- if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) {
- if (cfs_task_bw_constrained(rq->curr))
- return false;
+ for_each_class(class) {
+ if (class->can_stop_tick) {
+ ret = class->can_stop_tick(rq, &stop_next);
+ if (stop_next)
+ break;
+ }
}

- return true;
+ return ret;
}
#endif /* CONFIG_NO_HZ_FULL */
#endif /* CONFIG_SMP */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 58b542bf2893..0b461cb40408 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2715,6 +2715,19 @@ static int task_is_throttled_dl(struct task_struct *p, int cpu)
}
#endif

+#ifdef CONFIG_NO_HZ_FULL
+static bool can_stop_tick_dl(struct rq *rq, int *stop_next)
+{
+ /* Deadline tasks, even if single, need the tick */
+ if (rq->dl.dl_nr_running) {
+ *stop_next = 1;
+ return false;
+ }
+
+ return true;
+}
+#endif
+
DEFINE_SCHED_CLASS(dl) = {

.enqueue_task = enqueue_task_dl,
@@ -2750,6 +2763,9 @@ DEFINE_SCHED_CLASS(dl) = {
#ifdef CONFIG_SCHED_CORE
.task_is_throttled = task_is_throttled_dl,
#endif
+#ifdef CONFIG_NO_HZ_FULL
+ .can_stop_tick = can_stop_tick_dl,
+#endif
};

/* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 128a78f3f264..7fa4892267f1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6267,7 +6267,8 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
rq_clock_stop_loop_update(rq);
}

-bool cfs_task_bw_constrained(struct task_struct *p)
+#ifdef CONFIG_NO_HZ_FULL
+static inline bool cfs_task_bw_constrained(struct task_struct *p)
{
struct cfs_rq *cfs_rq = task_cfs_rq(p);

@@ -6281,7 +6282,6 @@ bool cfs_task_bw_constrained(struct task_struct *p)
return false;
}

-#ifdef CONFIG_NO_HZ_FULL
/* called from pick_next_task_fair() */
static void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p)
{
@@ -6305,6 +6305,44 @@ static void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p)
if (cfs_task_bw_constrained(p))
tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
}
+
+static inline bool __need_bw_check(struct rq *rq, struct task_struct *p)
+{
+ if (rq->nr_running != 1)
+ return false;
+
+ if (p->sched_class != &fair_sched_class)
+ return false;
+
+ if (!task_on_rq_queued(p))
+ return false;
+
+ return true;
+}
+
+static bool can_stop_tick_fair(struct rq *rq, int *stop_next)
+{
+ if (rq->nr_running > 1) {
+ *stop_next = 1;
+ return false;
+ }
+
+ /*
+ * If there is one task and it has CFS runtime bandwidth constraints
+ * and it's on the cpu now we don't want to stop the tick.
+ * This check prevents clearing the bit if a newly enqueued task here is
+ * dequeued by migrating while the constrained task continues to run.
+ * E.g. going from 2->1 without going through pick_next_task().
+ */
+ if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) {
+ if (cfs_task_bw_constrained(rq->curr)) {
+ *stop_next = 1;
+ return false;
+ }
+ }
+
+ return true;
+}
#endif

#else /* CONFIG_CFS_BANDWIDTH */
@@ -6348,10 +6386,15 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
static inline void update_runtime_enabled(struct rq *rq) {}
static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
-#ifdef CONFIG_CGROUP_SCHED
-bool cfs_task_bw_constrained(struct task_struct *p)
+#ifdef CONFIG_NO_HZ_FULL
+static bool can_stop_tick_fair(struct rq *rq, int *stop_next)
{
- return false;
+ if (rq->nr_running > 1) {
+ *stop_next = 1;
+ return false;
+ }
+
+ return true;
}
#endif
#endif /* CONFIG_CFS_BANDWIDTH */
@@ -12864,6 +12907,9 @@ DEFINE_SCHED_CLASS(fair) = {
#ifdef CONFIG_SCHED_CORE
.task_is_throttled = task_is_throttled_fair,
#endif
+#ifdef CONFIG_NO_HZ_FULL
+ .can_stop_tick = can_stop_tick_fair,
+#endif

#ifdef CONFIG_UCLAMP_TASK
.uclamp_enabled = 1,
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 0597ba0f85ff..0b9e9467ef61 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1740,6 +1740,37 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
#endif
}

+#ifdef CONFIG_NO_HZ_FULL
+static bool can_stop_tick_rt(struct rq *rq, int *stop_next)
+{
+ int fifo_nr_running;
+
+ /*
+ * If there are more than one RR tasks, we need the tick to affect the
+ * actual RR behaviour.
+ */
+ if (rq->rt.rr_nr_running) {
+ *stop_next = 1;
+ if (rq->rt.rr_nr_running == 1)
+ return true;
+ else
+ return false;
+ }
+
+ /*
+ * If there's no RR tasks, but FIFO tasks, we can skip the tick, no
+ * forced preemption between FIFO tasks.
+ */
+ fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
+ if (fifo_nr_running) {
+ *stop_next = 1;
+ return true;
+ }
+
+ return true;
+}
+#endif
+
static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
{
struct sched_rt_entity *rt_se = &p->rt;
@@ -2732,6 +2763,9 @@ DEFINE_SCHED_CLASS(rt) = {
#ifdef CONFIG_SCHED_CORE
.task_is_throttled = task_is_throttled_rt,
#endif
+#ifdef CONFIG_NO_HZ_FULL
+ .can_stop_tick = can_stop_tick_rt,
+#endif

#ifdef CONFIG_UCLAMP_TASK
.uclamp_enabled = 1,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4d4b6f178e99..f464e7fff0ef 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -459,7 +459,6 @@ extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth
extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
-extern bool cfs_task_bw_constrained(struct task_struct *p);

extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
struct sched_rt_entity *rt_se, int cpu,
@@ -495,7 +494,6 @@ static inline void set_task_rq_fair(struct sched_entity *se,
#else /* CONFIG_CGROUP_SCHED */

struct cfs_bandwidth { };
-static inline bool cfs_task_bw_constrained(struct task_struct *p) { return false; }

#endif /* CONFIG_CGROUP_SCHED */

@@ -2289,6 +2287,9 @@ struct sched_class {
#ifdef CONFIG_SCHED_CORE
int (*task_is_throttled)(struct task_struct *p, int cpu);
#endif
+#ifdef CONFIG_NO_HZ_FULL
+ bool (*can_stop_tick)(struct rq *rq, int *stop_next);
+#endif
};

static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
--
2.39.2