[PATCH v2] sched/stat: correct the task blocking state

From: alexs
Date: Wed Jan 03 2024 - 03:09:13 EST


From: Alex Shi <alexs@xxxxxxxxxx>

The commit 80ed87c8a9ca ("sched/wait: Introduce TASK_NOLOAD and TASK_IDLE")
stopped the idle kthreads from contributing to the load average. However,
the idle state time still contributes to the blocked state time instead of
the sleep time. As a result, we cannot determine if a task is stopped due
to some reasons or if it is idle by its own initiative.

Distinguishing between these two states would make the system state clearer
and provide us with an opportunity to use the 'D' state of a task as an
indicator of latency issues.

Originally-from: Curu Wong <curuwang@xxxxxxxxxxx>
Signed-off-by: Alex Shi <alexs@xxxxxxxxxx>
To: linux-kernel@xxxxxxxxxxxxxxx
To: Valentin Schneider <vschneid@xxxxxxxxxx>
To: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
To: Mel Gorman <mgorman@xxxxxxx>
To: Ben Segall <bsegall@xxxxxxxxxx>
To: Steven Rostedt <rostedt@xxxxxxxxxxx>
To: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
To: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
To: Juri Lelli <juri.lelli@xxxxxxxxxx>
To: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
To: Ingo Molnar <mingo@xxxxxxxxxx>
---
include/linux/sched.h | 6 ++++++
kernel/sched/deadline.c | 5 +++--
kernel/sched/fair.c | 5 +++--
kernel/sched/rt.c | 5 +++--
4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 292c31697248..002f80291837 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -140,6 +140,12 @@ struct user_event_mm;
#define is_special_task_state(state) \
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))

+/* blocked task is UNINTERRUPTIBLE but not NOLOAD */
+#define is_blocked_state(state) \
+ ((state) & TASK_UNINTERRUPTIBLE && (!((state) & TASK_NOLOAD)))
+
+#define is_idle_state(state) (((state) & TASK_IDLE) == TASK_IDLE)
+
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
# define debug_normal_state_change(state_value) \
do { \
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b28114478b82..99d46affc2aa 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1566,11 +1566,12 @@ update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
unsigned int state;

state = READ_ONCE(p->__state);
- if (state & TASK_INTERRUPTIBLE)
+ /* idle state still accounts into sleep */
+ if (state & TASK_INTERRUPTIBLE || is_idle_state(state))
__schedstat_set(p->stats.sleep_start,
rq_clock(rq_of_dl_rq(dl_rq)));

- if (state & TASK_UNINTERRUPTIBLE)
+ if (is_blocked_state(state))
__schedstat_set(p->stats.block_start,
rq_clock(rq_of_dl_rq(dl_rq)));
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d7a3c63a2171..69506253aadf 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1278,10 +1278,11 @@ update_stats_dequeue_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, int fl

/* XXX racy against TTWU */
state = READ_ONCE(tsk->__state);
- if (state & TASK_INTERRUPTIBLE)
+ /* idle state still accounts into sleep */
+ if (state & TASK_INTERRUPTIBLE || is_idle_state(state))
__schedstat_set(tsk->stats.sleep_start,
rq_clock(rq_of(cfs_rq)));
- if (state & TASK_UNINTERRUPTIBLE)
+ if (is_blocked_state(state))
__schedstat_set(tsk->stats.block_start,
rq_clock(rq_of(cfs_rq)));
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 6aaf0a3d6081..dd0e381689f8 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1371,11 +1371,12 @@ update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
unsigned int state;

state = READ_ONCE(p->__state);
- if (state & TASK_INTERRUPTIBLE)
+ /* idle state still accounts into sleep */
+ if (state & TASK_INTERRUPTIBLE || is_idle_state(state))
__schedstat_set(p->stats.sleep_start,
rq_clock(rq_of_rt_rq(rt_rq)));

- if (state & TASK_UNINTERRUPTIBLE)
+ if (is_blocked_state(state))
__schedstat_set(p->stats.block_start,
rq_clock(rq_of_rt_rq(rt_rq)));
}
--
2.43.0