[PATCH] sched/core: Avoid WARN_DOUBLE_CLOCK warning when CONFIG_SCHED_CORE

From: Hao Jia
Date: Tue Dec 06 2022 - 02:06:09 EST


When we need to call update_rq_clock() to update the rq clock of
other CPUs on the same core, before that we need to clear RQCF_UPDATED
of rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
Because at this time the rq->clock_update_flags of other CPUs
may be RQCF_UPDATED.

Some call trace reports:
Call Trace 1:
<TASK>
__schedule+0x61c/0x11d0
schedule+0x5d/0xd0
worker_thread+0xb5/0x380
? preempt_count_add+0x56/0xa0
? rescuer_thread+0x310/0x310
kthread+0xe6/0x110
? kthread_complete_and_exit+0x20/0x20
ret_from_fork+0x1f/0x30
</TASK>

Call Trace 2:
<TASK>
__schedule+0x91d/0x11d0
schedule+0x5d/0xd0
exit_to_user_mode_prepare+0xe5/0x1e0
syscall_exit_to_user_mode+0x17/0x30
do_syscall_64+0x40/0x90
entry_SYSCALL_64_after_hwframe+0x63/0xcd

Call Trace 3:
<IRQ>
__sched_core_tick+0x27/0x40
scheduler_tick+0x1be/0x270
? tick_sched_handle.isra.18+0x60/0x60
update_process_times+0x6a/0x90
tick_sched_handle.isra.18+0x1f/0x60
tick_sched_timer+0x47/0x80
__hrtimer_run_queues+0x10a/0x280
hrtimer_interrupt+0x10b/0x240
__sysvec_apic_timer_interrupt+0x70/0x160
sysvec_apic_timer_interrupt+0x9a/0xd0
</IRQ>
<TASK>
asm_sysvec_apic_timer_interrupt+0x16/0x20

Steps to reproduce:
1. Enable CONFIG_SCHED_DEBUG and CONFIG_SCHED_CORE when compiling
the kernel
2. echo 1 > /sys/kernel/debug/clear_warn_once
echo "WARN_DOUBLE_CLOCK" > /sys/kernel/debug/sched/features
3. Run the linux/tools/testing/selftests/sched/cs_prctl_test test

Signed-off-by: Hao Jia <jiahao.os@xxxxxxxxxxxxx>
---
kernel/sched/core.c | 5 ++++-
kernel/sched/core_sched.c | 4 +++-
kernel/sched/sched.h | 10 +++++++++-
3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index daff72f00385..fcf5e4faec34 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5951,6 +5951,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
rq->core->core_cookie = 0UL;
if (rq->core->core_forceidle_count) {
if (!core_clock_updated) {
+ rq_clock_clear_update(rq->core);
update_rq_clock(rq->core);
core_clock_updated = true;
}
@@ -6007,8 +6008,10 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* pick_next_task(). If the current cpu is not the core,
* the core may also have been updated above.
*/
- if (i != cpu && (rq_i != rq->core || !core_clock_updated))
+ if (i != cpu && (rq_i != rq->core || !core_clock_updated)) {
+ rq_clock_clear_update(rq_i);
update_rq_clock(rq_i);
+ }

p = rq_i->core_pick = pick_task(rq_i);
if (!max || prio_less(max, p, fi_before))
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
index a57fd8f27498..70a6f36fd830 100644
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -291,8 +291,10 @@ void __sched_core_tick(struct rq *rq)
if (!rq->core->core_forceidle_count)
return;

- if (rq != rq->core)
+ if (rq != rq->core) {
+ rq_clock_clear_update(rq->core);
update_rq_clock(rq->core);
+ }

__sched_core_account_forceidle(rq);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a4a20046e586..1a2c40c413c2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2544,8 +2544,16 @@ static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
#endif
}
-#else
+
+#ifdef CONFIG_SCHED_CORE
+static inline void rq_clock_clear_update(struct rq *rq)
+{
+ rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+}
+#endif
+#else /* CONFIG_SCHED_DEBUG */
static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
+static inline void rq_clock_clear_update(struct rq *rq) {}
#endif

#ifdef CONFIG_SMP
--
2.37.0