[PATCH v2 2/3] sched/fair: Calculate the cache-hot time of the idle CPU

From: Chen Yu
Date: Tue Nov 21 2023 - 02:41:09 EST


When a CPU is about to become idle due to task dequeue, uses
the dequeued task's average sleep time to set the cache
hot timeout of this idle CPU. This information can facilitate
SIS to skip the cache-hot idle CPU and scan for the next
cache-cold one. When that task is woken up again, it can choose
its previous CPU and reuses its hot-cache.

This is a preparation for the next patch to introduce SIS_CACHE
based task wakeup.

Signed-off-by: Chen Yu <yu.c.chen@xxxxxxxxx>
---
kernel/sched/fair.c | 30 +++++++++++++++++++++++++++++-
kernel/sched/features.h | 1 +
kernel/sched/sched.h | 1 +
3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 672616503e35..c309b3d203c0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6853,8 +6853,17 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
util_est_update(&rq->cfs, p, task_sleep);

if (task_sleep) {
- p->last_dequeue_time = sched_clock_cpu(cpu_of(rq));
+ u64 now = sched_clock_cpu(cpu_of(rq));
+
+ p->last_dequeue_time = now;
p->last_dequeue_cpu = cpu_of(rq);
+
+#ifdef CONFIG_SMP
+ /* this rq becomes idle, update its cache hot timeout */
+ if (sched_feat(SIS_CACHE) && !rq->nr_running &&
+ p->avg_hot_dur)
+ rq->cache_hot_timeout = max(rq->cache_hot_timeout, now + p->avg_hot_dur);
+#endif
} else {
/* 0 indicates the dequeue is not caused by sleep */
p->last_dequeue_time = 0;
@@ -7347,6 +7356,25 @@ static inline int select_idle_smt(struct task_struct *p, int target)

#endif /* CONFIG_SCHED_SMT */

+/*
+ * Return true if the idle CPU is cache-hot for someone,
+ * return false otherwise.
+ */
+static __maybe_unused bool cache_hot_cpu(int cpu, int *hot_cpu)
+{
+ if (!sched_feat(SIS_CACHE))
+ return false;
+
+ if (sched_clock_cpu(cpu) >= cpu_rq(cpu)->cache_hot_timeout)
+ return false;
+
+ /* record the first cache hot idle cpu as the backup */
+ if (*hot_cpu == -1)
+ *hot_cpu = cpu;
+
+ return true;
+}
+
/*
* Scan the LLC domain for idle CPUs; this is dynamically regulated by
* comparing the average scan cost (tracked in sd->avg_scan_cost) against the
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index a3ddf84de430..0af282712cd1 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -50,6 +50,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
* When doing wakeups, attempt to limit superfluous scans of the LLC domain.
*/
SCHED_FEAT(SIS_UTIL, true)
+SCHED_FEAT(SIS_CACHE, true)

/*
* Issue a WARN when we do multiple update_rq_clock() calls
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e58a54bda77d..191ed62ef06d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1083,6 +1083,7 @@ struct rq {
#endif
u64 idle_stamp;
u64 avg_idle;
+ u64 cache_hot_timeout;

/* This is used to determine avg_idle's max value */
u64 max_idle_balance_cost;
--
2.25.1