[PATCH 1/3] SCHEDULER: Add an interface for counting real utilization.

From: Xiaobing Li
Date: Wed Sep 27 2023 - 22:31:02 EST


Since pelt takes the running time of the thread as utilization, and for
some threads, although they are running, they are not actually
processing any transactions and are in an idling state.
our goal is to count the effective working time of the thread, so as to
Calculate the true utilization of threads.

Signed-off-by: Xiaobing Li <xiaobing.li@xxxxxxxxxxx>
---
include/linux/kernel.h | 7 ++++++-
include/linux/sched.h | 1 +
kernel/sched/cputime.c | 36 +++++++++++++++++++++++++++++++++++-
kernel/sched/pelt.c | 14 ++++++++++++++
4 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index cee8fe87e9f4..c1557fa9cbbe 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -37,7 +37,8 @@
#include <uapi/linux/kernel.h>

#define STACK_MAGIC 0xdeadbeef
-
+struct cfs_rq;
+struct sched_entity;
/**
* REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value
* @x: value to repeat
@@ -103,6 +104,10 @@ extern int __cond_resched(void);

#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)

+extern void __update_sq_avg_block(u64 now, struct sched_entity *se);
+
+extern void __update_sq_avg(u64 now, struct sched_entity *se);
+
extern int __cond_resched(void);

DECLARE_STATIC_CALL(might_resched, __cond_resched);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 77f01ac385f7..403ccb456c9a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -583,6 +583,7 @@ struct sched_entity {
* collide with read-mostly values above.
*/
struct sched_avg avg;
+ struct sched_avg sq_avg;
#endif
};

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index af7952f12e6c..824203293fd9 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -479,6 +479,40 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)

#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */

+void get_sqthread_util(struct task_struct *p)
+{
+ struct task_struct **sqstat = kcpustat_this_cpu->sq_util;
+
+ for (int i = 0; i < MAX_SQ_NUM; i++) {
+ if (sqstat[i] && (task_cpu(sqstat[i]) != task_cpu(p)
+ || sqstat[i]->__state == TASK_DEAD))
+ sqstat[i] = NULL;
+ }
+
+ if (strncmp(p->comm, "iou-sqp", 7))
+ return;
+
+ if (!kcpustat_this_cpu->flag) {
+ for (int j = 0; j < MAX_SQ_NUM; j++)
+ kcpustat_this_cpu->sq_util[j] = NULL;
+ kcpustat_this_cpu->flag = true;
+ }
+ int index = MAX_SQ_NUM;
+ bool flag = true;
+
+ for (int i = 0; i < MAX_SQ_NUM; i++) {
+ if (sqstat[i] == p)
+ flag = false;
+ if (!sqstat[i] || task_cpu(sqstat[i]) != task_cpu(p)) {
+ sqstat[i] = NULL;
+ if (i < index)
+ index = i;
+ }
+ }
+ if (flag && index < MAX_SQ_NUM)
+ sqstat[index] = p;
+}
+
/*
* Account a single tick of CPU time.
* @p: the process that the CPU time gets accounted to
@@ -487,7 +521,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
void account_process_tick(struct task_struct *p, int user_tick)
{
u64 cputime, steal;
-
+ get_sqthread_util(p);
if (vtime_accounting_enabled_this_cpu())
return;

diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index 0f310768260c..945efe80e08c 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -266,6 +266,20 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
}

+void __update_sq_avg_block(u64 now, struct sched_entity *se)
+{
+ if (___update_load_sum(now, &se->sq_avg, 0, 0, 0))
+ ___update_load_avg(&se->sq_avg, se_weight(se));
+}
+
+void __update_sq_avg(u64 now, struct sched_entity *se)
+{
+ struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+
+ if (___update_load_sum(now, &se->sq_avg, !!se->on_rq, se_runnable(se), qcfs_rq->curr == se))
+ ___update_load_avg(&se->sq_avg, se_weight(se));
+}
+
/*
* sched_entity:
*
--
2.34.1