[PATCH 2/3] sched/fair: Generalize misfit lb by adding a misfit reason

From: Qais Yousef
Date: Fri Dec 08 2023 - 20:18:40 EST


MISFIT_PERF is what is currently implemented. It indicates that the task
requires moving to a more performant CPU.

Guard misfit handling in find_busiest_queue and update_sd_pick_busiest
with MISFIT_PERF. They explicitly assume this type of misfit

This generalizes misfit lb to allow for more types of misfits to be
added. Like MISFIT_POWER to help uclamp_max being more effective, and
MISFIT_LATENCY to help latency sensitive tasks to be spread in
oversubscribe conditions.

Signed-off-by: Qais Yousef (Google) <qyousef@xxxxxxxxxxx>
---
kernel/sched/fair.c | 28 +++++++++++++++++++++++-----
kernel/sched/sched.h | 8 ++++++++
2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index eb9e891182cc..dd49b89a6e3e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5063,7 +5063,8 @@ static inline int task_fits_cpu(struct task_struct *p, int cpu)
return (util_fits_cpu(util, uclamp_min, uclamp_max, cpu) > 0);
}

-static inline int is_misfit_task(struct task_struct *p, struct rq *rq)
+static inline int is_misfit_task(struct task_struct *p, struct rq *rq,
+ misfit_reason_t *reason)
{
if (!p || p->nr_cpus_allowed == 1)
return 0;
@@ -5071,16 +5072,21 @@ static inline int is_misfit_task(struct task_struct *p, struct rq *rq)
if (task_fits_cpu(p, cpu_of(rq)))
return 0;

+ if (reason)
+ *reason = MISFIT_PERF;
return 1;
}

static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
{
+ misfit_reason_t reason;
+
if (!sched_asym_cpucap_active())
return;

- if (!is_misfit_task(p, rq)) {
+ if (!is_misfit_task(p, rq, &reason)) {
rq->misfit_task_load = 0;
+ rq->misfit_reason = -1;
return;
}

@@ -5089,6 +5095,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
* task_h_load() returns 0.
*/
rq->misfit_task_load = max_t(unsigned long, task_h_load(p), 1);
+ rq->misfit_reason = reason;
}

#else /* CONFIG_SMP */
@@ -9111,7 +9118,7 @@ static int detach_tasks(struct lb_env *env)

case migrate_misfit:
/* This is not a misfit task */
- if (!is_misfit_task(p, cpu_rq(env->src_cpu)))
+ if (!is_misfit_task(p, cpu_rq(env->src_cpu), NULL))
goto next;

env->imbalance = 0;
@@ -9426,6 +9433,7 @@ struct sg_lb_stats {
unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */
unsigned int group_smt_balance; /* Task on busy SMT be moved */
unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
+ misfit_reason_t group_misfit_reason;
#ifdef CONFIG_NUMA_BALANCING
unsigned int nr_numa_running;
unsigned int nr_preferred_running;
@@ -9904,6 +9912,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
/* Check for a misfit task on the cpu */
if (sgs->group_misfit_task_load < rq->misfit_task_load) {
sgs->group_misfit_task_load = rq->misfit_task_load;
+ sgs->group_misfit_reason = rq->misfit_reason;
*sg_status |= SG_OVERLOAD;
}
} else if ((env->idle != CPU_NOT_IDLE) &&
@@ -9969,6 +9978,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
*/
if ((env->sd->flags & SD_ASYM_CPUCAPACITY) &&
(sgs->group_type == group_misfit_task) &&
+ (sgs->group_misfit_reason == MISFIT_PERF) &&
(!capacity_greater(capacity_of(env->dst_cpu), sg->sgc->max_capacity) ||
sds->local_stat.group_type != group_has_spare))
return false;
@@ -10193,6 +10203,7 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,

for_each_cpu(i, sched_group_span(group)) {
struct rq *rq = cpu_rq(i);
+ misfit_reason_t reason;
unsigned int local;

sgs->group_load += cpu_load_without(rq, p);
@@ -10212,9 +10223,15 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,

/* Check if task fits in the CPU */
if (sd->flags & SD_ASYM_CPUCAPACITY &&
- sgs->group_misfit_task_load &&
- !is_misfit_task(p, rq))
+ sgs->group_misfit_task_load) {
+ if (!is_misfit_task(p, rq, &reason)) {
sgs->group_misfit_task_load = 0;
+ sgs->group_misfit_reason = -1;
+ } else {
+ sgs->group_misfit_task_load = max_t(unsigned long, task_h_load(p), 1);
+ sgs->group_misfit_reason = reason;
+ }
+ }

}

@@ -11008,6 +11025,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
* average load.
*/
if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
+ rq->misfit_reason == MISFIT_PERF &&
!capacity_greater(capacity_of(env->dst_cpu), capacity) &&
nr_running == 1)
continue;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e58a54bda77d..399b6526afab 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -962,6 +962,10 @@ struct balance_callback {
void (*func)(struct rq *rq);
};

+typedef enum misfit_reason {
+ MISFIT_PERF, /* Requires moving to a more performant CPU */
+} misfit_reason_t;
+
/*
* This is the main, per-CPU runqueue data structure.
*
@@ -1168,6 +1172,10 @@ struct rq {
call_single_data_t cfsb_csd;
struct list_head cfsb_csd_list;
#endif
+
+#ifdef CONFIG_SMP
+ misfit_reason_t misfit_reason;
+#endif
};

#ifdef CONFIG_FAIR_GROUP_SCHED
--
2.34.1