[PATCH 2/6] sched/fair: Check whether active load balance is needed in busiest group

From: Tim Chen
Date: Thu May 04 2023 - 12:11:47 EST


From: Tim C Chen <tim.c.chen@xxxxxxxxxxxxxxx>

In the busiest group, we need to consider whether active load balance
to a local group is needed even when it is not overloaded. For example,
when the busiest group is a SMT group that's fully busy and the destination group
is a cluster group with idle CPU. Such condition is considered by
asym_active_balance() in load balancing but not when looking for busiest
group and load imbalance. Add this consideration in find_busiest_group()
and calculate_imbalance().

Reviewed-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---
kernel/sched/fair.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 87317634fab2..bde962aa160a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9433,6 +9433,17 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->group_capacity;
}

+/* One group is SMT while the other group is not */
+static inline bool asymmetric_groups(struct sched_group *sg1,
+ struct sched_group *sg2)
+{
+ if (!sg1 || !sg2)
+ return false;
+
+ return (sg1->flags & SD_SHARE_CPUCAPACITY) !=
+ (sg2->flags & SD_SHARE_CPUCAPACITY);
+}
+
/**
* update_sd_pick_busiest - return 1 on busiest group
* @env: The load balancing environment.
@@ -10079,6 +10090,31 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
update_idle_cpu_scan(env, sum_util);
}

+static inline bool asym_active_balance_busiest(struct lb_env *env, struct sd_lb_stats *sds)
+{
+ /*
+ * Don't balance to a group without spare capacity.
+ *
+ * Skip non asymmetric sched group balancing. That check
+ * is handled by code path handling imbalanced load between
+ * similar groups.
+ */
+ if (env->idle == CPU_NOT_IDLE ||
+ sds->local_stat.group_type != group_has_spare ||
+ !asymmetric_groups(sds->local, sds->busiest))
+ return false;
+
+ /*
+ * For SMT source group, pull when there are two or more
+ * tasks over-utilizing a core.
+ */
+ if (sds->busiest->flags & SD_SHARE_CPUCAPACITY &&
+ sds->busiest_stat.sum_h_nr_running > 1)
+ return true;
+
+ return false;
+}
+
/**
* calculate_imbalance - Calculate the amount of imbalance present within the
* groups of a given sched_domain during load balance.
@@ -10164,6 +10200,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
return;
}

+ if (asym_active_balance_busiest(env, sds)) {
+ env->migration_type = migrate_task;
+ env->imbalance = 1;
+ return;
+ }
+
if (busiest->group_weight == 1 || sds->prefer_sibling) {
unsigned int nr_diff = busiest->sum_nr_running;
/*
@@ -10371,6 +10413,9 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
*/
goto out_balanced;

+ if (asym_active_balance_busiest(env, &sds))
+ goto force_balance;
+
if (busiest->group_weight > 1 &&
local->idle_cpus <= (busiest->idle_cpus + 1))
/*
--
2.32.0