[PATCH 11/15] sched: Pass unlimited __cpu_power information toupper domain level groups

From: Andreas Herrmann
Date: Thu Aug 20 2009 - 09:42:17 EST



For performance reasons __cpu_power in a sched_group might be limited
such that the group can handle only one task. To correctly calculate
the capacity in upper domain level groups the unlimited power
information is required. This patch stores unlimited __cpu_power
information in sched_groups.orig_power and uses this when calculating
__cpu_power in upper domain level groups.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@xxxxxxx>
---
include/linux/sched.h | 8 +++++++-
kernel/sched.c | 36 ++++++++++++++++++++++++------------
2 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c53bdd8..d230717 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -890,7 +890,13 @@ struct sched_group {
* (see include/linux/reciprocal_div.h)
*/
u32 reciprocal_cpu_power;
-
+ /*
+ * Backup of original power for this group.
+ * It is used to pass correct power information to upper
+ * domain level groups in case __cpu_power is limited for
+ * performance reasons.
+ */
+ unsigned int orig_power;
/*
* The CPUs this group covers.
*
diff --git a/kernel/sched.c b/kernel/sched.c
index 7a0d710..464b6ba 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8376,6 +8376,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)

sg_inc_cpu_power(sg, sd->groups->__cpu_power);
}
+ sg->orig_power = sg->__cpu_power;
sg = sg->next;
} while (sg != group_head);
}
@@ -8514,18 +8515,9 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
child = sd->child;

sd->groups->__cpu_power = 0;
-
- /*
- * For perf policy, if the groups in child domain share resources
- * (for example cores sharing some portions of the cache hierarchy
- * or SMT), then set this domain groups cpu_power such that each group
- * can handle only one task, when there are other idle groups in the
- * same sched domain.
- */
- if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) &&
- (child->flags &
- (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) {
+ if (!child) {
sg_inc_cpu_power(sd->groups, SCHED_LOAD_SCALE);
+ sd->groups->orig_power = sd->groups->__cpu_power;
return;
}

@@ -8534,9 +8526,29 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
*/
group = child->groups;
do {
- sg_inc_cpu_power(sd->groups, group->__cpu_power);
+ sg_inc_cpu_power(sd->groups, group->orig_power);
group = group->next;
} while (group != child->groups);
+ sd->groups->orig_power = sd->groups->__cpu_power;
+
+ /*
+ * For perf policy, if the groups in child domain share resources
+ * (for example cores sharing some portions of the cache hierarchy
+ * or SMT), then set this domain groups cpu_power such that each group
+ * can handle only one task, when there are other idle groups in the
+ * same sched domain.
+ * Note: Unmodified power information is kept in orig_power and
+ * can be used in higher domain levels to calculate
+ * and reflect the correct capacity of a sched_group.
+ * This is required for power_savings scheduling.
+ */
+ if (!(sd->flags & SD_POWERSAVINGS_BALANCE) &&
+ ((child->flags &
+ (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) {
+ sd->groups->__cpu_power = 0;
+ sg_inc_cpu_power(sd->groups, SCHED_LOAD_SCALE);
+ }
+
}

/*
--
1.6.0.4



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/