Re: [PATCHv4 2/2] powerpc: implement arch_scale_smt_power for Power7

From: Michael Neuling
Date: Fri Feb 19 2010 - 06:01:25 EST


In message <1266573672.1806.70.camel@laptop> you wrote:
> On Fri, 2010-02-19 at 17:05 +1100, Michael Neuling wrote:
> > > include/linux/sched.h | 2 +-
> > > kernel/sched_fair.c | 61 +++++++++++++++++++++++++++++++++++++++++++
++--
> > -
> > > 2 files changed, 58 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > > index 0eef87b..42fa5c6 100644
> > > --- a/include/linux/sched.h
> > > +++ b/include/linux/sched.h
> > > @@ -849,7 +849,7 @@ enum cpu_idle_type {
> > > #define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
> > > #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg
> > resources */
> > > #define SD_SERIALIZE 0x0400 /* Only a single load balancing
instanc
> > e */
> > > -
> > > +#define SD_ASYM_PACKING 0x0800
> >
> > Would we eventually add this to SD_SIBLING_INIT in a arch specific hook,
> > or is this ok to add it generically?
>
> I'd think we'd want to keep that limited to architectures that actually
> need it.

OK

> >
> > > +static int update_sd_pick_busiest(struct sched_domain *sd,
> > > + struct sd_lb_stats *sds,
> > > + struct sched_group *sg,
> > > + struct sg_lb_stats *sgs)
> > > +{
> > > + if (sgs->sum_nr_running > sgs->group_capacity)
> > > + return 1;
> > > +
> > > + if (sgs->group_imb)
> > > + return 1;
> > > +
> > > + if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running) {
> > > + if (!sds->busiest)
> > > + return 1;
> > > +
> > > + if (group_first_cpu(sds->busiest) < group_first_cpu(group))
> >
> > "group" => "sg" here? (I get a compile error otherwise)
>
> Oh, quite ;-)
>
> > > +static int check_asym_packing(struct sched_domain *sd,
> > > + struct sd_lb_stats *sds,
> > > + int cpu, unsigned long *imbalance)
> > > +{
> > > + int i, cpu, busiest_cpu;
> >
> > Redefining cpu here. Looks like the cpu parameter is not really needed?
>
> Seems that way indeed, I went back and forth a few times on the actual
> implementation of this function (which started out live as a copy of
> check_power_save_busiest_group), its amazing there were only these two
> compile glitches ;-)

:-)

Below are the cleanups + the arch specific bits. It doesn't change your
logic at all. Obviously the PPC arch bits would need to be split into a
separate patch.

Compiles and boots against linux-next.

Mikey
---
arch/powerpc/include/asm/cputable.h | 3 +
arch/powerpc/kernel/process.c | 7 +++
include/linux/sched.h | 4 +-
include/linux/topology.h | 1
kernel/sched_fair.c | 64 ++++++++++++++++++++++++++++++++++--
5 files changed, 74 insertions(+), 5 deletions(-)

Index: linux-next/arch/powerpc/include/asm/cputable.h
===================================================================
--- linux-next.orig/arch/powerpc/include/asm/cputable.h
+++ linux-next/arch/powerpc/include/asm/cputable.h
@@ -195,6 +195,7 @@ extern const char *powerpc_base_platform
#define CPU_FTR_SAO LONG_ASM_CONST(0x0020000000000000)
#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000)
#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000)
+#define CPU_FTR_ASYM_SMT4 LONG_ASM_CONST(0x0100000000000000)

#ifndef __ASSEMBLY__

@@ -409,7 +410,7 @@ extern const char *powerpc_base_platform
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
- CPU_FTR_DSCR | CPU_FTR_SAO)
+ CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT4)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
Index: linux-next/arch/powerpc/kernel/process.c
===================================================================
--- linux-next.orig/arch/powerpc/kernel/process.c
+++ linux-next/arch/powerpc/kernel/process.c
@@ -1265,3 +1265,10 @@ unsigned long randomize_et_dyn(unsigned

return ret;
}
+
+int arch_sd_asym_packing(void)
+{
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT4))
+ return SD_ASYM_PACKING;
+ return 0;
+}
Index: linux-next/include/linux/sched.h
===================================================================
--- linux-next.orig/include/linux/sched.h
+++ linux-next/include/linux/sched.h
@@ -849,7 +849,7 @@ enum cpu_idle_type {
#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
-
+#define SD_ASYM_PACKING 0x0800 /* Asymetric SMT packing */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */

enum powersavings_balance_level {
@@ -881,6 +881,8 @@ static inline int sd_balance_for_package
return SD_PREFER_SIBLING;
}

+extern int arch_sd_asym_packing(void);
+
/*
* Optimise SD flags for power savings:
* SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
Index: linux-next/include/linux/topology.h
===================================================================
--- linux-next.orig/include/linux/topology.h
+++ linux-next/include/linux/topology.h
@@ -102,6 +102,7 @@ int arch_update_cpu_topology(void);
| 1*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \
+ | arch_sd_asym_packing() \
, \
.last_balance = jiffies, \
.balance_interval = 1, \
Index: linux-next/kernel/sched_fair.c
===================================================================
--- linux-next.orig/kernel/sched_fair.c
+++ linux-next/kernel/sched_fair.c
@@ -2086,6 +2086,7 @@ struct sd_lb_stats {
struct sched_group *this; /* Local group in this sd */
unsigned long total_load; /* Total load of all groups in sd */
unsigned long total_pwr; /* Total power of all groups in sd */
+ unsigned long total_nr_running;
unsigned long avg_load; /* Average load across all groups in sd */

/** Statistics of this group */
@@ -2493,6 +2494,28 @@ static inline void update_sg_lb_stats(st
DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
}

+static int update_sd_pick_busiest(struct sched_domain *sd,
+ struct sd_lb_stats *sds,
+ struct sched_group *sg,
+ struct sg_lb_stats *sgs)
+{
+ if (sgs->sum_nr_running > sgs->group_capacity)
+ return 1;
+
+ if (sgs->group_imb)
+ return 1;
+
+ if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running) {
+ if (!sds->busiest)
+ return 1;
+
+ if (group_first_cpu(sds->busiest) < group_first_cpu(sg))
+ return 1;
+ }
+
+ return 0;
+}
+
/**
* update_sd_lb_stats - Update sched_group's statistics for load balancing.
* @sd: sched_domain whose statistics are to be updated.
@@ -2533,6 +2556,7 @@ static inline void update_sd_lb_stats(st

sds->total_load += sgs.group_load;
sds->total_pwr += group->cpu_power;
+ sds->total_nr_running += sgs.sum_nr_running;

/*
* In case the child domain prefers tasks go to siblings
@@ -2547,9 +2571,8 @@ static inline void update_sd_lb_stats(st
sds->this = group;
sds->this_nr_running = sgs.sum_nr_running;
sds->this_load_per_task = sgs.sum_weighted_load;
- } else if (sgs.avg_load > sds->max_load &&
- (sgs.sum_nr_running > sgs.group_capacity ||
- sgs.group_imb)) {
+ } else if (sgs.avg_load >= sds->max_load &&
+ update_sd_pick_busiest(sd, sds, group, &sgs)) {
sds->max_load = sgs.avg_load;
sds->busiest = group;
sds->busiest_nr_running = sgs.sum_nr_running;
@@ -2562,6 +2585,38 @@ static inline void update_sd_lb_stats(st
} while (group != sd->groups);
}

+int __weak sd_asym_packing_arch(void)
+{
+ return 0;
+}
+
+static int check_asym_packing(struct sched_domain *sd,
+ struct sd_lb_stats *sds,
+ unsigned long *imbalance)
+{
+ int i, cpu, busiest_cpu;
+
+ if (!(sd->flags & SD_ASYM_PACKING))
+ return 0;
+
+ if (!sds->busiest)
+ return 0;
+
+ i = 0;
+ busiest_cpu = group_first_cpu(sds->busiest);
+ for_each_cpu(cpu, sched_domain_span(sd)) {
+ i++;
+ if (cpu == busiest_cpu)
+ break;
+ }
+
+ if (sds->total_nr_running > i)
+ return 0;
+
+ *imbalance = sds->max_load;
+ return 1;
+}
+
/**
* fix_small_imbalance - Calculate the minor imbalance that exists
* amongst the groups of a sched_domain, during
@@ -2761,6 +2816,9 @@ find_busiest_group(struct sched_domain *
return sds.busiest;

out_balanced:
+ if (check_asym_packing(sd, &sds, imbalance))
+ return sds.busiest;
+
/*
* There is no obvious imbalance. But check if we can do some balancing
* to save power.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/