[tip:sched/core] x86: sched: Provide arch implementations using aperf/mperf

From: tip-bot for Peter Zijlstra
Date: Wed Sep 16 2009 - 06:24:08 EST


Commit-ID: 47fe38fcff0517e67d395c039d2e26d2de688a60
Gitweb: http://git.kernel.org/tip/47fe38fcff0517e67d395c039d2e26d2de688a60
Author: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
AuthorDate: Wed, 2 Sep 2009 13:49:18 +0200
Committer: Ingo Molnar <mingo@xxxxxxx>
CommitDate: Tue, 15 Sep 2009 16:51:27 +0200

x86: sched: Provide arch implementations using aperf/mperf

APERF/MPERF support for cpu_power.

APERF/MPERF is arch defined to be a relative scale of work capacity
per logical cpu, this is assumed to include SMT and Turbo mode.

APERF/MPERF are specified to both reset to 0 when either counter
wraps, which is highly inconvenient, since that'll give a blimp
when that happens. The manual specifies writing 0 to the counters
after each read, but that's 1) too expensive, and 2) destroys the
possibility of sharing these counters with other users, so we live
with the blimp - the other existing user does too.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@xxxxxxx>


---
arch/x86/kernel/cpu/Makefile | 2 +-
arch/x86/kernel/cpu/sched.c | 55 ++++++++++++++++++++++++++++++++++++++++++
include/linux/sched.h | 4 +++
3 files changed, 60 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index c1f253d..8dd3063 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp)

obj-y := intel_cacheinfo.o addon_cpuid_features.o
obj-y += proc.o capflags.o powerflags.o common.o
-obj-y += vmware.o hypervisor.o
+obj-y += vmware.o hypervisor.o sched.o

obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c
new file mode 100644
index 0000000..6c00a8f
--- /dev/null
+++ b/arch/x86/kernel/cpu/sched.c
@@ -0,0 +1,55 @@
+#include <linux/sched.h>
+#include <linux/math64.h>
+#include <linux/percpu.h>
+#include <linux/irqflags.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+#ifdef CONFIG_SMP
+
+static DEFINE_PER_CPU(struct aperfmperf, old_perf);
+
+static unsigned long scale_aperfmperf(void)
+{
+ struct aperfmperf val, *old = &__get_cpu_var(old_perf);
+ unsigned long ratio, flags;
+
+ local_irq_save(flags);
+ get_aperfmperf(&val);
+ local_irq_restore(flags);
+
+ ratio = calc_aperfmperf_ratio(old, &val);
+ *old = val;
+
+ return ratio;
+}
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+ /*
+ * do aperf/mperf on the cpu level because it includes things
+ * like turbo mode, which are relevant to full cores.
+ */
+ if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+ return scale_aperfmperf();
+
+ /*
+ * maybe have something cpufreq here
+ */
+
+ return default_scale_freq_power(sd, cpu);
+}
+
+unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
+{
+ /*
+ * aperf/mperf already includes the smt gain
+ */
+ if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+ return SCHED_LOAD_SCALE;
+
+ return default_scale_smt_power(sd, cpu);
+}
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c30bf3d..fc4c0f9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -992,6 +992,9 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag)
return 0;
}

+unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu);
+unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
+
#else /* CONFIG_SMP */

struct sched_domain_attr;
@@ -1003,6 +1006,7 @@ partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
}
#endif /* !CONFIG_SMP */

+
struct io_context; /* See blkdev.h */


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/