Re: [RFC V2 1/2] topology: Allow multiple entities to provide sched_freq_tick() callback

From: Viresh Kumar
Date: Fri Jan 15 2021 - 02:49:33 EST


On 13-01-21, 16:18, Ionela Voinescu wrote:
> On Tuesday 15 Dec 2020 at 16:46:35 (+0530), Viresh Kumar wrote:
> > +void topology_scale_freq_tick(void)
> > +{
> > + struct scale_freq_tick_data *sftd = *this_cpu_ptr(&sft_data);
> > +
> > + if (sftd)
> > + sftd->scale_freq();
> > +}
>
> What do you think about having a single topology function that handles
> all sources of invariance (cpufreq, arch counters, platform counters)?

I think keeping them separate is better, both of these are called from
scheduler's context (hotpath) and adding any more unnecessary
conditionals there should be rather avoided. We could have given a
though to merging them if they were going to share some code, but that
is not the case here clearly. They are quite different.

This is how it looks now:

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 3b8dca4eb08d..be6a53ba3e2d 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -17,15 +17,9 @@ int pcibus_to_node(struct pci_bus *bus);
#include <linux/arch_topology.h>

void update_freq_counters_refs(void);
-void topology_scale_freq_tick(void);

-#ifdef CONFIG_ARM64_AMU_EXTN
-/*
- * Replace task scheduler's default counter-based
- * frequency-invariance scale factor setting.
- */
+/* Replace task scheduler's default frequency-invariance scale factor setting */
#define arch_scale_freq_tick topology_scale_freq_tick
-#endif /* CONFIG_ARM64_AMU_EXTN */

/* Replace task scheduler's default frequency-invariant accounting */
#define arch_set_freq_scale topology_set_freq_scale
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index e08a4126453a..1e47dfd465f8 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -199,8 +199,44 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
return 0;
}

-static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
-#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
+static void amu_scale_freq_tick(void)
+{
+ u64 prev_core_cnt, prev_const_cnt;
+ u64 core_cnt, const_cnt, scale;
+
+ prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
+ prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
+
+ update_freq_counters_refs();
+
+ const_cnt = this_cpu_read(arch_const_cycles_prev);
+ core_cnt = this_cpu_read(arch_core_cycles_prev);
+
+ if (unlikely(core_cnt <= prev_core_cnt ||
+ const_cnt <= prev_const_cnt))
+ return;
+
+ /*
+ * /\core arch_max_freq_scale
+ * scale = ------- * --------------------
+ * /\const SCHED_CAPACITY_SCALE
+ *
+ * See validate_cpu_freq_invariance_counters() for details on
+ * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
+ */
+ scale = core_cnt - prev_core_cnt;
+ scale *= this_cpu_read(arch_max_freq_scale);
+ scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
+ const_cnt - prev_const_cnt);
+
+ scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
+ this_cpu_write(freq_scale, (unsigned long)scale);
+}
+
+static struct scale_freq_data amu_sfd = {
+ .source = SCALE_FREQ_SOURCE_ARCH,
+ .set_freq_scale = amu_scale_freq_tick,
+};

static void amu_fie_setup(const struct cpumask *cpus)
{
@@ -227,7 +263,7 @@ static void amu_fie_setup(const struct cpumask *cpus)
if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask))
return;

- static_branch_enable(&amu_fie_key);
+ topology_set_scale_freq_source(&amu_sfd, amu_fie_cpus);

pr_debug("CPUs[%*pbl]: counters will be used for FIE.",
cpumask_pr_args(cpus));
@@ -283,53 +319,6 @@ static int __init init_amu_fie(void)
}
core_initcall(init_amu_fie);

-bool arch_freq_counters_available(const struct cpumask *cpus)
-{
- return amu_freq_invariant() &&
- cpumask_subset(cpus, amu_fie_cpus);
-}
-
-void topology_scale_freq_tick(void)
-{
- u64 prev_core_cnt, prev_const_cnt;
- u64 core_cnt, const_cnt, scale;
- int cpu = smp_processor_id();
-
- if (!amu_freq_invariant())
- return;
-
- if (!cpumask_test_cpu(cpu, amu_fie_cpus))
- return;
-
- prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
- prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
-
- update_freq_counters_refs();
-
- const_cnt = this_cpu_read(arch_const_cycles_prev);
- core_cnt = this_cpu_read(arch_core_cycles_prev);
-
- if (unlikely(core_cnt <= prev_core_cnt ||
- const_cnt <= prev_const_cnt))
- return;
-
- /*
- * /\core arch_max_freq_scale
- * scale = ------- * --------------------
- * /\const SCHED_CAPACITY_SCALE
- *
- * See validate_cpu_freq_invariance_counters() for details on
- * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
- */
- scale = core_cnt - prev_core_cnt;
- scale *= this_cpu_read(arch_max_freq_scale);
- scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
- const_cnt - prev_const_cnt);
-
- scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
- this_cpu_write(freq_scale, (unsigned long)scale);
-}
-
#ifdef CONFIG_ACPI_CPPC_LIB
#include <acpi/cppc_acpi.h>

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index de8587cc119e..e2115ea348dc 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -21,17 +21,65 @@
#include <linux/sched.h>
#include <linux/smp.h>

+static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
+static struct cpumask scale_freq_counters_mask;
+
+static bool supports_scale_freq_counters(const struct cpumask *cpus)
+{
+ return cpumask_subset(cpus, &scale_freq_counters_mask);
+}
+
bool topology_scale_freq_invariant(void)
{
return cpufreq_supports_freq_invariance() ||
- arch_freq_counters_available(cpu_online_mask);
+ supports_scale_freq_counters(cpu_online_mask);
+}
+
+void topology_set_scale_freq_source(struct scale_freq_data *data,
+ const struct cpumask *cpus)
+{
+ struct scale_freq_data *sfd;
+ int cpu;
+
+ for_each_cpu(cpu, cpus) {
+ sfd = per_cpu(sft_data, cpu);
+
+ /* Use ARCH provided counters whenever possible */
+ if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
+ per_cpu(sft_data, cpu) = data;
+ cpumask_set_cpu(cpu, &scale_freq_counters_mask);
+ }
+ }
}
+EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);

-__weak bool arch_freq_counters_available(const struct cpumask *cpus)
+void topology_clear_scale_freq_source(enum scale_freq_source source,
+ const struct cpumask *cpus)
{
- return false;
+ struct scale_freq_data *sfd;
+ int cpu;
+
+ for_each_cpu(cpu, cpus) {
+ sfd = per_cpu(sft_data, cpu);
+
+ if (sfd && sfd->source == source) {
+ per_cpu(sft_data, cpu) = NULL;
+ cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
+ }
+ }
}
+EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source);
+
+void topology_scale_freq_tick(void)
+{
+ struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data);
+
+ if (sfd)
+ sfd->set_freq_scale();
+}
+
DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+EXPORT_SYMBOL_GPL(freq_scale);

void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
unsigned long max_freq)
@@ -47,7 +95,7 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
* want to update the scale factor with information from CPUFREQ.
* Instead the scale factor will be updated from arch_scale_freq_tick.
*/
- if (arch_freq_counters_available(cpus))
+ if (supports_scale_freq_counters(cpus))
return;

scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 0f6cd6b73a61..1bcbf8eff991 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -34,7 +34,18 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
unsigned long max_freq);
bool topology_scale_freq_invariant(void);

-bool arch_freq_counters_available(const struct cpumask *cpus);
+enum scale_freq_source {
+ SCALE_FREQ_SOURCE_ARCH,
+};
+
+struct scale_freq_data {
+ enum scale_freq_source source;
+ void (*set_freq_scale)(void);
+};
+
+void topology_scale_freq_tick(void);
+void topology_set_scale_freq_source(struct scale_freq_data *data, const struct cpumask *cpus);
+void topology_clear_scale_freq_source(enum scale_freq_source source, const struct cpumask *cpus);

DECLARE_PER_CPU(unsigned long, thermal_pressure);


--
viresh