Re: [PATCH v1 1/3] arm64: topology: Add arch_freq_get_on_cpu() support

From: lihuisong (C)
Date: Wed Mar 06 2024 - 22:02:50 EST


Hi Vanshidhar,

在 2024/3/1 0:25, Vanshidhar Konda 写道:
AMU counters are used by the Frequency Invariance Engine (FIE) to
estimate the CPU utilization during each tick. The delta of the AMU
counters between two ticks can also be used to estimate the average CPU
frequency of each core over the tick duration. Measure the AMU counters
during tick, compute the delta and store it. When the frequency of the
core is queried, use the stored delta to determine the frequency.

arch_freq_get_on_cpu() is used on x86 systems to estimate the frequency
of each CPU. It can be wired up on arm64 for the same functionality.

Signed-off-by: Vanshidhar Konda <vanshikonda@xxxxxxxxxxxxxxxxxxxxxx>
---
arch/arm64/kernel/topology.c | 114 +++++++++++++++++++++++++++++------
1 file changed, 96 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 1a2c72f3e7f8..db8d14525cf4 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -17,6 +17,8 @@
#include <linux/cpufreq.h>
#include <linux/init.h>
#include <linux/percpu.h>
+#include <linux/sched/isolation.h>
+#include <linux/seqlock_types.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
@@ -82,20 +84,54 @@ int __init parse_acpi_topology(void)
#undef pr_fmt
#define pr_fmt(fmt) "AMU: " fmt
+struct amu_counters {
+ seqcount_t seq;
+ unsigned long last_update;
+ u64 core_cnt;
+ u64 const_cnt;
+ u64 delta_core_cnt;
+ u64 delta_const_cnt;
+};
+
/*
* Ensure that amu_scale_freq_tick() will return SCHED_CAPACITY_SCALE until
* the CPU capacity and its associated frequency have been correctly
* initialized.
*/
-static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
-static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
-static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) =
+ 1UL << (2 * SCHED_CAPACITY_SHIFT);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_counters, cpu_samples) = {
+ .seq = SEQCNT_ZERO(cpu_samples.seq)
+};
static cpumask_var_t amu_fie_cpus;
void update_freq_counters_refs(void)
{
- this_cpu_write(arch_core_cycles_prev, read_corecnt());
- this_cpu_write(arch_const_cycles_prev, read_constcnt());
+ struct amu_counters *cpu_sample = this_cpu_ptr(&cpu_samples);
+ u64 core_cnt, const_cnt, delta_core_cnt, delta_const_cnt;
+
+ const_cnt = read_constcnt();
+ core_cnt = read_corecnt();
+
+ if (unlikely(core_cnt < cpu_sample->core_cnt) ||
+ unlikely(const_cnt < cpu_sample->const_cnt)) {

The two counter register might be wrap around. So here is not fully true, right?

If we don't consider this case, below warning should be removed.

+ WARN(1, "AMU counter values should be monotonic.\n");
+ cpu_sample->delta_const_cnt = 0;
+ cpu_sample->delta_core_cnt = 0;
+ return;
+ }
+
+ delta_core_cnt = core_cnt - cpu_sample->core_cnt;
+ delta_const_cnt = const_cnt - cpu_sample->const_cnt;
+
+ cpu_sample->core_cnt = core_cnt;
+ cpu_sample->const_cnt = const_cnt;
+
+ raw_write_seqcount_begin(&cpu_sample->seq);
+ cpu_sample->last_update = jiffies;
+ cpu_sample->delta_const_cnt = delta_const_cnt;
+ cpu_sample->delta_core_cnt = delta_core_cnt;
+ raw_write_seqcount_end(&cpu_sample->seq);
}
<...>