Re: [PATCH v2] perf test x86: address multiplexing in rdpmc test

From: Peter Zijlstra
Date: Sun Mar 22 2020 - 06:19:02 EST


On Sat, Mar 21, 2020 at 10:37:10AM -0700, Ian Rogers wrote:

> +static u64 mmap_read_self(void *addr, u64 *running)
> {
> struct perf_event_mmap_page *pc = addr;
> u32 seq, idx, time_mult = 0, time_shift = 0;
> - u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
> + u64 count, cyc = 0, time_offset = 0, enabled, delta;
>
> do {
> seq = pc->lock;
> barrier();
>
> enabled = pc->time_enabled;
> - running = pc->time_running;
> -
> - if (enabled != running) {
> + *running = pc->time_running;
> +
> + if (*running == 0) {
> + /*
> + * Counter won't have a value as due to multiplexing the
> + * event wasn't scheduled.
> + */
> + return 0;
> + }

I still think adding code for an error case here is a bad idea. And only
passing running as an argument is inconsistent.

Also, then I had a look at what the compiler made of that function and
cried.

Here's something a little better. Much of it copied from linux/math64.h
and asm/div64.h.

---
diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
index 1ea916656a2d..386a6dacb21e 100644
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ b/tools/perf/arch/x86/tests/rdpmc.c
@@ -34,20 +34,98 @@ static u64 rdtsc(void)
return low | ((u64)high) << 32;
}

-static u64 mmap_read_self(void *addr)
+#ifdef __x86_64__
+static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c)
+{
+ u64 q;
+
+ asm ("mulq %2; divq %3" : "=a" (q)
+ : "a" (a), "rm" (b), "rm" (c)
+ : "rdx");
+
+ return q;
+}
+#define mul_u64_u64_div64 mul_u64_u64_div64
+#endif
+
+#ifdef __SIZEOF_INT128__
+
+static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift)
+{
+ return (u64)(((unsigned __int128)a * b) >> shift);
+}
+
+#ifndef mul_u64_u64_div64
+static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c)
+{
+ unsigned __int128 m = a;
+ m *= b;
+ return m / c;
+}
+#endif
+
+#else
+
+#ifdef __i386__
+static inline u64 mul_u32_u32(u32 a, u32 b)
+{
+ u32 high, low;
+
+ asm ("mull %[b]" : "=a" (low), "=d" (high)
+ : [a] "a" (a), [b] "rm" (b) );
+
+ return low | ((u64)high) << 32;
+}
+#else
+static inline u64 mul_u32_u32(u32 a, u32 b)
+{
+ return (u64)a * b;
+}
+#endif
+
+static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift)
+{
+ u32 ah, al;
+ u64 ret;
+
+ al = a;
+ ah = a >> 32;
+
+ ret = mul_u32_u32(al, mul) >> shift;
+ if (ah)
+ ret += mul_u32_u32(ah, mul) << (32 - shift);
+
+ return ret;
+}
+
+#ifndef mul_u64_u64_div64
+static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c)
+{
+ u64 quot, rem;
+
+ quot = a / c;
+ rem = a % c;
+
+ return qout * b + (rem * b) / c;
+}
+#endif
+
+#endif
+
+static u64 mmap_read_self(void *addr, u64 *enabled, u64 *running)
{
struct perf_event_mmap_page *pc = addr;
u32 seq, idx, time_mult = 0, time_shift = 0;
- u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+ u64 count, cyc = 0, time_offset = 0;

do {
seq = pc->lock;
barrier();

- enabled = pc->time_enabled;
- running = pc->time_running;
+ *enabled = pc->time_enabled;
+ *running = pc->time_running;

- if (enabled != running) {
+ if (*enabled != *running) {
cyc = rdtsc();
time_mult = pc->time_mult;
time_shift = pc->time_shift;
@@ -62,21 +140,13 @@ static u64 mmap_read_self(void *addr)
barrier();
} while (pc->lock != seq);

- if (enabled != running) {
- u64 quot, rem;
-
- quot = (cyc >> time_shift);
- rem = cyc & (((u64)1 << time_shift) - 1);
- delta = time_offset + quot * time_mult +
- ((rem * time_mult) >> time_shift);
-
- enabled += delta;
+ if (*enabled != *running) {
+ u64 delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
+ *enabled += delta;
if (idx)
- running += delta;
+ *running += delta;

- quot = count / running;
- rem = count % running;
- count = quot * enabled + (rem * enabled) / running;
+ count = mul_u64_u64_div64(count, *enabled, *running);
}

return count;
@@ -130,14 +200,18 @@ static int __test__rdpmc(void)
}

for (n = 0; n < 6; n++) {
- u64 stamp, now, delta;
+ u64 stamp, now, delta, enabled, running;

- stamp = mmap_read_self(addr);
+ stamp = mmap_read_self(addr, &enabled, &running);

for (i = 0; i < loops; i++)
tmp++;

- now = mmap_read_self(addr);
+ now = mmap_read_self(addr, &enabled, &running);
+
+ if (enabled && !running)
+ goto out_error;
+
loops *= 10;

delta = now - stamp;
@@ -155,6 +229,11 @@ static int __test__rdpmc(void)
return -1;

return 0;
+
+out_error:
+ close(fd);
+ pr_err("counter never ran; you loose\n");
+ return -1;
}

int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused)