[PATCH] perf/x86: Use local64_try_cmpxchg

From: Uros Bizjak
Date: Thu Jul 06 2023 - 10:17:43 EST


Use local64_try_cmpxchg instead of local64_cmpxchg (*ptr, old, new) == old.
x86 CMPXCHG instruction returns success in ZF flag, so this change saves a
compare after cmpxchg (and related move instruction in front of cmpxchg).

Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg
fails. There is no need to re-read the value in the loop.

No functional change intended.

Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Ian Rogers <irogers@xxxxxxxxxx>
Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc. "H. Peter Anvin" <hpa@xxxxxxxxx>
Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx>
---
arch/x86/events/amd/ibs.c | 4 ++--
arch/x86/events/core.c | 10 ++++------
arch/x86/events/intel/cstate.c | 10 ++++------
arch/x86/events/msr.c | 8 +++-----
4 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 371014802191..b54db2138c16 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -156,8 +156,8 @@ perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
* count to the generic event atomically:
*/
prev_raw_count = local64_read(&hwc->prev_count);
- if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count)
+ if (!local64_try_cmpxchg(&hwc->prev_count,
+ &prev_raw_count, new_raw_count))
return 0;

/*
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 9d248703cbdd..23c96420dea1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -129,13 +129,11 @@ u64 x86_perf_event_update(struct perf_event *event)
* exchange a new raw count - then add that new-prev delta
* count to the generic event atomically:
*/
-again:
prev_raw_count = local64_read(&hwc->prev_count);
- rdpmcl(hwc->event_base_rdpmc, new_raw_count);
-
- if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count)
- goto again;
+ do {
+ rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+ } while (!local64_try_cmpxchg(&hwc->prev_count,
+ &prev_raw_count, new_raw_count));

/*
* Now we have the new raw value and have updated the prev
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 835862c548cc..97035db68df2 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -365,13 +365,11 @@ static void cstate_pmu_event_update(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
u64 prev_raw_count, new_raw_count;

-again:
prev_raw_count = local64_read(&hwc->prev_count);
- new_raw_count = cstate_pmu_read_counter(event);
-
- if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count)
- goto again;
+ do {
+ new_raw_count = cstate_pmu_read_counter(event);
+ } while (!local64_try_cmpxchg(&hwc->prev_count,
+ &prev_raw_count, new_raw_count));

local64_add(new_raw_count - prev_raw_count, &event->count);
}
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 0feaaa571303..61c59e0627d4 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -244,12 +244,10 @@ static void msr_event_update(struct perf_event *event)
s64 delta;

/* Careful, an NMI might modify the previous event value: */
-again:
prev = local64_read(&event->hw.prev_count);
- now = msr_read_counter(event);
-
- if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
- goto again;
+ do {
+ now = msr_read_counter(event);
+ } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, now));

delta = now - prev;
if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
--
2.41.0