[PATCH v2 11/13] x86/tsc: Provide sched_clock_noinstr()

From: Peter Zijlstra
Date: Fri May 19 2023 - 06:35:08 EST


With the intent to provide local_clock_noinstr(), a variant of
local_clock() that's safe to be called from noinstr code (with the
assumption that any such code will already be non-preemptible),
prepare for things by providing a noinstr sched_clock_noinstr()
function.

Specifically, preempt_enable_*() calls out to schedule(), which upsets
noinstr validation efforts.

vmlinux.o: warning: objtool: native_sched_clock+0x96: call to preempt_schedule_notrace_thunk() leaves .noinstr.text section
vmlinux.o: warning: objtool: kvm_clock_read+0x22: call to preempt_schedule_notrace_thunk() leaves .noinstr.text section

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/include/asm/hyperv_timer.h | 5 ++++
arch/x86/kernel/kvmclock.c | 4 +--
arch/x86/kernel/tsc.c | 38 +++++++++++++++++++++++--------
arch/x86/kvm/x86.c | 7 ++---
arch/x86/xen/time.c | 3 --
drivers/clocksource/hyperv_timer.c | 44 ++++++++++++++++++++++--------------
include/clocksource/hyperv_timer.h | 24 +++++++------------
7 files changed, 76 insertions(+), 49 deletions(-)

--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -71,7 +71,7 @@ static int kvm_set_wallclock(const struc
return -ENODEV;
}

-static noinstr u64 kvm_clock_read(void)
+static u64 kvm_clock_read(void)
{
u64 ret;

@@ -88,7 +88,7 @@ static u64 kvm_clock_get_cycles(struct c

static noinstr u64 kvm_sched_clock_read(void)
{
- return kvm_clock_read() - kvm_sched_clock_offset;
+ return pvclock_clocksource_read_nowd(this_cpu_pvti()) - kvm_sched_clock_offset;
}

static inline void kvm_sched_clock_init(bool stable)
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -69,12 +69,10 @@ static int __init tsc_early_khz_setup(ch
}
early_param("tsc_early_khz", tsc_early_khz_setup);

-__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
+__always_inline void __cyc2ns_read(struct cyc2ns_data *data)
{
int seq, idx;

- preempt_disable_notrace();
-
do {
seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
idx = seq & 1;
@@ -86,6 +84,12 @@ __always_inline void cyc2ns_read_begin(s
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
}

+__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
+{
+ preempt_disable_notrace();
+ __cyc2ns_read(data);
+}
+
__always_inline void cyc2ns_read_end(void)
{
preempt_enable_notrace();
@@ -115,18 +119,25 @@ __always_inline void cyc2ns_read_end(voi
* -johnstul@xxxxxxxxxx "math is hard, lets go shopping!"
*/

-static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static __always_inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
struct cyc2ns_data data;
unsigned long long ns;

- cyc2ns_read_begin(&data);
+ __cyc2ns_read(&data);

ns = data.cyc2ns_offset;
ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);

- cyc2ns_read_end();
+ return ns;
+}

+static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ unsigned long long ns;
+ preempt_disable_notrace();
+ ns = __cycles_2_ns(cyc);
+ preempt_enable_notrace();
return ns;
}

@@ -223,7 +234,7 @@ noinstr u64 native_sched_clock(void)
u64 tsc_now = rdtsc();

/* return the value in ns */
- return cycles_2_ns(tsc_now);
+ return __cycles_2_ns(tsc_now);
}

/*
@@ -250,7 +261,7 @@ u64 native_sched_clock_from_tsc(u64 tsc)
/* We need to define a real function for sched_clock, to override the
weak default version */
#ifdef CONFIG_PARAVIRT
-noinstr u64 sched_clock(void)
+noinstr u64 sched_clock_noinstr(void)
{
return paravirt_sched_clock();
}
@@ -260,11 +271,20 @@ bool using_native_sched_clock(void)
return static_call_query(pv_sched_clock) == native_sched_clock;
}
#else
-u64 sched_clock(void) __attribute__((alias("native_sched_clock")));
+u64 sched_clock_noinstr(void) __attribute__((alias("native_sched_clock")));

bool using_native_sched_clock(void) { return true; }
#endif

+notrace u64 sched_clock(void)
+{
+ u64 now;
+ preempt_disable_notrace();
+ now = sched_clock_noinstr();
+ preempt_enable_notrace();
+ return now;
+}
+
int check_tsc_unstable(void)
{
return tsc_unstable;
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -66,11 +66,10 @@ static noinstr u64 xen_sched_clock(void)
struct pvclock_vcpu_time_info *src;
u64 ret;

- preempt_disable_notrace();
src = &__this_cpu_read(xen_vcpu)->time;
ret = pvclock_clocksource_read_nowd(src);
ret -= xen_sched_clock_offset;
- preempt_enable_notrace();
+
return ret;
}