Re: [PATCH] sched: Support current clocksource handling in fallback sched_clock().

From: Paul Mundt
Date: Thu May 28 2009 - 07:09:31 EST


On Thu, May 28, 2009 at 11:34:41AM +0200, Peter Zijlstra wrote:
> That's a single assignment, vs two reads on use. Should we be worried
> about the SMP race where we observe two different sched_clocksource
> pointers on read?
>
>
> I would suggest we write it as:
>
> u64 __weak sched_clock(void)
> {
> struct clocksource *clock = ACCESS_ONCE(sched_clocksource);
>
> return cyc2ns(clock, clocksource_read(clock));
> }

Here's an updated version, also with an added sanity check in the
unregister path..

--

include/linux/clocksource.h | 4 +++-
kernel/sched_clock.c | 6 ++++--
kernel/time/clocksource.c | 13 +++++++++++++
kernel/time/jiffies.c | 2 +-
4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c56457c..2109940 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -202,7 +202,8 @@ struct clocksource {
#endif
};

-extern struct clocksource *clock; /* current clocksource */
+extern struct clocksource *clock; /* current clocksource */
+extern struct clocksource *sched_clocksource; /* sched_clock() clocksource */

/*
* Clock source flags bits::
@@ -212,6 +213,7 @@ extern struct clocksource *clock; /* current clocksource */

#define CLOCK_SOURCE_WATCHDOG 0x10
#define CLOCK_SOURCE_VALID_FOR_HRES 0x20
+#define CLOCK_SOURCE_USE_FOR_SCHED_CLOCK 0x40

/* simplify initialization of mask field */
#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index e1d16c9..b91190e 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -30,6 +30,7 @@
#include <linux/percpu.h>
#include <linux/ktime.h>
#include <linux/sched.h>
+#include <linux/clocksource.h>

/*
* Scheduler clock - returns current time in nanosec units.
@@ -38,8 +39,9 @@
*/
unsigned long long __attribute__((weak)) sched_clock(void)
{
- return (unsigned long long)(jiffies - INITIAL_JIFFIES)
- * (NSEC_PER_SEC / HZ);
+ struct clocksource *clock = ACCESS_ONCE(sched_clocksource);
+
+ return cyc2ns(clock, clocksource_read(clock));
}

static __read_mostly int sched_clock_running;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 80189f6..57b011a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -109,6 +109,7 @@ EXPORT_SYMBOL(timecounter_cyc2time);

/* XXX - Would like a better way for initializing curr_clocksource */
extern struct clocksource clocksource_jiffies;
+struct clocksource *sched_clocksource = &clocksource_jiffies;

/*[Clocksource internal variables]---------
* curr_clocksource:
@@ -362,6 +363,9 @@ static struct clocksource *select_clocksource(void)
if (next == curr_clocksource)
return NULL;

+ if (next->flags & CLOCK_SOURCE_USE_FOR_SCHED_CLOCK)
+ sched_clocksource = next;
+
return next;
}

@@ -437,10 +441,19 @@ void clocksource_unregister(struct clocksource *cs)
unsigned long flags;

spin_lock_irqsave(&clocksource_lock, flags);
+
+ if (sched_clocksource == cs) {
+ printk(KERN_WARNING "Refusing to unregister "
+ "scheduler clocksource %s", cs->name);
+ goto out;
+ }
+
list_del(&cs->list);
if (clocksource_override == cs)
clocksource_override = NULL;
next_clocksource = select_clocksource();
+
+out:
spin_unlock_irqrestore(&clocksource_lock, flags);
}

diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index c3f6c30..727d881 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -52,7 +52,7 @@

static cycle_t jiffies_read(struct clocksource *cs)
{
- return (cycle_t) jiffies;
+ return (cycle_t) (jiffies - INITIAL_JIFFIES);
}

struct clocksource clocksource_jiffies = {
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/