Re: [PATCH 2/13] Time: Reduced NTP Rework (part 2)

From: Ingo Molnar
Date: Sat Nov 26 2005 - 09:52:49 EST



- clean up code impacted by timeofday-ntp-part2.patch
- fix ntp_leapsecond() return

Signed-off-by: Ingo Molnar <mingo@xxxxxxx>

include/linux/timex.h | 12 -
kernel/time.c | 347 ++++++++++++++++++++++++++++----------------------
kernel/timer.c | 20 +-
3 files changed, 212 insertions(+), 167 deletions(-)

Index: linux/include/linux/timex.h
===================================================================
--- linux.orig/include/linux/timex.h
+++ linux/include/linux/timex.h
@@ -261,6 +261,7 @@ extern long pps_errcnt; /* calibration
extern long pps_stbcnt; /* stability limit exceeded */

extern seqlock_t ntp_lock;
+
/**
* ntp_clear - Clears the NTP state variables
*
@@ -269,13 +270,13 @@ extern seqlock_t ntp_lock;
static inline void ntp_clear(void)
{
unsigned long flags;
+
write_seqlock_irqsave(&ntp_lock, flags);
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
write_sequnlock_irqrestore(&ntp_lock, flags);
-
}

/**
@@ -289,21 +290,18 @@ static inline int ntp_synced(void)

/**
* ntp_get_ppm_adjustment - Returns Shifted PPM adjustment
- *
*/
-long ntp_get_ppm_adjustment(void);
+extern long ntp_get_ppm_adjustment(void);

/**
* ntp_advance - Advances the NTP state machine by interval_ns
- *
*/
-void ntp_advance(unsigned long interval_ns);
+extern void ntp_advance(unsigned long interval_ns);

/**
* ntp_leapsecond - NTP leapsecond processing code.
- *
*/
-int ntp_leapsecond(struct timespec now);
+extern int ntp_leapsecond(struct timespec now);


/* Required to safely shift negative values */
Index: linux/kernel/time.c
===================================================================
--- linux.orig/kernel/time.c
+++ linux/kernel/time.c
@@ -241,201 +241,246 @@ void __attribute__ ((weak)) notify_arch_
return;
}

-/* adjtimex mainly allows reading (and writing, if superuser) of
- * kernel time-keeping variables. used by xntpd.
+static inline int
+process_adj_offset(const struct timex *txc, const struct timespec now,
+ int result)
+{
+ long ltemp, mtemp;
+
+ /* note: txc values were checked earlier. */
+
+ if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
+ /* adjtime() is independent from ntp_adjtime() */
+ time_next_adjust = txc->offset;
+ if (time_next_adjust == 0)
+ time_adjust = 0;
+ return result;
+ }
+
+ if (!(time_status & (STA_PLL | STA_PPSTIME)))
+ return result;
+
+ if ((time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
+ (STA_PPSTIME | STA_PPSSIGNAL))
+ ltemp = pps_offset;
+ else
+ ltemp = txc->offset;
+
+ /* scale the phase adjustment and clamp to the operating range: */
+ if (ltemp > MAXPHASE)
+ time_offset = MAXPHASE << SHIFT_UPDATE;
+ else {
+ if (ltemp < -MAXPHASE)
+ time_offset = -(MAXPHASE << SHIFT_UPDATE);
+ else
+ time_offset = ltemp << SHIFT_UPDATE;
+ }
+
+ /*
+ * select whether the frequency is to be controlled
+ * and in which mode (PLL or FLL). Clamp to the operating
+ * range. Ugly multiply/divide should be replaced someday.
+ */
+ if ((time_status & STA_FREQHOLD) || (time_reftime == 0))
+ time_reftime = now.tv_sec;
+
+ mtemp = now.tv_sec - time_reftime;
+ time_reftime = now.tv_sec;
+
+ if (time_status & STA_FLL) { /* FLL mode: */
+ if (mtemp >= MINSEC) {
+ ltemp = (time_offset / mtemp) <<
+ (SHIFT_USEC - SHIFT_UPDATE);
+ time_freq += shift_right(ltemp, SHIFT_KH);
+ } else /* calibration interval too short (p. 12): */
+ result = TIME_ERROR;
+ } else { /* PLL mode: */
+ if (mtemp < MAXSEC) {
+ ltemp *= mtemp;
+ /* TODO: is 2*time_constant correct? --mingo */
+ time_freq += shift_right(ltemp, 2*time_constant +
+ SHIFT_KF - SHIFT_USEC);
+ } else /* calibration interval too long (p. 12) */
+ result = TIME_ERROR;
+ }
+
+ time_freq = min(time_freq, time_tolerance);
+ time_freq = max(time_freq, -time_tolerance);
+
+ return result;
+}
+
+static inline int
+process_input_params(const struct timex *txc, const struct timespec now,
+ int result)
+{
+ if (txc->modes & ADJ_STATUS)
+ /* only set allowed bits: */
+ time_status = (txc->status & ~STA_RONLY) |
+ (time_status & STA_RONLY);
+
+ if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */
+ if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ)
+ return -EINVAL;
+ time_freq = txc->freq - pps_freq;
+ }
+
+ if (txc->modes & ADJ_MAXERROR) {
+ if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT)
+ return -EINVAL;
+ time_maxerror = txc->maxerror;
+ }
+
+ if (txc->modes & ADJ_ESTERROR) {
+ if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT)
+ return -EINVAL;
+ time_esterror = txc->esterror;
+ }
+
+ if (txc->modes & ADJ_TIMECONST) { /* p. 24 */
+ /* NTP v4 uses values > 6 */
+ if (txc->constant < 0)
+ return -EINVAL;
+ time_constant = txc->constant;
+ }
+
+ if (txc->modes & ADJ_OFFSET)
+ result = process_adj_offset(txc, now, result);
+
+ if (txc->modes & ADJ_TICK) {
+ tick_usec = txc->tick;
+ tick_nsec = TICK_USEC_TO_NSEC(tick_usec);
+ }
+
+ return result;
+}
+
+/**
+ * do_adjtimex - allows reading (and writing, if superuser) of
+ * kernel time-keeping variables. Used by xntpd.
+ * @txc: time-adjustments settings structure
*/
int do_adjtimex(struct timex *txc)
{
- long ltemp, mtemp, save_adjust;
+ unsigned long flags, seq;
+ struct timespec now;
+ long save_adjust;
int result;
- unsigned long flags;
- struct timespec now_ts;
- unsigned long seq;
- /* In order to modify anything, you gotta be super-user! */
+
+ /* in order to modify anything, you gotta be super-user! */
if (txc->modes && !capable(CAP_SYS_TIME))
return -EPERM;

- /* Now we validate the data before disabling interrupts */
-
+ /* now we validate the data before disabling interrupts: */
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
- /* singleshot must not be used with any other mode bits */
+ /* singleshot must not be used with any other mode bits: */
if (txc->modes != ADJ_OFFSET_SINGLESHOT)
return -EINVAL;

if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
- /* adjustment Offset limited to +- .512 seconds */
+ /* adjustment offset limited to +- .512 seconds: */
if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
return -EINVAL;

/* if the quartz is off by more than 10% something is VERY wrong ! */
if (txc->modes & ADJ_TICK)
- if (txc->tick < 900000/USER_HZ ||
- txc->tick > 1100000/USER_HZ)
+ if (txc->tick < 900000/USER_HZ || txc->tick > 1100000/USER_HZ)
return -EINVAL;

- do { /* save off current xtime */
+ /*
+ * TODO: shouldnt we write-lock xtime_lock below, and then
+ * lock the ntp lock, and do the whole adjustment from under
+ * the xtime lock and the ntp lock? --mingo
+ */
+
+ /* save current xtime: */
+ do {
seq = read_seqbegin(&xtime_lock);
- now_ts = xtime;
+ now = xtime;
} while (read_seqretry(&xtime_lock, seq));

write_seqlock_irqsave(&ntp_lock, flags);

result = time_state; /* mostly `TIME_OK' */

- /* Save for later - semantics of adjtime is to return old value */
- save_adjust = time_next_adjust ? time_next_adjust : time_adjust;
+ /* save for later - semantics of adjtime is to return old value: */
+ if (time_next_adjust)
+ save_adjust = time_next_adjust;
+ else
+ save_adjust = time_adjust;

-#if 0 /* STA_CLOCKERR is never set yet */
- time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */
-#endif
- /* If there are input parameters, then process them */
+ /* process input parameters: */
if (txc->modes)
- {
- if (txc->modes & ADJ_STATUS) /* only set allowed bits */
- time_status = (txc->status & ~STA_RONLY) |
- (time_status & STA_RONLY);
-
- if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */
- if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
- result = -EINVAL;
- goto leave;
- }
- time_freq = txc->freq - pps_freq;
- }
-
- if (txc->modes & ADJ_MAXERROR) {
- if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
- result = -EINVAL;
- goto leave;
- }
- time_maxerror = txc->maxerror;
- }
+ result = process_input_params(txc, now, result);

- if (txc->modes & ADJ_ESTERROR) {
- if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
- result = -EINVAL;
- goto leave;
- }
- time_esterror = txc->esterror;
- }
-
- if (txc->modes & ADJ_TIMECONST) { /* p. 24 */
- if (txc->constant < 0) { /* NTP v4 uses values > 6 */
- result = -EINVAL;
- goto leave;
- }
- time_constant = txc->constant;
- }
-
- if (txc->modes & ADJ_OFFSET) { /* values checked earlier */
- if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
- /* adjtime() is independent from ntp_adjtime() */
- if ((time_next_adjust = txc->offset) == 0)
- time_adjust = 0;
- }
- else if ( time_status & (STA_PLL | STA_PPSTIME) ) {
- ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
- (STA_PPSTIME | STA_PPSSIGNAL) ?
- pps_offset : txc->offset;
-
- /*
- * Scale the phase adjustment and
- * clamp to the operating range.
- */
- if (ltemp > MAXPHASE)
- time_offset = MAXPHASE << SHIFT_UPDATE;
- else if (ltemp < -MAXPHASE)
- time_offset = -(MAXPHASE << SHIFT_UPDATE);
- else
- time_offset = ltemp << SHIFT_UPDATE;
-
- /*
- * Select whether the frequency is to be controlled
- * and in which mode (PLL or FLL). Clamp to the operating
- * range. Ugly multiply/divide should be replaced someday.
- */
-
- if (time_status & STA_FREQHOLD || time_reftime == 0)
- time_reftime = now_ts.tv_sec;
- mtemp = now_ts.tv_sec - time_reftime;
- time_reftime = now_ts.tv_sec;
- if (time_status & STA_FLL) {
- if (mtemp >= MINSEC) {
- ltemp = (time_offset / mtemp) << (SHIFT_USEC -
- SHIFT_UPDATE);
- time_freq += shift_right(ltemp, SHIFT_KH);
- } else /* calibration interval too short (p. 12) */
- result = TIME_ERROR;
- } else { /* PLL mode */
- if (mtemp < MAXSEC) {
- ltemp *= mtemp;
- time_freq += shift_right(ltemp,(time_constant +
- time_constant +
- SHIFT_KF - SHIFT_USEC));
- } else /* calibration interval too long (p. 12) */
- result = TIME_ERROR;
- }
- time_freq = min(time_freq, time_tolerance);
- time_freq = max(time_freq, -time_tolerance);
- } /* STA_PLL || STA_PPSTIME */
- } /* txc->modes & ADJ_OFFSET */
- if (txc->modes & ADJ_TICK) {
- tick_usec = txc->tick;
- tick_nsec = TICK_USEC_TO_NSEC(tick_usec);
- }
- } /* txc->modes */
-leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
- || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
- && (time_status & STA_PPSSIGNAL) == 0)
- /* p. 24, (b) */
- || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
- == (STA_PPSTIME|STA_PPSJITTER))
- /* p. 24, (c) */
- || ((time_status & STA_PPSFREQ) != 0
- && (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
- /* p. 24, (d) */
+ if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
+ || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
+ && (time_status & STA_PPSSIGNAL) == 0)
+ /* p. 24, (b) */
+ || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
+ == (STA_PPSTIME|STA_PPSJITTER))
+ /* p. 24, (c) */
+ || ((time_status & STA_PPSFREQ) != 0
+ && (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
+ /* p. 24, (d) */
result = TIME_ERROR;

if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
- txc->offset = save_adjust;
- else {
- txc->offset = shift_right(time_offset, SHIFT_UPDATE);
- }
- txc->freq = time_freq + pps_freq;
- txc->maxerror = time_maxerror;
- txc->esterror = time_esterror;
- txc->status = time_status;
- txc->constant = time_constant;
- txc->precision = time_precision;
- txc->tolerance = time_tolerance;
- txc->tick = tick_usec;
- txc->ppsfreq = pps_freq;
- txc->jitter = pps_jitter >> PPS_AVG;
- txc->shift = pps_shift;
- txc->stabil = pps_stabil;
- txc->jitcnt = pps_jitcnt;
- txc->calcnt = pps_calcnt;
- txc->errcnt = pps_errcnt;
- txc->stbcnt = pps_stbcnt;
+ txc->offset = save_adjust;
+ else
+ txc->offset = shift_right(time_offset, SHIFT_UPDATE);
+
+ txc->freq = time_freq + pps_freq;
+ txc->maxerror = time_maxerror;
+ txc->esterror = time_esterror;
+ txc->status = time_status;
+ txc->constant = time_constant;
+ txc->precision = time_precision;
+ txc->tolerance = time_tolerance;
+ /*
+ * TODO: shouldnt txc->time be filled in here, within ntp_lock and
+ * xtime_lock, to get an atomic snapshot of time state? --mingo
+ */
+ txc->tick = tick_usec;
+ txc->ppsfreq = pps_freq;
+ txc->jitter = pps_jitter >> PPS_AVG;
+ txc->shift = pps_shift;
+ txc->stabil = pps_stabil;
+ txc->jitcnt = pps_jitcnt;
+ txc->calcnt = pps_calcnt;
+ txc->errcnt = pps_errcnt;
+ txc->stbcnt = pps_stbcnt;
+
write_sequnlock_irqrestore(&ntp_lock, flags);
+
do_gettimeofday(&txc->time);
+
notify_arch_cmos_timer();
- return(result);
+
+ return result;
}

asmlinkage long sys_adjtimex(struct timex __user *txc_p)
{
- struct timex txc; /* Local copy of parameter */
+ struct timex txc;
int ret;

- /* Copy the user data space into the kernel copy
- * structure. But bear in mind that the structures
- * may change
+ /*
+ * copy the user data space into the kernel copy
+ * structure:
*/
- if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
+ if (copy_from_user(&txc, txc_p, sizeof(struct timex)))
return -EFAULT;
+
ret = do_adjtimex(&txc);
- return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+
+ /*
+ * copy the results back to userspace (even if there was an error):
+ */
+ if (copy_to_user(txc_p, &txc, sizeof(struct timex)))
+ ret = -EFAULT;
+
+ return ret;
}

inline struct timespec current_kernel_time(void)
Index: linux/kernel/timer.c
===================================================================
--- linux.orig/kernel/timer.c
+++ linux/kernel/timer.c
@@ -630,9 +630,9 @@ long offset_adj_ppm;
long tick_adj_ppm;
long singleshot_adj_ppm;

-#define MAX_SINGLESHOT_ADJ 500 /* (ppm) */
-#define SEC_PER_DAY 86400
-#define END_OF_DAY(x) (x + SEC_PER_DAY - (x % SEC_PER_DAY) - 1)
+#define MAX_SINGLESHOT_ADJ 500 /* (ppm) */
+#define SEC_PER_DAY 86400
+#define END_OF_DAY(x) ((x) + SEC_PER_DAY - ((x) % SEC_PER_DAY) - 1)

/* NTP lock, protects NTP state machine */
seqlock_t ntp_lock = SEQLOCK_UNLOCKED;
@@ -645,10 +645,8 @@ seqlock_t ntp_lock = SEQLOCK_UNLOCKED;
* should be added to the current time to properly
* adjust for leapseconds.
*/
-
int ntp_leapsecond(struct timespec now)
{
- unsigned long flags;
/*
* Leap second processing. If in leap-insert state at
* the end of the day, the system clock is set back one
@@ -656,9 +654,12 @@ int ntp_leapsecond(struct timespec now)
* set ahead one second.
*/
static time_t leaptime = 0;
+
+ unsigned long flags;
int ret = 0;

write_seqlock_irqsave(&ntp_lock, flags);
+
switch (time_state) {

case TIME_OK:
@@ -690,7 +691,7 @@ int ntp_leapsecond(struct timespec now)
break;

case TIME_OOP:
- /* Wait for the end of the leap second*/
+ /* Wait for the end of the leap second*/
if (now.tv_sec > (leaptime + 1))
time_state = TIME_WAIT;
time_state = TIME_WAIT;
@@ -703,7 +704,8 @@ int ntp_leapsecond(struct timespec now)
}

write_sequnlock_irqrestore(&ntp_lock, flags);
- return 0;
+
+ return ret;
}

/*
@@ -802,9 +804,9 @@ static void second_overflow(void)

offset_adj_ppm = shift_right(ltemp, SHIFT_UPDATE); /* ppm */

- /* first calculate usec/user_tick offset */
+ /* first calculate usec/user_tick offset: */
tick_adj_ppm = ((USEC_PER_SEC + USER_HZ/2)/USER_HZ) - tick_usec;
- /* multiply by user_hz to get usec/sec => ppm */
+ /* multiply by user_hz to get usec/sec => ppm: */
tick_adj_ppm *= USER_HZ;

/*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/