Re: gettimeofday running backwards on 2.4.20

From: john stultz (johnstul@us.ibm.com)
Date: Tue May 06 2003 - 15:33:41 EST


> Just recently my NAS benchmarks and MPI latency tests showed bizarre
> results, so I pulled out my test program and am seeing the same
> problems again. It seems that roughly 50 in 1 million calls go
> backwards, even with 2.4.20.
[snip]
> Interestingly, it only happens on the compute nodes with NFS root.
> The service node has booted from a local SCSI disk and is serving roughly
> 140 compute nodes without any timing bugs.

2.4 still has problems with xtime_lock writer starvation, as well as
being unable to handle lost ticks. Do you see this problem if you run
UP?

Give this patch (against 2.4.21-rc1) a whirl to see if you're getting
caught by xtime_lock starvation.

thanks
-john

diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c Tue May 6 13:25:40 2003
+++ b/arch/i386/kernel/time.c Tue May 6 13:25:40 2003
@@ -79,7 +79,7 @@
  */
 unsigned long fast_gettimeoffset_quotient;
 
-extern rwlock_t xtime_lock;
+extern spinlock_t xtime_lock;
 extern unsigned long wall_jiffies;
 
 spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
@@ -444,7 +444,7 @@
         unsigned long flags;
         unsigned long usec, sec;
 
- read_lock_irqsave(&xtime_lock, flags);
+ spin_lock_irqsave(&xtime_lock, flags);
         usec = do_gettimeoffset();
         {
                 unsigned long lost = jiffies - wall_jiffies;
@@ -453,7 +453,7 @@
         }
         sec = xtime.tv_sec;
         usec += xtime.tv_usec;
- read_unlock_irqrestore(&xtime_lock, flags);
+ spin_unlock_irqrestore(&xtime_lock, flags);
 
         while (usec >= 1000000) {
                 usec -= 1000000;
@@ -466,7 +466,7 @@
 
 void do_settimeofday(struct timeval *tv)
 {
- write_lock_irq(&xtime_lock);
+ spin_lock_irq(&xtime_lock);
         /*
          * This is revolting. We need to set "xtime" correctly. However, the
          * value in this location is the value at the most recent update of
@@ -486,7 +486,7 @@
         time_status |= STA_UNSYNC;
         time_maxerror = NTP_PHASE_LIMIT;
         time_esterror = NTP_PHASE_LIMIT;
- write_unlock_irq(&xtime_lock);
+ spin_unlock_irq(&xtime_lock);
 }
 
 /*
@@ -652,7 +652,7 @@
          * the irq version of write_lock because as just said we have irq
          * locally disabled. -arca
          */
- write_lock(&xtime_lock);
+ spin_lock(&xtime_lock);
 
         if(use_cyclone)
                 mark_timeoffset_cyclone();
@@ -708,7 +708,7 @@
 
         do_timer_interrupt(irq, NULL, regs);
 
- write_unlock(&xtime_lock);
+ spin_unlock(&xtime_lock);
 
 }
 
diff -Nru a/kernel/time.c b/kernel/time.c
--- a/kernel/time.c Tue May 6 13:25:40 2003
+++ b/kernel/time.c Tue May 6 13:25:40 2003
@@ -38,7 +38,7 @@
 
 /* The xtime_lock is not only serializing the xtime read/writes but it's also
    serializing all accesses to the global NTP variables now. */
-extern rwlock_t xtime_lock;
+extern spinlock_t xtime_lock;
 
 #if !defined(__alpha__) && !defined(__ia64__)
 
@@ -79,7 +79,7 @@
                 return -EPERM;
         if (get_user(value, tptr))
                 return -EFAULT;
- write_lock_irq(&xtime_lock);
+ spin_lock_irq(&xtime_lock);
         vxtime_lock();
         xtime.tv_sec = value;
         xtime.tv_usec = 0;
@@ -88,7 +88,7 @@
         time_status |= STA_UNSYNC;
         time_maxerror = NTP_PHASE_LIMIT;
         time_esterror = NTP_PHASE_LIMIT;
- write_unlock_irq(&xtime_lock);
+ spin_unlock_irq(&xtime_lock);
         return 0;
 }
 
@@ -127,11 +127,11 @@
  */
 inline static void warp_clock(void)
 {
- write_lock_irq(&xtime_lock);
+ spin_lock_irq(&xtime_lock);
         vxtime_lock();
         xtime.tv_sec += sys_tz.tz_minuteswest * 60;
         vxtime_unlock();
- write_unlock_irq(&xtime_lock);
+ spin_unlock_irq(&xtime_lock);
 }
 
 /*
@@ -235,7 +235,7 @@
                 if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ)
                         return -EINVAL;
 
- write_lock_irq(&xtime_lock);
+ spin_lock_irq(&xtime_lock);
         result = time_state; /* mostly `TIME_OK' */
 
         /* Save for later - semantics of adjtime is to return old value */
@@ -390,7 +390,7 @@
         txc->calcnt = pps_calcnt;
         txc->errcnt = pps_errcnt;
         txc->stbcnt = pps_stbcnt;
- write_unlock_irq(&xtime_lock);
+ spin_unlock_irq(&xtime_lock);
         do_gettimeofday(&txc->time);
         return(result);
 }
diff -Nru a/kernel/timer.c b/kernel/timer.c
--- a/kernel/timer.c Tue May 6 13:25:40 2003
+++ b/kernel/timer.c Tue May 6 13:25:40 2003
@@ -666,7 +666,7 @@
 /*
  * This spinlock protect us from races in SMP while playing with xtime. -arca
  */
-rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
+spinlock_t xtime_lock = SPIN_LOCK_UNLOCKED;
 
 static inline void update_times(void)
 {
@@ -677,7 +677,7 @@
          * just know that the irqs are locally enabled and so we don't
          * need to save/restore the flags of the local CPU here. -arca
          */
- write_lock_irq(&xtime_lock);
+ spin_lock_irq(&xtime_lock);
         vxtime_lock();
 
         ticks = jiffies - wall_jiffies;
@@ -686,7 +686,7 @@
                 update_wall_time(ticks);
         }
         vxtime_unlock();
- write_unlock_irq(&xtime_lock);
+ spin_unlock_irq(&xtime_lock);
         calc_load(ticks);
 }
 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Wed May 07 2003 - 22:00:28 EST