[PATCH] Fix for rtc.c non-atomic mess (try 2)

From: Cesar Eduardo Barros (cesarb@web4u.com.br)
Date: Sat Apr 22 2000 - 20:09:57 EST


Well, based in feedback, I remade the whole patch. Now I added code to avoid
the xchg() race (thanks to Manfred Spraul for hinting me on that one). Couldn't
avoid using spinlocks; sorry :-)

This time I didn't even try to compile it. Please tell me what I did wrong this
time :-)

--- linux-2.3.99-pre5/drivers/char/rtc.c Fri Apr 14 14:31:08 2000
+++ linux-rtcfix2/drivers/char/rtc.c Sat Apr 22 21:56:48 2000
@@ -40,6 +40,9 @@
  * 1.10b Andrew Morton: SMP lock fix
  */
 
+/* If someone thinks this is spinlocking too much, feel free to use a single
+ * global /dev/rtc driver spinlock plus the rtc_lock for the CMOS_* stuff */
+
 #define RTC_VERSION "1.10b"
 
 #define RTC_IRQ 8 /* Can't see this changing soon. */
@@ -124,8 +127,21 @@
 #define RTC_IS_OPEN 0x01 /* means /dev/rtc is in use */
 #define RTC_TIMER_ON 0x02 /* missed irq timer active */
 
-static atomic_t rtc_status = ATOMIC_INIT(0); /* bitmapped status byte. */
+/*
+ * rtc_status is never changed by rtc_interrupt, and ioctl/open/close is
+ * protected by the big kernel lock. However, ioctl can still disable the timer
+ * in rtc_status and then with del_timer after the interrupt has read
+ * rtc_status but before mod_timer is called, which would then reenable the
+ * timer (but you would need to have an awful timing before you'd trip on it)
+ */
+static spinlock_t rtc_irq_timer_lock = SPIN_LOCK_UNLOCKED;
+static unsigned long rtc_status = 0; /* bitmapped status byte. */
 static unsigned long rtc_freq = 0; /* Current periodic IRQ rate */
+/* Having rtc_lock double as rtc_irq_data_lock is evil */
+static spinlock_t rtc_irq_data_lock = SPIN_LOCK_UNLOCKED;
+/* Making this two variables instead of one might make the interrupt a bit
+ * faster (and exchange a spinlock (2 locked mem accesses) by two variable
+ * atomic updates (2 locked mem accesses)). Or maybe slower. Ideas? */
 static unsigned long rtc_irq_data = 0; /* our output to the world */
 
 /*
@@ -151,6 +167,8 @@
 
 static void rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
+ unsigned long rtc_data;
+
         /*
          * Can be an alarm interrupt, update complete interrupt,
          * or a periodic interrupt. We store the status in the
@@ -158,19 +176,29 @@
          * the last read in the remainder of rtc_irq_data.
          */
 
+ /* First, ack the interrupt */
         spin_lock (&rtc_lock);
+ rtc_data = CMOS_READ(RTC_INTR_FLAGS);
+ spin_unlock (&rtc_lock);
+
+ /* Second, avoid a bogus rtc_dropped_irq */
+ spin_lock (&rtc_irq_timer_lock);
+ if (rtc_status & RTC_TIMER_ON)
+ mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
+ spin_unlock (&rtc_irq_timer_lock);
+
+ /* Third, update rtc_irq_data */
+ spin_lock (&rtc_irq_data_lock);
         rtc_irq_data += 0x100;
         rtc_irq_data &= ~0xff;
- rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0);
- spin_unlock (&rtc_lock);
+ rtc_irq_data |= rtc_data & 0xF0;
+ spin_unlock (&rtc_irq_data_lock);
 
+ /* Now do the rest of the actions */
         wake_up_interruptible(&rtc_wait);
 
         if (rtc_async_queue)
                 kill_fasync (rtc_async_queue, SIGIO, POLL_IN);
-
- if (atomic_read(&rtc_status) & RTC_TIMER_ON)
- mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
 }
 #endif
 
@@ -192,6 +220,7 @@
         DECLARE_WAITQUEUE(wait, current);
         unsigned long data;
         ssize_t retval;
+ unsigned long flags;
         
         if (count < sizeof(unsigned long))
                 return -EINVAL;
@@ -200,7 +229,18 @@
 
         current->state = TASK_INTERRUPTIBLE;
                 
- while ((data = xchg(&rtc_irq_data, 0)) == 0) {
+ do {
+ /* First make it right. Then make it fast. Putting this whole
+ * block within the parentheses of a while would be too
+ * confusing. And no, xchg() is not the answer. */
+ spin_lock_irqsave (&rtc_irq_data_lock, flags);
+ data = rtc_irq_data;
+ rtc_irq_data = 0;
+ spin_unlock_irqrestore (&rtc_irq_data_lock, flags);
+
+ if (data != 0)
+ break;
+
                 if (file->f_flags & O_NONBLOCK) {
                         retval = -EAGAIN;
                         goto out;
@@ -210,7 +250,7 @@
                         goto out;
                 }
                 schedule();
- }
+ } while (1);
 
         retval = put_user(data, (unsigned long *)buf);
         if (!retval)
@@ -245,11 +285,12 @@
         case RTC_PIE_OFF: /* Mask periodic int. enab. bit */
         {
                 mask_rtc_irq_bit(RTC_PIE);
- if (atomic_read(&rtc_status) & RTC_TIMER_ON) {
+ spin_lock_irqsave (&rtc_irq_timer_lock, flags);
+ if (rtc_status & RTC_TIMER_ON) {
+ rtc_status &= ~RTC_TIMER_ON;
                         del_timer(&rtc_irq_timer);
- atomic_set(&rtc_status,
- atomic_read(&rtc_status) & ~RTC_TIMER_ON);
                 }
+ spin_unlock_irqrestore (&rtc_irq_timer_lock, flags);
                 return 0;
         }
         case RTC_PIE_ON: /* Allow periodic ints */
@@ -262,11 +303,13 @@
                 if ((rtc_freq > 64) && (!capable(CAP_SYS_RESOURCE)))
                         return -EACCES;
 
- if (!(atomic_read(&rtc_status) & RTC_TIMER_ON)) {
- atomic_set(&rtc_status,
- atomic_read(&rtc_status) | RTC_TIMER_ON);
+ /* No need to spinlock here if RTC_TIMER_ON is only enabled at
+ * the end. The mb() should avoid trigger-happy compilers */
+ if (!(rtc_status & RTC_TIMER_ON)) {
                         rtc_irq_timer.expires = jiffies + HZ/rtc_freq + 2*HZ/100;
                         add_timer(&rtc_irq_timer);
+ mb (); /* FIXME is this really needed? */
+ rtc_status |= RTC_TIMER_ON;
                 }
                 set_rtc_irq_bit(RTC_PIE);
                 return 0;
@@ -491,16 +534,18 @@
 {
         unsigned long flags;
 
- if(atomic_read(&rtc_status) & RTC_IS_OPEN)
+ /* If someday somebody decides to remove the kernel_lock on open and
+ * close and ioctl this is gonna get open to races again */
+ if(rtc_status & RTC_IS_OPEN)
                 return -EBUSY;
 
         MOD_INC_USE_COUNT;
 
- atomic_set(&rtc_status, atomic_read(&rtc_status) | RTC_IS_OPEN);
+ rtc_status |= RTC_IS_OPEN;
 
- spin_lock_irqsave (&rtc_lock, flags);
+ spin_lock_irqsave (&rtc_irq_data_lock, flags);
         rtc_irq_data = 0;
- spin_unlock_irqrestore (&rtc_lock, flags);
+ spin_unlock_irqrestore (&rtc_irq_data_lock, flags);
         return 0;
 }
 
@@ -530,10 +575,12 @@
         CMOS_READ(RTC_INTR_FLAGS);
         spin_unlock_irqrestore(&rtc_lock, flags);
 
- if (atomic_read(&rtc_status) & RTC_TIMER_ON) {
- atomic_set(&rtc_status, atomic_read(&rtc_status) & ~RTC_TIMER_ON);
+ spin_lock_irqsave (&rtc_irq_timer_lock, flags);
+ if (rtc_status & RTC_TIMER_ON) {
+ rtc_status &= ~RTC_TIMER_ON;
                 del_timer(&rtc_irq_timer);
         }
+ spin_unlock_irqrestore (&rtc_irq_timer_lock, flags);
 
         if (file->f_flags & FASYNC) {
                 rtc_fasync (-1, file, 0);
@@ -542,10 +589,11 @@
 #endif
         MOD_DEC_USE_COUNT;
 
- spin_lock_irqsave (&rtc_lock, flags);
+ /* FIXME: why is this outside the #ifndef __alpha__? */
+ spin_lock_irqsave (&rtc_irq_data_lock, flags);
         rtc_irq_data = 0;
- spin_unlock_irqrestore (&rtc_lock, flags);
- atomic_set(&rtc_status, atomic_read(&rtc_status) & ~RTC_IS_OPEN);
+ spin_unlock_irqrestore (&rtc_irq_data_lock, flags);
+ rtc_status &= ~RTC_IS_OPEN;
         return 0;
 }
 
@@ -556,9 +604,9 @@
 
         poll_wait(file, &rtc_wait, wait);
 
- spin_lock_irqsave (&rtc_lock, flags);
+ spin_lock_irqsave (&rtc_irq_data_lock, flags);
         l = rtc_irq_data;
- spin_unlock_irqrestore (&rtc_lock, flags);
+ spin_unlock_irqrestore (&rtc_irq_data_lock, flags);
 
         if (l != 0)
                 return POLLIN | POLLRDNORM;
@@ -698,9 +746,14 @@
 static void __exit rtc_exit (void)
 {
         /* interrupts and maybe timer disabled at this point by rtc_release */
+ /* FIXME: Maybe??? */
 
- if (atomic_read(&rtc_status) & RTC_TIMER_ON)
+#if 0
+ spin_lock_irqsave (&rtc_irq_timer_lock, flags);
+ if (rtc_status & RTC_TIMER_ON)
                 del_timer(&rtc_irq_timer);
+ spin_unlock_irqrestore (&rtc_irq_timer_lock, flags);
+#endif
 
         remove_proc_entry ("driver/rtc", NULL);
         misc_deregister(&rtc_dev);
@@ -734,16 +787,38 @@
 
 static void rtc_dropped_irq(unsigned long data)
 {
+ unsigned long rtc_data;
         unsigned long flags;
 
         printk(KERN_INFO "rtc: lost some interrupts at %ldHz.\n", rtc_freq);
- mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
 
- spin_lock_irqsave(&rtc_lock, flags);
+ /* First, ack the (missed) interrupt */
+ spin_lock_irqsave (&rtc_lock, flags);
+ rtc_data = CMOS_READ(RTC_INTR_FLAGS);
+ spin_unlock_irqrestore (&rtc_lock, flags);
+
+ /* Second, avoid a bogus rtc_dropped_irq */
+ spin_lock_irqsave (&rtc_irq_timer_lock, flags);
+ /* Just in case someone disabled the timer from behind our back... */
+ if (rtc_status & RTC_TIMER_ON)
+ mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
+ spin_unlock_irqrestore (&rtc_irq_timer_lock, flags);
+
+ /* Third, update rtc_irq_data */
+ /* FIXME: what happens if the real interrupt comes before we get here?
+ * The order of the status would get a bit missed up... Maybe I should
+ * ack within this block? */
+ spin_lock_irqsave(&rtc_irq_data_lock, flags);
         rtc_irq_data += ((rtc_freq/HZ)<<8);
         rtc_irq_data &= ~0xff;
- rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0); /* restart */
- spin_unlock_irqrestore(&rtc_lock, flags);
+ rtc_irq_data |= rtc_data & 0xF0;
+ spin_unlock_irqrestore(&rtc_irq_data_lock, flags);
+
+ /* Now we have new data */
+ wake_up_interruptible(&rtc_wait);
+
+ if (rtc_async_queue)
+ kill_fasync (rtc_async_queue, SIGIO, POLL_IN);
 }
 #endif
 
@@ -955,8 +1030,12 @@
         val &= ~bit;
         CMOS_WRITE(val, RTC_CONTROL);
         CMOS_READ(RTC_INTR_FLAGS);
- rtc_irq_data = 0;
         spin_unlock_irqrestore(&rtc_lock, flags);
+
+ /* FIXME is doing this outside the rtc_lock ok? */
+ spin_lock_irqsave (&rtc_irq_data_lock, flags);
+ rtc_irq_data = 0;
+ spin_unlock_irqrestore (&rtc_irq_data_lock, flags);
 }
 
 static void set_rtc_irq_bit(unsigned char bit)
@@ -969,7 +1048,11 @@
         val |= bit;
         CMOS_WRITE(val, RTC_CONTROL);
         CMOS_READ(RTC_INTR_FLAGS);
- rtc_irq_data = 0;
         spin_unlock_irqrestore(&rtc_lock, flags);
+
+ /* FIXME is doing this outside the rtc_lock ok? */
+ spin_lock_irqsave (&rtc_irq_data_lock, flags);
+ rtc_irq_data = 0;
+ spin_unlock_irqrestore (&rtc_irq_data_lock, flags);
 }
 #endif

-- 
Cesar Eduardo Barros
cesarb@web4u.com.br
cesarb@dcc.ufrj.br

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Apr 23 2000 - 21:00:21 EST