Re: [BUG] Transmit-timed-outs with 2.4.0test 3c59x drivers

From: Andrew Morton (andrewm@uow.edu.au)
Date: Sat Aug 26 2000 - 00:57:35 EST


Aaron Lehmann wrote:
>
> I've been really enthusiastic about playing with 2.4.0-test*, but one
> problem that I've been experiencing has been making it unusable. After
> a few hours of uptime with normal workstation network activity (NFS,
> FTP, web), my 3c905 network interface goes down completely. When I
> switch to a virtual console, I get messages saying "NETDEV WATCHDOG:
> Transmit timed out".
> ...

Are you running on a busy, 10 Mbps hubbed LAN?

The driver has a problem recovering from 16 successive
collisions. Fixed in 2.2 but still in private testing for
2.4 - David Hinds found some peculiarities in the Cardbus
NIC's handling of this situation which complicated things.

It was quite an elusive problem until David showed that
running `ping -f -s 50000 victim' from a high-performance
NIC on another machine produces a shower of errors. This
test produces so many collisions that the driver actually
gets _legitimate_ tx timeouts: it can't send any packets
at all in a 400 millisec time period.

Please try this patch against 2.4.0-test7 and let me know
how it goes.

--- linux-2.4.0-test7/drivers/net/3c59x.c Thu Aug 24 21:07:19 2000
+++ linux-akpm/drivers/net/3c59x.c Fri Aug 25 21:14:12 2000
@@ -98,6 +98,14 @@
     - Added INVERT_LED_PWR, used it.
     - Backed out the extra_reset stuff
 
+ LK1.1.9 18 Aug 2000 andrewm
+ - Backed out the tx_reset_resume flags. It was a no-op.
+ - In vortex_error, don't reset the Tx on txReclaim errors
+ - In vortex_error, don't reset the Tx on maxCollisions errors.
+ Hence backed out all the DownListPtr logic here.
+ - In vortex_error, give cyclone cards a partial TxReset on
+ maxCollisions (David Hinds)
+
     - See http://www.uow.edu.au/~andrewm/linux/#3c59x-2.3 for more details.
     - Also see Documentation/networking/vortex.txt
 */
@@ -183,7 +191,7 @@
 #include <linux/delay.h>
 
 static char version[] __devinitdata =
-"3c59x.c:LK1.1.8 13 Aug 2000 Donald Becker and others. http://www.scyld.com/network/vortex.html " "$Revision: 1.102.2.25 $\n";
+"3c59x.c:LK1.1.9 18 Aug 2000 Donald Becker and others. http://www.scyld.com/network/vortex.html " "$Revision: 1.102.2.31 $\n";
 
 MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
 MODULE_DESCRIPTION("3Com 3c59x/3c90x/3c575 series Vortex/Boomerang/Cyclone driver");
@@ -303,7 +311,7 @@
 enum { IS_VORTEX=1, IS_BOOMERANG=2, IS_CYCLONE=4, IS_TORNADO=8,
         EEPROM_8BIT=0x10, /* AKPM: Uses 0x230 as the base bitmaps for EEPROM reads */
         HAS_PWR_CTRL=0x20, HAS_MII=0x40, HAS_NWAY=0x80, HAS_CB_FNS=0x100,
- INVERT_MII_PWR=0x200, INVERT_LED_PWR=0x400 };
+ INVERT_MII_PWR=0x200, INVERT_LED_PWR=0x400, MAX_COLLISION_BUG=0x800 };
 
 
 enum vortex_chips {
@@ -409,20 +417,20 @@
         {"3c556 10/100 Mini PCI Adapter",
          PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|EEPROM_8BIT|HAS_CB_FNS|INVERT_MII_PWR, 128, },
         {"3c575 [Megahertz] 10/100 LAN CardBus",
- PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
+ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT|MAX_COLLISION_BUG, 128, },
         {"3c575 Boomerang CardBus",
- PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
+ PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT|MAX_COLLISION_BUG, 128, },
 
         {"3CCFE575BT Cyclone CardBus",
- PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_LED_PWR, 128, },
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_LED_PWR|MAX_COLLISION_BUG, 128, },
         {"3CCFE575CT Cyclone CardBus",
- PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR, 128, },
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|MAX_COLLISION_BUG, 128, },
         {"3CCFE656 Cyclone CardBus",
- PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|INVERT_LED_PWR, 128, },
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|INVERT_LED_PWR|MAX_COLLISION_BUG, 128, },
         {"3CCFEM656B Cyclone+Winmodem CardBus",
- PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|INVERT_LED_PWR, 128, },
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|INVERT_LED_PWR|MAX_COLLISION_BUG, 128, },
         {"3CCFE656C Tornado+Winmodem CardBus", /* From pcmcia-cs-3.1.5 */
- PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR, 128, },
+ PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|MAX_COLLISION_BUG, 128, },
 
         {"3c450 HomePNA Tornado", /* AKPM: from Don's 0.99Q */
          PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY, 128, },
@@ -652,7 +660,6 @@
                 open:1,
                 must_free_region:1; /* Flag: if zero, Cardbus owns the I/O region */
         int drv_flags;
- int tx_reset_resume; /* Flag to retart timer after vortex_error handling */
         u16 status_enable;
         u16 intr_enable;
         u16 available_media; /* From Wn3_Options. */
@@ -1369,6 +1376,7 @@
 
         vortex_up(dev);
         vp->open = 1;
+ vp->tx_full = 0;
         return 0;
 
 out_free_irq:
@@ -1499,9 +1507,10 @@
         struct vortex_private *vp = (struct vortex_private *)dev->priv;
         long ioaddr = dev->base_addr;
 
- printk(KERN_ERR "%s: transmit timed out, tx_status %2.2x status %4.4x.\n",
+ printk(KERN_ERR "%s: transmit timed out, tx_status %2.2x status %4.4x. rc %d\n",
                    dev->name, inb(ioaddr + TxStatus),
- inw(ioaddr + EL3_STATUS));
+ inw(ioaddr + EL3_STATUS),
+ atomic_read(&dev->refcnt));
 
         /* Slight code bloat to be user friendly. */
         if ((inb(ioaddr + TxStatus) & 0x88) == 0x88)
@@ -1570,7 +1579,7 @@
 {
         struct vortex_private *vp = (struct vortex_private *)dev->priv;
         long ioaddr = dev->base_addr;
- int do_tx_reset = 0;
+ int do_tx_reset = 0, reset_mask = 0;
         unsigned char tx_status = 0;
 
         if (vortex_debug > 2) {
@@ -1589,10 +1598,14 @@
                 if (tx_status & 0x14) vp->stats.tx_fifo_errors++;
                 if (tx_status & 0x38) vp->stats.tx_aborted_errors++;
                 outb(0, ioaddr + TxStatus);
- if (tx_status & 0x3a) /* TxReset after 16 collisions, despite what the manual says */
- do_tx_reset = 1; /* Also reset on reclaim errors */
- else /* Merely re-enable the transmitter. */
+ if (tx_status & 0x30) { /* txJabber or txUnderrun */
+ do_tx_reset = 1;
+ } else if ((tx_status & 0x08) && (vp->drv_flags & IS_CYCLONE)) { /* maxCollisions */
+ do_tx_reset = 1;
+ reset_mask = 0x0108; /* Reset interface logic, but not download logic */
+ } else { /* Merely re-enable the transmitter. */
                         outw(TxEnable, ioaddr + EL3_CMD);
+ }
         }
 
         if (status & RxEarly) { /* Rx early is unused. */
@@ -1644,40 +1657,11 @@
                 }
         }
 
- /*
- * Black magic. If we're resetting the transmitter, remember the current downlist
- * pointer and restore it afterwards. We can't usr cur_tx because that could
- * lag the actual hardware index.
- */
         if (do_tx_reset) {
- if (vp->full_bus_master_tx) {
- unsigned long old_down_list_ptr;
-
- wait_for_completion(dev, DownStall);
- old_down_list_ptr = inl(ioaddr + DownListPtr);
- wait_for_completion(dev, TxReset);
- outw(TxEnable, ioaddr + EL3_CMD);
-
- /* Restart DMA if necessary */
- outl(old_down_list_ptr, ioaddr + DownListPtr);
- if (vortex_debug > 2)
- printk(KERN_DEBUG "reset DMA to 0x%08x\n", inl(ioaddr + DownListPtr));
- outw(DownUnstall, ioaddr + EL3_CMD);
-
- /*
- * Here we make a single attempt to prevent a timeout by
- * restarting the timer if we think that the ISR has a good
- * chance of unjamming things.
- */
- if (vp->tx_reset_resume == 0 && vp->tx_full) {
- vp->tx_reset_resume = 1;
- dev->trans_start = jiffies;
- }
- } else {
- wait_for_completion(dev, TxReset);
- outw(TxEnable, ioaddr + EL3_CMD);
+ wait_for_completion(dev, TxReset|reset_mask);
+ outw(TxEnable, ioaddr + EL3_CMD);
+ if (!vp->full_bus_master_tx)
                         netif_wake_queue(dev);
- }
         }
 }
 
@@ -1785,7 +1769,6 @@
                 /* netif_start_queue (dev); */ /* AKPM: redundant? */
         }
         outw(DownUnstall, ioaddr + EL3_CMD);
- vp->tx_reset_resume = 0;
         spin_unlock_irqrestore(&vp->lock, flags);
         dev->trans_start = jiffies;
         return 0;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Thu Aug 31 2000 - 21:00:17 EST