[patch] (2/8) Add 802.3ad support to bonding

From: Shmulik Hen (hshmulik@intel.com)
Date: Thu Mar 20 2003 - 10:15:36 EST


This patch complements the latest drop of bonding from source-forge
(2.4.20-20030317) by incorporating the changes to bond_release_all() too.
It also fixes a hang when releasing a slave while outgoing traffic is
running, that looks like a deadlock between the BR_NETPROTO_LOCK,
dev->xmit_lock and the bond lock (happens on quad processor machines,
but KDB back trace wasn't clear enough).

This patch is against bonding 2.4.20-20030317.

diff -Nuarp linux-2.4.20-bonding-20030317/drivers/net/bonding.c linux-2.4.20-bonding-20030317-devel/drivers/net/bonding.c
--- linux-2.4.20-bonding-20030317/drivers/net/bonding.c 2003-03-18 17:03:24.000000000 +0200
+++ linux-2.4.20-bonding-20030317-devel/drivers/net/bonding.c 2003-03-18 17:03:24.000000000 +0200
@@ -286,6 +286,15 @@
  * checking slave and slave->dev (which only worked by accident).
  * - Misc code cleanup: get arp_send() prototype from header file,
  * add max_bonds to bonding.txt.
+ *
+ * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
+ * Shmulik Hen <shmulik.hen at intel dot com>
+ * - Make sure only bond_attach_slave() and bond_detach_slave() can
+ * manipulate the slave list, including slave_cnt, even when in
+ * bond_release_all().
+ * - Fixed hang in bond_release() while traffic is running.
+ * netdev_set_master() must not be called from within the bond lock.
+ *
  */
 
 #include <linux/config.h>
@@ -326,8 +335,8 @@
 #include <linux/mii.h>
 #include <linux/ethtool.h>
 
-#define DRV_VERSION "2.4.20-20030207"
-#define DRV_RELDATE "February 7, 2003"
+#define DRV_VERSION "2.4.20-20030317"
+#define DRV_RELDATE "March 17, 2003"
 #define DRV_NAME "bonding"
 #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver"
 
@@ -1469,16 +1478,14 @@ static int bond_release(struct net_devic
 
         bond = (struct bonding *) master->priv;
 
- write_lock_irqsave(&bond->lock, flags);
-
         /* master already enslaved, or slave not enslaved,
            or no slave for this master */
         if ((master->flags & IFF_SLAVE) || !(slave->flags & IFF_SLAVE)) {
                 printk (KERN_DEBUG "%s: cannot release %s.\n", master->name, slave->name);
- write_unlock_irqrestore(&bond->lock, flags);
                 return -EINVAL;
         }
 
+ write_lock_irqsave(&bond->lock, flags);
         bond->current_arp_slave = NULL;
         our_slave = (slave_t *)bond;
         old_current = bond->current_slave;
@@ -1497,38 +1504,7 @@ static int bond_release(struct net_devic
                         } else {
                                 printk(".\n");
                         }
-
- /* release the slave from its bond */
-
- if (multicast_mode == BOND_MULTICAST_ALL) {
- /* flush master's mc_list from slave */
- bond_mc_list_flush (slave, master);
-
- /* unset promiscuity level from slave */
- if (master->flags & IFF_PROMISC)
- dev_set_promiscuity(slave, -1);
-
- /* unset allmulti level from slave */
- if (master->flags & IFF_ALLMULTI)
- dev_set_allmulti(slave, -1);
- }
-
- netdev_set_master(slave, NULL);
-
- /* only restore its RUNNING flag if monitoring set it down */
- if (slave->flags & IFF_UP) {
- slave->flags |= IFF_RUNNING;
- }
-
- if (slave->flags & IFF_NOARP ||
- bond->current_slave != NULL) {
- dev_close(slave);
- our_slave->original_flags &= ~IFF_UP;
- }
-
- bond_restore_slave_flags(our_slave);
- kfree(our_slave);
-
+
                         if (bond->current_slave == NULL) {
                                 printk(KERN_INFO
                                         "%s: now running without any active interface !\n",
@@ -1539,16 +1515,51 @@ static int bond_release(struct net_devic
                                 bond->primary_slave = NULL;
                         }
 
- write_unlock_irqrestore(&bond->lock, flags);
- return 0; /* deletion OK */
+ break;
                 }
- }
 
- /* if we get here, it's because the device was not found */
+ }
         write_unlock_irqrestore(&bond->lock, flags);
+
+ if (our_slave == (slave_t *)bond) {
+ /* if we get here, it's because the device was not found */
+ printk (KERN_INFO "%s: %s not enslaved\n", master->name, slave->name);
+ return -EINVAL;
+ }
+
+ /* undo settings and restore original values */
+
+ if (multicast_mode == BOND_MULTICAST_ALL) {
+ /* flush master's mc_list from slave */
+ bond_mc_list_flush (slave, master);
 
- printk (KERN_INFO "%s: %s not enslaved\n", master->name, slave->name);
- return -EINVAL;
+ /* unset promiscuity level from slave */
+ if (master->flags & IFF_PROMISC)
+ dev_set_promiscuity(slave, -1);
+
+ /* unset allmulti level from slave */
+ if (master->flags & IFF_ALLMULTI)
+ dev_set_allmulti(slave, -1);
+ }
+
+ netdev_set_master(slave, NULL);
+
+ /* only restore its RUNNING flag if monitoring set it down */
+ if (slave->flags & IFF_UP) {
+ slave->flags |= IFF_RUNNING;
+ }
+
+ if (slave->flags & IFF_NOARP ||
+ bond->current_slave != NULL) {
+ dev_close(slave);
+ our_slave->original_flags &= ~IFF_UP;
+ }
+
+ bond_restore_slave_flags(our_slave);
+
+ kfree(our_slave);
+
+ return 0; /* deletion OK */
 }
 
 /*
@@ -1571,10 +1582,12 @@ static int bond_release_all(struct net_d
 
         bond = (struct bonding *) master->priv;
         bond->current_arp_slave = NULL;
+ bond->current_slave = NULL;
+ bond->primary_slave = NULL;
 
         while ((our_slave = bond->prev) != (slave_t *)bond) {
                 slave_dev = our_slave->dev;
- bond->prev = our_slave->prev;
+ bond_detach_slave(bond, our_slave);
 
                 if (multicast_mode == BOND_MULTICAST_ALL
                     || (multicast_mode == BOND_MULTICAST_ACTIVE
@@ -1604,10 +1617,6 @@ static int bond_release_all(struct net_d
                         dev_close(slave_dev);
         }
 
- bond->current_slave = NULL;
- bond->next = (slave_t *)bond;
- bond->slave_cnt = 0;
- bond->primary_slave = NULL;
         printk (KERN_INFO "%s: released all slaves\n", master->name);
 
         return 0;

-- 
| Shmulik Hen                                    |
| Israel Design Center (Jerusalem)               |
| LAN Access Division                            |
| Intel Communications Group, Intel corp.        |
|                                                |
| Anti-Spam: shmulik dot hen at intel dot com    |

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Mar 23 2003 - 22:00:30 EST