diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-20 17:43:29 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-20 17:43:29 -0700 |
commit | db6d8c7a4027b48d797b369a53f8470aaeed7063 (patch) | |
tree | e140c104a89abc2154e1f41a7db8ebecbb6fa0b4 /drivers/net/bonding/bond_main.c | |
parent | 3a533374283aea50eab3976d8a6d30532175f009 (diff) | |
parent | fb65a7c091529bfffb1262515252c0d0f6241c5c (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (1232 commits)
iucv: Fix bad merging.
net_sched: Add size table for qdiscs
net_sched: Add accessor function for packet length for qdiscs
net_sched: Add qdisc_enqueue wrapper
highmem: Export totalhigh_pages.
ipv6 mcast: Omit redundant address family checks in ip6_mc_source().
net: Use standard structures for generic socket address structures.
ipv6 netns: Make several "global" sysctl variables namespace aware.
netns: Use net_eq() to compare net-namespaces for optimization.
ipv6: remove unused macros from net/ipv6.h
ipv6: remove unused parameter from ip6_ra_control
tcp: fix kernel panic with listening_get_next
tcp: Remove redundant checks when setting eff_sacks
tcp: options clean up
tcp: Fix MD5 signatures for non-linear skbs
sctp: Update sctp global memory limit allocations.
sctp: remove unnecessary byteshifting, calculate directly in big-endian
sctp: Allow only 1 listening socket with SO_REUSEADDR
sctp: Do not leak memory on multiple listen() calls
sctp: Support ipv6only AF_INET6 sockets.
...
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
-rw-r--r-- | drivers/net/bonding/bond_main.c | 824 |
1 files changed, 505 insertions, 319 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 50a40e43315..9737c06045d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -88,6 +88,7 @@ #define BOND_LINK_ARP_INTERV 0 static int max_bonds = BOND_DEFAULT_MAX_BONDS; +static int num_grat_arp = 1; static int miimon = BOND_LINK_MON_INTERV; static int updelay = 0; static int downdelay = 0; @@ -99,11 +100,13 @@ static char *xmit_hash_policy = NULL; static int arp_interval = BOND_LINK_ARP_INTERV; static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; static char *arp_validate = NULL; -static int fail_over_mac = 0; +static char *fail_over_mac = NULL; struct bond_params bonding_defaults; module_param(max_bonds, int, 0); MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); +module_param(num_grat_arp, int, 0644); +MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event"); module_param(miimon, int, 0); MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); module_param(updelay, int, 0); @@ -133,8 +136,8 @@ module_param_array(arp_ip_target, charp, NULL, 0); MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); module_param(arp_validate, charp, 0); MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); -module_param(fail_over_mac, int, 0); -MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. 0 of off (default), 1 for on."); +module_param(fail_over_mac, charp, 0); +MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. none (default), active or follow"); /*----------------------------- Global variables ----------------------------*/ @@ -187,6 +190,13 @@ struct bond_parm_tbl arp_validate_tbl[] = { { NULL, -1}, }; +struct bond_parm_tbl fail_over_mac_tbl[] = { +{ "none", BOND_FOM_NONE}, +{ "active", BOND_FOM_ACTIVE}, +{ "follow", BOND_FOM_FOLLOW}, +{ NULL, -1}, +}; + /*-------------------------- Forward declarations ---------------------------*/ static void bond_send_gratuitous_arp(struct bonding *bond); @@ -261,14 +271,14 @@ static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) */ static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) { - struct vlan_entry *vlan, *next; + struct vlan_entry *vlan; int res = -ENODEV; dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); write_lock_bh(&bond->lock); - list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) { + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { if (vlan->vlan_id == vlan_id) { list_del(&vlan->vlan_list); @@ -762,39 +772,49 @@ static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct /* * Push the promiscuity flag down to appropriate slaves */ -static void bond_set_promiscuity(struct bonding *bond, int inc) +static int bond_set_promiscuity(struct bonding *bond, int inc) { + int err = 0; if (USES_PRIMARY(bond->params.mode)) { /* write lock already acquired */ if (bond->curr_active_slave) { - dev_set_promiscuity(bond->curr_active_slave->dev, inc); + err = dev_set_promiscuity(bond->curr_active_slave->dev, + inc); } } else { struct slave *slave; int i; bond_for_each_slave(bond, slave, i) { - dev_set_promiscuity(slave->dev, inc); + err = dev_set_promiscuity(slave->dev, inc); + if (err) + return err; } } + return err; } /* * Push the allmulti flag down to all slaves */ -static void bond_set_allmulti(struct bonding *bond, int inc) +static int bond_set_allmulti(struct bonding *bond, int inc) { + int err = 0; if (USES_PRIMARY(bond->params.mode)) { /* write lock already acquired */ if (bond->curr_active_slave) { - dev_set_allmulti(bond->curr_active_slave->dev, inc); + err = dev_set_allmulti(bond->curr_active_slave->dev, + inc); } } else { struct slave *slave; int i; bond_for_each_slave(bond, slave, i) { - dev_set_allmulti(slave->dev, inc); + err = dev_set_allmulti(slave->dev, inc); + if (err) + return err; } } + return err; } /* @@ -955,6 +975,7 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct } if (new_active) { + /* FIXME: Signal errors upstream. */ if (bond->dev->flags & IFF_PROMISC) { dev_set_promiscuity(new_active->dev, 1); } @@ -970,6 +991,82 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct } } +/* + * bond_do_fail_over_mac + * + * Perform special MAC address swapping for fail_over_mac settings + * + * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh. + */ +static void bond_do_fail_over_mac(struct bonding *bond, + struct slave *new_active, + struct slave *old_active) +{ + u8 tmp_mac[ETH_ALEN]; + struct sockaddr saddr; + int rv; + + switch (bond->params.fail_over_mac) { + case BOND_FOM_ACTIVE: + if (new_active) + memcpy(bond->dev->dev_addr, new_active->dev->dev_addr, + new_active->dev->addr_len); + break; + case BOND_FOM_FOLLOW: + /* + * if new_active && old_active, swap them + * if just old_active, do nothing (going to no active slave) + * if just new_active, set new_active to bond's MAC + */ + if (!new_active) + return; + + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); + + if (old_active) { + memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); + memcpy(saddr.sa_data, old_active->dev->dev_addr, + ETH_ALEN); + saddr.sa_family = new_active->dev->type; + } else { + memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN); + saddr.sa_family = bond->dev->type; + } + + rv = dev_set_mac_address(new_active->dev, &saddr); + if (rv) { + printk(KERN_ERR DRV_NAME + ": %s: Error %d setting MAC of slave %s\n", + bond->dev->name, -rv, new_active->dev->name); + goto out; + } + + if (!old_active) + goto out; + + memcpy(saddr.sa_data, tmp_mac, ETH_ALEN); + saddr.sa_family = old_active->dev->type; + + rv = dev_set_mac_address(old_active->dev, &saddr); + if (rv) + printk(KERN_ERR DRV_NAME + ": %s: Error %d setting MAC of slave %s\n", + bond->dev->name, -rv, new_active->dev->name); +out: + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + break; + default: + printk(KERN_ERR DRV_NAME + ": %s: bond_do_fail_over_mac impossible: bad policy %d\n", + bond->dev->name, bond->params.fail_over_mac); + break; + } + +} + + /** * find_best_interface - select the best available slave to be the active one * @bond: our bonding struct @@ -1037,7 +1134,8 @@ static struct slave *bond_find_best_slave(struct bonding *bond) * because it is apparently the best available slave we have, even though its * updelay hasn't timed out yet. * - * Warning: Caller must hold curr_slave_lock for writing. + * If new_active is not NULL, caller must hold bond->lock for read and + * curr_slave_lock for write_bh. */ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) { @@ -1048,6 +1146,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } if (new_active) { + new_active->jiffies = jiffies; + if (new_active->link == BOND_LINK_BACK) { if (USES_PRIMARY(bond->params.mode)) { printk(KERN_INFO DRV_NAME @@ -1059,7 +1159,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) new_active->delay = 0; new_active->link = BOND_LINK_UP; - new_active->jiffies = jiffies; if (bond->params.mode == BOND_MODE_8023AD) { bond_3ad_handle_link_change(new_active, BOND_LINK_UP); @@ -1101,22 +1200,22 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) if (new_active) { bond_set_slave_active_flags(new_active); - } - /* when bonding does not set the slave MAC address, the bond MAC - * address is the one of the active slave. - */ - if (new_active && bond->params.fail_over_mac) - memcpy(bond->dev->dev_addr, new_active->dev->dev_addr, - new_active->dev->addr_len); - if (bond->curr_active_slave && - test_bit(__LINK_STATE_LINKWATCH_PENDING, - &bond->curr_active_slave->dev->state)) { - dprintk("delaying gratuitous arp on %s\n", - bond->curr_active_slave->dev->name); - bond->send_grat_arp = 1; - } else + if (bond->params.fail_over_mac) + bond_do_fail_over_mac(bond, new_active, + old_active); + + bond->send_grat_arp = bond->params.num_grat_arp; bond_send_gratuitous_arp(bond); + + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); + + netdev_bonding_change(bond->dev); + + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + } } } @@ -1129,7 +1228,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) * - The primary_slave has got its link back. * - A slave has got its link back and there's no old curr_active_slave. * - * Warning: Caller must hold curr_slave_lock for writing. + * Caller must hold bond->lock for read and curr_slave_lock for write_bh. */ void bond_select_active_slave(struct bonding *bond) { @@ -1376,14 +1475,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) printk(KERN_WARNING DRV_NAME ": %s: Warning: The first slave device " "specified does not support setting the MAC " - "address. Enabling the fail_over_mac option.", + "address. Setting fail_over_mac to active.", bond_dev->name); - bond->params.fail_over_mac = 1; - } else if (!bond->params.fail_over_mac) { + bond->params.fail_over_mac = BOND_FOM_ACTIVE; + } else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { printk(KERN_ERR DRV_NAME ": %s: Error: The slave device specified " "does not support setting the MAC address, " - "but fail_over_mac is not enabled.\n" + "but fail_over_mac is not set to active.\n" , bond_dev->name); res = -EOPNOTSUPP; goto err_undo_flags; @@ -1456,20 +1555,24 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (!USES_PRIMARY(bond->params.mode)) { /* set promiscuity level to new slave */ if (bond_dev->flags & IFF_PROMISC) { - dev_set_promiscuity(slave_dev, 1); + res = dev_set_promiscuity(slave_dev, 1); + if (res) + goto err_close; } /* set allmulti level to new slave */ if (bond_dev->flags & IFF_ALLMULTI) { - dev_set_allmulti(slave_dev, 1); + res = dev_set_allmulti(slave_dev, 1); + if (res) + goto err_close; } - netif_tx_lock_bh(bond_dev); + netif_addr_lock_bh(bond_dev); /* upload master's mc_list to new slave */ for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); } - netif_tx_unlock_bh(bond_dev); + netif_addr_unlock_bh(bond_dev); } if (bond->params.mode == BOND_MODE_8023AD) { @@ -1490,6 +1593,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) bond_compute_features(bond); + write_unlock_bh(&bond->lock); + + read_lock(&bond->lock); + new_slave->last_arp_rx = jiffies; if (bond->params.miimon && !bond->params.use_carrier) { @@ -1566,6 +1673,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } } + write_lock_bh(&bond->curr_slave_lock); + switch (bond->params.mode) { case BOND_MODE_ACTIVEBACKUP: bond_set_slave_inactive_flags(new_slave); @@ -1613,9 +1722,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) break; } /* switch(bond_mode) */ + write_unlock_bh(&bond->curr_slave_lock); + bond_set_carrier(bond); - write_unlock_bh(&bond->lock); + read_unlock(&bond->lock); res = bond_create_slave_symlinks(bond_dev, slave_dev); if (res) @@ -1639,6 +1750,10 @@ err_unset_master: err_restore_mac: if (!bond->params.fail_over_mac) { + /* XXX TODO - fom follow mode needs to change master's + * MAC if this slave's MAC is in use by the bond, or at + * least print a warning. + */ memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); addr.sa_family = slave_dev->type; dev_set_mac_address(slave_dev, &addr); @@ -1693,20 +1808,18 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) return -EINVAL; } - mac_addr_differ = memcmp(bond_dev->dev_addr, - slave->perm_hwaddr, - ETH_ALEN); - if (!mac_addr_differ && (bond->slave_cnt > 1)) { - printk(KERN_WARNING DRV_NAME - ": %s: Warning: the permanent HWaddr of %s - " - "%s - is still in use by %s. " - "Set the HWaddr of %s to a different address " - "to avoid conflicts.\n", - bond_dev->name, - slave_dev->name, - print_mac(mac, slave->perm_hwaddr), - bond_dev->name, - slave_dev->name); + if (!bond->params.fail_over_mac) { + mac_addr_differ = memcmp(bond_dev->dev_addr, slave->perm_hwaddr, + ETH_ALEN); + if (!mac_addr_differ && (bond->slave_cnt > 1)) + printk(KERN_WARNING DRV_NAME + ": %s: Warning: the permanent HWaddr of %s - " + "%s - is still in use by %s. " + "Set the HWaddr of %s to a different address " + "to avoid conflicts.\n", + bond_dev->name, slave_dev->name, + print_mac(mac, slave->perm_hwaddr), + bond_dev->name, slave_dev->name); } /* Inform AD package of unbinding of slave. */ @@ -1823,9 +1936,9 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) } /* flush master's mc_list from slave */ - netif_tx_lock_bh(bond_dev); + netif_addr_lock_bh(bond_dev); bond_mc_list_flush(bond_dev, slave_dev); - netif_tx_unlock_bh(bond_dev); + netif_addr_unlock_bh(bond_dev); } netdev_set_master(slave_dev, NULL); @@ -1833,7 +1946,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) /* close slave before restoring its mac address */ dev_close(slave_dev); - if (!bond->params.fail_over_mac) { + if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { /* restore original ("permanent") mac address */ memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); addr.sa_family = slave_dev->type; @@ -1946,9 +2059,9 @@ static int bond_release_all(struct net_device *bond_dev) } /* flush master's mc_list from slave */ - netif_tx_lock_bh(bond_dev); + netif_addr_lock_bh(bond_dev); bond_mc_list_flush(bond_dev, slave_dev); - netif_tx_unlock_bh(bond_dev); + netif_addr_unlock_bh(bond_dev); } netdev_set_master(slave_dev, NULL); @@ -2136,17 +2249,6 @@ static int __bond_mii_monitor(struct bonding *bond, int have_locks) * program could monitor the link itself if needed. */ - if (bond->send_grat_arp) { - if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING, - &bond->curr_active_slave->dev->state)) - dprintk("Needs to send gratuitous arp but not yet\n"); - else { - dprintk("sending delayed gratuitous arp on on %s\n", - bond->curr_active_slave->dev->name); - bond_send_gratuitous_arp(bond); - bond->send_grat_arp = 0; - } - } read_lock(&bond->curr_slave_lock); oldcurrent = bond->curr_active_slave; read_unlock(&bond->curr_slave_lock); @@ -2387,6 +2489,13 @@ void bond_mii_monitor(struct work_struct *work) read_unlock(&bond->lock); return; } + + if (bond->send_grat_arp) { + read_lock(&bond->curr_slave_lock); + bond_send_gratuitous_arp(bond); + read_unlock(&bond->curr_slave_lock); + } + if (__bond_mii_monitor(bond, 0)) { read_unlock(&bond->lock); rtnl_lock(); @@ -2397,7 +2506,7 @@ void bond_mii_monitor(struct work_struct *work) read_lock(&bond->lock); } - delay = ((bond->params.miimon * HZ) / 1000) ? : 1; + delay = msecs_to_jiffies(bond->params.miimon); read_unlock(&bond->lock); queue_delayed_work(bond->wq, &bond->mii_work, delay); } @@ -2426,37 +2535,14 @@ out: return addr; } -static int bond_has_ip(struct bonding *bond) -{ - struct vlan_entry *vlan, *vlan_next; - - if (bond->master_ip) - return 1; - - if (list_empty(&bond->vlan_list)) - return 0; - - list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, - vlan_list) { - if (vlan->vlan_ip) - return 1; - } - - return 0; -} - static int bond_has_this_ip(struct bonding *bond, __be32 ip) { - struct vlan_entry *vlan, *vlan_next; + struct vlan_entry *vlan; if (ip == bond->master_ip) return 1; - if (list_empty(&bond->vlan_list)) - return 0; - - list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, - vlan_list) { + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { if (ip == vlan->vlan_ip) return 1; } @@ -2498,7 +2584,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) { int i, vlan_id, rv; __be32 *targets = bond->params.arp_targets; - struct vlan_entry *vlan, *vlan_next; + struct vlan_entry *vlan; struct net_device *vlan_dev; struct flowi fl; struct rtable *rt; @@ -2545,8 +2631,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) } vlan_id = 0; - list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, - vlan_list) { + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); if (vlan_dev == rt->u.dst.dev) { vlan_id = vlan->vlan_id; @@ -2576,6 +2661,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) /* * Kick out a gratuitous ARP for an IP on the bonding master plus one * for each VLAN above us. + * + * Caller must hold curr_slave_lock for read or better */ static void bond_send_gratuitous_arp(struct bonding *bond) { @@ -2585,9 +2672,13 @@ static void bond_send_gratuitous_arp(struct bonding *bond) dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name, slave ? slave->dev->name : "NULL"); - if (!slave) + + if (!slave || !bond->send_grat_arp || + test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) return; + bond->send_grat_arp--; + if (bond->master_ip) { bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, bond->master_ip, 0); @@ -2707,7 +2798,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) read_lock(&bond->lock); - delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; + delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); if (bond->kill_timers) { goto out; @@ -2764,8 +2855,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) * if we don't know our ip yet */ if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || - (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks) && - bond_has_ip(bond))) { + (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) { slave->link = BOND_LINK_DOWN; slave->state = BOND_STATE_BACKUP; @@ -2813,246 +2903,305 @@ out: } /* - * When using arp monitoring in active-backup mode, this function is - * called to determine if any backup slaves have went down or a new - * current slave needs to be found. - * The backup slaves never generate traffic, they are considered up by merely - * receiving traffic. If the current slave goes down, each backup slave will - * be given the opportunity to tx/rx an arp before being taken down - this - * prevents all slaves from being taken down due to the current slave not - * sending any traffic for the backups to receive. The arps are not necessarily - * necessary, any tx and rx traffic will keep the current slave up. While any - * rx traffic will keep the backup slaves up, the current slave is responsible - * for generating traffic to keep them up regardless of any other traffic they - * may have received. - * see loadbalance_arp_monitor for arp monitoring in load balancing mode + * Called to inspect slaves for active-backup mode ARP monitor link state + * changes. Sets new_link in slaves to specify what action should take + * place for the slave. Returns 0 if no changes are found, >0 if changes + * to link states must be committed. + * + * Called with bond->lock held for read. */ -void bond_activebackup_arp_mon(struct work_struct *work) +static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) { - struct bonding *bond = container_of(work, struct bonding, - arp_work.work); struct slave *slave; - int delta_in_ticks; - int i; + int i, commit = 0; - read_lock(&bond->lock); + bond_for_each_slave(bond, slave, i) { + slave->new_link = BOND_LINK_NOCHANGE; - delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; + if (slave->link != BOND_LINK_UP) { + if (time_before_eq(jiffies, slave_last_rx(bond, slave) + + delta_in_ticks)) { + slave->new_link = BOND_LINK_UP; + commit++; + } - if (bond->kill_timers) { - goto out; - } + continue; + } - if (bond->slave_cnt == 0) { - goto re_arm; + /* + * Give slaves 2*delta after being enslaved or made + * active. This avoids bouncing, as the last receive + * times need a full ARP monitor cycle to be updated. + */ + if (!time_after_eq(jiffies, slave->jiffies + + 2 * delta_in_ticks)) + continue; + + /* + * Backup slave is down if: + * - No current_arp_slave AND + * - more than 3*delta since last receive AND + * - the bond has an IP address + * + * Note: a non-null current_arp_slave indicates + * the curr_active_slave went down and we are + * searching for a new one; under this condition + * we only take the curr_active_slave down - this + * gives each slave a chance to tx/rx traffic + * before being taken out + */ + if (slave->state == BOND_STATE_BACKUP && + !bond->current_arp_slave && + time_after(jiffies, slave_last_rx(bond, slave) + + 3 * delta_in_ticks)) { + slave->new_link = BOND_LINK_DOWN; + commit++; + } + + /* + * Active slave is down if: + * - more than 2*delta since transmitting OR + * - (more than 2*delta since receive AND + * the bond has an IP address) + */ + if ((slave->state == BOND_STATE_ACTIVE) && + (time_after_eq(jiffies, slave->dev->trans_start + + 2 * delta_in_ticks) || + (time_after_eq(jiffies, slave_last_rx(bond, slave) + + 2 * delta_in_ticks)))) { + slave->new_link = BOND_LINK_DOWN; + commit++; + } } - /* determine if any slave has come up or any backup slave has - * gone down - * TODO: what about up/down delay in arp mode? it wasn't here before - * so it can wait + read_lock(&bond->curr_slave_lock); + + /* + * Trigger a commit if the primary option setting has changed. */ - bond_for_each_slave(bond, slave, i) { - if (slave->link != BOND_LINK_UP) { - if (time_before_eq(jiffies, - slave_last_rx(bond, slave) + delta_in_ticks)) { + if (bond->primary_slave && + (bond->primary_slave != bond->curr_active_slave) && + (bond->primary_slave->link == BOND_LINK_UP)) + commit++; - slave->link = BOND_LINK_UP; + read_unlock(&bond->curr_slave_lock); - write_lock_bh(&bond->curr_slave_lock); + return commit; +} - if ((!bond->curr_active_slave) && - time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks)) { - bond_change_active_slave(bond, slave); - bond->current_arp_slave = NULL; - } else if (bond->curr_active_slave != slave) { - /* this slave has just come up but we - * already have a current slave; this - * can also happen if bond_enslave adds - * a new slave that is up while we are - * searching for a new slave - */ - bond_set_slave_inactive_flags(slave); - bond->current_arp_slave = NULL; - } +/* + * Called to commit link state changes noted by inspection step of + * active-backup mode ARP monitor. + * + * Called with RTNL and bond->lock for read. + */ +static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) +{ + struct slave *slave; + int i; - bond_set_carrier(bond); + bond_for_each_slave(bond, slave, i) { + switch (slave->new_link) { + case BOND_LINK_NOCHANGE: + continue; - if (slave == bond->curr_active_slave) { - printk(KERN_INFO DRV_NAME - ": %s: %s is up and now the " - "active interface\n", - bond->dev->name, - slave->dev->name); - netif_carrier_on(bond->dev); - } else { - printk(KERN_INFO DRV_NAME - ": %s: backup interface %s is " - "now up\n", - bond->dev->name, - slave->dev->name); - } + case BOND_LINK_UP: + write_lock_bh(&bond->curr_slave_lock); - write_unlock_bh(&bond->curr_slave_lock); - } - } else { - read_lock(&bond->curr_slave_lock); + if (!bond->curr_active_slave && + time_before_eq(jiffies, slave->dev->trans_start + + delta_in_ticks)) { + slave->link = BOND_LINK_UP; + bond_change_active_slave(bond, slave); + bond->current_arp_slave = NULL; - if ((slave != bond->curr_active_slave) && - (!bond->current_arp_slave) && - (time_after_eq(jiffies, slave_last_rx(bond, slave) + 3*delta_in_ticks) && - bond_has_ip(bond))) { - /* a backup slave has gone down; three times - * the delta allows the current slave to be - * taken out before the backup slave. - * note: a non-null current_arp_slave indicates - * the curr_active_slave went down and we are - * searching for a new one; under this - * condition we only take the curr_active_slave - * down - this gives each slave a chance to - * tx/rx traffic before being taken out + printk(KERN_INFO DRV_NAME + ": %s: %s is up and now the " + "active interface\n", + bond->dev->name, slave->dev->name); + + } else if (bond->curr_active_slave != slave) { + /* this slave has just come up but we + * already have a current slave; this can + * also happen if bond_enslave adds a new + * slave that is up while we are searching + * for a new slave */ + slave->link = BOND_LINK_UP; + bond_set_slave_inactive_flags(slave); + bond->current_arp_slave = NULL; - read_unlock(&bond->curr_slave_lock); + printk(KERN_INFO DRV_NAME + ": %s: backup interface %s is now up\n", + bond->dev->name, slave->dev->name); + } - slave->link = BOND_LINK_DOWN; + write_unlock_bh(&bond->curr_slave_lock); - if (slave->link_failure_count < UINT_MAX) { - slave->link_failure_count++; - } + break; + + case BOND_LINK_DOWN: + if (slave->link_failure_count < UINT_MAX) + slave->link_failure_count++; + + slave->link = BOND_LINK_DOWN; + + if (slave == bond->curr_active_slave) { + printk(KERN_INFO DRV_NAME + ": %s: link status down for active " + "interface %s, disabling it\n", + bond->dev->name, slave->dev->name); bond_set_slave_inactive_flags(slave); + write_lock_bh(&bond->curr_slave_lock); + + bond_select_active_slave(bond); + if (bond->curr_active_slave) + bond->curr_active_slave->jiffies = + jiffies; + + write_unlock_bh(&bond->curr_slave_lock); + + bond->current_arp_slave = NULL; + + } else if (slave->state == BOND_STATE_BACKUP) { printk(KERN_INFO DRV_NAME ": %s: backup interface %s is now down\n", - bond->dev->name, - slave->dev->name); - } else { - read_unlock(&bond->curr_slave_lock); + bond->dev->name, slave->dev->name); + + bond_set_slave_inactive_flags(slave); } + break; + + default: + printk(KERN_ERR DRV_NAME + ": %s: impossible: new_link %d on slave %s\n", + bond->dev->name, slave->new_link, + slave->dev->name); } } - read_lock(&bond->curr_slave_lock); - slave = bond->curr_active_slave; - read_unlock(&bond->curr_slave_lock); - - if (slave) { - /* if we have sent traffic in the past 2*arp_intervals but - * haven't xmit and rx traffic in that time interval, select - * a different slave. slave->jiffies is only updated when - * a slave first becomes the curr_active_slave - not necessarily - * after every arp; this ensures the slave has a full 2*delta - * before being taken out. if a primary is being used, check - * if it is up and needs to take over as the curr_active_slave - */ - if ((time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || - (time_after_eq(jiffies, slave_last_rx(bond, slave) + 2*delta_in_ticks) && - bond_has_ip(bond))) && - time_after_eq(jiffies, slave->jiffies + 2*delta_in_ticks)) { + /* + * No race with changes to primary via sysfs, as we hold rtnl. + */ + if (bond->primary_slave && + (bond->primary_slave != bond->curr_active_slave) && + (bond->primary_slave->link == BOND_LINK_UP)) { + write_lock_bh(&bond->curr_slave_lock); + bond_change_active_slave(bond, bond->primary_slave); + write_unlock_bh(&bond->curr_slave_lock); + } - slave->link = BOND_LINK_DOWN; + bond_set_carrier(bond); +} - if (slave->link_failure_count < UINT_MAX) { - slave->link_failure_count++; - } +/* + * Send ARP probes for active-backup mode ARP monitor. + * + * Called with bond->lock held for read. + */ +static void bond_ab_arp_probe(struct bonding *bond) +{ + struct slave *slave; + int i; - printk(KERN_INFO DRV_NAME - ": %s: link status down for active interface " - "%s, disabling it\n", - bond->dev->name, - slave->dev->name); + read_lock(&bond->curr_slave_lock); - write_lock_bh(&bond->curr_slave_lock); + if (bond->current_arp_slave && bond->curr_active_slave) + printk("PROBE: c_arp %s && cas %s BAD\n", + bond->current_arp_slave->dev->name, + bond->curr_active_slave->dev->name); - bond_select_active_slave(bond); - slave = bond->curr_active_slave; + if (bond->curr_active_slave) { + bond_arp_send_all(bond, bond->curr_active_slave); + read_unlock(&bond->curr_slave_lock); + return; + } - write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->curr_slave_lock); - bond->current_arp_slave = slave; + /* if we don't have a curr_active_slave, search for the next available + * backup slave from the current_arp_slave and make it the candidate + * for becoming the curr_active_slave + */ - if (slave) { - slave->jiffies = jiffies; - } - } else if ((bond->primary_slave) && - (bond->primary_slave != slave) && - (bond->primary_slave->link == BOND_LINK_UP)) { - /* at this point, slave is the curr_active_slave */ - printk(KERN_INFO DRV_NAME - ": %s: changing from interface %s to primary " - "interface %s\n", - bond->dev->name, - slave->dev->name, - bond->primary_slave->dev->name); + if (!bond->current_arp_slave) { + bond->current_arp_slave = bond->first_slave; + if (!bond->current_arp_slave) + return; + } - /* primary is up so switch to it */ - write_lock_bh(&bond->curr_slave_lock); - bond_change_active_slave(bond, bond->primary_slave); - write_unlock_bh(&bond->curr_slave_lock); + bond_set_slave_inactive_flags(bond->current_arp_slave); - slave = bond->primary_slave; + /* search for next candidate */ + bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { + if (IS_UP(slave->dev)) { + slave->link = BOND_LINK_BACK; + bond_set_slave_active_flags(slave); + bond_arp_send_all(bond, slave); slave->jiffies = jiffies; - } else { - bond->current_arp_slave = NULL; + bond->current_arp_slave = slave; + break; } - /* the current slave must tx an arp to ensure backup slaves - * rx traffic + /* if the link state is up at this point, we + * mark it down - this can happen if we have + * simultaneous link failures and + * reselect_active_interface doesn't make this + * one the current slave so it is still marked + * up when it is actually down */ - if (slave && bond_has_ip(bond)) { - bond_arp_send_all(bond, slave); + if (slave->link == BOND_LINK_UP) { + slave->link = BOND_LINK_DOWN; + if (slave->link_failure_count < UINT_MAX) + slave->link_failure_count++; + + bond_set_slave_inactive_flags(slave); + + printk(KERN_INFO DRV_NAME + ": %s: backup interface %s is now down.\n", + bond->dev->name, slave->dev->name); } } +} - /* if we don't have a curr_active_slave, search for the next available - * backup slave from the current_arp_slave and make it the candidate - * for becoming the curr_active_slave - */ - if (!slave) { - if (!bond->current_arp_slave) { - bond->current_arp_slave = bond->first_slave; - } +void bond_activebackup_arp_mon(struct work_struct *work) +{ + struct bonding *bond = container_of(work, struct bonding, + arp_work.work); + int delta_in_ticks; - if (bond->current_arp_slave) { - bond_set_slave_inactive_flags(bond->current_arp_slave); + read_lock(&bond->lock); - /* search for next candidate */ - bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { - if (IS_UP(slave->dev)) { - slave->link = BOND_LINK_BACK; - bond_set_slave_active_flags(slave); - bond_arp_send_all(bond, slave); - slave->jiffies = jiffies; - bond->current_arp_slave = slave; - break; - } + if (bond->kill_timers) + goto out; - /* if the link state is up at this point, we - * mark it down - this can happen if we have - * simultaneous link failures and - * reselect_active_interface doesn't make this - * one the current slave so it is still marked - * up when it is actually down - */ - if (slave->link == BOND_LINK_UP) { - slave->link = BOND_LINK_DOWN; - if (slave->link_failure_count < UINT_MAX) { - slave->link_failure_count++; - } + delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); - bond_set_slave_inactive_flags(slave); + if (bond->slave_cnt == 0) + goto re_arm; - printk(KERN_INFO DRV_NAME - ": %s: backup interface %s is " - "now down.\n", - bond->dev->name, - slave->dev->name); - } - } - } + if (bond->send_grat_arp) { + read_lock(&bond->curr_slave_lock); + bond_send_gratuitous_arp(bond); + read_unlock(&bond->curr_slave_lock); } + if (bond_ab_arp_inspect(bond, delta_in_ticks)) { + read_unlock(&bond->lock); + rtnl_lock(); + read_lock(&bond->lock); + + bond_ab_arp_commit(bond, delta_in_ticks); + + read_unlock(&bond->lock); + rtnl_unlock(); + read_lock(&bond->lock); + } + + bond_ab_arp_probe(bond); + re_arm: if (bond->params.arp_interval) { queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); @@ -3128,7 +3277,8 @@ static void bond_info_show_master(struct seq_file *seq) if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && bond->params.fail_over_mac) - seq_printf(seq, " (fail_over_mac)"); + seq_printf(seq, " (fail_over_mac %s)", + fail_over_mac_tbl[bond->params.fail_over_mac].modename); seq_printf(seq, "\n"); @@ -3500,13 +3650,13 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, { struct in_ifaddr *ifa = ptr; struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; - struct bonding *bond, *bond_next; - struct vlan_entry *vlan, *vlan_next; + struct bonding *bond; + struct vlan_entry *vlan; if (dev_net(ifa->ifa_dev->dev) != &init_net) return NOTIFY_DONE; - list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { + list_for_each_entry(bond, &bond_dev_list, bond_list) { if (bond->dev == event_dev) { switch (event) { case NETDEV_UP: @@ -3520,11 +3670,7 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, } } - if (list_empty(&bond->vlan_list)) - continue; - - list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, - vlan_list) { + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); if (vlan_dev == event_dev) { switch (event) { @@ -3716,6 +3862,7 @@ static int bond_close(struct net_device *bond_dev) write_lock_bh(&bond->lock); + bond->send_grat_arp = 0; /* signal timers not to re-arm */ bond->kill_timers = 1; @@ -3933,6 +4080,10 @@ static void bond_set_multicast_list(struct net_device *bond_dev) * Do promisc before checking multicast_mode */ if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) { + /* + * FIXME: Need to handle the error when one of the multi-slaves + * encounters error. + */ bond_set_promiscuity(bond, 1); } @@ -3942,6 +4093,10 @@ static void bond_set_multicast_list(struct net_device *bond_dev) /* set allmulti flag to slaves */ if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) { + /* + * FIXME: Need to handle the error when one of the multi-slaves + * encounters error. + */ bond_set_allmulti(bond, 1); } @@ -4060,10 +4215,10 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); /* - * If fail_over_mac is enabled, do nothing and return success. - * Returning an error causes ifenslave to fail. + * If fail_over_mac is set to active, do nothing and return + * success. Returning an error causes ifenslave to fail. */ - if (bond->params.fail_over_mac) + if (bond->params.fail_over_mac == BOND_FOM_ACTIVE) return 0; if (!is_valid_ether_addr(sa->sa_data)) { @@ -4518,9 +4673,9 @@ static void bond_free_all(void) struct net_device *bond_dev = bond->dev; bond_work_cancel_all(bond); - netif_tx_lock_bh(bond_dev); + netif_addr_lock_bh(bond_dev); bond_mc_list_destroy(bond); - netif_tx_unlock_bh(bond_dev); + netif_addr_unlock_bh(bond_dev); /* Release the bonded slaves */ bond_release_all(bond_dev); bond_destroy(bond); @@ -4568,7 +4723,7 @@ int bond_parse_parm(const char *buf, struct bond_parm_tbl *tbl) static int bond_check_params(struct bond_params *params) { - int arp_validate_value; + int arp_validate_value, fail_over_mac_value; /* * Convert string parameters. @@ -4618,11 +4773,11 @@ static int bond_check_params(struct bond_params *params) } } - if (max_bonds < 1 || max_bonds > INT_MAX) { + if (max_bonds < 0 || max_bonds > INT_MAX) { printk(KERN_WARNING DRV_NAME ": Warning: max_bonds (%d) not in range %d-%d, so it " "was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", - max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); + max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS); max_bonds = BOND_DEFAULT_MAX_BONDS; } @@ -4658,6 +4813,13 @@ static int bond_check_params(struct bond_params *params) use_carrier = 1; } + if (num_grat_arp < 0 || num_grat_arp > 255) { + printk(KERN_WARNING DRV_NAME + ": Warning: num_grat_arp (%d) not in range 0-255 so it " + "was reset to 1 \n", num_grat_arp); + num_grat_arp = 1; + } + /* reset values for 802.3ad */ if (bond_mode == BOND_MODE_8023AD) { if (!miimon) { @@ -4814,7 +4976,7 @@ static int bond_check_params(struct bond_params *params) printk("\n"); - } else { + } else if (max_bonds) { /* miimon and arp_interval not set, we need one so things * work as expected, see bonding.txt for details */ @@ -4836,15 +4998,29 @@ static int bond_check_params(struct bond_params *params) primary = NULL; } - if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP)) - printk(KERN_WARNING DRV_NAME - ": Warning: fail_over_mac only affects " - "active-backup mode.\n"); + if (fail_over_mac) { + fail_over_mac_value = bond_parse_parm(fail_over_mac, + fail_over_mac_tbl); + if (fail_over_mac_value == -1) { + printk(KERN_ERR DRV_NAME + ": Error: invalid fail_over_mac \"%s\"\n", + arp_validate == NULL ? "NULL" : arp_validate); + return -EINVAL; + } + + if (bond_mode != BOND_MODE_ACTIVEBACKUP) + printk(KERN_WARNING DRV_NAME + ": Warning: fail_over_mac only affects " + "active-backup mode.\n"); + } else { + fail_over_mac_value = BOND_FOM_NONE; + } /* fill params struct with the proper values */ params->mode = bond_mode; params->xmit_policy = xmit_hashtype; params->miimon = miimon; + params->num_grat_arp = num_grat_arp; params->arp_interval = arp_interval; params->arp_validate = arp_validate_value; params->updelay = updelay; @@ -4852,7 +5028,7 @@ static int bond_check_params(struct bond_params *params) params->use_carrier = use_carrier; params->lacp_fast = lacp_fast; params->primary[0] = 0; - params->fail_over_mac = fail_over_mac; + params->fail_over_mac = fail_over_mac_value; if (primary) { strncpy(params->primary, primary, IFNAMSIZ); @@ -4866,15 +5042,28 @@ static int bond_check_params(struct bond_params *params) static struct lock_class_key bonding_netdev_xmit_lock_key; +static void bond_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, + &bonding_netdev_xmit_lock_key); +} + +static void bond_set_lockdep_class(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL); +} + /* Create a new bond based on the specified name and bonding parameters. * If name is NULL, obtain a suitable "bond%d" name for us. * Caller must NOT hold rtnl_lock; we need to release it here before we * set up our sysfs entries. */ -int bond_create(char *name, struct bond_params *params, struct bonding **newbond) +int bond_create(char *name, struct bond_params *params) { struct net_device *bond_dev; - struct bonding *bond, *nxt; + struct bonding *bond; int res; rtnl_lock(); @@ -4882,7 +5071,7 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond /* Check to see if the bond already exists. */ if (name) { - list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) + list_for_each_entry(bond, &bond_dev_list, bond_list) if (strnicmp(bond->dev->name, name, IFNAMSIZ) == 0) { printk(KERN_ERR DRV_NAME ": cannot add bond %s; it already exists\n", @@ -4923,10 +5112,7 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond goto out_bond; } - lockdep_set_class(&bond_dev->_xmit_lock, &bonding_netdev_xmit_lock_key); - - if (newbond) - *newbond = bond_dev->priv; + bond_set_lockdep_class(bond_dev); netif_carrier_off(bond_dev); @@ -4957,7 +5143,7 @@ static int __init bonding_init(void) { int i; int res; - struct bonding *bond, *nxt; + struct bonding *bond; printk(KERN_INFO "%s", version); @@ -4973,7 +5159,7 @@ static int __init bonding_init(void) init_rwsem(&bonding_rwsem); for (i = 0; i < max_bonds; i++) { - res = bond_create(NULL, &bonding_defaults, NULL); + res = bond_create(NULL, &bonding_defaults); if (res) goto err; } @@ -4987,7 +5173,7 @@ static int __init bonding_init(void) goto out; err: - list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { + list_for_each_entry(bond, &bond_dev_list, bond_list) { bond_work_cancel_all(bond); destroy_workqueue(bond->wq); } |