diff options
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
-rw-r--r-- | drivers/net/bonding/bond_main.c | 635 |
1 files changed, 314 insertions, 321 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6191b551a0e..4c08018d733 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -79,7 +79,6 @@ #include <net/pkt_sched.h> #include <linux/rculist.h> #include <net/flow_keys.h> -#include <linux/reciprocal_div.h> #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -87,13 +86,11 @@ /*---------------------------- Module parameters ----------------------------*/ /* monitor all links that often (in milliseconds). <=0 disables monitoring */ -#define BOND_LINK_MON_INTERV 0 -#define BOND_LINK_ARP_INTERV 0 static int max_bonds = BOND_DEFAULT_MAX_BONDS; static int tx_queues = BOND_DEFAULT_TX_QUEUES; static int num_peer_notif = 1; -static int miimon = BOND_LINK_MON_INTERV; +static int miimon; static int updelay; static int downdelay; static int use_carrier = 1; @@ -104,7 +101,7 @@ static char *lacp_rate; static int min_links; static char *ad_select; static char *xmit_hash_policy; -static int arp_interval = BOND_LINK_ARP_INTERV; +static int arp_interval; static char *arp_ip_target[BOND_MAX_ARP_TARGETS]; static char *arp_validate; static char *arp_all_targets; @@ -113,6 +110,7 @@ static int all_slaves_active; static struct bond_params bonding_defaults; static int resend_igmp = BOND_DEFAULT_RESEND_IGMP; static int packets_per_slave = 1; +static int lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; module_param(max_bonds, int, 0); MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); @@ -189,6 +187,10 @@ module_param(packets_per_slave, int, 0); MODULE_PARM_DESC(packets_per_slave, "Packets to send per slave in balance-rr " "mode; 0 for a random slave, 1 packet per " "slave (default), >1 packets per slave."); +module_param(lp_interval, uint, 0); +MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where " + "the bonding driver sends learning packets to " + "each slaves peer switch. The default is 1."); /*----------------------------- Global variables ----------------------------*/ @@ -204,67 +206,6 @@ static int bond_mode = BOND_MODE_ROUNDROBIN; static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2; static int lacp_fast; -const struct bond_parm_tbl bond_lacp_tbl[] = { -{ "slow", AD_LACP_SLOW}, -{ "fast", AD_LACP_FAST}, -{ NULL, -1}, -}; - -const struct bond_parm_tbl bond_mode_tbl[] = { -{ "balance-rr", BOND_MODE_ROUNDROBIN}, -{ "active-backup", BOND_MODE_ACTIVEBACKUP}, -{ "balance-xor", BOND_MODE_XOR}, -{ "broadcast", BOND_MODE_BROADCAST}, -{ "802.3ad", BOND_MODE_8023AD}, -{ "balance-tlb", BOND_MODE_TLB}, -{ "balance-alb", BOND_MODE_ALB}, -{ NULL, -1}, -}; - -const struct bond_parm_tbl xmit_hashtype_tbl[] = { -{ "layer2", BOND_XMIT_POLICY_LAYER2}, -{ "layer3+4", BOND_XMIT_POLICY_LAYER34}, -{ "layer2+3", BOND_XMIT_POLICY_LAYER23}, -{ "encap2+3", BOND_XMIT_POLICY_ENCAP23}, -{ "encap3+4", BOND_XMIT_POLICY_ENCAP34}, -{ NULL, -1}, -}; - -const struct bond_parm_tbl arp_all_targets_tbl[] = { -{ "any", BOND_ARP_TARGETS_ANY}, -{ "all", BOND_ARP_TARGETS_ALL}, -{ NULL, -1}, -}; - -const struct bond_parm_tbl arp_validate_tbl[] = { -{ "none", BOND_ARP_VALIDATE_NONE}, -{ "active", BOND_ARP_VALIDATE_ACTIVE}, -{ "backup", BOND_ARP_VALIDATE_BACKUP}, -{ "all", BOND_ARP_VALIDATE_ALL}, -{ NULL, -1}, -}; - -const struct bond_parm_tbl fail_over_mac_tbl[] = { -{ "none", BOND_FOM_NONE}, -{ "active", BOND_FOM_ACTIVE}, -{ "follow", BOND_FOM_FOLLOW}, -{ NULL, -1}, -}; - -const struct bond_parm_tbl pri_reselect_tbl[] = { -{ "always", BOND_PRI_RESELECT_ALWAYS}, -{ "better", BOND_PRI_RESELECT_BETTER}, -{ "failure", BOND_PRI_RESELECT_FAILURE}, -{ NULL, -1}, -}; - -struct bond_parm_tbl ad_select_tbl[] = { -{ "stable", BOND_AD_STABLE}, -{ "bandwidth", BOND_AD_BANDWIDTH}, -{ "count", BOND_AD_COUNT}, -{ NULL, -1}, -}; - /*-------------------------- Forward declarations ---------------------------*/ static int bond_init(struct net_device *bond_dev); @@ -299,7 +240,7 @@ const char *bond_mode_name(int mode) * @skb: hw accel VLAN tagged skb to transmit * @slave_dev: slave that is supposed to xmit this skbuff */ -int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, +void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev) { skb->dev = slave_dev; @@ -312,8 +253,6 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); else dev_queue_xmit(skb); - - return 0; } /* @@ -463,6 +402,22 @@ static void bond_update_speed_duplex(struct slave *slave) return; } +const char *bond_slave_link_status(s8 link) +{ + switch (link) { + case BOND_LINK_UP: + return "up"; + case BOND_LINK_FAIL: + return "going down"; + case BOND_LINK_DOWN: + return "down"; + case BOND_LINK_BACK: + return "going back"; + default: + return "unknown"; + } +} + /* * if <dev> supports MII link status reporting, check its link status. * @@ -591,33 +546,22 @@ static int bond_set_allmulti(struct bonding *bond, int inc) * device and retransmit an IGMP JOIN request to the current active * slave. */ -static void bond_resend_igmp_join_requests(struct bonding *bond) +static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) { + struct bonding *bond = container_of(work, struct bonding, + mcast_work.work); + if (!rtnl_trylock()) { queue_delayed_work(bond->wq, &bond->mcast_work, 1); return; } call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev); - rtnl_unlock(); - /* We use curr_slave_lock to protect against concurrent access to - * igmp_retrans from multiple running instances of this function and - * bond_change_active_slave - */ - write_lock_bh(&bond->curr_slave_lock); if (bond->igmp_retrans > 1) { bond->igmp_retrans--; queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); } - write_unlock_bh(&bond->curr_slave_lock); -} - -static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) -{ - struct bonding *bond = container_of(work, struct bonding, - mcast_work.work); - - bond_resend_igmp_join_requests(bond); + rtnl_unlock(); } /* Flush bond's hardware addresses from slave @@ -697,14 +641,12 @@ static void bond_set_dev_addr(struct net_device *bond_dev, * * Perform special MAC address swapping for fail_over_mac settings * - * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh. + * Called with RTNL, curr_slave_lock for write_bh. */ static void bond_do_fail_over_mac(struct bonding *bond, struct slave *new_active, struct slave *old_active) __releases(&bond->curr_slave_lock) - __releases(&bond->lock) - __acquires(&bond->lock) __acquires(&bond->curr_slave_lock) { u8 tmp_mac[ETH_ALEN]; @@ -715,9 +657,7 @@ static void bond_do_fail_over_mac(struct bonding *bond, case BOND_FOM_ACTIVE: if (new_active) { write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); bond_set_dev_addr(bond->dev, new_active->dev); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); } break; @@ -731,7 +671,6 @@ static void bond_do_fail_over_mac(struct bonding *bond, return; write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); if (old_active) { memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); @@ -761,7 +700,6 @@ static void bond_do_fail_over_mac(struct bonding *bond, pr_err("%s: Error %d setting MAC of slave %s\n", bond->dev->name, -rv, new_active->dev->name); out: - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); break; default: @@ -821,7 +759,11 @@ static struct slave *bond_find_best_slave(struct bonding *bond) static bool bond_should_notify_peers(struct bonding *bond) { - struct slave *slave = bond->curr_active_slave; + struct slave *slave; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + rcu_read_unlock(); pr_debug("bond_should_notify_peers: bond %s slave %s\n", bond->dev->name, slave ? slave->dev->name : "NULL"); @@ -846,8 +788,7 @@ static bool bond_should_notify_peers(struct bonding *bond) * because it is apparently the best available slave we have, even though its * updelay hasn't timed out yet. * - * If new_active is not NULL, caller must hold bond->lock for read and - * curr_slave_lock for write_bh. + * If new_active is not NULL, caller must hold curr_slave_lock for write_bh. */ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) { @@ -916,14 +857,12 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); if (should_notify_peers) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); } } @@ -949,7 +888,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) * - The primary_slave has got its link back. * - A slave has got its link back and there's no old curr_active_slave. * - * Caller must hold bond->lock for read and curr_slave_lock for write_bh. + * Caller must hold curr_slave_lock for write_bh. */ void bond_select_active_slave(struct bonding *bond) { @@ -1589,16 +1528,20 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_unregister; } + res = bond_sysfs_slave_add(new_slave); + if (res) { + pr_debug("Error %d calling bond_sysfs_slave_add\n", res); + goto err_upper_unlink; + } + bond->slave_cnt++; bond_compute_features(bond); bond_set_carrier(bond); if (USES_PRIMARY(bond->params.mode)) { - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); } pr_info("%s: enslaving %s as a%s interface with a%s link.\n", @@ -1610,6 +1553,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) return 0; /* Undo stages on error */ +err_upper_unlink: + bond_upper_dev_unlink(bond_dev, slave_dev); + err_unregister: netdev_rx_handler_unregister(slave_dev); @@ -1618,19 +1564,13 @@ err_detach: bond_hw_addr_flush(bond_dev, slave_dev); vlan_vids_del_by_dev(slave_dev, bond_dev); - write_lock_bh(&bond->lock); if (bond->primary_slave == new_slave) bond->primary_slave = NULL; if (bond->curr_active_slave == new_slave) { - bond_change_active_slave(bond, NULL); - write_unlock_bh(&bond->lock); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); + bond_change_active_slave(bond, NULL); bond_select_active_slave(bond); write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); - } else { - write_unlock_bh(&bond->lock); } slave_disable_netpoll(new_slave); @@ -1658,7 +1598,7 @@ err_free: err_undo_flags: /* Enslave of first slave has failed and we need to fix master's mac */ if (!bond_has_slaves(bond) && - ether_addr_equal(bond_dev->dev_addr, slave_dev->dev_addr)) + ether_addr_equal_64bits(bond_dev->dev_addr, slave_dev->dev_addr)) eth_hw_addr_random(bond_dev); return res; @@ -1695,23 +1635,21 @@ static int __bond_release_one(struct net_device *bond_dev, } block_netpoll_tx(); - write_lock_bh(&bond->lock); slave = bond_get_slave_by_dev(bond, slave_dev); if (!slave) { /* not a slave of this bond */ pr_info("%s: %s not enslaved\n", bond_dev->name, slave_dev->name); - write_unlock_bh(&bond->lock); unblock_netpoll_tx(); return -EINVAL; } - write_unlock_bh(&bond->lock); - /* release the slave from its bond */ bond->slave_cnt--; + bond_sysfs_slave_del(slave); + bond_upper_dev_unlink(bond_dev, slave_dev); /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. @@ -1720,12 +1658,10 @@ static int __bond_release_one(struct net_device *bond_dev, write_lock_bh(&bond->lock); /* Inform AD package of unbinding of slave. */ - if (bond->params.mode == BOND_MODE_8023AD) { - /* must be called before the slave is - * detached from the list - */ + if (bond->params.mode == BOND_MODE_8023AD) bond_3ad_unbind_slave(slave); - } + + write_unlock_bh(&bond->lock); pr_info("%s: releasing %s interface %s\n", bond_dev->name, @@ -1737,7 +1673,7 @@ static int __bond_release_one(struct net_device *bond_dev, bond->current_arp_slave = NULL; if (!all && !bond->params.fail_over_mac) { - if (ether_addr_equal(bond_dev->dev_addr, slave->perm_hwaddr) && + if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) && bond_has_slaves(bond)) pr_warn("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s. Set the HWaddr of %s to a different address to avoid conflicts.\n", bond_dev->name, slave_dev->name, @@ -1748,8 +1684,11 @@ static int __bond_release_one(struct net_device *bond_dev, if (bond->primary_slave == slave) bond->primary_slave = NULL; - if (oldcurrent == slave) + if (oldcurrent == slave) { + write_lock_bh(&bond->curr_slave_lock); bond_change_active_slave(bond, NULL); + write_unlock_bh(&bond->curr_slave_lock); + } if (bond_is_lb(bond)) { /* Must be called only after the slave has been @@ -1757,9 +1696,7 @@ static int __bond_release_one(struct net_device *bond_dev, * has been cleared (if our_slave == old_current), * but before a new active slave is selected. */ - write_unlock_bh(&bond->lock); bond_alb_deinit_slave(bond, slave); - write_lock_bh(&bond->lock); } if (all) { @@ -1770,15 +1707,11 @@ static int __bond_release_one(struct net_device *bond_dev, * is no concern that another slave add/remove event * will interfere. */ - write_unlock_bh(&bond->lock); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); - write_lock_bh(&bond->lock); } if (!bond_has_slaves(bond)) { @@ -1793,7 +1726,6 @@ static int __bond_release_one(struct net_device *bond_dev, } } - write_unlock_bh(&bond->lock); unblock_netpoll_tx(); synchronize_rcu(); @@ -1928,7 +1860,7 @@ static int bond_miimon_inspect(struct bonding *bond) ignore_updelay = !bond->curr_active_slave ? true : false; - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2119,48 +2051,42 @@ do_failover: * an acquisition of appropriate locks followed by a commit phase to * implement whatever link state changes are indicated. */ -void bond_mii_monitor(struct work_struct *work) +static void bond_mii_monitor(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mii_work.work); bool should_notify_peers = false; unsigned long delay; - read_lock(&bond->lock); - delay = msecs_to_jiffies(bond->params.miimon); if (!bond_has_slaves(bond)) goto re_arm; + rcu_read_lock(); + should_notify_peers = bond_should_notify_peers(bond); if (bond_miimon_inspect(bond)) { - read_unlock(&bond->lock); + rcu_read_unlock(); /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { - read_lock(&bond->lock); delay = 1; should_notify_peers = false; goto re_arm; } - read_lock(&bond->lock); - bond_miimon_commit(bond); - read_unlock(&bond->lock); rtnl_unlock(); /* might sleep, hold no other locks */ - read_lock(&bond->lock); - } + } else + rcu_read_unlock(); re_arm: if (bond->params.miimon) queue_delayed_work(bond->wq, &bond->mii_work, delay); - read_unlock(&bond->lock); - if (should_notify_peers) { if (!rtnl_trylock()) return; @@ -2414,20 +2340,20 @@ static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, * arp is transmitted to generate traffic. see activebackup_arp_monitor for * arp monitoring in active backup mode. */ -void bond_loadbalance_arp_mon(struct work_struct *work) +static void bond_loadbalance_arp_mon(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, arp_work.work); struct slave *slave, *oldcurrent; struct list_head *iter; - int do_failover = 0; - - read_lock(&bond->lock); + int do_failover = 0, slave_state_changed = 0; if (!bond_has_slaves(bond)) goto re_arm; - oldcurrent = bond->curr_active_slave; + rcu_read_lock(); + + oldcurrent = ACCESS_ONCE(bond->curr_active_slave); /* see if any of the previous devices are up now (i.e. they have * xmt and rcv traffic). the curr_active_slave does not come into * the picture unless it is null. also, slave->jiffies is not needed @@ -2436,7 +2362,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) * TODO: what about up/down delay in arp mode? it wasn't here before * so it can wait */ - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); if (slave->link != BOND_LINK_UP) { @@ -2444,7 +2370,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) bond_time_in_interval(bond, slave->dev->last_rx, 1)) { slave->link = BOND_LINK_UP; - bond_set_active_slave(slave); + slave_state_changed = 1; /* primary_slave has no meaning in round-robin * mode. the window of a slave being up and @@ -2473,7 +2399,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) !bond_time_in_interval(bond, slave->dev->last_rx, 2)) { slave->link = BOND_LINK_DOWN; - bond_set_backup_slave(slave); + slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; @@ -2498,22 +2424,33 @@ void bond_loadbalance_arp_mon(struct work_struct *work) bond_arp_send_all(bond, slave); } - if (do_failover) { - block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); + rcu_read_unlock(); - bond_select_active_slave(bond); + if (do_failover || slave_state_changed) { + if (!rtnl_trylock()) + goto re_arm; - write_unlock_bh(&bond->curr_slave_lock); - unblock_netpoll_tx(); + if (slave_state_changed) { + bond_slave_state_change(bond); + } else if (do_failover) { + /* the bond_select_active_slave must hold RTNL + * and curr_slave_lock for write. + */ + block_netpoll_tx(); + write_lock_bh(&bond->curr_slave_lock); + + bond_select_active_slave(bond); + + write_unlock_bh(&bond->curr_slave_lock); + unblock_netpoll_tx(); + } + rtnl_unlock(); } re_arm: if (bond->params.arp_interval) queue_delayed_work(bond->wq, &bond->arp_work, msecs_to_jiffies(bond->params.arp_interval)); - - read_unlock(&bond->lock); } /* @@ -2522,7 +2459,7 @@ re_arm: * place for the slave. Returns 0 if no changes are found, >0 if changes * to link states must be committed. * - * Called with bond->lock held for read. + * Called with rcu_read_lock hold. */ static int bond_ab_arp_inspect(struct bonding *bond) { @@ -2531,7 +2468,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) struct slave *slave; int commit = 0; - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; last_rx = slave_last_rx(bond, slave); @@ -2593,7 +2530,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) * Called to commit link state changes noted by inspection step of * active-backup mode ARP monitor. * - * Called with RTNL and bond->lock for read. + * Called with RTNL hold. */ static void bond_ab_arp_commit(struct bonding *bond) { @@ -2667,42 +2604,49 @@ do_failover: /* * Send ARP probes for active-backup mode ARP monitor. - * - * Called with bond->lock held for read. */ -static void bond_ab_arp_probe(struct bonding *bond) +static bool bond_ab_arp_probe(struct bonding *bond) { - struct slave *slave, *before = NULL, *new_slave = NULL; + struct slave *slave, *before = NULL, *new_slave = NULL, + *curr_arp_slave, *curr_active_slave; struct list_head *iter; bool found = false; - read_lock(&bond->curr_slave_lock); + rcu_read_lock(); + curr_arp_slave = rcu_dereference(bond->current_arp_slave); + curr_active_slave = rcu_dereference(bond->curr_active_slave); - if (bond->current_arp_slave && bond->curr_active_slave) + if (curr_arp_slave && curr_active_slave) pr_info("PROBE: c_arp %s && cas %s BAD\n", - bond->current_arp_slave->dev->name, - bond->curr_active_slave->dev->name); + curr_arp_slave->dev->name, + curr_active_slave->dev->name); - if (bond->curr_active_slave) { - bond_arp_send_all(bond, bond->curr_active_slave); - read_unlock(&bond->curr_slave_lock); - return; + if (curr_active_slave) { + bond_arp_send_all(bond, curr_active_slave); + rcu_read_unlock(); + return true; } - - read_unlock(&bond->curr_slave_lock); + rcu_read_unlock(); /* if we don't have a curr_active_slave, search for the next available * backup slave from the current_arp_slave and make it the candidate * for becoming the curr_active_slave */ - if (!bond->current_arp_slave) { - bond->current_arp_slave = bond_first_slave(bond); - if (!bond->current_arp_slave) - return; + if (!rtnl_trylock()) + return false; + /* curr_arp_slave might have gone away */ + curr_arp_slave = ACCESS_ONCE(bond->current_arp_slave); + + if (!curr_arp_slave) { + curr_arp_slave = bond_first_slave(bond); + if (!curr_arp_slave) { + rtnl_unlock(); + return true; + } } - bond_set_slave_inactive_flags(bond->current_arp_slave); + bond_set_slave_inactive_flags(curr_arp_slave); bond_for_each_slave(bond, slave, iter) { if (!found && !before && IS_UP(slave->dev)) @@ -2727,68 +2671,67 @@ static void bond_ab_arp_probe(struct bonding *bond) pr_info("%s: backup interface %s is now down.\n", bond->dev->name, slave->dev->name); } - if (slave == bond->current_arp_slave) + if (slave == curr_arp_slave) found = true; } if (!new_slave && before) new_slave = before; - if (!new_slave) - return; + if (!new_slave) { + rtnl_unlock(); + return true; + } new_slave->link = BOND_LINK_BACK; bond_set_slave_active_flags(new_slave); bond_arp_send_all(bond, new_slave); new_slave->jiffies = jiffies; - bond->current_arp_slave = new_slave; + rcu_assign_pointer(bond->current_arp_slave, new_slave); + rtnl_unlock(); + return true; } -void bond_activebackup_arp_mon(struct work_struct *work) +static void bond_activebackup_arp_mon(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, arp_work.work); - bool should_notify_peers = false; + bool should_notify_peers = false, should_commit = false; int delta_in_ticks; - read_lock(&bond->lock); - delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); if (!bond_has_slaves(bond)) goto re_arm; + rcu_read_lock(); should_notify_peers = bond_should_notify_peers(bond); + should_commit = bond_ab_arp_inspect(bond); + rcu_read_unlock(); - if (bond_ab_arp_inspect(bond)) { - read_unlock(&bond->lock); - + if (should_commit) { /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { - read_lock(&bond->lock); delta_in_ticks = 1; should_notify_peers = false; goto re_arm; } - read_lock(&bond->lock); - bond_ab_arp_commit(bond); - - read_unlock(&bond->lock); rtnl_unlock(); - read_lock(&bond->lock); } - bond_ab_arp_probe(bond); + if (!bond_ab_arp_probe(bond)) { + /* rtnl locking failed, re-arm */ + delta_in_ticks = 1; + should_notify_peers = false; + } re_arm: if (bond->params.arp_interval) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); - read_unlock(&bond->lock); - if (should_notify_peers) { if (!rtnl_trylock()) return; @@ -2896,9 +2839,27 @@ static int bond_slave_netdev_event(unsigned long event, */ break; case NETDEV_CHANGENAME: - /* - * TODO: handle changing the primary's name - */ + /* we don't care if we don't have primary set */ + if (!USES_PRIMARY(bond->params.mode) || + !bond->params.primary[0]) + break; + + if (slave == bond->primary_slave) { + /* slave's name changed - he's no longer primary */ + bond->primary_slave = NULL; + } else if (!strcmp(slave_dev->name, bond->params.primary)) { + /* we have a new primary slave */ + bond->primary_slave = slave; + } else { /* we didn't change primary - exit */ + break; + } + + pr_info("%s: Primary slave changed to %s, reselecting active slave.\n", + bond->dev->name, bond->primary_slave ? slave_dev->name : + "none"); + write_lock_bh(&bond->curr_slave_lock); + bond_select_active_slave(bond); + write_unlock_bh(&bond->curr_slave_lock); break; case NETDEV_FEAT_CHANGE: bond_compute_features(bond); @@ -3178,6 +3139,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd struct ifslave k_sinfo; struct ifslave __user *u_sinfo = NULL; struct mii_ioctl_data *mii = NULL; + struct bond_opt_value newval; struct net *net; int res = 0; @@ -3249,37 +3211,35 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - slave_dev = dev_get_by_name(net, ifr->ifr_slave); + slave_dev = __dev_get_by_name(net, ifr->ifr_slave); pr_debug("slave_dev=%p:\n", slave_dev); if (!slave_dev) - res = -ENODEV; - else { - pr_debug("slave_dev->name=%s:\n", slave_dev->name); - switch (cmd) { - case BOND_ENSLAVE_OLD: - case SIOCBONDENSLAVE: - res = bond_enslave(bond_dev, slave_dev); - break; - case BOND_RELEASE_OLD: - case SIOCBONDRELEASE: - res = bond_release(bond_dev, slave_dev); - break; - case BOND_SETHWADDR_OLD: - case SIOCBONDSETHWADDR: - bond_set_dev_addr(bond_dev, slave_dev); - res = 0; - break; - case BOND_CHANGE_ACTIVE_OLD: - case SIOCBONDCHANGEACTIVE: - res = bond_option_active_slave_set(bond, slave_dev); - break; - default: - res = -EOPNOTSUPP; - } + return -ENODEV; - dev_put(slave_dev); + pr_debug("slave_dev->name=%s:\n", slave_dev->name); + switch (cmd) { + case BOND_ENSLAVE_OLD: + case SIOCBONDENSLAVE: + res = bond_enslave(bond_dev, slave_dev); + break; + case BOND_RELEASE_OLD: + case SIOCBONDRELEASE: + res = bond_release(bond_dev, slave_dev); + break; + case BOND_SETHWADDR_OLD: + case SIOCBONDSETHWADDR: + bond_set_dev_addr(bond_dev, slave_dev); + res = 0; + break; + case BOND_CHANGE_ACTIVE_OLD: + case SIOCBONDCHANGEACTIVE: + bond_opt_initstr(&newval, slave_dev->name); + res = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval); + break; + default: + res = -EOPNOTSUPP; } return res; @@ -3550,7 +3510,7 @@ unwind: * it fails, it tries to find the first available slave for transmission. * The skb is consumed in all cases, thus the function is void. */ -void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) +static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) { struct list_head *iter; struct slave *slave; @@ -3590,8 +3550,9 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) */ static u32 bond_rr_gen_slave_id(struct bonding *bond) { - int packets_per_slave = bond->params.packets_per_slave; u32 slave_id; + struct reciprocal_value reciprocal_packets_per_slave; + int packets_per_slave = bond->params.packets_per_slave; switch (packets_per_slave) { case 0: @@ -3601,8 +3562,10 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond) slave_id = bond->rr_tx_counter; break; default: + reciprocal_packets_per_slave = + bond->params.reciprocal_packets_per_slave; slave_id = reciprocal_divide(bond->rr_tx_counter, - packets_per_slave); + reciprocal_packets_per_slave); break; } bond->rr_tx_counter++; @@ -3707,28 +3670,24 @@ static inline int bond_slave_override(struct bonding *bond, struct sk_buff *skb) { struct slave *slave = NULL; - struct slave *check_slave; struct list_head *iter; - int res = 1; if (!skb->queue_mapping) return 1; /* Find out if any slaves have the same mapping as this skb. */ - bond_for_each_slave_rcu(bond, check_slave, iter) { - if (check_slave->queue_id == skb->queue_mapping) { - slave = check_slave; + bond_for_each_slave_rcu(bond, slave, iter) { + if (slave->queue_id == skb->queue_mapping) { + if (slave_can_tx(slave)) { + bond_dev_queue_xmit(bond, skb, slave->dev); + return 0; + } + /* If the slave isn't UP, use default transmit policy. */ break; } } - /* If the slave isn't UP, use default transmit policy. */ - if (slave && slave->queue_id && IS_UP(slave->dev) && - (slave->link == BOND_LINK_UP)) { - res = bond_dev_queue_xmit(bond, skb, slave->dev); - } - - return res; + return 1; } @@ -3941,6 +3900,9 @@ void bond_setup(struct net_device *bond_dev) * capable */ + /* Don't allow bond devices to change network namespaces. */ + bond_dev->features |= NETIF_F_NETNS_LOCAL; + bond_dev->hw_features = BOND_VLAN_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | @@ -3974,6 +3936,29 @@ static void bond_uninit(struct net_device *bond_dev) /*------------------------- Module initialization ---------------------------*/ +int bond_parm_tbl_lookup(int mode, const struct bond_parm_tbl *tbl) +{ + int i; + + for (i = 0; tbl[i].modename; i++) + if (mode == tbl[i].mode) + return tbl[i].mode; + + return -1; +} + +static int bond_parm_tbl_lookup_name(const char *modename, + const struct bond_parm_tbl *tbl) +{ + int i; + + for (i = 0; tbl[i].modename; i++) + if (strcmp(modename, tbl[i].modename) == 0) + return tbl[i].mode; + + return -1; +} + /* * Convert string input module parms. Accept either the * number of the mode or its string name. A bit complicated because @@ -3982,27 +3967,17 @@ static void bond_uninit(struct net_device *bond_dev) */ int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl) { - int modeint = -1, i, rv; - char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, }; + int modeint; + char *p, modestr[BOND_MAX_MODENAME_LEN + 1]; for (p = (char *)buf; *p; p++) if (!(isdigit(*p) || isspace(*p))) break; - if (*p) - rv = sscanf(buf, "%20s", modestr); - else - rv = sscanf(buf, "%d", &modeint); - - if (!rv) - return -1; - - for (i = 0; tbl[i].modename; i++) { - if (modeint == tbl[i].mode) - return tbl[i].mode; - if (strcmp(modestr, tbl[i].modename) == 0) - return tbl[i].mode; - } + if (*p && sscanf(buf, "%20s", modestr) != 0) + return bond_parm_tbl_lookup_name(modestr, tbl); + else if (sscanf(buf, "%d", &modeint) != 0) + return bond_parm_tbl_lookup(modeint, tbl); return -1; } @@ -4010,18 +3985,20 @@ int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl) static int bond_check_params(struct bond_params *params) { int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; + struct bond_opt_value newval, *valptr; int arp_all_targets_value; /* * Convert string parameters. */ if (mode) { - bond_mode = bond_parse_parm(mode, bond_mode_tbl); - if (bond_mode == -1) { - pr_err("Error: Invalid bonding mode \"%s\"\n", - mode == NULL ? "NULL" : mode); + bond_opt_initstr(&newval, mode); + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval); + if (!valptr) { + pr_err("Error: Invalid bonding mode \"%s\"\n", mode); return -EINVAL; } + bond_mode = valptr->value; } if (xmit_hash_policy) { @@ -4030,14 +4007,15 @@ static int bond_check_params(struct bond_params *params) pr_info("xmit_hash_policy param is irrelevant in mode %s\n", bond_mode_name(bond_mode)); } else { - xmit_hashtype = bond_parse_parm(xmit_hash_policy, - xmit_hashtype_tbl); - if (xmit_hashtype == -1) { + bond_opt_initstr(&newval, xmit_hash_policy); + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_XMIT_HASH), + &newval); + if (!valptr) { pr_err("Error: Invalid xmit_hash_policy \"%s\"\n", - xmit_hash_policy == NULL ? "NULL" : xmit_hash_policy); return -EINVAL; } + xmit_hashtype = valptr->value; } } @@ -4046,26 +4024,29 @@ static int bond_check_params(struct bond_params *params) pr_info("lacp_rate param is irrelevant in mode %s\n", bond_mode_name(bond_mode)); } else { - lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); - if (lacp_fast == -1) { + bond_opt_initstr(&newval, lacp_rate); + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_LACP_RATE), + &newval); + if (!valptr) { pr_err("Error: Invalid lacp rate \"%s\"\n", - lacp_rate == NULL ? "NULL" : lacp_rate); + lacp_rate); return -EINVAL; } + lacp_fast = valptr->value; } } if (ad_select) { - params->ad_select = bond_parse_parm(ad_select, ad_select_tbl); - if (params->ad_select == -1) { - pr_err("Error: Invalid ad_select \"%s\"\n", - ad_select == NULL ? "NULL" : ad_select); + bond_opt_initstr(&newval, lacp_rate); + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_SELECT), + &newval); + if (!valptr) { + pr_err("Error: Invalid ad_select \"%s\"\n", ad_select); return -EINVAL; } - - if (bond_mode != BOND_MODE_8023AD) { + params->ad_select = valptr->value; + if (bond_mode != BOND_MODE_8023AD) pr_warning("ad_select param only affects 802.3ad mode\n"); - } } else { params->ad_select = BOND_AD_STABLE; } @@ -4077,9 +4058,9 @@ static int bond_check_params(struct bond_params *params) } if (miimon < 0) { - pr_warning("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to %d\n", - miimon, INT_MAX, BOND_LINK_MON_INTERV); - miimon = BOND_LINK_MON_INTERV; + pr_warning("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to 0\n", + miimon, INT_MAX); + miimon = 0; } if (updelay < 0) { @@ -4106,8 +4087,8 @@ static int bond_check_params(struct bond_params *params) num_peer_notif = 1; } - /* reset values for 802.3ad */ - if (bond_mode == BOND_MODE_8023AD) { + /* reset values for 802.3ad/TLB/ALB */ + if (BOND_NO_USES_ARP(bond_mode)) { if (!miimon) { pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); pr_warning("Forcing miimon to 100msec\n"); @@ -4136,22 +4117,13 @@ static int bond_check_params(struct bond_params *params) resend_igmp = BOND_DEFAULT_RESEND_IGMP; } - if (packets_per_slave < 0 || packets_per_slave > USHRT_MAX) { + bond_opt_initval(&newval, packets_per_slave); + if (!bond_opt_parse(bond_opt_get(BOND_OPT_PACKETS_PER_SLAVE), &newval)) { pr_warn("Warning: packets_per_slave (%d) should be between 0 and %u resetting to 1\n", packets_per_slave, USHRT_MAX); packets_per_slave = 1; } - /* reset values for TLB/ALB */ - if ((bond_mode == BOND_MODE_TLB) || - (bond_mode == BOND_MODE_ALB)) { - if (!miimon) { - pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure and link speed which are essential for TLB/ALB load balancing\n"); - pr_warning("Forcing miimon to 100msec\n"); - miimon = BOND_DEFAULT_MIIMON; - } - } - if (bond_mode == BOND_MODE_ALB) { pr_notice("In ALB mode you might experience client disconnections upon reconnection of a link if the bonding module updelay parameter (%d msec) is incompatible with the forwarding delay time of the switch\n", updelay); @@ -4191,9 +4163,9 @@ static int bond_check_params(struct bond_params *params) } if (arp_interval < 0) { - pr_warning("Warning: arp_interval module parameter (%d) , not in range 0-%d, so it was reset to %d\n", - arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); - arp_interval = BOND_LINK_ARP_INTERV; + pr_warning("Warning: arp_interval module parameter (%d) , not in range 0-%d, so it was reset to 0\n", + arp_interval, INT_MAX); + arp_interval = 0; } for (arp_ip_count = 0, i = 0; @@ -4232,35 +4204,40 @@ static int bond_check_params(struct bond_params *params) return -EINVAL; } - arp_validate_value = bond_parse_parm(arp_validate, - arp_validate_tbl); - if (arp_validate_value == -1) { + bond_opt_initstr(&newval, arp_validate); + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_VALIDATE), + &newval); + if (!valptr) { pr_err("Error: invalid arp_validate \"%s\"\n", - arp_validate == NULL ? "NULL" : arp_validate); + arp_validate); return -EINVAL; } - } else + arp_validate_value = valptr->value; + } else { arp_validate_value = 0; + } arp_all_targets_value = 0; if (arp_all_targets) { - arp_all_targets_value = bond_parse_parm(arp_all_targets, - arp_all_targets_tbl); - - if (arp_all_targets_value == -1) { + bond_opt_initstr(&newval, arp_all_targets); + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS), + &newval); + if (!valptr) { pr_err("Error: invalid arp_all_targets_value \"%s\"\n", arp_all_targets); arp_all_targets_value = 0; + } else { + arp_all_targets_value = valptr->value; } } if (miimon) { pr_info("MII link monitoring set to %d ms\n", miimon); } else if (arp_interval) { + valptr = bond_opt_get_val(BOND_OPT_ARP_VALIDATE, + arp_validate_value); pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):", - arp_interval, - arp_validate_tbl[arp_validate_value].modename, - arp_ip_count); + arp_interval, valptr->string, arp_ip_count); for (i = 0; i < arp_ip_count; i++) pr_info(" %s", arp_ip_target[i]); @@ -4284,33 +4261,41 @@ static int bond_check_params(struct bond_params *params) } if (primary && primary_reselect) { - primary_reselect_value = bond_parse_parm(primary_reselect, - pri_reselect_tbl); - if (primary_reselect_value == -1) { + bond_opt_initstr(&newval, primary_reselect); + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_PRIMARY_RESELECT), + &newval); + if (!valptr) { pr_err("Error: Invalid primary_reselect \"%s\"\n", - primary_reselect == - NULL ? "NULL" : primary_reselect); + primary_reselect); return -EINVAL; } + primary_reselect_value = valptr->value; } else { primary_reselect_value = BOND_PRI_RESELECT_ALWAYS; } if (fail_over_mac) { - fail_over_mac_value = bond_parse_parm(fail_over_mac, - fail_over_mac_tbl); - if (fail_over_mac_value == -1) { + bond_opt_initstr(&newval, fail_over_mac); + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_FAIL_OVER_MAC), + &newval); + if (!valptr) { pr_err("Error: invalid fail_over_mac \"%s\"\n", - arp_validate == NULL ? "NULL" : arp_validate); + fail_over_mac); return -EINVAL; } - + fail_over_mac_value = valptr->value; if (bond_mode != BOND_MODE_ACTIVEBACKUP) pr_warning("Warning: fail_over_mac only affects active-backup mode.\n"); } else { fail_over_mac_value = BOND_FOM_NONE; } + if (lp_interval == 0) { + pr_warning("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", + INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); + lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; + } + /* fill params struct with the proper values */ params->mode = bond_mode; params->xmit_policy = xmit_hashtype; @@ -4330,11 +4315,19 @@ static int bond_check_params(struct bond_params *params) params->all_slaves_active = all_slaves_active; params->resend_igmp = resend_igmp; params->min_links = min_links; - params->lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; - if (packets_per_slave > 1) - params->packets_per_slave = reciprocal_value(packets_per_slave); - else - params->packets_per_slave = packets_per_slave; + params->lp_interval = lp_interval; + params->packets_per_slave = packets_per_slave; + if (packets_per_slave > 0) { + params->reciprocal_packets_per_slave = + reciprocal_value(packets_per_slave); + } else { + /* reciprocal_packets_per_slave is unused if + * packets_per_slave is 0 or 1, just initialize it + */ + params->reciprocal_packets_per_slave = + (struct reciprocal_value) { 0 }; + } + if (primary) { strncpy(params->primary, primary, IFNAMSIZ); params->primary[IFNAMSIZ - 1] = 0; |