summaryrefslogtreecommitdiffstats
path: root/drivers/net/bonding/bond_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
-rw-r--r--drivers/net/bonding/bond_main.c390
1 files changed, 349 insertions, 41 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 269a5e40734..2c930da90a8 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -475,7 +475,18 @@
* Solution is to move call to dev_remove_pack outside of the
* spinlock.
* Set version to 2.6.1.
- *
+ * 2005/06/05 - Jay Vosburgh <fubar@us.ibm.com>
+ * - Support for generating gratuitous ARPs in active-backup mode.
+ * Includes support for VLAN tagging all bonding-generated ARPs
+ * as needed. Set version to 2.6.2.
+ * 2005/06/08 - Jason Gabler <jygabler at lbl dot gov>
+ * - alternate hashing policy support for mode 2
+ * * Added kernel parameter "xmit_hash_policy" to allow the selection
+ * of different hashing policies for mode 2. The original mode 2
+ * policy is the default, now found in xmit_hash_policy_layer2().
+ * * Added xmit_hash_policy_layer34()
+ * - Modified by Jay Vosburgh <fubar@us.ibm.com> to also support mode 4.
+ * Set version to 2.6.3.
*/
//#define BONDING_DEBUG 1
@@ -490,7 +501,10 @@
#include <linux/ptrace.h>
#include <linux/ioport.h>
#include <linux/in.h>
+#include <net/ip.h>
#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/init.h>
@@ -519,6 +533,7 @@
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
#include <linux/if_bonding.h>
+#include <net/route.h>
#include "bonding.h"
#include "bond_3ad.h"
#include "bond_alb.h"
@@ -537,6 +552,7 @@ static int use_carrier = 1;
static char *mode = NULL;
static char *primary = NULL;
static char *lacp_rate = NULL;
+static char *xmit_hash_policy = NULL;
static int arp_interval = BOND_LINK_ARP_INTERV;
static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
@@ -556,6 +572,8 @@ module_param(primary, charp, 0);
MODULE_PARM_DESC(primary, "Primary network device to use");
module_param(lacp_rate, charp, 0);
MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner (slow/fast)");
+module_param(xmit_hash_policy, charp, 0);
+MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method : 0 for layer 2 (default), 1 for layer 3+4");
module_param(arp_interval, int, 0);
MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
module_param_array(arp_ip_target, charp, NULL, 0);
@@ -574,8 +592,8 @@ static struct proc_dir_entry *bond_proc_dir = NULL;
static u32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ;
static int arp_ip_count = 0;
-static u32 my_ip = 0;
static int bond_mode = BOND_MODE_ROUNDROBIN;
+static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2;
static int lacp_fast = 0;
static int app_abi_ver = 0;
static int orig_app_abi_ver = -1; /* This is used to save the first ABI version
@@ -585,7 +603,6 @@ static int orig_app_abi_ver = -1; /* This is used to save the first ABI version
* command comes from an application using
* another ABI version.
*/
-
struct bond_parm_tbl {
char *modename;
int mode;
@@ -608,9 +625,16 @@ static struct bond_parm_tbl bond_mode_tbl[] = {
{ NULL, -1},
};
+static struct bond_parm_tbl xmit_hashtype_tbl[] = {
+{ "layer2", BOND_XMIT_POLICY_LAYER2},
+{ "layer3+4", BOND_XMIT_POLICY_LAYER34},
+{ NULL, -1},
+};
+
/*-------------------------- Forward declarations ---------------------------*/
-static inline void bond_set_mode_ops(struct net_device *bond_dev, int mode);
+static inline void bond_set_mode_ops(struct bonding *bond, int mode);
+static void bond_send_gratuitous_arp(struct bonding *bond);
/*---------------------------- General routines -----------------------------*/
@@ -659,6 +683,7 @@ static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
INIT_LIST_HEAD(&vlan->vlan_list);
vlan->vlan_id = vlan_id;
+ vlan->vlan_ip = 0;
write_lock_bh(&bond->lock);
@@ -1468,16 +1493,6 @@ static void bond_change_active_slave(struct bonding *bond, struct slave *new_act
}
}
- if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
- if (old_active) {
- bond_set_slave_inactive_flags(old_active);
- }
-
- if (new_active) {
- bond_set_slave_active_flags(new_active);
- }
- }
-
if (USES_PRIMARY(bond->params.mode)) {
bond_mc_swap(bond, new_active, old_active);
}
@@ -1488,6 +1503,17 @@ static void bond_change_active_slave(struct bonding *bond, struct slave *new_act
} else {
bond->curr_active_slave = new_active;
}
+
+ if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
+ if (old_active) {
+ bond_set_slave_inactive_flags(old_active);
+ }
+
+ if (new_active) {
+ bond_set_slave_active_flags(new_active);
+ }
+ bond_send_gratuitous_arp(bond);
+ }
}
/**
@@ -2694,15 +2720,180 @@ out:
read_unlock(&bond->lock);
}
+
+static u32 bond_glean_dev_ip(struct net_device *dev)
+{
+ struct in_device *idev;
+ struct in_ifaddr *ifa;
+ u32 addr = 0;
+
+ if (!dev)
+ return 0;
+
+ rcu_read_lock();
+ idev = __in_dev_get(dev);
+ if (!idev)
+ goto out;
+
+ ifa = idev->ifa_list;
+ if (!ifa)
+ goto out;
+
+ addr = ifa->ifa_local;
+out:
+ rcu_read_unlock();
+ return addr;
+}
+
+static int bond_has_ip(struct bonding *bond)
+{
+ struct vlan_entry *vlan, *vlan_next;
+
+ if (bond->master_ip)
+ return 1;
+
+ if (list_empty(&bond->vlan_list))
+ return 0;
+
+ list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
+ vlan_list) {
+ if (vlan->vlan_ip)
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * We go to the (large) trouble of VLAN tagging ARP frames because
+ * switches in VLAN mode (especially if ports are configured as
+ * "native" to a VLAN) might not pass non-tagged frames.
+ */
+static void bond_arp_send(struct net_device *slave_dev, int arp_op, u32 dest_ip, u32 src_ip, unsigned short vlan_id)
+{
+ struct sk_buff *skb;
+
+ dprintk("arp %d on slave %s: dst %x src %x vid %d\n", arp_op,
+ slave_dev->name, dest_ip, src_ip, vlan_id);
+
+ skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip,
+ NULL, slave_dev->dev_addr, NULL);
+
+ if (!skb) {
+ printk(KERN_ERR DRV_NAME ": ARP packet allocation failed\n");
+ return;
+ }
+ if (vlan_id) {
+ skb = vlan_put_tag(skb, vlan_id);
+ if (!skb) {
+ printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n");
+ return;
+ }
+ }
+ arp_xmit(skb);
+}
+
+
static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
{
- int i;
+ int i, vlan_id, rv;
u32 *targets = bond->params.arp_targets;
+ struct vlan_entry *vlan, *vlan_next;
+ struct net_device *vlan_dev;
+ struct flowi fl;
+ struct rtable *rt;
for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
- arp_send(ARPOP_REQUEST, ETH_P_ARP, targets[i], slave->dev,
- my_ip, NULL, slave->dev->dev_addr,
- NULL);
+ dprintk("basa: target %x\n", targets[i]);
+ if (list_empty(&bond->vlan_list)) {
+ dprintk("basa: empty vlan: arp_send\n");
+ bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
+ bond->master_ip, 0);
+ continue;
+ }
+
+ /*
+ * If VLANs are configured, we do a route lookup to
+ * determine which VLAN interface would be used, so we
+ * can tag the ARP with the proper VLAN tag.
+ */
+ memset(&fl, 0, sizeof(fl));
+ fl.fl4_dst = targets[i];
+ fl.fl4_tos = RTO_ONLINK;
+
+ rv = ip_route_output_key(&rt, &fl);
+ if (rv) {
+ if (net_ratelimit()) {
+ printk(KERN_WARNING DRV_NAME
+ ": %s: no route to arp_ip_target %u.%u.%u.%u\n",
+ bond->dev->name, NIPQUAD(fl.fl4_dst));
+ }
+ continue;
+ }
+
+ /*
+ * This target is not on a VLAN
+ */
+ if (rt->u.dst.dev == bond->dev) {
+ dprintk("basa: rtdev == bond->dev: arp_send\n");
+ bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
+ bond->master_ip, 0);
+ continue;
+ }
+
+ vlan_id = 0;
+ list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
+ vlan_list) {
+ vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id];
+ if (vlan_dev == rt->u.dst.dev) {
+ vlan_id = vlan->vlan_id;
+ dprintk("basa: vlan match on %s %d\n",
+ vlan_dev->name, vlan_id);
+ break;
+ }
+ }
+
+ if (vlan_id) {
+ bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
+ vlan->vlan_ip, vlan_id);
+ continue;
+ }
+
+ if (net_ratelimit()) {
+ printk(KERN_WARNING DRV_NAME
+ ": %s: no path to arp_ip_target %u.%u.%u.%u via rt.dev %s\n",
+ bond->dev->name, NIPQUAD(fl.fl4_dst),
+ rt->u.dst.dev ? rt->u.dst.dev->name : "NULL");
+ }
+ }
+}
+
+/*
+ * Kick out a gratuitous ARP for an IP on the bonding master plus one
+ * for each VLAN above us.
+ */
+static void bond_send_gratuitous_arp(struct bonding *bond)
+{
+ struct slave *slave = bond->curr_active_slave;
+ struct vlan_entry *vlan;
+ struct net_device *vlan_dev;
+
+ dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name,
+ slave ? slave->dev->name : "NULL");
+ if (!slave)
+ return;
+
+ if (bond->master_ip) {
+ bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
+ bond->master_ip, 0);
+ }
+
+ list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+ vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id];
+ if (vlan->vlan_ip) {
+ bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip,
+ vlan->vlan_ip, vlan->vlan_id);
+ }
}
}
@@ -2781,7 +2972,7 @@ static void bond_loadbalance_arp_mon(struct net_device *bond_dev)
*/
if (((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) ||
(((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) &&
- my_ip)) {
+ bond_has_ip(bond))) {
slave->link = BOND_LINK_DOWN;
slave->state = BOND_STATE_BACKUP;
@@ -2920,7 +3111,7 @@ static void bond_activebackup_arp_mon(struct net_device *bond_dev)
if ((slave != bond->curr_active_slave) &&
(!bond->current_arp_slave) &&
(((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) &&
- my_ip)) {
+ bond_has_ip(bond))) {
/* a backup slave has gone down; three times
* the delta allows the current slave to be
* taken out before the backup slave.
@@ -2966,8 +3157,8 @@ static void bond_activebackup_arp_mon(struct net_device *bond_dev)
* if it is up and needs to take over as the curr_active_slave
*/
if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) ||
- (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) &&
- my_ip)) &&
+ (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) &&
+ bond_has_ip(bond))) &&
((jiffies - slave->jiffies) >= 2*delta_in_ticks)) {
slave->link = BOND_LINK_DOWN;
@@ -3019,7 +3210,7 @@ static void bond_activebackup_arp_mon(struct net_device *bond_dev)
/* the current slave must tx an arp to ensure backup slaves
* rx traffic
*/
- if (slave && my_ip) {
+ if (slave && bond_has_ip(bond)) {
bond_arp_send_all(bond, slave);
}
}
@@ -3471,10 +3662,67 @@ static int bond_netdev_event(struct notifier_block *this, unsigned long event, v
return NOTIFY_DONE;
}
+/*
+ * bond_inetaddr_event: handle inetaddr notifier chain events.
+ *
+ * We keep track of device IPs primarily to use as source addresses in
+ * ARP monitor probes (rather than spewing out broadcasts all the time).
+ *
+ * We track one IP for the main device (if it has one), plus one per VLAN.
+ */
+static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev;
+ struct bonding *bond, *bond_next;
+ struct vlan_entry *vlan, *vlan_next;
+
+ list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) {
+ if (bond->dev == event_dev) {
+ switch (event) {
+ case NETDEV_UP:
+ bond->master_ip = ifa->ifa_local;
+ return NOTIFY_OK;
+ case NETDEV_DOWN:
+ bond->master_ip = bond_glean_dev_ip(bond->dev);
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+ }
+
+ if (list_empty(&bond->vlan_list))
+ continue;
+
+ list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
+ vlan_list) {
+ vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id];
+ if (vlan_dev == event_dev) {
+ switch (event) {
+ case NETDEV_UP:
+ vlan->vlan_ip = ifa->ifa_local;
+ return NOTIFY_OK;
+ case NETDEV_DOWN:
+ vlan->vlan_ip =
+ bond_glean_dev_ip(vlan_dev);
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+ }
+ }
+ }
+ return NOTIFY_DONE;
+}
+
static struct notifier_block bond_netdev_notifier = {
.notifier_call = bond_netdev_event,
};
+static struct notifier_block bond_inetaddr_notifier = {
+ .notifier_call = bond_inetaddr_event,
+};
+
/*-------------------------- Packet type handling ---------------------------*/
/* register to receive lacpdus on a bond */
@@ -3496,6 +3744,46 @@ static void bond_unregister_lacpdu(struct bonding *bond)
dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
}
+/*---------------------------- Hashing Policies -----------------------------*/
+
+/*
+ * Hash for the the output device based upon layer 3 and layer 4 data. If
+ * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is
+ * altogether not IP, mimic bond_xmit_hash_policy_l2()
+ */
+static int bond_xmit_hash_policy_l34(struct sk_buff *skb,
+ struct net_device *bond_dev, int count)
+{
+ struct ethhdr *data = (struct ethhdr *)skb->data;
+ struct iphdr *iph = skb->nh.iph;
+ u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl);
+ int layer4_xor = 0;
+
+ if (skb->protocol == __constant_htons(ETH_P_IP)) {
+ if (!(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) &&
+ (iph->protocol == IPPROTO_TCP ||
+ iph->protocol == IPPROTO_UDP)) {
+ layer4_xor = htons((*layer4hdr ^ *(layer4hdr + 1)));
+ }
+ return (layer4_xor ^
+ ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
+
+ }
+
+ return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
+}
+
+/*
+ * Hash for the output device based upon layer 2 data
+ */
+static int bond_xmit_hash_policy_l2(struct sk_buff *skb,
+ struct net_device *bond_dev, int count)
+{
+ struct ethhdr *data = (struct ethhdr *)skb->data;
+
+ return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
+}
+
/*-------------------------- Device entry points ----------------------------*/
static int bond_open(struct net_device *bond_dev)
@@ -4060,17 +4348,6 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
struct bonding *bond = bond_dev->priv;
int res = 1;
- /* if we are sending arp packets, try to at least
- identify our own ip address */
- if (bond->params.arp_interval && !my_ip &&
- (skb->protocol == __constant_htons(ETH_P_ARP))) {
- char *the_ip = (char *)skb->data +
- sizeof(struct ethhdr) +
- sizeof(struct arphdr) +
- ETH_ALEN;
- memcpy(&my_ip, the_ip, 4);
- }
-
read_lock(&bond->lock);
read_lock(&bond->curr_slave_lock);
@@ -4093,14 +4370,13 @@ out:
}
/*
- * in XOR mode, we determine the output device by performing xor on
- * the source and destination hw adresses. If this device is not
- * enabled, find the next slave following this xor slave.
+ * In bond_xmit_xor() , we determine the output device by using a pre-
+ * determined xmit_hash_policy(), If the selected device is not enabled,
+ * find the next active slave.
*/
static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
{
struct bonding *bond = bond_dev->priv;
- struct ethhdr *data = (struct ethhdr *)skb->data;
struct slave *slave, *start_at;
int slave_no;
int i;
@@ -4112,7 +4388,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
goto out;
}
- slave_no = (data->h_dest[5]^bond_dev->dev_addr[5]) % bond->slave_cnt;
+ slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt);
bond_for_each_slave(bond, slave, i) {
slave_no--;
@@ -4208,8 +4484,10 @@ out:
/*
* set bond mode specific net device operations
*/
-static inline void bond_set_mode_ops(struct net_device *bond_dev, int mode)
+static inline void bond_set_mode_ops(struct bonding *bond, int mode)
{
+ struct net_device *bond_dev = bond->dev;
+
switch (mode) {
case BOND_MODE_ROUNDROBIN:
bond_dev->hard_start_xmit = bond_xmit_roundrobin;
@@ -4219,12 +4497,20 @@ static inline void bond_set_mode_ops(struct net_device *bond_dev, int mode)
break;
case BOND_MODE_XOR:
bond_dev->hard_start_xmit = bond_xmit_xor;
+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
+ bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
+ else
+ bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
break;
case BOND_MODE_BROADCAST:
bond_dev->hard_start_xmit = bond_xmit_broadcast;
break;
case BOND_MODE_8023AD:
bond_dev->hard_start_xmit = bond_3ad_xmit_xor;
+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
+ bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
+ else
+ bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
break;
case BOND_MODE_TLB:
case BOND_MODE_ALB:
@@ -4273,7 +4559,7 @@ static int __init bond_init(struct net_device *bond_dev, struct bond_params *par
bond_dev->change_mtu = bond_change_mtu;
bond_dev->set_mac_address = bond_set_mac_address;
- bond_set_mode_ops(bond_dev, bond->params.mode);
+ bond_set_mode_ops(bond, bond->params.mode);
bond_dev->destructor = free_netdev;
@@ -4384,6 +4670,25 @@ static int bond_check_params(struct bond_params *params)
}
}
+ if (xmit_hash_policy) {
+ if ((bond_mode != BOND_MODE_XOR) &&
+ (bond_mode != BOND_MODE_8023AD)) {
+ printk(KERN_INFO DRV_NAME
+ ": xor_mode param is irrelevant in mode %s\n",
+ bond_mode_name(bond_mode));
+ } else {
+ xmit_hashtype = bond_parse_parm(xmit_hash_policy,
+ xmit_hashtype_tbl);
+ if (xmit_hashtype == -1) {
+ printk(KERN_ERR DRV_NAME
+ ": Error: Invalid xmit_hash_policy \"%s\"\n",
+ xmit_hash_policy == NULL ? "NULL" :
+ xmit_hash_policy);
+ return -EINVAL;
+ }
+ }
+ }
+
if (lacp_rate) {
if (bond_mode != BOND_MODE_8023AD) {
printk(KERN_INFO DRV_NAME
@@ -4595,6 +4900,7 @@ static int bond_check_params(struct bond_params *params)
/* fill params struct with the proper values */
params->mode = bond_mode;
+ params->xmit_policy = xmit_hashtype;
params->miimon = miimon;
params->arp_interval = arp_interval;
params->updelay = updelay;
@@ -4669,6 +4975,7 @@ static int __init bonding_init(void)
rtnl_unlock();
register_netdevice_notifier(&bond_netdev_notifier);
+ register_inetaddr_notifier(&bond_inetaddr_notifier);
return 0;
@@ -4684,6 +4991,7 @@ out_err:
static void __exit bonding_exit(void)
{
unregister_netdevice_notifier(&bond_netdev_notifier);
+ unregister_inetaddr_notifier(&bond_inetaddr_notifier);
rtnl_lock();
bond_free_all();