From 98b90f26651f9d84cfbb0221c9a3d9863c5bea69 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Mon, 27 Jan 2014 14:37:31 +0100 Subject: bonding: RCUify bond_ab_arp_probe Currently bond_ab_arp_probe() is always called under rcu_read_lock(), however to work with curr_active_slave we're still holding the curr_slave_lock. To remove that curr_slave_lock - rcu_dereference the bond's curr_active_slave and use it further - so that we're sure the slave won't go away, and we don't care if it will change in the meanwhile. CC: Jay Vosburgh CC: Andy Gospodarek Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a7db819bca9..27e6fddb220 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2605,25 +2605,21 @@ do_failover: static void bond_ab_arp_probe(struct bonding *bond) { struct slave *slave, *before = NULL, *new_slave = NULL, - *curr_arp_slave = rcu_dereference(bond->current_arp_slave); + *curr_arp_slave = rcu_dereference(bond->current_arp_slave), + *curr_active_slave = rcu_dereference(bond->curr_active_slave); struct list_head *iter; bool found = false; - read_lock(&bond->curr_slave_lock); - - if (curr_arp_slave && bond->curr_active_slave) + if (curr_arp_slave && curr_active_slave) pr_info("PROBE: c_arp %s && cas %s BAD\n", curr_arp_slave->dev->name, - bond->curr_active_slave->dev->name); + curr_active_slave->dev->name); - if (bond->curr_active_slave) { - bond_arp_send_all(bond, bond->curr_active_slave); - read_unlock(&bond->curr_slave_lock); + if (curr_active_slave) { + bond_arp_send_all(bond, curr_active_slave); return; } - read_unlock(&bond->curr_slave_lock); - /* if we don't have a curr_active_slave, search for the next available * backup slave from the current_arp_slave and make it the candidate * for becoming the curr_active_slave -- cgit v1.2.3-70-g09d2 From f2ebd477f141bc09b10fb8deb612a4d9b8999bba Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Mon, 27 Jan 2014 14:37:32 +0100 Subject: bonding: restructure locking of bond_ab_arp_probe() Currently we're calling it from under RCU context, however we're using some functions that require rtnl to be held. Fix this by restructuring the locking - don't call it under any locks, aquire rcu_read_lock() if we're sending _only_ (i.e. we have the active slave present), and use rtnl locking otherwise - if we need to modify (in)active flags of a slave. CC: Jay Vosburgh CC: Andy Gospodarek Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 57 ++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 21 deletions(-) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 27e6fddb220..dd75615d85f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2599,17 +2599,18 @@ do_failover: /* * Send ARP probes for active-backup mode ARP monitor. - * - * Called with rcu_read_lock hold. */ -static void bond_ab_arp_probe(struct bonding *bond) +static bool bond_ab_arp_probe(struct bonding *bond) { struct slave *slave, *before = NULL, *new_slave = NULL, - *curr_arp_slave = rcu_dereference(bond->current_arp_slave), - *curr_active_slave = rcu_dereference(bond->curr_active_slave); + *curr_arp_slave, *curr_active_slave; struct list_head *iter; bool found = false; + rcu_read_lock(); + curr_arp_slave = rcu_dereference(bond->current_arp_slave); + curr_active_slave = rcu_dereference(bond->curr_active_slave); + if (curr_arp_slave && curr_active_slave) pr_info("PROBE: c_arp %s && cas %s BAD\n", curr_arp_slave->dev->name, @@ -2617,23 +2618,32 @@ static void bond_ab_arp_probe(struct bonding *bond) if (curr_active_slave) { bond_arp_send_all(bond, curr_active_slave); - return; + rcu_read_unlock(); + return true; } + rcu_read_unlock(); /* if we don't have a curr_active_slave, search for the next available * backup slave from the current_arp_slave and make it the candidate * for becoming the curr_active_slave */ + if (!rtnl_trylock()) + return false; + /* curr_arp_slave might have gone away */ + curr_arp_slave = ACCESS_ONCE(bond->current_arp_slave); + if (!curr_arp_slave) { - curr_arp_slave = bond_first_slave_rcu(bond); - if (!curr_arp_slave) - return; + curr_arp_slave = bond_first_slave(bond); + if (!curr_arp_slave) { + rtnl_unlock(); + return true; + } } bond_set_slave_inactive_flags(curr_arp_slave); - bond_for_each_slave_rcu(bond, slave, iter) { + bond_for_each_slave(bond, slave, iter) { if (!found && !before && IS_UP(slave->dev)) before = slave; @@ -2663,21 +2673,26 @@ static void bond_ab_arp_probe(struct bonding *bond) if (!new_slave && before) new_slave = before; - if (!new_slave) - return; + if (!new_slave) { + rtnl_unlock(); + return true; + } new_slave->link = BOND_LINK_BACK; bond_set_slave_active_flags(new_slave); bond_arp_send_all(bond, new_slave); new_slave->jiffies = jiffies; rcu_assign_pointer(bond->current_arp_slave, new_slave); + rtnl_unlock(); + + return true; } static void bond_activebackup_arp_mon(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, arp_work.work); - bool should_notify_peers = false; + bool should_notify_peers = false, should_commit = false; int delta_in_ticks; delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); @@ -2686,12 +2701,11 @@ static void bond_activebackup_arp_mon(struct work_struct *work) goto re_arm; rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); + should_commit = bond_ab_arp_inspect(bond); + rcu_read_unlock(); - if (bond_ab_arp_inspect(bond)) { - rcu_read_unlock(); - + if (should_commit) { /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { delta_in_ticks = 1; @@ -2700,13 +2714,14 @@ static void bond_activebackup_arp_mon(struct work_struct *work) } bond_ab_arp_commit(bond); - rtnl_unlock(); - rcu_read_lock(); } - bond_ab_arp_probe(bond); - rcu_read_unlock(); + if (!bond_ab_arp_probe(bond)) { + /* rtnl locking failed, re-arm */ + delta_in_ticks = 1; + should_notify_peers = false; + } re_arm: if (bond->params.arp_interval) -- cgit v1.2.3-70-g09d2 From 6fde8f037e604e05df1529e4689041715d6d55d2 Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Tue, 28 Jan 2014 11:48:53 +0800 Subject: bonding: fix locking in bond_loadbalance_arp_mon() The commit 1d3ee88ae0d605629bf369 (bonding: add netlink attributes to slave link dev) has add rtmsg_ifinfo() in bond_set_active_slave() and bond_set_backup_slave(), so the two function need to called in RTNL lock, but bond_loadbalance_arp_mon() only calling these functions in RCU, warning message will occurs. fix this by add a new function bond_slave_state_change(), which will reset the slave's state after slave link check, so remove the bond_set_xxx_slave() from the cycle and only record the slave_state_changed, this will call the new function to set all slaves to new state in RTNL later. Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 29 +++++++++++++++++------------ drivers/net/bonding/bonding.h | 13 +++++++++++++ 2 files changed, 30 insertions(+), 12 deletions(-) (limited to 'drivers/net/bonding/bond_main.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index dd75615d85f..4c08018d733 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2346,7 +2346,7 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) arp_work.work); struct slave *slave, *oldcurrent; struct list_head *iter; - int do_failover = 0; + int do_failover = 0, slave_state_changed = 0; if (!bond_has_slaves(bond)) goto re_arm; @@ -2370,7 +2370,7 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) bond_time_in_interval(bond, slave->dev->last_rx, 1)) { slave->link = BOND_LINK_UP; - bond_set_active_slave(slave); + slave_state_changed = 1; /* primary_slave has no meaning in round-robin * mode. the window of a slave being up and @@ -2399,7 +2399,7 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) !bond_time_in_interval(bond, slave->dev->last_rx, 2)) { slave->link = BOND_LINK_DOWN; - bond_set_backup_slave(slave); + slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; @@ -2426,19 +2426,24 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) rcu_read_unlock(); - if (do_failover) { - /* the bond_select_active_slave must hold RTNL - * and curr_slave_lock for write. - */ + if (do_failover || slave_state_changed) { if (!rtnl_trylock()) goto re_arm; - block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); - bond_select_active_slave(bond); + if (slave_state_changed) { + bond_slave_state_change(bond); + } else if (do_failover) { + /* the bond_select_active_slave must hold RTNL + * and curr_slave_lock for write. + */ + block_netpoll_tx(); + write_lock_bh(&bond->curr_slave_lock); - write_unlock_bh(&bond->curr_slave_lock); - unblock_netpoll_tx(); + bond_select_active_slave(bond); + + write_unlock_bh(&bond->curr_slave_lock); + unblock_netpoll_tx(); + } rtnl_unlock(); } diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 1a9062f4e0d..86ccfb9f71c 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -303,6 +303,19 @@ static inline void bond_set_backup_slave(struct slave *slave) } } +static inline void bond_slave_state_change(struct bonding *bond) +{ + struct list_head *iter; + struct slave *tmp; + + bond_for_each_slave(bond, tmp, iter) { + if (tmp->link == BOND_LINK_UP) + bond_set_active_slave(tmp); + else if (tmp->link == BOND_LINK_DOWN) + bond_set_backup_slave(tmp); + } +} + static inline int bond_slave_state(struct slave *slave) { return slave->backup; -- cgit v1.2.3-70-g09d2