diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-08 07:55:01 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-08 07:55:01 -0800 |
commit | d7fc02c7bae7b1cf69269992cf880a43a350cdaa (patch) | |
tree | a43d56fa72913a1cc98a0bbebe054d08581b3a7c /net/ipv4/inet_timewait_sock.c | |
parent | ee1262dbc65ce0b6234a915d8432171e8d77f518 (diff) | |
parent | 28b4d5cc17c20786848cdc07b7ea237a309776bb (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1815 commits)
mac80211: fix reorder buffer release
iwmc3200wifi: Enable wimax core through module parameter
iwmc3200wifi: Add wifi-wimax coexistence mode as a module parameter
iwmc3200wifi: Coex table command does not expect a response
iwmc3200wifi: Update wiwi priority table
iwlwifi: driver version track kernel version
iwlwifi: indicate uCode type when fail dump error/event log
iwl3945: remove duplicated event logging code
b43: fix two warnings
ipw2100: fix rebooting hang with driver loaded
cfg80211: indent regulatory messages with spaces
iwmc3200wifi: fix NULL pointer dereference in pmkid update
mac80211: Fix TX status reporting for injected data frames
ath9k: enable 2GHz band only if the device supports it
airo: Fix integer overflow warning
rt2x00: Fix padding bug on L2PAD devices.
WE: Fix set events not propagated
b43legacy: avoid PPC fault during resume
b43: avoid PPC fault during resume
tcp: fix a timewait refcnt race
...
Fix up conflicts due to sysctl cleanups (dead sysctl_check code and
CTL_UNNUMBERED removed) in
kernel/sysctl_check.c
net/ipv4/sysctl_net_ipv4.c
net/ipv6/addrconf.c
net/sctp/sysctl.c
Diffstat (limited to 'net/ipv4/inet_timewait_sock.c')
-rw-r--r-- | net/ipv4/inet_timewait_sock.c | 112 |
1 files changed, 76 insertions, 36 deletions
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 13f0781f35c..0fdf45e4c90 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -14,22 +14,33 @@ #include <net/inet_timewait_sock.h> #include <net/ip.h> + +/* + * unhash a timewait socket from established hash + * lock must be hold by caller + */ +int inet_twsk_unhash(struct inet_timewait_sock *tw) +{ + if (hlist_nulls_unhashed(&tw->tw_node)) + return 0; + + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); + return 1; +} + /* Must be called with locally disabled BHs. */ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; + int refcnt; /* Unlink from established hashes. */ spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); spin_lock(lock); - if (hlist_nulls_unhashed(&tw->tw_node)) { - spin_unlock(lock); - return; - } - hlist_nulls_del_rcu(&tw->tw_node); - sk_nulls_node_init(&tw->tw_node); + refcnt = inet_twsk_unhash(tw); spin_unlock(lock); /* Disassociate with bind bucket. */ @@ -37,9 +48,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; - __hlist_del(&tw->tw_bind_node); - tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + if (tb) { + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + refcnt++; + } spin_unlock(&bhead->lock); #ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { @@ -47,7 +61,10 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); } #endif - inet_twsk_put(tw); + while (refcnt) { + inet_twsk_put(tw); + refcnt--; + } } static noinline void inet_twsk_free(struct inet_timewait_sock *tw) @@ -86,7 +103,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; @@ -101,13 +118,22 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, * Should be done before removing sk from established chain * because readers are lockless and search established first. */ - atomic_inc(&tw->tw_refcnt); inet_twsk_add_node_rcu(tw, &ehead->twchain); /* Step 3: Remove SK from established hash. */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + /* + * Notes : + * - We initially set tw_refcnt to 0 in inet_twsk_alloc() + * - We add one reference for the bhash link + * - We add one reference for the ehash link + * - We want this refcnt update done before allowing other + * threads to find this tw in ehash chain. + */ + atomic_add(1 + 1 + 1, &tw->tw_refcnt); + spin_unlock(lock); } @@ -124,14 +150,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat kmemcheck_annotate_bitfield(tw, flags); /* Give us an identity. */ - tw->tw_daddr = inet->daddr; - tw->tw_rcv_saddr = inet->rcv_saddr; + tw->tw_daddr = inet->inet_daddr; + tw->tw_rcv_saddr = inet->inet_rcv_saddr; tw->tw_bound_dev_if = sk->sk_bound_dev_if; - tw->tw_num = inet->num; + tw->tw_num = inet->inet_num; tw->tw_state = TCP_TIME_WAIT; tw->tw_substate = state; - tw->tw_sport = inet->sport; - tw->tw_dport = inet->dport; + tw->tw_sport = inet->inet_sport; + tw->tw_dport = inet->inet_dport; tw->tw_family = sk->sk_family; tw->tw_reuse = sk->sk_reuse; tw->tw_hash = sk->sk_hash; @@ -139,7 +165,12 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); - atomic_set(&tw->tw_refcnt, 1); + /* + * Because we use RCU lookups, we should not set tw_refcnt + * to a non null value before everything is setup for this + * timewait socket. + */ + atomic_set(&tw->tw_refcnt, 0); inet_twsk_dead_node_init(tw); __module_get(tw->tw_prot->owner); } @@ -421,37 +452,46 @@ out: EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); -void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, +void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) { struct inet_timewait_sock *tw; struct sock *sk; struct hlist_nulls_node *node; - int h; + unsigned int slot; - local_bh_disable(); - for (h = 0; h < (hashinfo->ehash_size); h++) { - struct inet_ehash_bucket *head = - inet_ehash_bucket(hashinfo, h); - spinlock_t *lock = inet_ehash_lockp(hashinfo, h); + for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; +restart_rcu: + rcu_read_lock(); restart: - spin_lock(lock); - sk_nulls_for_each(sk, node, &head->twchain) { - + sk_nulls_for_each_rcu(sk, node, &head->twchain) { tw = inet_twsk(sk); - if (!net_eq(twsk_net(tw), net) || - tw->tw_family != family) + if ((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count)) + continue; + + if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) continue; - atomic_inc(&tw->tw_refcnt); - spin_unlock(lock); + if (unlikely((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count))) { + inet_twsk_put(tw); + goto restart; + } + + rcu_read_unlock(); inet_twsk_deschedule(tw, twdr); inet_twsk_put(tw); - - goto restart; + goto restart_rcu; } - spin_unlock(lock); + /* If the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto restart; + rcu_read_unlock(); } - local_bh_enable(); } EXPORT_SYMBOL_GPL(inet_twsk_purge); |