From 1cb561f83793191cf86a2db3948d28f5f42df9ff Mon Sep 17 00:00:00 2001 From: Javier Cardona Date: Mon, 29 Mar 2010 11:00:20 -0700 Subject: mac80211: Handle mesh action frames in ieee80211_rx_h_action This fixes the problem introduced in commit 8404080568613d93ad7cf0a16dfb68 which broke mesh peer link establishment. changes: v2 Added missing break (Johannes) v3 Broke original patch into two (Johannes) Signed-off-by: Javier Cardona Cc: stable@kernel.org Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mesh.c | 3 --- net/mac80211/rx.c | 5 +++++ 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 61080c5fad5..7a6bebce7f2 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -749,9 +749,6 @@ ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_ACTION: - if (skb->len < IEEE80211_MIN_ACTION_SIZE) - return RX_DROP_MONITOR; - /* fall through */ case IEEE80211_STYPE_PROBE_RESP: case IEEE80211_STYPE_BEACON: skb_queue_tail(&ifmsh->skb_queue, skb); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b5c48de81d8..13fcd2d17c6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1973,6 +1973,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; } break; + case MESH_PLINK_CATEGORY: + case MESH_PATH_SEL_CATEGORY: + if (ieee80211_vif_is_mesh(&sdata->vif)) + return ieee80211_mesh_rx_mgmt(sdata, rx->skb); + break; } /* -- cgit v1.2.3-70-g09d2 From 0379185b6c0d1e8252023698cf1091da92a3dc03 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Apr 2010 11:18:42 +0200 Subject: mac80211: annotate station rcu dereferences The new RCU lockdep support warns about these in some contexts -- make it aware of the locks used to protect all this. Different locks are used in different contexts which unfortunately means we can't get perfect checking. Also remove rcu_dereference() from two places that don't actually dereference the pointers. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/main.c | 4 ++-- net/mac80211/sta_info.c | 20 ++++++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 06c33b68d8e..b887e484ae0 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -225,11 +225,11 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP: sdata->vif.bss_conf.enable_beacon = - !!rcu_dereference(sdata->u.ap.beacon); + !!sdata->u.ap.beacon; break; case NL80211_IFTYPE_ADHOC: sdata->vif.bss_conf.enable_beacon = - !!rcu_dereference(sdata->u.ibss.presp); + !!sdata->u.ibss.presp; break; case NL80211_IFTYPE_MESH_POINT: sdata->vif.bss_conf.enable_beacon = true; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 56422d89435..fb12cec4d33 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -93,12 +93,18 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; - sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]); + sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], + rcu_read_lock_held() || + lockdep_is_held(&local->sta_lock) || + lockdep_is_held(&local->sta_mtx)); while (sta) { if (sta->sdata == sdata && memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; - sta = rcu_dereference(sta->hnext); + sta = rcu_dereference_check(sta->hnext, + rcu_read_lock_held() || + lockdep_is_held(&local->sta_lock) || + lockdep_is_held(&local->sta_mtx)); } return sta; } @@ -113,13 +119,19 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; - sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]); + sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], + rcu_read_lock_held() || + lockdep_is_held(&local->sta_lock) || + lockdep_is_held(&local->sta_mtx)); while (sta) { if ((sta->sdata == sdata || sta->sdata->bss == sdata->bss) && memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; - sta = rcu_dereference(sta->hnext); + sta = rcu_dereference_check(sta->hnext, + rcu_read_lock_held() || + lockdep_is_held(&local->sta_lock) || + lockdep_is_held(&local->sta_mtx)); } return sta; } -- cgit v1.2.3-70-g09d2 From fd218cf9557b9bf7061365a8fe7020a56d3f767c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Apr 2010 21:20:47 -0700 Subject: bridge: Fix IGMP3 report parsing The IGMP3 report parsing is looking at the wrong address for group records. This patch fixes it. Reported-by: Banyeer Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 6980625537c..f29ada827a6 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -723,7 +723,7 @@ static int br_multicast_igmp3_report(struct net_bridge *br, if (!pskb_may_pull(skb, len)) return -EINVAL; - grec = (void *)(skb->data + len); + grec = (void *)(skb->data + len - sizeof(*grec)); group = grec->grec_mca; type = grec->grec_type; -- cgit v1.2.3-70-g09d2 From f5eb917b861828da18dc28854308068c66d1449a Mon Sep 17 00:00:00 2001 From: John Hughes Date: Wed, 7 Apr 2010 21:29:25 -0700 Subject: x25: Patch to fix bug 15678 - x25 accesses fields beyond end of packet. Here is a patch to stop X.25 examining fields beyond the end of the packet. For example, when a simple CALL ACCEPTED was received: 10 10 0f x25_parse_facilities was attempting to decode the FACILITIES field, but this packet contains no facilities field. Signed-off-by: John Hughes Signed-off-by: David S. Miller --- include/net/x25.h | 4 ++++ net/x25/af_x25.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- net/x25/x25_facilities.c | 12 +++++++++++- net/x25/x25_in.c | 15 +++++++++++---- 4 files changed, 72 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/x25.h b/include/net/x25.h index 9baa07dc7d1..33f67fb7858 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -182,6 +182,10 @@ extern int sysctl_x25_clear_request_timeout; extern int sysctl_x25_ack_holdback_timeout; extern int sysctl_x25_forward; +extern int x25_parse_address_block(struct sk_buff *skb, + struct x25_address *called_addr, + struct x25_address *calling_addr); + extern int x25_addr_ntoa(unsigned char *, struct x25_address *, struct x25_address *); extern int x25_addr_aton(unsigned char *, struct x25_address *, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 9796f3ed1ed..fe26c01ef3e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -82,6 +82,41 @@ struct compat_x25_subscrip_struct { }; #endif + +int x25_parse_address_block(struct sk_buff *skb, + struct x25_address *called_addr, + struct x25_address *calling_addr) +{ + unsigned char len; + int needed; + int rc; + + if (skb->len < 1) { + /* packet has no address block */ + rc = 0; + goto empty; + } + + len = *skb->data; + needed = 1 + (len >> 4) + (len & 0x0f); + + if (skb->len < needed) { + /* packet is too short to hold the addresses it claims + to hold */ + rc = -1; + goto empty; + } + + return x25_addr_ntoa(skb->data, called_addr, calling_addr); + +empty: + *called_addr->x25_addr = 0; + *calling_addr->x25_addr = 0; + + return rc; +} + + int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr, struct x25_address *calling_addr) { @@ -921,16 +956,26 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, /* * Extract the X.25 addresses and convert them to ASCII strings, * and remove them. + * + * Address block is mandatory in call request packets */ - addr_len = x25_addr_ntoa(skb->data, &source_addr, &dest_addr); + addr_len = x25_parse_address_block(skb, &source_addr, &dest_addr); + if (addr_len <= 0) + goto out_clear_request; skb_pull(skb, addr_len); /* * Get the length of the facilities, skip past them for the moment * get the call user data because this is needed to determine * the correct listener + * + * Facilities length is mandatory in call request packets */ + if (skb->len < 1) + goto out_clear_request; len = skb->data[0] + 1; + if (skb->len < len) + goto out_clear_request; skb_pull(skb,len); /* diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index a21f6646eb3..a2765c6b1f1 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -35,7 +35,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, struct x25_dte_facilities *dte_facs, unsigned long *vc_fac_mask) { unsigned char *p = skb->data; - unsigned int len = *p++; + unsigned int len; *vc_fac_mask = 0; @@ -50,6 +50,14 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, memset(dte_facs->called_ae, '\0', sizeof(dte_facs->called_ae)); memset(dte_facs->calling_ae, '\0', sizeof(dte_facs->calling_ae)); + if (skb->len < 1) + return 0; + + len = *p++; + + if (len >= skb->len) + return -1; + while (len > 0) { switch (*p & X25_FAC_CLASS_MASK) { case X25_FAC_CLASS_A: @@ -247,6 +255,8 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, memcpy(new, ours, sizeof(*new)); len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask); + if (len < 0) + return len; /* * They want reverse charging, we won't accept it. diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 96d92278354..b39072f3a29 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -89,6 +89,7 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametype) { struct x25_address source_addr, dest_addr; + int len; switch (frametype) { case X25_CALL_ACCEPTED: { @@ -106,11 +107,17 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp * Parse the data in the frame. */ skb_pull(skb, X25_STD_MIN_LEN); - skb_pull(skb, x25_addr_ntoa(skb->data, &source_addr, &dest_addr)); - skb_pull(skb, - x25_parse_facilities(skb, &x25->facilities, + + len = x25_parse_address_block(skb, &source_addr, + &dest_addr); + if (len > 0) + skb_pull(skb, len); + + len = x25_parse_facilities(skb, &x25->facilities, &x25->dte_facilities, - &x25->vc_facil_mask)); + &x25->vc_facil_mask); + if (len > 0) + skb_pull(skb, len); /* * Copy any Call User Data. */ -- cgit v1.2.3-70-g09d2 From ddd0451fc8dbf94446c81500ff0dcee06c4057cb Mon Sep 17 00:00:00 2001 From: John Hughes Date: Sun, 4 Apr 2010 06:48:10 +0000 Subject: x.25 attempts to negotiate invalid throughput The current X.25 code has some bugs in throughput negotiation: 1. It does negotiation in all cases, usually there is no need 2. It incorrectly attempts to negotiate the throughput class in one direction only. There are separate throughput classes for input and output and if either is negotiated both mist be negotiates. This is bug https://bugzilla.kernel.org/show_bug.cgi?id=15681 This bug was first reported by Daniel Ferenci to the linux-x25 mailing list on 6/8/2004, but is still present. The current (2.6.34) x.25 code doesn't seem to know that the X.25 throughput facility includes two values, one for the required throughput outbound, one for inbound. This causes it to attempt to negotiate throughput 0x0A, which is throughput 9600 inbound and the illegal value "0" for inbound throughput. Because of this some X.25 devices (e.g. Cisco 1600) refuse to connect to Linux X.25. The following patch fixes this behaviour. Unless the user specifies a required throughput it does not attempt to negotiate. If the user does not specify a throughput it accepts the suggestion of the remote X.25 system. If the user requests a throughput then it validates both the input and output throughputs and correctly negotiates them with the remote end. Signed-off-by: John Hughes Tested-by: Andrew Hendry Signed-off-by: David S. Miller --- net/x25/af_x25.c | 20 ++++++++++++++++---- net/x25/x25_facilities.c | 15 ++++++++++++--- 2 files changed, 28 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index fe26c01ef3e..8ed51c926c5 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -588,7 +588,8 @@ static int x25_create(struct net *net, struct socket *sock, int protocol, x25->facilities.winsize_out = X25_DEFAULT_WINDOW_SIZE; x25->facilities.pacsize_in = X25_DEFAULT_PACKET_SIZE; x25->facilities.pacsize_out = X25_DEFAULT_PACKET_SIZE; - x25->facilities.throughput = X25_DEFAULT_THROUGHPUT; + x25->facilities.throughput = 0; /* by default don't negotiate + throughput */ x25->facilities.reverse = X25_DEFAULT_REVERSE; x25->dte_facilities.calling_len = 0; x25->dte_facilities.called_len = 0; @@ -1459,9 +1460,20 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (facilities.winsize_in < 1 || facilities.winsize_in > 127) break; - if (facilities.throughput < 0x03 || - facilities.throughput > 0xDD) - break; + if (facilities.throughput) { + int out = facilities.throughput & 0xf0; + int in = facilities.throughput & 0x0f; + if (!out) + facilities.throughput |= + X25_DEFAULT_THROUGHPUT << 4; + else if (out < 0x30 || out > 0xD0) + break; + if (!in) + facilities.throughput |= + X25_DEFAULT_THROUGHPUT; + else if (in < 0x03 || in > 0x0D) + break; + } if (facilities.reverse && (facilities.reverse & 0x81) != 0x81) break; diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index a2765c6b1f1..771bab00754 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -269,9 +269,18 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, new->reverse = theirs.reverse; if (theirs.throughput) { - if (theirs.throughput < ours->throughput) { - SOCK_DEBUG(sk, "X.25: throughput negotiated down\n"); - new->throughput = theirs.throughput; + int theirs_in = theirs.throughput & 0x0f; + int theirs_out = theirs.throughput & 0xf0; + int ours_in = ours->throughput & 0x0f; + int ours_out = ours->throughput & 0xf0; + if (!ours_in || theirs_in < ours_in) { + SOCK_DEBUG(sk, "X.25: inbound throughput negotiated\n"); + new->throughput = (new->throughput & 0xf0) | theirs_in; + } + if (!ours_out || theirs_out < ours_out) { + SOCK_DEBUG(sk, + "X.25: outbound throughput negotiated\n"); + new->throughput = (new->throughput & 0x0f) | theirs_out; } } -- cgit v1.2.3-70-g09d2 From 1223c67c0938d2df309fde618bd82c87c8c1af04 Mon Sep 17 00:00:00 2001 From: "Jorge Boncompte [DTI2]" Date: Thu, 8 Apr 2010 04:56:48 +0000 Subject: udp: fix for unicast RX path optimization Commits 5051ebd275de672b807c28d93002c2fb0514a3c9 and 5051ebd275de672b807c28d93002c2fb0514a3c9 ("ipv[46]: udp: optimize unicast RX path") broke some programs. After upgrading a L2TP server to 2.6.33 it started to fail, tunnels going up an down, after the 10th tunnel came up. My modified rp-l2tp uses a global unconnected socket bound to (INADDR_ANY, 1701) and one connected socket per tunnel after parameter negotiation. After ten sockets were open and due to mixed parameters to udp[46]_lib_lookup2() kernel started to drop packets. Signed-off-by: Jorge Boncompte [DTI2] Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7af756d0f93..24272c4cfbc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -471,8 +471,8 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, if (hslot->count < hslot2->count) goto begin; - result = udp4_lib_lookup2(net, INADDR_ANY, sport, - daddr, hnum, dif, + result = udp4_lib_lookup2(net, saddr, sport, + INADDR_ANY, hnum, dif, hslot2, slot2); } rcu_read_unlock(); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3c0c9c755c9..787e480cc09 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -258,8 +258,8 @@ static struct sock *__udp6_lib_lookup(struct net *net, if (hslot->count < hslot2->count) goto begin; - result = udp6_lib_lookup2(net, &in6addr_any, sport, - daddr, hnum, dif, + result = udp6_lib_lookup2(net, saddr, sport, + &in6addr_any, hnum, dif, hslot2, slot2); } rcu_read_unlock(); -- cgit v1.2.3-70-g09d2 From 2626419ad5be1a054d350786b684b41d23de1538 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 8 Apr 2010 11:32:30 -0700 Subject: tcp: Set CHECKSUM_UNNECESSARY in tcp_init_nondata_skb Back in commit 04a0551c87363f100b04d28d7a15a632b70e18e7 ("loopback: Drop obsolete ip_summed setting") we stopped setting CHECKSUM_UNNECESSARY in the loopback xmit. This is because such a setting was a lie since it implies that the checksum field of the packet is properly filled in. Instead what happens normally is that CHECKSUM_PARTIAL is set and skb->csum is calculated as needed. But this was only happening for TCP data packets (via the skb->ip_summed assignment done in tcp_sendmsg()). It doesn't happen for non-data packets like ACKs etc. Fix this by setting skb->ip_summed in the common non-data packet constructor. It already is setting skb->csum to zero. But this reminds us that we still have things like ip_output.c's ip_dev_loopback_xmit() which sets skb->ip_summed to the value CHECKSUM_UNNECESSARY, which Herbert's patch teaches us is not valid. So we'll have to address that at some point too. Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f181b78f238..00afbb0c7e5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -349,6 +349,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, */ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; TCP_SKB_CB(skb)->flags = flags; -- cgit v1.2.3-70-g09d2 From ae4e8d63b5619d4d95f1d2bfa2b836caa6e62d06 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 11 Apr 2010 02:40:49 -0700 Subject: Revert "tcp: Set CHECKSUM_UNNECESSARY in tcp_init_nondata_skb" This reverts commit 2626419ad5be1a054d350786b684b41d23de1538. It causes regressions for people with IGB cards. Connection requests don't complete etc. The true cause of the issue is still not known, but we should sort this out in net-next-2.6 not net-2.6 Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 00afbb0c7e5..f181b78f238 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -349,7 +349,6 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, */ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { - skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; TCP_SKB_CB(skb)->flags = flags; -- cgit v1.2.3-70-g09d2 From 4ffa87012efd7b664762b579213d4663560ef4a3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Apr 2010 23:47:31 +0000 Subject: can: avoids a false warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At this point optlen == sizeof(sfilter) but some compilers are dumb. Reported-by: Németh Márton Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/raw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/can/raw.c b/net/can/raw.c index 3a7dffb6519..da99cf153b3 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -445,7 +445,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; } } else if (count == 1) { - if (copy_from_user(&sfilter, optval, optlen)) + if (copy_from_user(&sfilter, optval, sizeof(sfilter))) return -EFAULT; } -- cgit v1.2.3-70-g09d2 From 4eaa0e3c869acd5dbc7c2e3818a9ae9cbf221d27 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Apr 2010 16:13:29 -0700 Subject: fib: suppress lockdep-RCU false positive in FIB trie. Followup of commit 634a4b20 Allow tnode_get_child_rcu() to be called either under rcu_read_lock() protection or with RTNL held. Signed-off-by: Eric Dumazet Signed-off-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 59a838795e3..c98f115fb0f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -209,7 +209,9 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) { struct node *ret = tnode_get_child(tn, i); - return rcu_dereference(ret); + return rcu_dereference_check(ret, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); } static inline int tnode_child_length(const struct tnode *tn) -- cgit v1.2.3-70-g09d2 From 8728c544a9cbdcb0034aa5c45706c5f953f030ee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 11 Apr 2010 21:18:17 +0000 Subject: net: dev_pick_tx() fix When dev_pick_tx() caches tx queue_index on a socket, we must check socket dst_entry matches skb one, or risk a crash later, as reported by Denys Fedorysychenko, if old packets are in flight during a route change, involving devices with different number of queues. Bug introduced by commit a4ee3ce3 (net: Use sk_tx_queue_mapping for connected sockets) Reported-by: Denys Fedorysychenko Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1c8a0ce473a..92584bfef09 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1989,8 +1989,12 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, if (dev->real_num_tx_queues > 1) queue_index = skb_tx_hash(dev, skb); - if (sk && sk->sk_dst_cache) - sk_tx_queue_set(sk, queue_index); + if (sk) { + struct dst_entry *dst = rcu_dereference(sk->sk_dst_cache); + + if (dst && skb_dst(skb) == dst) + sk_tx_queue_set(sk, queue_index); + } } } -- cgit v1.2.3-70-g09d2 From e30b38c298b55e09456d3ccbc1df2f3e2e8dc6e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Apr 2010 09:13:03 +0000 Subject: ip: Fix ip_dev_loopback_xmit() Eric Paris got following trace with a linux-next kernel [ 14.203970] BUG: using smp_processor_id() in preemptible [00000000] code: avahi-daemon/2093 [ 14.204025] caller is netif_rx+0xfa/0x110 [ 14.204035] Call Trace: [ 14.204064] [] debug_smp_processor_id+0x105/0x110 [ 14.204070] [] netif_rx+0xfa/0x110 [ 14.204090] [] ip_dev_loopback_xmit+0x71/0xa0 [ 14.204095] [] ip_mc_output+0x192/0x2c0 [ 14.204099] [] ip_local_out+0x20/0x30 [ 14.204105] [] ip_push_pending_frames+0x28d/0x3d0 [ 14.204119] [] udp_push_pending_frames+0x14c/0x400 [ 14.204125] [] udp_sendmsg+0x39c/0x790 [ 14.204137] [] inet_sendmsg+0x45/0x80 [ 14.204149] [] sock_sendmsg+0xf1/0x110 [ 14.204189] [] sys_sendmsg+0x20c/0x380 [ 14.204233] [] system_call_fastpath+0x16/0x1b While current linux-2.6 kernel doesnt emit this warning, bug is latent and might cause unexpected failures. ip_dev_loopback_xmit() runs in process context, preemption enabled, so must call netif_rx_ni() instead of netif_rx(), to make sure that we process pending software interrupt. Same change for ip6_dev_loopback_xmit() Reported-by: Eric Paris Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c65f18e0936..d1bcc9f21d4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -120,7 +120,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); - netif_rx(newskb); + netif_rx_ni(newskb); return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 16c4391f952..65f9c379df3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -108,7 +108,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); - netif_rx(newskb); + netif_rx_ni(newskb); return 0; } -- cgit v1.2.3-70-g09d2 From 1c4f0197323254e463b642abf2c8361e2a924859 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 14 Apr 2010 23:11:14 +0000 Subject: packet : remove init_net restriction The af_packet protocol is used by Perl to do ioctls as reported by Stephane Riviere: "Net::RawIP relies on SIOCGIFADDR et SIOCGIFHWADDR to get the IP and MAC addresses of the network interface." But in a new network namespace these ioctl fail because it is disabled for a namespace different from the init_net_ns. These two lines should not be there as af_inet and af_packet are namespace aware since a long time now. I suppose we forget to remove these lines because we sent the af_packet first, before af_inet was supported. Signed-off-by: Daniel Lezcano Reported-by: Stephane Riviere Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index cc90363d7e7..243946d4809 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2169,8 +2169,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: - if (!net_eq(sock_net(sk), &init_net)) - return -ENOIOCTLCMD; return inet_dgram_ops.ioctl(sock, cmd, arg); #endif -- cgit v1.2.3-70-g09d2 From b4bb5c3fd9333024044362df67e23e96158489ed Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 Apr 2010 10:48:38 +0200 Subject: mac80211: remove bogus TX agg state assignment When the addba timer expires but has no work to do, it should not affect the state machine. If it does, TX will not see the successfully established and we can also crash trying to re-establish the session. Cc: stable@kernel.org [2.6.32, 2.6.33] Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 5538e1b4a69..944a8a92207 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -183,7 +183,6 @@ static void sta_addba_resp_timer_expired(unsigned long data) HT_AGG_STATE_REQ_STOP_BA_MSK)) != HT_ADDBA_REQUESTED_MSK) { spin_unlock_bh(&sta->lock); - *state = HT_AGG_STATE_IDLE; #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "timer expired on tid %d but we are not " "(or no longer) expecting addBA response there", -- cgit v1.2.3-70-g09d2 From fe6f212ce12341df18ef9b890bea739b4547157b Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Mon, 19 Apr 2010 10:46:31 -0700 Subject: mac80211: pass HT changes to driver when off channel Since "mac80211: make off-channel work generic" drivers have not been notified of configuration changes after association or authentication. This caused more dependence on current state to ensure driver will be notified when configuration changes occur. One such problem arises if off-channel is in progress when HT information changes. Since HT is only enabled on the "oper_channel" the driver will never be notified of this change. Usually the driver is notified soon after of a BSS information change (BSS_CHANGED_HT) ... but since the driver did not get a notification that this is a HT channel the new BSS information does not make sense. Fix this by also changing the off-channel information when HT is enabled and thus cause driver to be notified correctly. This fixes a problem in 4965 when associated with 5GHz 40MHz channel. Without this patch the system can associate but is unable to transfer any data, not even ping. See http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2158 Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index be5f723d643..8a9650343f2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -167,6 +167,8 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, ht_changed = conf_is_ht(&local->hw.conf) != enable_ht || channel_type != local->hw.conf.channel_type; + if (local->tmp_channel) + local->tmp_channel_type = channel_type; local->oper_channel_type = channel_type; if (ht_changed) { -- cgit v1.2.3-70-g09d2 From 8eabf95cb17253a3ac72b1a62ce8a80b3efecd62 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Apr 2010 03:20:05 +0000 Subject: bridge: add a missing ntohs() grec_nsrcs is in network order, we should convert to host horder in br_multicast_igmp3_report() Signed-off-by: Eric Dumazet Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f29ada827a6..386c15369d9 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -727,7 +727,7 @@ static int br_multicast_igmp3_report(struct net_bridge *br, group = grec->grec_mca; type = grec->grec_type; - len += grec->grec_nsrcs * 4; + len += ntohs(grec->grec_nsrcs) * 4; if (!pskb_may_pull(skb, len)) return -EINVAL; -- cgit v1.2.3-70-g09d2 From 6651ffc8e8bdd5fb4b7d1867c6cfebb4f309512c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 21 Apr 2010 00:47:15 -0700 Subject: ipv6: Fix tcp_v6_send_response transport header setting. My recent patch to remove the open-coded checksum sequence in tcp_v6_send_response broke it as we did not set the transport header pointer on the new packet. Actually, there is code there trying to set the transport header properly, but it sets it for the wrong skb ('skb' instead of 'buff'). This bug was introduced by commit a8fdf2b331b38d61fb5f11f3aec4a4f9fb2dedcb ("ipv6: Fix tcp_v6_send_response(): it didn't set skb transport header") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c92ebe8f80d..075f540ec19 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1015,7 +1015,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); t1 = (struct tcphdr *) skb_push(buff, tot_len); - skb_reset_transport_header(skb); + skb_reset_transport_header(buff); /* Swap the send and the receive. */ memset(t1, 0, sizeof(*t1)); -- cgit v1.2.3-70-g09d2 From 05d17608a69b3ae653ea5c9857283bef3439c733 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 20 Apr 2010 00:25:58 +0000 Subject: net: Fix an RCU warning in dev_pick_tx() Fix the following RCU warning in dev_pick_tx(): =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- net/core/dev.c:1993 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by swapper/0: #0: (&idev->mc_ifc_timer){+.-...}, at: [] run_timer_softirq+0x17b/0x278 #1: (rcu_read_lock_bh){.+....}, at: [] dev_queue_xmit+0x14e/0x4dc stack backtrace: Pid: 0, comm: swapper Not tainted 2.6.34-rc5-cachefs #4 Call Trace: [] lockdep_rcu_dereference+0xaa/0xb2 [] dev_queue_xmit+0x259/0x4dc [] ? dev_queue_xmit+0x14e/0x4dc [] ? trace_hardirqs_on+0xd/0xf [] ? local_bh_enable_ip+0xbc/0xc1 [] neigh_resolve_output+0x24b/0x27c [] ip6_output_finish+0x7c/0xb4 [] ip6_output2+0x256/0x261 [] ? trace_hardirqs_on+0xd/0xf [] ip6_output+0xbbc/0xbcb [] ? fib6_force_start_gc+0x2b/0x2d [] mld_sendpack+0x273/0x39d [] ? mld_sendpack+0x0/0x39d [] ? mark_held_locks+0x52/0x70 [] mld_ifc_timer_expire+0x24f/0x288 [] run_timer_softirq+0x1ec/0x278 [] ? run_timer_softirq+0x17b/0x278 [] ? mld_ifc_timer_expire+0x0/0x288 [] ? __do_softirq+0x69/0x140 [] __do_softirq+0xa2/0x140 [] call_softirq+0x1c/0x28 [] do_softirq+0x38/0x80 [] irq_exit+0x45/0x47 [] smp_apic_timer_interrupt+0x88/0x96 [] apic_timer_interrupt+0x13/0x20 [] ? __atomic_notifier_call_chain+0x0/0x86 [] ? mwait_idle+0x6e/0x78 [] ? mwait_idle+0x65/0x78 [] cpu_idle+0x4d/0x83 [] rest_init+0xb9/0xc0 [] ? rest_init+0x0/0xc0 [] start_kernel+0x392/0x39d [] x86_64_start_reservations+0xb3/0xb7 [] x86_64_start_kernel+0xe4/0xeb An rcu_dereference() should be an rcu_dereference_bh(). Signed-off-by: David Howells Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 92584bfef09..f769098774b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1990,7 +1990,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, queue_index = skb_tx_hash(dev, skb); if (sk) { - struct dst_entry *dst = rcu_dereference(sk->sk_dst_cache); + struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache); if (dst && skb_dst(skb) == dst) sk_tx_queue_set(sk, queue_index); -- cgit v1.2.3-70-g09d2 From bc8e4b954e463716a57d8113dd50ae9d47b682a7 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 21 Apr 2010 16:25:30 -0700 Subject: xfrm6: ensure to use the same dev when building a bundle When building a bundle, we set dst.dev and rt6.rt6i_idev. We must ensure to set the same device for both fields. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ae181651c75..00bf7c962b7 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -124,7 +124,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.dst.dev = dev; dev_hold(dev); - xdst->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev); + xdst->u.rt6.rt6i_idev = in6_dev_get(dev); if (!xdst->u.rt6.rt6i_idev) return -ENODEV; -- cgit v1.2.3-70-g09d2 From 2cec6b014da6fb4a40ba1c6556cdf9681ed3f89e Mon Sep 17 00:00:00 2001 From: andrew hendry Date: Sat, 17 Apr 2010 14:17:32 +0000 Subject: X25 fix dead unaccepted sockets 1, An X25 program binds and listens 2, calls arrive waiting to be accepted 3, Program exits without accepting 4, Sockets time out but don't get correctly cleaned up 5, cat /proc/net/x25/socket shows the dead sockets with bad inode fields. This line borrowed from AX25 sets the dying socket so the timers clean up later. Signed-off-by: Andrew Hendry Signed-off-by: David S. Miller --- net/x25/af_x25.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index cbddd0cb83f..36e84e13c6a 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -402,6 +402,7 @@ static void __x25_destroy_socket(struct sock *sk) /* * Queue the unaccepted socket for death */ + skb->sk->sk_state = TCP_LISTEN; sock_set_flag(skb->sk, SOCK_DEAD); x25_start_heartbeat(skb->sk); x25_sk(skb->sk)->state = X25_STATE_0; -- cgit v1.2.3-70-g09d2 From f2228f785a9d97307aa8ba709088cfda6c3df73f Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Sun, 18 Apr 2010 16:58:22 +0000 Subject: ipv6: allow to send packet after receiving ICMPv6 Too Big message with MTU field less than IPV6_MIN_MTU According to RFC2460, PMTU is set to the IPv6 Minimum Link MTU (1280) and a fragment header should always be included after a node receiving Too Big message reporting PMTU is less than the IPv6 Minimum Link MTU. After receiving a ICMPv6 Too Big message reporting PMTU is less than the IPv6 Minimum Link MTU, sctp *can't* send any data/control chunk that total length including IPv6 head and IPv6 extend head is less than IPV6_MIN_MTU(1280 bytes). The failure occured in p6_fragment(), about reason see following(take SHUTDOWN chunk for example): sctp_packet_transmit (SHUTDOWN chunk, len=16 byte) |------sctp_v6_xmit (local_df=0) |------ip6_xmit |------ip6_output (dst_allfrag is ture) |------ip6_fragment In ip6_fragment(), for local_df=0, drops the the packet and returns EMSGSIZE. The patch fixes it with adding check length of skb->len. In this case, Ipv6 not to fragment upper protocol data, just only add a fragment header before it. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 65f9c379df3..75d5ef83009 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -629,7 +629,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (!skb->local_df) { + if (!skb->local_df && skb->len > mtu) { skb->dev = skb_dst(skb)->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), -- cgit v1.2.3-70-g09d2 From f4f914b58019f0e50d521bbbadfaee260d766f95 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 20 Apr 2010 21:21:26 +0000 Subject: net: ipv6 bind to device issue The issue raises when having 2 NICs both assigned the same IPv6 global address. If a sender binds to a particular NIC (SO_BINDTODEVICE), the outgoing traffic is being sent via the first found. The bonded device is thus not taken into an account during the routing. From the ip6_route_output function: If the binding address is multicast, linklocal or loopback, the RT6_LOOKUP_F_IFACE bit is set, but not for global address. So binding global address will neglect SO_BINDTODEVICE-binded device, because the fib6_rule_lookup function path won't check for the flowi::oif field and take first route that fits. Signed-off-by: Jiri Olsa Signed-off-by: Scott Otto Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c2438e8cb9d..05ebd783304 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -815,7 +815,7 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, { int flags = 0; - if (rt6_need_strict(&fl->fl6_dst)) + if (fl->oif || rt6_need_strict(&fl->fl6_dst)) flags |= RT6_LOOKUP_F_IFACE; if (!ipv6_addr_any(&fl->fl6_src)) -- cgit v1.2.3-70-g09d2 From 3d7b08945e54a3a5358d5890240619a013cb7388 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Apr 2010 15:35:55 -0400 Subject: SUNRPC: Fix a bug in rpcauth_prune_expired Don't want to evict a credential if cred->cr_expire == jiffies, since that means that it was just placed on the cred_unused list. We therefore need to use time_in_range() rather than time_in_range_open(). Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index f394fc190a4..95afe79dd9d 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -237,7 +237,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { /* Enforce a 60 second garbage collection moratorium */ - if (time_in_range_open(cred->cr_expire, expired, jiffies) && + if (time_in_range(cred->cr_expire, expired, jiffies) && test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) continue; -- cgit v1.2.3-70-g09d2 From 80032cffb95edff4fc216b1cb21682257be326b7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 21 Apr 2010 23:53:27 +0000 Subject: rtnetlink: potential ERR_PTR dereference In the original code, if rtnl_create_link() returned an ERR_PTR then that would get passed to rtnl_configure_link() which dereferences it. Signed-off-by: Dan Carpenter Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4568120d853..fe776c9ddec 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1270,10 +1270,11 @@ replay: err = ops->newlink(net, dev, tb, data); else err = register_netdevice(dev); - if (err < 0 && !IS_ERR(dev)) { + + if (err < 0 && !IS_ERR(dev)) free_netdev(dev); + if (err < 0) goto out; - } err = rtnl_configure_link(dev, ifm); if (err < 0) -- cgit v1.2.3-70-g09d2 From 24acc6895616b373475e92e49925efc3ef591563 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 21 Apr 2010 23:55:27 +0000 Subject: rdma: potential ERR_PTR dereference In the original code, the "goto out" calls "rdma_destroy_id(cm_id);" That isn't needed here and would cause problems because "cm_id" is an ERR_PTR. The new code just returns directly. Signed-off-by: Dan Carpenter Acked-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/rdma_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 9ece910ea39..7b155081b4d 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -134,7 +134,7 @@ static int __init rds_rdma_listen_init(void) ret = PTR_ERR(cm_id); printk(KERN_ERR "RDS/RDMA: failed to setup listener, " "rdma_create_id() returned %d\n", ret); - goto out; + return ret; } sin.sin_family = AF_INET, -- cgit v1.2.3-70-g09d2 From fda48a0d7a8412cedacda46a9c0bf8ef9cd13559 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Apr 2010 09:26:15 +0000 Subject: tcp: bind() fix when many ports are bound Port autoselection done by kernel only works when number of bound sockets is under a threshold (typically 30000). When this threshold is over, we must check if there is a conflict before exiting first loop in inet_csk_get_port() Change inet_csk_bind_conflict() to forbid two reuse-enabled sockets to bind on same (address,port) tuple (with a non ANY address) Same change for inet6_csk_bind_conflict() Reported-by: Gaspar Chilingarov Signed-off-by: Eric Dumazet Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 16 +++++++++++----- net/ipv6/inet6_connection_sock.c | 15 ++++++++++----- 2 files changed, 21 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8da6429269d..14825eb0977 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -70,13 +70,17 @@ int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr || sk2_rcv_saddr == sk_rcv_saddr) break; - } + } else if (reuse && sk2->sk_reuse && + sk2_rcv_saddr && + sk2_rcv_saddr == sk_rcv_saddr) + break; } } return node != NULL; @@ -120,9 +124,11 @@ again: smallest_size = tb->num_owners; smallest_rover = rover; if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { - spin_unlock(&head->lock); - snum = smallest_rover; - goto have_snum; + if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { + spin_unlock(&head->lock); + snum = smallest_rover; + goto have_snum; + } } } goto next; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 628db24bcf2..b4b7d40a9c9 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -42,11 +42,16 @@ int inet6_csk_bind_conflict(const struct sock *sk, if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && - (!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; + sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + if ((!sk->sk_reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + ipv6_rcv_saddr_equal(sk, sk2)) + break; + else if (sk->sk_reuse && sk2->sk_reuse && + !ipv6_addr_any(inet6_rcv_saddr(sk2)) && + ipv6_rcv_saddr_equal(sk, sk2)) + break; + } } return node != NULL; -- cgit v1.2.3-70-g09d2 From 6443bb1fc2050ca2b6585a3fa77f7833b55329ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 25 Apr 2010 15:09:42 -0700 Subject: ipv6: Fix inet6_csk_bind_conflict() Commit fda48a0d7a84 (tcp: bind() fix when many ports are bound) introduced a bug on IPV6 part. We should not call ipv6_addr_any(inet6_rcv_saddr(sk2)) but ipv6_addr_any(inet6_rcv_saddr(sk)) because sk2 can be IPV4, while sk is IPV6. Reported-by: Michael S. Tsirkin Signed-off-by: Eric Dumazet Tested-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/ipv6/inet6_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index b4b7d40a9c9..3a4d92b5a83 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -48,7 +48,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, ipv6_rcv_saddr_equal(sk, sk2)) break; else if (sk->sk_reuse && sk2->sk_reuse && - !ipv6_addr_any(inet6_rcv_saddr(sk2)) && + !ipv6_addr_any(inet6_rcv_saddr(sk)) && ipv6_rcv_saddr_equal(sk, sk2)) break; } -- cgit v1.2.3-70-g09d2 From 93c0c8b4a5a174645550d444bd5c3ff0cccf74cb Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Mon, 26 Apr 2010 11:20:32 -0700 Subject: ieee802154: Fix oops during ieee802154_sock_ioctl Trying to run izlisten (from lowpan-tools tests) on a device that does not exists I got the oops below. The problem is that we are using get_dev_by_name without checking if we really get a device back. We don't in this case and writing to dev->type generates this oops. [Oops code removed by Dmitry Eremin-Solenikov] If possible this patch should be applied to the current -rc fixes branch. Signed-off-by: Stefan Schmidt Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: David S. Miller --- net/ieee802154/af_ieee802154.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index c7da600750b..93c91b633a5 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -151,6 +151,9 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, dev_load(sock_net(sk), ifr.ifr_name); dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); + if (!dev) + return -ENODEV; + if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); -- cgit v1.2.3-70-g09d2 From 4eb8b9031a0314539605733597b1e30222d4da70 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 25 Apr 2010 08:59:07 +0000 Subject: bridge br_multicast: Ensure to initialize BR_INPUT_SKB_CB(skb)->mrouters_only. Even with commit 32dec5dd0233ebffa9cae25ce7ba6daeb7df4467 ("bridge br_multicast: Don't refer to BR_INPUT_SKB_CB(skb)->mrouters_only without IGMP snooping."), BR_INPUT_SKB_CB(skb)->mrouters_only is not appropriately initialized if IGMP snooping support is compiled and disabled, so we can see garbage. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 386c15369d9..eaa0e1bae49 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -957,9 +957,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, unsigned offset; int err; - BR_INPUT_SKB_CB(skb)->igmp = 0; - BR_INPUT_SKB_CB(skb)->mrouters_only = 0; - /* We treat OOM as packet loss for now. */ if (!pskb_may_pull(skb, sizeof(*iph))) return -EINVAL; @@ -1049,6 +1046,9 @@ err_out: int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { + BR_INPUT_SKB_CB(skb)->igmp = 0; + BR_INPUT_SKB_CB(skb)->mrouters_only = 0; + if (br->multicast_disabled) return 0; -- cgit v1.2.3-70-g09d2 From 477fffb082920476cc26f238d65538ccb8d601e1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 21 Apr 2010 23:52:01 +0000 Subject: bluetooth: handle l2cap_create_connless_pdu() errors l2cap_create_connless_pdu() can sometimes return ERR_PTR(-ENOMEM) or ERR_PTR(-EFAULT). Signed-off-by: Dan Carpenter Acked-by: Marcel Holtmann Signed-off-by: David S. Miller --- net/bluetooth/l2cap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 99d68c34e4f..9753b690a8b 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1626,7 +1626,10 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms /* Connectionless channel */ if (sk->sk_type == SOCK_DGRAM) { skb = l2cap_create_connless_pdu(sk, msg, len); - err = l2cap_do_send(sk, skb); + if (IS_ERR(skb)) + err = PTR_ERR(skb); + else + err = l2cap_do_send(sk, skb); goto done; } -- cgit v1.2.3-70-g09d2 From 8d238b25b1ec22a73b1c2206f111df2faaff8285 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 28 Apr 2010 11:25:59 -0700 Subject: Revert "tcp: bind() fix when many ports are bound" This reverts two commits: fda48a0d7a8412cedacda46a9c0bf8ef9cd13559 tcp: bind() fix when many ports are bound and a follow-on fix for it: 6443bb1fc2050ca2b6585a3fa77f7833b55329ed ipv6: Fix inet6_csk_bind_conflict() It causes problems with binding listening sockets when time-wait sockets from a previous instance still are alive. It's too late to keep fiddling with this so late in the -rc series, and we'll deal with it in net-next-2.6 instead. Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 16 +++++----------- net/ipv6/inet6_connection_sock.c | 15 +++++---------- 2 files changed, 10 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 14825eb0977..8da6429269d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -70,17 +70,13 @@ int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { + const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr || sk2_rcv_saddr == sk_rcv_saddr) break; - } else if (reuse && sk2->sk_reuse && - sk2_rcv_saddr && - sk2_rcv_saddr == sk_rcv_saddr) - break; + } } } return node != NULL; @@ -124,11 +120,9 @@ again: smallest_size = tb->num_owners; smallest_rover = rover; if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { - if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { - spin_unlock(&head->lock); - snum = smallest_rover; - goto have_snum; - } + spin_unlock(&head->lock); + snum = smallest_rover; + goto have_snum; } } goto next; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 3a4d92b5a83..628db24bcf2 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -42,16 +42,11 @@ int inet6_csk_bind_conflict(const struct sock *sk, if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if ((!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; - else if (sk->sk_reuse && sk2->sk_reuse && - !ipv6_addr_any(inet6_rcv_saddr(sk)) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; - } + sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && + (!sk->sk_reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + ipv6_rcv_saddr_equal(sk, sk2)) + break; } return node != NULL; -- cgit v1.2.3-70-g09d2 From 561b1733a465cf9677356b40c27653dd45f1ac56 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 28 Apr 2010 08:47:18 +0000 Subject: sctp: avoid irq lock inversion while call sk->sk_data_ready() sk->sk_data_ready() of sctp socket can be called from both BH and non-BH contexts, but the default sk->sk_data_ready(), sock_def_readable(), can not be used in this case. Therefore, we have to make a new function sctp_data_ready() to grab sk->sk_data_ready() with BH disabling. ========================================================= [ INFO: possible irq lock inversion dependency detected ] 2.6.33-rc6 #129 --------------------------------------------------------- sctp_darn/1517 just changed the state of lock: (clock-AF_INET){++.?..}, at: [] sock_def_readable+0x20/0x80 but this lock took another, SOFTIRQ-unsafe lock in the past: (slock-AF_INET){+.-...} and interrupts could create inverse lock ordering between them. other info that might help us debug this: 1 lock held by sctp_darn/1517: #0: (sk_lock-AF_INET){+.+.+.}, at: [] sctp_sendmsg+0x23d/0xc00 [sctp] Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 1 + net/sctp/endpointola.c | 1 + net/sctp/socket.c | 10 ++++++++++ 3 files changed, 12 insertions(+) (limited to 'net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 78740ec57d5..fa6cde578a1 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -128,6 +128,7 @@ extern int sctp_register_pf(struct sctp_pf *, sa_family_t); int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); +void sctp_data_ready(struct sock *sk, int len); unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 905fda582b9..7ec09ba03a1 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -144,6 +144,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Use SCTP specific send buffer space queues. */ ep->sndbuf_policy = sctp_sndbuf_policy; + sk->sk_data_ready = sctp_data_ready; sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 007e8baba08..efa2bc3f002 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6189,6 +6189,16 @@ do_nonblock: goto out; } +void sctp_data_ready(struct sock *sk, int len) +{ + read_lock_bh(&sk->sk_callback_lock); + if (sk_has_sleeper(sk)) + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | + POLLRDNORM | POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + read_unlock_bh(&sk->sk_callback_lock); +} + /* If socket sndbuf has changed, wake up all per association waiters. */ void sctp_write_space(struct sock *sk) { -- cgit v1.2.3-70-g09d2 From 0c42749cffbb4a06be86c5e5db6c7ebad548781f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 28 Apr 2010 08:47:19 +0000 Subject: sctp: fix potential reference of a freed pointer When sctp attempts to update an assocition, it removes any addresses that were not in the updated INITs. However, the loop may attempt to refrence a transport with address after removing it. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index df5abbff63e..99c93ee98ad 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1194,8 +1194,10 @@ void sctp_assoc_update(struct sctp_association *asoc, /* Remove any peer addresses not present in the new association. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { trans = list_entry(pos, struct sctp_transport, transports); - if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr)) - sctp_assoc_del_peer(asoc, &trans->ipaddr); + if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr)) { + sctp_assoc_rm_peer(asoc, trans); + continue; + } if (asoc->state >= SCTP_STATE_ESTABLISHED) sctp_transport_reset(trans); -- cgit v1.2.3-70-g09d2 From 81419d862db743fe4450a021893f24bab4698c1d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 28 Apr 2010 08:47:20 +0000 Subject: sctp: per_cpu variables should be in bh_disabled section Since the change of the atomics to percpu variables, we now have to disable BH in process context when touching percpu variables. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index efa2bc3f002..44a1ab03a3f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3719,12 +3719,12 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); - percpu_counter_inc(&sctp_sockets_allocated); /* Set socket backlog limit. */ sk->sk_backlog.limit = sysctl_sctp_rmem[1]; local_bh_disable(); + percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); local_bh_enable(); @@ -3741,8 +3741,8 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - percpu_counter_dec(&sctp_sockets_allocated); local_bh_disable(); + percpu_counter_dec(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); } -- cgit v1.2.3-70-g09d2 From a8170c35e738d62e9919ce5b109cf4ed66e95bde Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 28 Apr 2010 08:47:21 +0000 Subject: sctp: fix to calc the INIT/INIT-ACK chunk length correctly is set When calculating the INIT/INIT-ACK chunk length, we should not only account the length of parameters, but also the parameters zero padding length, such as AUTH HMACS parameter and CHUNKS parameter. Without the parameters zero padding length we may get following oops. skb_over_panic: text:ce2068d2 len:130 put:6 head:cac3fe00 data:cac3fe00 tail:0xcac3fe82 end:0xcac3fe80 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:127! invalid opcode: 0000 [#2] SMP last sysfs file: /sys/module/aes_generic/initstate Modules linked in: authenc ...... Pid: 4102, comm: sctp_darn Tainted: G D 2.6.34-rc2 #6 EIP: 0060:[] EFLAGS: 00010282 CPU: 0 EIP is at skb_over_panic+0x37/0x3e EAX: 00000078 EBX: c07c024b ECX: c07c02b9 EDX: cb607b78 ESI: 00000000 EDI: cac3fe7a EBP: 00000002 ESP: cb607b74 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process sctp_darn (pid: 4102, ti=cb607000 task=cabdc990 task.ti=cb607000) Stack: c07c02b9 ce2068d2 00000082 00000006 cac3fe00 cac3fe00 cac3fe82 cac3fe80 <0> c07c024b cac3fe7c cac3fe7a c0608dec ca986e80 ce2068d2 00000006 0000007a <0> cb8120ca ca986e80 cb812000 00000003 cb8120c4 ce208a25 cb8120ca cadd9400 Call Trace: [] ? sctp_addto_chunk+0x45/0x85 [sctp] [] ? skb_put+0x2e/0x32 [] ? sctp_addto_chunk+0x45/0x85 [sctp] [] ? sctp_make_init+0x279/0x28c [sctp] [] ? apic_timer_interrupt+0x2a/0x30 [] ? sctp_sf_do_prm_asoc+0x2b/0x7b [sctp] [] ? sctp_do_sm+0xa0/0x14a [sctp] [] ? sctp_pname+0x0/0x14 [sctp] [] ? sctp_primitive_ASSOCIATE+0x2b/0x31 [sctp] [] ? sctp_sendmsg+0x7a0/0x9eb [sctp] [] ? inet_sendmsg+0x3b/0x43 [] ? task_tick_fair+0x2d/0xd9 [] ? sock_sendmsg+0xa7/0xc1 [] ? smp_apic_timer_interrupt+0x6b/0x75 [] ? dequeue_task_fair+0x34/0x19b [] ? sched_clock_local+0x17/0x11e [] ? _copy_from_user+0x2b/0x10c [] ? verify_iovec+0x3c/0x6a [] ? sys_sendmsg+0x186/0x1e2 [] ? __wake_up_common+0x34/0x5b [] ? __wake_up+0x2c/0x3b [] ? tty_wakeup+0x43/0x47 [] ? remove_wait_queue+0x16/0x24 [] ? n_tty_read+0x5b8/0x65e [] ? default_wake_function+0x0/0x8 [] ? sys_socketcall+0x17f/0x1cd [] ? sysenter_do_call+0x12/0x22 Code: 0f 45 de 53 ff b0 98 00 00 00 ff b0 94 ...... EIP: [] skb_over_panic+0x37/0x3e SS:ESP 0068:cb607b74 To reproduce: # modprobe sctp # echo 1 > /proc/sys/net/sctp/addip_enable # echo 1 > /proc/sys/net/sctp/auth_enable # sctp_test -H 3ffe:501:ffff:100:20c:29ff:fe4d:f37e -P 800 -l # sctp_darn -H 3ffe:501:ffff:100:20c:29ff:fe4d:f37e -P 900 -h 192.168.0.21 -p 800 -I -s -t sctp_darn ready to send... 3ffe:501:ffff:100:20c:29ff:fe4d:f37e:900-192.168.0.21:800 Interactive mode> bindx-add=192.168.0.21 3ffe:501:ffff:100:20c:29ff:fe4d:f37e:900-192.168.0.21:800 Interactive mode> bindx-add=192.168.1.21 3ffe:501:ffff:100:20c:29ff:fe4d:f37e:900-192.168.0.21:800 Interactive mode> snd=10 ------------------------------------------------------------------ eth0 has addresses: 3ffe:501:ffff:100:20c:29ff:fe4d:f37e and 192.168.0.21 eth1 has addresses: 192.168.1.21 ------------------------------------------------------------------ Reported-by: George Cheimonidis Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 17cb400ecd6..f6fc5c1a407 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -208,7 +208,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sp = sctp_sk(asoc->base.sk); num_types = sp->pf->supported_addrs(sp, types); - chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types); + chunksize = sizeof(init) + addrs_len; + chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); if (sctp_prsctp_enable) @@ -238,14 +239,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* Add HMACS parameter length if any were defined */ auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += ntohs(auth_hmacs->length); + chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += ntohs(auth_chunks->length); + chunksize += WORD_ROUND(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -255,7 +256,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += sizeof(sctp_supported_ext_param_t) + num_ext; + chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -397,13 +399,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += ntohs(auth_hmacs->length); + chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += ntohs(auth_chunks->length); + chunksize += WORD_ROUND(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -412,7 +414,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += sizeof(sctp_supported_ext_param_t) + num_ext; + chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize); -- cgit v1.2.3-70-g09d2 From c0786693404cffd80ca3cb6e75ee7b35186b2825 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 28 Apr 2010 08:47:22 +0000 Subject: sctp: Fix oops when sending queued ASCONF chunks When we finish processing ASCONF_ACK chunk, we try to send the next queued ASCONF. This action runs the sctp state machine recursively and it's not prepared to do so. kernel BUG at kernel/timer.c:790! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/module/ipv6/initstate Modules linked in: sha256_generic sctp libcrc32c ipv6 dm_multipath uinput 8139too i2c_piix4 8139cp mii i2c_core pcspkr virtio_net joydev floppy virtio_blk virtio_pci [last unloaded: scsi_wait_scan] Pid: 0, comm: swapper Not tainted 2.6.34-rc4 #15 /Bochs EIP: 0060:[] EFLAGS: 00010286 CPU: 0 EIP is at add_timer+0xd/0x1b EAX: cecbab14 EBX: 000000f0 ECX: c0957b1c EDX: 03595cf4 ESI: cecba800 EDI: cf276f00 EBP: c0957aa0 ESP: c0957aa0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process swapper (pid: 0, ti=c0956000 task=c0988ba0 task.ti=c0956000) Stack: c0957ae0 d1851214 c0ab62e4 c0ab5f26 0500ffff 00000004 00000005 00000004 <0> 00000000 d18694fd 00000004 1666b892 cecba800 cecba800 c0957b14 00000004 <0> c0957b94 d1851b11 ceda8b00 cecba800 cf276f00 00000001 c0957b14 000000d0 Call Trace: [] ? sctp_side_effects+0x607/0xdfc [sctp] [] ? sctp_do_sm+0x108/0x159 [sctp] [] ? sctp_pname+0x0/0x1d [sctp] [] ? sctp_primitive_ASCONF+0x36/0x3b [sctp] [] ? sctp_process_asconf_ack+0x2a4/0x2d3 [sctp] [] ? sctp_sf_do_asconf_ack+0x1dd/0x2b4 [sctp] [] ? sctp_do_sm+0xb8/0x159 [sctp] [] ? sctp_cname+0x0/0x52 [sctp] [] ? sctp_assoc_bh_rcv+0xac/0xe1 [sctp] [] ? sctp_inq_push+0x2d/0x30 [sctp] [] ? sctp_rcv+0x797/0x82e [sctp] Tested-by: Wei Yongjun Signed-off-by: Yuansong Qiao Signed-off-by: Shuaijun Zhang Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/command.h | 1 + net/sctp/sm_make_chunk.c | 15 --------------- net/sctp/sm_sideeffect.c | 26 ++++++++++++++++++++++++++ net/sctp/sm_statefuns.c | 8 +++++++- 4 files changed, 34 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 8be5135ff7a..2c55a7ea20a 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -107,6 +107,7 @@ typedef enum { SCTP_CMD_T1_RETRAN, /* Mark for retransmission after T1 timeout */ SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */ SCTP_CMD_SEND_MSG, /* Send the whole use message */ + SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ SCTP_CMD_LAST } sctp_verb_t; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index f6fc5c1a407..0fd5b4c8835 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3318,21 +3318,6 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, sctp_chunk_free(asconf); asoc->addip_last_asconf = NULL; - /* Send the next asconf chunk from the addip chunk queue. */ - if (!list_empty(&asoc->addip_chunk_list)) { - struct list_head *entry = asoc->addip_chunk_list.next; - asconf = list_entry(entry, struct sctp_chunk, list); - - list_del_init(entry); - - /* Hold the chunk until an ASCONF_ACK is received. */ - sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(asoc, asconf)) - sctp_chunk_free(asconf); - else - asoc->addip_last_asconf = asconf; - } - return retval; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 4c5bed9af4e..d5ae450b6f0 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -962,6 +962,29 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc, } +/* Sent the next ASCONF packet currently stored in the association. + * This happens after the ASCONF_ACK was succeffully processed. + */ +static void sctp_cmd_send_asconf(struct sctp_association *asoc) +{ + /* Send the next asconf chunk from the addip chunk + * queue. + */ + if (!list_empty(&asoc->addip_chunk_list)) { + struct list_head *entry = asoc->addip_chunk_list.next; + struct sctp_chunk *asconf = list_entry(entry, + struct sctp_chunk, list); + list_del_init(entry); + + /* Hold the chunk until an ASCONF_ACK is received. */ + sctp_chunk_hold(asconf); + if (sctp_primitive_ASCONF(asoc, asconf)) + sctp_chunk_free(asconf); + else + asoc->addip_last_asconf = asconf; + } +} + /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real @@ -1617,6 +1640,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, } error = sctp_cmd_send_msg(asoc, cmd->obj.msg); break; + case SCTP_CMD_SEND_NEXT_ASCONF: + sctp_cmd_send_asconf(asoc); + break; default: printk(KERN_WARNING "Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index abf601a1b84..24b2cd55563 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3676,8 +3676,14 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); if (!sctp_process_asconf_ack((struct sctp_association *)asoc, - asconf_ack)) + asconf_ack)) { + /* Successfully processed ASCONF_ACK. We can + * release the next asconf if we have one. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF, + SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; + } abort = sctp_make_abort(asoc, asconf_ack, sizeof(sctp_errhdr_t)); -- cgit v1.2.3-70-g09d2 From 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 28 Apr 2010 10:30:59 +0000 Subject: sctp: Fix skb_over_panic resulting from multiple invalid parameter errors (CVE-2010-1173) (v4) Ok, version 4 Change Notes: 1) Minor cleanups, from Vlads notes Summary: Hey- Recently, it was reported to me that the kernel could oops in the following way: <5> kernel BUG at net/core/skbuff.c:91! <5> invalid operand: 0000 [#1] <5> Modules linked in: sctp netconsole nls_utf8 autofs4 sunrpc iptable_filter ip_tables cpufreq_powersave parport_pc lp parport vmblock(U) vsock(U) vmci(U) vmxnet(U) vmmemctl(U) vmhgfs(U) acpiphp dm_mirror dm_mod button battery ac md5 ipv6 uhci_hcd ehci_hcd snd_ens1371 snd_rawmidi snd_seq_device snd_pcm_oss snd_mixer_oss snd_pcm snd_timer snd_page_alloc snd_ac97_codec snd soundcore pcnet32 mii floppy ext3 jbd ata_piix libata mptscsih mptsas mptspi mptscsi mptbase sd_mod scsi_mod <5> CPU: 0 <5> EIP: 0060:[] Not tainted VLI <5> EFLAGS: 00010216 (2.6.9-89.0.25.EL) <5> EIP is at skb_over_panic+0x1f/0x2d <5> eax: 0000002c ebx: c033f461 ecx: c0357d96 edx: c040fd44 <5> esi: c033f461 edi: df653280 ebp: 00000000 esp: c040fd40 <5> ds: 007b es: 007b ss: 0068 <5> Process swapper (pid: 0, threadinfo=c040f000 task=c0370be0) <5> Stack: c0357d96 e0c29478 00000084 00000004 c033f461 df653280 d7883180 e0c2947d <5> 00000000 00000080 df653490 00000004 de4f1ac0 de4f1ac0 00000004 df653490 <5> 00000001 e0c2877a 08000800 de4f1ac0 df653490 00000000 e0c29d2e 00000004 <5> Call Trace: <5> [] sctp_addto_chunk+0xb0/0x128 [sctp] <5> [] sctp_addto_chunk+0xb5/0x128 [sctp] <5> [] sctp_init_cause+0x3f/0x47 [sctp] <5> [] sctp_process_unk_param+0xac/0xb8 [sctp] <5> [] sctp_verify_init+0xcc/0x134 [sctp] <5> [] sctp_sf_do_5_1B_init+0x83/0x28e [sctp] <5> [] sctp_do_sm+0x41/0x77 [sctp] <5> [] cache_grow+0x140/0x233 <5> [] sctp_endpoint_bh_rcv+0xc5/0x108 [sctp] <5> [] sctp_inq_push+0xe/0x10 [sctp] <5> [] sctp_rcv+0x454/0x509 [sctp] <5> [] ipt_hook+0x17/0x1c [iptable_filter] <5> [] nf_iterate+0x40/0x81 <5> [] ip_local_deliver_finish+0x0/0x151 <5> [] ip_local_deliver_finish+0xc6/0x151 <5> [] nf_hook_slow+0x83/0xb5 <5> [] ip_local_deliver+0x1a2/0x1a9 <5> [] ip_local_deliver_finish+0x0/0x151 <5> [] ip_rcv+0x334/0x3b4 <5> [] netif_receive_skb+0x320/0x35b <5> [] init_stall_timer+0x67/0x6a [uhci_hcd] <5> [] process_backlog+0x6c/0xd9 <5> [] net_rx_action+0xfe/0x1f8 <5> [] __do_softirq+0x35/0x79 <5> [] handle_IRQ_event+0x0/0x4f <5> [] do_softirq+0x46/0x4d Its an skb_over_panic BUG halt that results from processing an init chunk in which too many of its variable length parameters are in some way malformed. The problem is in sctp_process_unk_param: if (NULL == *errp) *errp = sctp_make_op_error_space(asoc, chunk, ntohs(chunk->chunk_hdr->length)); if (*errp) { sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, WORD_ROUND(ntohs(param.p->length))); sctp_addto_chunk(*errp, WORD_ROUND(ntohs(param.p->length)), param.v); When we allocate an error chunk, we assume that the worst case scenario requires that we have chunk_hdr->length data allocated, which would be correct nominally, given that we call sctp_addto_chunk for the violating parameter. Unfortunately, we also, in sctp_init_cause insert a sctp_errhdr_t structure into the error chunk, so the worst case situation in which all parameters are in violation requires chunk_hdr->length+(sizeof(sctp_errhdr_t)*param_count) bytes of data. The result of this error is that a deliberately malformed packet sent to a listening host can cause a remote DOS, described in CVE-2010-1173: http://cve.mitre.org/cgi-bin/cvename.cgi?name=2010-1173 I've tested the below fix and confirmed that it fixes the issue. We move to a strategy whereby we allocate a fixed size error chunk and ignore errors we don't have space to report. Tested by me successfully Signed-off-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/sm_make_chunk.c | 62 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ff301774471..597f8e27aaf 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -778,6 +778,7 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len, struct iovec *data); void sctp_chunk_free(struct sctp_chunk *); void *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data); +void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, const void *data); struct sctp_chunk *sctp_chunkify(struct sk_buff *, const struct sctp_association *, struct sock *); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 0fd5b4c8835..30c1767186b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -108,7 +108,7 @@ static const struct sctp_paramhdr prsctp_param = { cpu_to_be16(sizeof(struct sctp_paramhdr)), }; -/* A helper to initialize to initialize an op error inside a +/* A helper to initialize an op error inside a * provided chunk, as most cause codes will be embedded inside an * abort chunk. */ @@ -125,6 +125,29 @@ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err); } +/* A helper to initialize an op error inside a + * provided chunk, as most cause codes will be embedded inside an + * abort chunk. Differs from sctp_init_cause in that it won't oops + * if there isn't enough space in the op error chunk + */ +int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code, + size_t paylen) +{ + sctp_errhdr_t err; + __u16 len; + + /* Cause code constants are now defined in network order. */ + err.cause = cause_code; + len = sizeof(sctp_errhdr_t) + paylen; + err.length = htons(len); + + if (skb_tailroom(chunk->skb) > len) + return -ENOSPC; + chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, + sizeof(sctp_errhdr_t), + &err); + return 0; +} /* 3.3.2 Initiation (INIT) (1) * * This chunk is used to initiate a SCTP association between two @@ -1132,6 +1155,24 @@ nodata: return retval; } +/* Create an Operation Error chunk of a fixed size, + * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) + * This is a helper function to allocate an error chunk for + * for those invalid parameter codes in which we may not want + * to report all the errors, if the incomming chunk is large + */ +static inline struct sctp_chunk *sctp_make_op_error_fixed( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk) +{ + size_t size = asoc ? asoc->pathmtu : 0; + + if (!size) + size = SCTP_DEFAULT_MAXSEGMENT; + + return sctp_make_op_error_space(asoc, chunk, size); +} + /* Create an Operation Error chunk. */ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, const struct sctp_chunk *chunk, @@ -1374,6 +1415,18 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) return target; } +/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient + * space in the chunk + */ +void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, + int len, const void *data) +{ + if (skb_tailroom(chunk->skb) > len) + return sctp_addto_chunk(chunk, len, data); + else + return NULL; +} + /* Append bytes from user space to the end of a chunk. Will panic if * chunk is not big enough. * Returns a kernel err value. @@ -1977,13 +2030,12 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, * returning multiple unknown parameters. */ if (NULL == *errp) - *errp = sctp_make_op_error_space(asoc, chunk, - ntohs(chunk->chunk_hdr->length)); + *errp = sctp_make_op_error_fixed(asoc, chunk); if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, + sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, WORD_ROUND(ntohs(param.p->length))); - sctp_addto_chunk(*errp, + sctp_addto_chunk_fixed(*errp, WORD_ROUND(ntohs(param.p->length)), param.v); } else { -- cgit v1.2.3-70-g09d2 From f935aa9e99d6ec74a50871c120e6b21de7256efb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 May 2010 23:42:27 -0700 Subject: ipv6: Fix default multicast hops setting. As per RFC 3493 the default multicast hops setting for a socket should be "1" just like ipv4. Ironically we have a IPV6_DEFAULT_MCASTHOPS macro it just wasn't being used. Reported-by: Elliot Hughes Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3192aa02ba5..3f9e86b15e0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -200,7 +200,7 @@ lookup_protocol: inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk); np->hop_limit = -1; - np->mcast_hops = -1; + np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; np->ipv6only = net->ipv6.sysctl.bindv6only; -- cgit v1.2.3-70-g09d2 From d40a4de0be08f005814a4fddac748fe5353208ec Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Mon, 3 May 2010 15:44:27 +0000 Subject: IPv6: fix IPV6_RECVERR handling of locally-generated errors I noticed when I added support for IPV6_DONTFRAG that if you set IPV6_RECVERR and tried to send a UDP packet larger than 64K to an IPv6 destination, you'd correctly get an EMSGSIZE, but reading from MSG_ERRQUEUE returned the incorrect address in the cmsg: struct msghdr: msg_name 0x7fff8f3c96d0 msg_namelen 28 struct sockaddr_in6: sin6_family 10 sin6_port 7639 sin6_flowinfo 0 sin6_addr ::ffff:38.32.0.0 sin6_scope_id 0 ((null)) It should have returned this in my case: struct msghdr: msg_name 0x7fffd866b510 msg_namelen 28 struct sockaddr_in6: sin6_family 10 sin6_port 7639 sin6_flowinfo 0 sin6_addr 2620:0:a09:e000:21f:29ff:fe57:f88b sin6_scope_id 0 ((null)) The problem is that ipv6_recv_error() assumes that if the error wasn't generated by ICMPv6, it's an IPv4 address sitting there, and proceeds to create a v4-mapped address from it. Change ipv6_icmp_error() and ipv6_local_error() to set skb->protocol to htons(ETH_P_IPV6) so that ipv6_recv_error() knows the address sitting right after the extended error is IPv6, else it will incorrectly map the first octet into an IPv4-mapped IPv6 address in the cmsg structure returned in a recvmsg() call to obtain the error. Signed-off-by: Brian Haley -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 622dc7939a1..61573885e45 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -222,6 +222,8 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, if (!skb) return; + skb->protocol = htons(ETH_P_IPV6); + serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6; @@ -255,6 +257,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) if (!skb) return; + skb->protocol = htons(ETH_P_IPV6); + skb_put(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); iph = ipv6_hdr(skb); @@ -319,7 +323,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) sin->sin6_flowinfo = 0; sin->sin6_port = serr->port; sin->sin6_scope_id = 0; - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) { + if (skb->protocol == htons(ETH_P_IPV6)) { ipv6_addr_copy(&sin->sin6_addr, (struct in6_addr *)(nh + serr->addr_offset)); if (np->sndflow) @@ -341,7 +345,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_scope_id = 0; - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) { + if (skb->protocol == htons(ETH_P_IPV6)) { ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr); if (np->rxopt.all) datagram_recv_ctl(sk, msg, skb); -- cgit v1.2.3-70-g09d2 From 6ec82562ffc6f297d0de36d65776cff8e5704867 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 May 2010 00:53:53 -0700 Subject: veth: Dont kfree_skb() after dev_forward_skb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of congestion, netif_rx() frees the skb, so we must assume dev_forward_skb() also consume skb. Bug introduced by commit 445409602c092 (veth: move loopback logic to common location) We must change dev_forward_skb() to always consume skb, and veth to not double free it. Bug report : http://marc.info/?l=linux-netdev&m=127310770900442&w=3 Reported-by: Martín Ferrari Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/veth.c | 1 - net/core/dev.c | 11 +++++------ 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index f9f0730b53d..5ec542dd5b5 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -187,7 +187,6 @@ tx_drop: return NETDEV_TX_OK; rx_drop: - kfree_skb(skb); rcv_stats->rx_dropped++; return NETDEV_TX_OK; } diff --git a/net/core/dev.c b/net/core/dev.c index f769098774b..264137fce3a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1451,7 +1451,7 @@ static inline void net_timestamp(struct sk_buff *skb) * * return values: * NET_RX_SUCCESS (no congestion) - * NET_RX_DROP (packet was dropped) + * NET_RX_DROP (packet was dropped, but freed) * * dev_forward_skb can be used for injecting an skb from the * start_xmit function of one device into the receive queue @@ -1465,12 +1465,11 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { skb_orphan(skb); - if (!(dev->flags & IFF_UP)) - return NET_RX_DROP; - - if (skb->len > (dev->mtu + dev->hard_header_len)) + if (!(dev->flags & IFF_UP) || + (skb->len > (dev->mtu + dev->hard_header_len))) { + kfree_skb(skb); return NET_RX_DROP; - + } skb_set_dev(skb, dev); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; -- cgit v1.2.3-70-g09d2 From 50b5d6ad63821cea324a5a7a19854d4de1a0a819 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 6 May 2010 00:56:07 -0700 Subject: sctp: Fix a race between ICMP protocol unreachable and connect() ICMP protocol unreachable handling completely disregarded the fact that the user may have locked the socket. It proceeded to destroy the association, even though the user may have held the lock and had a ref on the association. This resulted in the following: Attempt to release alive inet socket f6afcc00 ========================= [ BUG: held lock freed! ] ------------------------- somenu/2672 is freeing memory f6afcc00-f6afcfff, with a lock still held there! (sk_lock-AF_INET){+.+.+.}, at: [] sctp_connect+0x13/0x4c 1 lock held by somenu/2672: #0: (sk_lock-AF_INET){+.+.+.}, at: [] sctp_connect+0x13/0x4c stack backtrace: Pid: 2672, comm: somenu Not tainted 2.6.32-telco #55 Call Trace: [] ? printk+0xf/0x11 [] debug_check_no_locks_freed+0xce/0xff [] kmem_cache_free+0x21/0x66 [] __sk_free+0x9d/0xab [] sk_free+0x1c/0x1e [] sctp_association_put+0x32/0x89 [] __sctp_connect+0x36d/0x3f4 [] ? sctp_connect+0x13/0x4c [] ? autoremove_wake_function+0x0/0x33 [] sctp_connect+0x31/0x4c [] inet_dgram_connect+0x4b/0x55 [] sys_connect+0x54/0x71 [] ? lock_release_non_nested+0x88/0x239 [] ? might_fault+0x42/0x7c [] ? might_fault+0x42/0x7c [] sys_socketcall+0x6d/0x178 [] ? trace_hardirqs_on_thunk+0xc/0x10 [] syscall_call+0x7/0xb This was because the sctp_wait_for_connect() would aqcure the socket lock and then proceed to release the last reference count on the association, thus cause the fully destruction path to finish freeing the socket. The simplest solution is to start a very short timer in case the socket is owned by user. When the timer expires, we can do some verification and be able to do the release properly. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 1 + include/net/sctp/structs.h | 3 +++ net/sctp/input.c | 22 ++++++++++++++++++---- net/sctp/sm_sideeffect.c | 35 +++++++++++++++++++++++++++++++++++ net/sctp/transport.c | 2 ++ 5 files changed, 59 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 851c813adb3..61d73e37d54 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -279,6 +279,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(unsigned long peer); void sctp_generate_heartbeat_event(unsigned long peer); +void sctp_generate_proto_unreach_event(unsigned long peer); void sctp_ootb_pkt_free(struct sctp_packet *); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 597f8e27aaf..219043a67bf 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1010,6 +1010,9 @@ struct sctp_transport { /* Heartbeat timer is per destination. */ struct timer_list hb_timer; + /* Timer to handle ICMP proto unreachable envets */ + struct timer_list proto_unreach_timer; + /* Since we're using per-destination retransmission timers * (see above), we're also using per-destination "transmitted" * queues. This probably ought to be a private struct diff --git a/net/sctp/input.c b/net/sctp/input.c index 2a570184e5a..ea2192444ce 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -440,11 +440,25 @@ void sctp_icmp_proto_unreachable(struct sock *sk, { SCTP_DEBUG_PRINTK("%s\n", __func__); - sctp_do_sm(SCTP_EVENT_T_OTHER, - SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), - asoc->state, asoc->ep, asoc, t, - GFP_ATOMIC); + if (sock_owned_by_user(sk)) { + if (timer_pending(&t->proto_unreach_timer)) + return; + else { + if (!mod_timer(&t->proto_unreach_timer, + jiffies + (HZ/20))) + sctp_association_hold(asoc); + } + + } else { + if (timer_pending(&t->proto_unreach_timer) && + del_timer(&t->proto_unreach_timer)) + sctp_association_put(asoc); + sctp_do_sm(SCTP_EVENT_T_OTHER, + SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), + asoc->state, asoc->ep, asoc, t, + GFP_ATOMIC); + } } /* Common lookup code for icmp/icmpv6 error handler. */ diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index d5ae450b6f0..eb1f42f45fd 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -397,6 +397,41 @@ out_unlock: sctp_transport_put(transport); } +/* Handle the timeout of the ICMP protocol unreachable timer. Trigger + * the correct state machine transition that will close the association. + */ +void sctp_generate_proto_unreach_event(unsigned long data) +{ + struct sctp_transport *transport = (struct sctp_transport *) data; + struct sctp_association *asoc = transport->asoc; + + sctp_bh_lock_sock(asoc->base.sk); + if (sock_owned_by_user(asoc->base.sk)) { + SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + + /* Try again later. */ + if (!mod_timer(&transport->proto_unreach_timer, + jiffies + (HZ/20))) + sctp_association_hold(asoc); + goto out_unlock; + } + + /* Is this structure just waiting around for us to actually + * get destroyed? + */ + if (asoc->base.dead) + goto out_unlock; + + sctp_do_sm(SCTP_EVENT_T_OTHER, + SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), + asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); + +out_unlock: + sctp_bh_unlock_sock(asoc->base.sk); + sctp_association_put(asoc); +} + + /* Inject a SACK Timeout event into the state machine. */ static void sctp_generate_sack_event(unsigned long data) { diff --git a/net/sctp/transport.c b/net/sctp/transport.c index be4d63d5a5c..4a368038d46 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -108,6 +108,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, (unsigned long)peer); setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, (unsigned long)peer); + setup_timer(&peer->proto_unreach_timer, + sctp_generate_proto_unreach_event, (unsigned long)peer); /* Initialize the 64-bit random nonce sent with heartbeat. */ get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); -- cgit v1.2.3-70-g09d2 From ccc2d97cb7c798e785c9f198de243e2b59f7073b Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Thu, 6 May 2010 03:44:34 +0000 Subject: ipv4: udp: fix short packet and bad checksum logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2783ef23 moved the initialisation of saddr and daddr after pskb_may_pull() to avoid a potential data corruption. Unfortunately also placing it after the short packet and bad checksum error paths, where these variables are used for logging. The result is bogus output like [92238.389505] UDP: short packet: From 2.0.0.0:65535 23715/178 to 0.0.0.0:65535 Moving the saddr and daddr initialisation above the error paths, while still keeping it after the pskb_may_pull() to keep the fix from commit 2783ef23. Signed-off-by: Bjørn Mork Cc: stable@kernel.org Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8fef859db35..c36522a0f11 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1527,6 +1527,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, uh = udp_hdr(skb); ulen = ntohs(uh->len); + saddr = ip_hdr(skb)->saddr; + daddr = ip_hdr(skb)->daddr; + if (ulen > skb->len) goto short_packet; @@ -1540,9 +1543,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; - saddr = ip_hdr(skb)->saddr; - daddr = ip_hdr(skb)->daddr; - if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, udptable); -- cgit v1.2.3-70-g09d2 From 79733a865c7fd778ce45e3503962b3a875b0a153 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 4 May 2010 16:04:49 -0700 Subject: mac80211: remove association work when processing deauth request In https://bugzilla.kernel.org/show_bug.cgi?id=15794 a user encountered the following: [18967.469098] wlan0: authenticated [18967.472527] wlan0: associate with 00:1c:10:b8:e3:ea (try 1) [18967.472585] wlan0: deauthenticating from 00:1c:10:b8:e3:ea by local choice (reason=3) [18967.672057] wlan0: associate with 00:1c:10:b8:e3:ea (try 2) [18967.872357] wlan0: associate with 00:1c:10:b8:e3:ea (try 3) [18968.072960] wlan0: association with 00:1c:10:b8:e3:ea timed out [18968.076890] ------------[ cut here ]------------ [18968.076898] WARNING: at net/wireless/mlme.c:341 cfg80211_send_assoc_timeout+0xa8/0x140() [18968.076900] Hardware name: GX628 [18968.076924] Pid: 1408, comm: phy0 Not tainted 2.6.34-rc4-00082-g250541f-dirty #3 [18968.076926] Call Trace: [18968.076931] [] ? warn_slowpath_common+0x6e/0xb0 [18968.076934] [] ? cfg80211_send_assoc_timeout+0xa8/0x140 [18968.076937] [] ? mod_timer+0x10b/0x180 [18968.076940] [] ? ieee80211_assoc_done+0xbc/0xc0 [18968.076943] [] ? ieee80211_work_work+0x553/0x11c0 [18968.076945] [] ? finish_task_switch+0x41/0xb0 [18968.076948] [] ? ieee80211_work_work+0x0/0x11c0 [18968.076951] [] ? worker_thread+0x13b/0x210 [18968.076954] [] ? autoremove_wake_function+0x0/0x30 [18968.076956] [] ? worker_thread+0x0/0x210 [18968.076959] [] ? kthread+0x8e/0xa0 [18968.076962] [] ? kernel_thread_helper+0x4/0x10 [18968.076964] [] ? kthread+0x0/0xa0 [18968.076966] [] ? kernel_thread_helper+0x0/0x10 [18968.076968] ---[ end trace 8aa6265f4b1adfe0 ]--- As explained by Johannes Berg : We authenticate successfully, and then userspace requests association. Then we start that process, but the AP doesn't respond. While we're still waiting for an AP response, userspace asks for a deauth. We do the deauth, but don't abort the association work. Then once the association work times out we tell cfg80211, but it no longer wants to know since for all it is concerned we accepted the deauth that also kills the association attempt. Fix this by, upon receipt of deauth request, removing the association work and continuing to send the deauth. Unfortunately the user reporting the issue is not able to reproduce this problem anymore and cannot verify this fix. This seems like a well understood issue though and I thus present the patch. Bug-identified-by: Johannes Berg Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8a9650343f2..6ccd48e180e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2029,7 +2029,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, continue; if (wk->type != IEEE80211_WORK_DIRECT_PROBE && - wk->type != IEEE80211_WORK_AUTH) + wk->type != IEEE80211_WORK_AUTH && + wk->type != IEEE80211_WORK_ASSOC) continue; if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN)) -- cgit v1.2.3-70-g09d2 From bbd725435ddb1cac732f7a8c23c21ff67f24c60f Mon Sep 17 00:00:00 2001 From: Andreas Meissner Date: Mon, 10 May 2010 04:47:49 -0700 Subject: IPv4: unresolved multicast route cleanup Fixes the expiration timer for unresolved multicast route entries. In case new multicast routing requests come in faster than the expiration timeout occurs (e.g. zap through multicast TV streams), the timer is prevented from being called at time for already existing entries. As the single timer is resetted to default whenever a new entry is made, the timeout for existing unresolved entires are missed and/or not updated. As a consequence new requests are denied when the limit of unresolved entries has been reached because old entries live longer than they are supposed to. The solution is to reset the timer only for the first unresolved entry in the multicast routing cache. All other timers are already set and updated correctly within the timer function itself by now. Signed-off by: Andreas Meissner Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d4f6d1340a..ec19a890c9a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -754,7 +754,8 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) c->next = mfc_unres_queue; mfc_unres_queue = c; - mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); + if (atomic_read(&net->ipv4.cache_resolve_queue_len) == 1) + mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); } /* -- cgit v1.2.3-70-g09d2 From f0ecde1466f21edf577b809735f4f35f354777a0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 10 May 2010 04:59:07 -0700 Subject: net: Fix FDDI and TR config checks in ipv4 arp and LLC. Need to check both CONFIG_FOO and CONFIG_FOO_MODULE Signed-off-by: David S. Miller --- net/ipv4/arp.c | 6 +++--- net/llc/llc_sap.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 6e747065c20..80769f1f9fa 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -661,13 +661,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, #endif #endif -#ifdef CONFIG_FDDI +#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) case ARPHRD_FDDI: arp->ar_hrd = htons(ARPHRD_ETHER); arp->ar_pro = htons(ETH_P_IP); break; #endif -#ifdef CONFIG_TR +#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) case ARPHRD_IEEE802_TR: arp->ar_hrd = htons(ARPHRD_IEEE802); arp->ar_pro = htons(ETH_P_IP); @@ -1051,7 +1051,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, return -EINVAL; } switch (dev->type) { -#ifdef CONFIG_FDDI +#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) case ARPHRD_FDDI: /* * According to RFC 1390, FDDI devices should accept ARP diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index a432f0ec051..94e7fca75b8 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -31,7 +31,7 @@ static int llc_mac_header_len(unsigned short devtype) case ARPHRD_ETHER: case ARPHRD_LOOPBACK: return sizeof(struct ethhdr); -#ifdef CONFIG_TR +#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) case ARPHRD_IEEE802_TR: return sizeof(struct trh_hdr); #endif -- cgit v1.2.3-70-g09d2 From 35790c0421121364883a167bab8a2e37e1f67f78 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 May 2010 00:34:04 -0700 Subject: tcp: fix MD5 (RFC2385) support TCP MD5 support uses percpu data for temporary storage. It currently disables preemption so that same storage cannot be reclaimed by another thread on same cpu. We also have to make sure a softirq handler wont try to use also same context. Various bug reports demonstrated corruptions. Fix is to disable preemption and BH. Reported-by: Bhaskar Dutta Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 21 +++------------------ net/ipv4/tcp.c | 34 ++++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 75be5a28815..aa04b9a5093 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1197,30 +1197,15 @@ extern int tcp_v4_md5_do_del(struct sock *sk, extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *); extern void tcp_free_md5sig_pool(void); -extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); -extern void __tcp_put_md5sig_pool(void); +extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); +extern void tcp_put_md5sig_pool(void); + extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *); extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *, unsigned header_len); extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key); -static inline -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) -{ - int cpu = get_cpu(); - struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu); - if (!ret) - put_cpu(); - return ret; -} - -static inline void tcp_put_md5sig_pool(void) -{ - __tcp_put_md5sig_pool(); - put_cpu(); -} - /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0f8caf64caa..296150b2a62 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2839,7 +2839,6 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) if (p->md5_desc.tfm) crypto_free_hash(p->md5_desc.tfm); kfree(p); - p = NULL; } } free_percpu(pool); @@ -2937,25 +2936,40 @@ retry: EXPORT_SYMBOL(tcp_alloc_md5sig_pool); -struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) + +/** + * tcp_get_md5sig_pool - get md5sig_pool for this user + * + * We use percpu structure, so if we succeed, we exit with preemption + * and BH disabled, to make sure another thread or softirq handling + * wont try to get same context. + */ +struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { struct tcp_md5sig_pool * __percpu *p; - spin_lock_bh(&tcp_md5sig_pool_lock); + + local_bh_disable(); + + spin_lock(&tcp_md5sig_pool_lock); p = tcp_md5sig_pool; if (p) tcp_md5sig_users++; - spin_unlock_bh(&tcp_md5sig_pool_lock); - return (p ? *per_cpu_ptr(p, cpu) : NULL); -} + spin_unlock(&tcp_md5sig_pool_lock); + + if (p) + return *per_cpu_ptr(p, smp_processor_id()); -EXPORT_SYMBOL(__tcp_get_md5sig_pool); + local_bh_enable(); + return NULL; +} +EXPORT_SYMBOL(tcp_get_md5sig_pool); -void __tcp_put_md5sig_pool(void) +void tcp_put_md5sig_pool(void) { + local_bh_enable(); tcp_free_md5sig_pool(); } - -EXPORT_SYMBOL(__tcp_put_md5sig_pool); +EXPORT_SYMBOL(tcp_put_md5sig_pool); int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, struct tcphdr *th) -- cgit v1.2.3-70-g09d2 From 55fa0cfd7c3ac2ae34cac7dca2e3fbcfe661e6c3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 9 May 2010 16:56:07 +0000 Subject: sctp: delete active ICMP proto unreachable timer when free transport transport may be free before ICMP proto unreachable timer expire, so we should delete active ICMP proto unreachable timer when transport is going away. Signed-off-by: Wei Yongjun Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/transport.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4a368038d46..165d54e07fc 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -173,6 +173,10 @@ void sctp_transport_free(struct sctp_transport *transport) del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); + /* Delete the ICMP proto unreachable timer if it's active. */ + if (timer_pending(&transport->proto_unreach_timer) && + del_timer(&transport->proto_unreach_timer)) + sctp_association_put(transport->asoc); sctp_transport_put(transport); } -- cgit v1.2.3-70-g09d2 From c02db8c6290bb992442fec1407643c94cc414375 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Sun, 16 May 2010 01:05:45 -0700 Subject: rtnetlink: make SR-IOV VF interface symmetric Now we have a set of nested attributes: IFLA_VFINFO_LIST (NESTED) IFLA_VF_INFO (NESTED) IFLA_VF_MAC IFLA_VF_VLAN IFLA_VF_TX_RATE This allows a single set to operate on multiple attributes if desired. Among other things, it means a dump can be replayed to set state. The current interface has yet to be released, so this seems like something to consider for 2.6.34. Signed-off-by: Chris Wright Signed-off-by: David S. Miller --- include/linux/if_link.h | 23 +++++-- net/core/rtnetlink.c | 159 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 129 insertions(+), 53 deletions(-) (limited to 'net') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index c9bf92cd765..d94963b379d 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -79,10 +79,7 @@ enum { IFLA_NET_NS_PID, IFLA_IFALIAS, IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ - IFLA_VF_MAC, /* Hardware queue specific attributes */ - IFLA_VF_VLAN, - IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ - IFLA_VFINFO, + IFLA_VFINFO_LIST, __IFLA_MAX }; @@ -203,6 +200,24 @@ enum macvlan_mode { /* SR-IOV virtual function managment section */ +enum { + IFLA_VF_INFO_UNSPEC, + IFLA_VF_INFO, + __IFLA_VF_INFO_MAX, +}; + +#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) + +enum { + IFLA_VF_UNSPEC, + IFLA_VF_MAC, /* Hardware queue specific attributes */ + IFLA_VF_VLAN, + IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ + __IFLA_VF_MAX, +}; + +#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) + struct ifla_vf_mac { __u32 vf; __u8 mac[32]; /* MAX_ADDR_LEN */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fe776c9ddec..31e85d327aa 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -602,12 +602,19 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; +/* All VF info */ static inline int rtnl_vfinfo_size(const struct net_device *dev) { - if (dev->dev.parent && dev_is_pci(dev->dev.parent)) - return dev_num_vf(dev->dev.parent) * - sizeof(struct ifla_vf_info); - else + if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { + + int num_vfs = dev_num_vf(dev->dev.parent); + size_t size = nlmsg_total_size(sizeof(struct nlattr)); + size += nlmsg_total_size(num_vfs * sizeof(struct nlattr)); + size += num_vfs * (sizeof(struct ifla_vf_mac) + + sizeof(struct ifla_vf_vlan) + + sizeof(struct ifla_vf_tx_rate)); + return size; + } else return 0; } @@ -629,7 +636,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_NUM_VF */ - + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */ + + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ } @@ -700,14 +707,37 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { int i; - struct ifla_vf_info ivi; - NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); - for (i = 0; i < dev_num_vf(dev->dev.parent); i++) { + struct nlattr *vfinfo, *vf; + int num_vfs = dev_num_vf(dev->dev.parent); + + NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs); + vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); + if (!vfinfo) + goto nla_put_failure; + for (i = 0; i < num_vfs; i++) { + struct ifla_vf_info ivi; + struct ifla_vf_mac vf_mac; + struct ifla_vf_vlan vf_vlan; + struct ifla_vf_tx_rate vf_tx_rate; if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; - NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi); + vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf; + memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); + vf_vlan.vlan = ivi.vlan; + vf_vlan.qos = ivi.qos; + vf_tx_rate.rate = ivi.tx_rate; + vf = nla_nest_start(skb, IFLA_VF_INFO); + if (!vf) { + nla_nest_cancel(skb, vfinfo); + goto nla_put_failure; + } + NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac); + NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan); + NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate); + nla_nest_end(skb, vf); } + nla_nest_end(skb, vfinfo); } if (dev->rtnl_link_ops) { if (rtnl_link_fill(skb, dev) < 0) @@ -769,12 +799,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, - [IFLA_VF_MAC] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_mac) }, - [IFLA_VF_VLAN] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_vlan) }, - [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_tx_rate) }, + [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, }; EXPORT_SYMBOL(ifla_policy); @@ -783,6 +808,19 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_DATA] = { .type = NLA_NESTED }, }; +static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { + [IFLA_VF_INFO] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { + [IFLA_VF_MAC] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_mac) }, + [IFLA_VF_VLAN] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_tx_rate) }, +}; + struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) { struct net *net; @@ -812,6 +850,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } +static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +{ + int rem, err = -EINVAL; + struct nlattr *vf; + const struct net_device_ops *ops = dev->netdev_ops; + + nla_for_each_nested(vf, attr, rem) { + switch (nla_type(vf)) { + case IFLA_VF_MAC: { + struct ifla_vf_mac *ivm; + ivm = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + break; + } + case IFLA_VF_VLAN: { + struct ifla_vf_vlan *ivv; + ivv = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, + ivv->vlan, + ivv->qos); + break; + } + case IFLA_VF_TX_RATE: { + struct ifla_vf_tx_rate *ivt; + ivt = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_tx_rate) + err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, + ivt->rate); + break; + } + default: + err = -EINVAL; + break; + } + if (err) + break; + } + return err; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { @@ -942,40 +1026,17 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, write_unlock_bh(&dev_base_lock); } - if (tb[IFLA_VF_MAC]) { - struct ifla_vf_mac *ivm; - ivm = nla_data(tb[IFLA_VF_MAC]); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac); - if (err < 0) - goto errout; - modified = 1; - } - - if (tb[IFLA_VF_VLAN]) { - struct ifla_vf_vlan *ivv; - ivv = nla_data(tb[IFLA_VF_VLAN]); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - if (err < 0) - goto errout; - modified = 1; - } - err = 0; - - if (tb[IFLA_VF_TX_RATE]) { - struct ifla_vf_tx_rate *ivt; - ivt = nla_data(tb[IFLA_VF_TX_RATE]); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_tx_rate) - err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate); - if (err < 0) - goto errout; - modified = 1; + if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *attr; + int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { + if (nla_type(attr) != IFLA_VF_INFO) + goto errout; + err = do_setvfinfo(dev, attr); + if (err < 0) + goto errout; + modified = 1; + } } err = 0; -- cgit v1.2.3-70-g09d2