From 5843ef421311c34f06d5ab82bced5aebfe185b2c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 30 Sep 2013 13:29:11 -0700 Subject: tcp: Always set options to 0 before calling tcp_established_options tcp_established_options assumes opts->options is 0 before calling, as it read modify writes it. For the tcp_current_mss() case the opts structure is not zeroed, so this can be done with uninitialized values. This is ok, because ->options is not read in this path. But it's still better to avoid the operation on the uninitialized field. This shuts up a static code analyzer, and presumably may help the optimizer. Cc: netdev@vger.kernel.org Signed-off-by: Andi Kleen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e6bb8256e59..c6f01f2cdb3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -637,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb unsigned int size = 0; unsigned int eff_sacks; + opts->options = 0; + #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (unlikely(*md5)) { -- cgit v1.2.3-70-g09d2 From c52e2421f7368fd36cbe330d2cf41b10452e39a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Oct 2013 11:54:30 -0700 Subject: tcp: must unclone packets before mangling them TCP stack should make sure it owns skbs before mangling them. We had various crashes using bnx2x, and it turned out gso_size was cleared right before bnx2x driver was populating TC descriptor of the _previous_ packet send. TCP stack can sometime retransmit packets that are still in Qdisc. Of course we could make bnx2x driver more robust (using ACCESS_ONCE(shinfo->gso_size) for example), but the bug is TCP stack. We have identified two points where skb_unclone() was needed. This patch adds a WARN_ON_ONCE() to warn us if we missed another fix of this kind. Kudos to Neal for finding the root cause of this bug. Its visible using small MSS. Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Cc: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c6f01f2cdb3..8fad1c15568 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -986,6 +986,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { + /* Make sure we own this skb before messing gso_size/gso_segs */ + WARN_ON_ONCE(skb_cloned(skb)); + if (skb->len <= mss_now || !sk_can_gso(sk) || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1067,9 +1070,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (skb_cloned(skb) && - skb_is_nonlinear(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -2344,6 +2345,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int oldpcount = tcp_skb_pcount(skb); if (unlikely(oldpcount > 1)) { + if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; tcp_init_tso_segs(sk, skb, cur_mss); tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); } -- cgit v1.2.3-70-g09d2 From 8f26fb1c1ed81c33f5d87c5936f4d9d1b4118918 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Oct 2013 12:24:54 -0700 Subject: tcp: remove the sk_can_gso() check from tcp_set_skb_tso_segs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sk_can_gso() should only be used as a hint in tcp_sendmsg() to build GSO packets in the first place. (As a performance hint) Once we have GSO packets in write queue, we can not decide they are no longer GSO only because flow now uses a route which doesn't handle TSO/GSO. Core networking stack handles the case very well for us, all we need is keeping track of packet counts in MSS terms, regardless of segmentation done later (in GSO or hardware) Right now, if tcp_fragment() splits a GSO packet in two parts, @left and @right, and route changed through a non GSO device, both @left and @right have pcount set to 1, which is wrong, and leads to incorrect packet_count tracking. This problem was added in commit d5ac99a648 ("[TCP]: skb pcount with MTU discovery") Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Reported-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8fad1c15568..d46f2143305 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -989,8 +989,7 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, /* Make sure we own this skb before messing gso_size/gso_segs */ WARN_ON_ONCE(skb_cloned(skb)); - if (skb->len <= mss_now || !sk_can_gso(sk) || - skb->ip_summed == CHECKSUM_NONE) { + if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal * non-TSO case. */ -- cgit v1.2.3-70-g09d2