From 97077c4a9868fce8ac151512cde5d24fc1144f24 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Aug 2005 12:03:32 -0700 Subject: [IPV6]: Fix raw socket hardware checksum failures When packets hit raw sockets the csum update isn't done yet, do it manually. Packets can also reach rawv6_rcv on the output path through ip6_call_ra_chain, in this case skb->ip_summed is CHECKSUM_NONE and this codepath isn't executed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/raw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e2b848ec985..1d4d75b34d3 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -328,6 +328,8 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (skb->ip_summed != CHECKSUM_UNNECESSARY) { if (skb->ip_summed == CHECKSUM_HW) { + skb_postpull_rcsum(skb, skb->nh.raw, + skb->h.raw - skb->nh.raw); skb->ip_summed = CHECKSUM_UNNECESSARY; if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, -- cgit v1.2.3-70-g09d2 From 35d59efd105b3b7c1b5878dcc9d1749f41f9740f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Aug 2005 12:03:59 -0700 Subject: [TCP]: Fix bug #5070: kernel BUG at net/ipv4/tcp_output.c:864 1) We send out a normal sized packet with TSO on to start off. 2) ICMP is received indicating a smaller MTU. 3) We send the current sk_send_head which needs to be fragmented since it was created before the ICMP event. The first fragment is then sent out. At this point the remaining fragment is allocated by tcp_fragment. However, its size is padded to fit the L1 cache-line size therefore creating tail-room up to 124 bytes long. This fragment will also be sitting at sk_send_head. 4) tcp_sendmsg is called again and it stores data in the tail-room of of the fragment. 5) tcp_push_one is called by tcp_sendmsg which then calls tso_fragment since the packet as a whole exceeds the MTU. At this point we have a packet that has data in the head area being fed to tso_fragment which bombs out. My take on this is that we shouldn't ever call tcp_fragment on a TSO socket for a packet that is yet to be transmitted since this creates a packet on sk_send_head that cannot be extended. So here is a patch to change it so that tso_fragment is always used in this case. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3ed6fc15815..566045e5843 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -861,7 +861,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, u16 flags; /* All of a TSO frame must be composed of paged data. */ - BUG_ON(skb->len != skb->data_len); + if (skb->len != skb->data_len) + return tcp_fragment(sk, skb, len, mss_now); buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC); if (unlikely(buff == NULL)) @@ -974,6 +975,8 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) sent_pkts = 0; while ((skb = sk->sk_send_head)) { + unsigned int limit; + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); @@ -994,9 +997,10 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) break; } + limit = mss_now; if (tso_segs > 1) { - u32 limit = tcp_window_allows(tp, skb, - mss_now, cwnd_quota); + limit = tcp_window_allows(tp, skb, + mss_now, cwnd_quota); if (skb->len < limit) { unsigned int trim = skb->len % mss_now; @@ -1004,15 +1008,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) if (trim) limit = skb->len - trim; } - if (skb->len > limit) { - if (tso_fragment(sk, skb, limit, mss_now)) - break; - } - } else if (unlikely(skb->len > mss_now)) { - if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) - break; } + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now))) + break; + TCP_SKB_CB(skb)->when = tcp_time_stamp; if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))) @@ -1064,11 +1065,14 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); if (likely(cwnd_quota)) { + unsigned int limit; + BUG_ON(!tso_segs); + limit = mss_now; if (tso_segs > 1) { - u32 limit = tcp_window_allows(tp, skb, - mss_now, cwnd_quota); + limit = tcp_window_allows(tp, skb, + mss_now, cwnd_quota); if (skb->len < limit) { unsigned int trim = skb->len % mss_now; @@ -1076,15 +1080,12 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) if (trim) limit = skb->len - trim; } - if (skb->len > limit) { - if (unlikely(tso_fragment(sk, skb, limit, mss_now))) - return; - } - } else if (unlikely(skb->len > mss_now)) { - if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) - return; } + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now))) + return; + /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; -- cgit v1.2.3-70-g09d2 From bfd272b1ca1164382eabaa9986aad822adb91eb2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Aug 2005 12:04:22 -0700 Subject: [IPV6]: Fix SKB leak in ip6_input_finish() Changing it to how ip_input handles should fix it. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 866f10726c5..10fbb50daea 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -198,12 +198,13 @@ resubmit: if (!raw_sk) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); - icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff); + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_UNK_NEXTHDR, nhoff, + skb->dev); } - } else { + } else IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); - kfree_skb(skb); - } + kfree_skb(skb); } rcu_read_unlock(); return 0; -- cgit v1.2.3-70-g09d2 From 1f07247de51efd30c88ad8e3e06a8b5382fc7d35 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 17 Aug 2005 12:05:27 -0700 Subject: [DECNET]: Fix RCU race condition in dn_neigh_construct(). Signed-off-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/decnet/dn_neigh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index f32dba9e26f..8d0cc3cf3e4 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -148,12 +148,12 @@ static int dn_neigh_construct(struct neighbour *neigh) __neigh_parms_put(neigh->parms); neigh->parms = neigh_parms_clone(parms); - rcu_read_unlock(); if (dn_db->use_long) neigh->ops = &dn_long_ops; else neigh->ops = &dn_short_ops; + rcu_read_unlock(); if (dn->flags & DN_NDFLAG_P3) neigh->ops = &dn_phase3_ops; -- cgit v1.2.3-70-g09d2