From f5ddcbbb40aa0ba7fbfe22355d287603dbeeaaac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Feb 2014 10:09:18 -0800 Subject: net-tcp: fastopen: fix high order allocations This patch fixes two bugs in fastopen : 1) The tcp_sendmsg(..., @size) argument was ignored. Code was relying on user not fooling the kernel with iovec mismatches 2) When MTU is about 64KB, tcp_send_syn_data() attempts order-5 allocations, which are likely to fail when memory gets fragmented. Fixes: 783237e8daf13 ("net-tcp: Fast Open client - sending SYN-data") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Tested-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3be16727f05..09805817627 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2908,7 +2908,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; - syn_data = skb_copy_expand(syn, skb_headroom(syn), space, + space = min_t(size_t, space, fo->size); + + /* limit to order-0 allocations */ + space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); + + syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, sk->sk_allocation); if (syn_data == NULL) goto fallback; -- cgit v1.2.3-70-g09d2 From 9a9bfd032f0207dd6e63c84b7676b58f160af04b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Feb 2014 04:31:03 -0800 Subject: net: tcp: use NET_INC_STATS() While LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES can only be incremented in tcp_transmit_skb() from softirq (incoming message or timer activation), it is better to use NET_INC_STATS() instead of NET_INC_STATS_BH() as tcp_transmit_skb() can be called from process context. This will avoid copy/paste confusion when/if we want to add other SNMP counters in tcp_transmit_skb() Signed-off-by: Eric Dumazet Cc: Hannes Frederic Sowa Cc: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 09805817627..d718482fd11 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -864,8 +864,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (unlikely(skb->fclone == SKB_FCLONE_ORIG && fclone->fclone == SKB_FCLONE_CLONE)) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); -- cgit v1.2.3-70-g09d2 From c84a57113f59486e6688be1cd443b96e3118efa0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 28 Feb 2014 16:42:26 -0800 Subject: tcp: fix bogus RTT on special retransmission RTT may be bogus with tall loss probe (TLP) when a packet is retransmitted and latter (s)acked without TCPCB_SACKED_RETRANS flag. For example, TLP calls __tcp_retransmit_skb() instead of tcp_retransmit_skb(). The skb timestamps are updated but the sacked flag is not marked with TCPCB_SACKED_RETRANS. As a result we'll get bogus RTT in tcp_clean_rtx_queue() or in tcp_sacktag_one() on spurious retransmission. The fix is to apply the sticky flag TCP_EVER_RETRANS to enforce Karn's check on RTT sampling. However this will disable F-RTO if timeout occurs after TLP, by resetting undo_marker in tcp_enter_loss(). We relax this check to only if any pending retransmists are still in-flight. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Nandita Dukkipati Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_output.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 227cba79fa6..eeaac399420 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1945,8 +1945,9 @@ void tcp_enter_loss(struct sock *sk, int how) if (skb == tcp_send_head(sk)) break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) tp->undo_marker = 0; + TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d718482fd11..f0eb4e337ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2337,6 +2337,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); unsigned int cur_mss; + int err; /* Inconslusive MTU probe */ if (icsk->icsk_mtup.probe_size) { @@ -2400,11 +2401,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); - return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : - -ENOBUFS; + err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : + -ENOBUFS; } else { - return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); + err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } + + if (likely(!err)) + TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; + return err; } int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) -- cgit v1.2.3-70-g09d2