diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 40 |
1 files changed, 27 insertions, 13 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 92fde8d1aa8..d46f2143305 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,6 +65,9 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX; +EXPORT_SYMBOL(sysctl_tcp_notsent_lowat); + static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -634,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb unsigned int size = 0; unsigned int eff_sacks; + opts->options = 0; + #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (unlikely(*md5)) { @@ -892,8 +897,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; - skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? - tcp_wfree : sock_wfree; + skb->destructor = tcp_wfree; atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ @@ -982,8 +986,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk) || - skb->ip_summed == CHECKSUM_NONE) { + /* Make sure we own this skb before messing gso_size/gso_segs */ + WARN_ON_ONCE(skb_cloned(skb)); + + if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1063,9 +1069,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (skb_cloned(skb) && - skb_is_nonlinear(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -1628,7 +1632,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) /* If a full-sized TSO skb can be sent, do it. */ if (limit >= min_t(unsigned int, sk->sk_gso_max_size, - sk->sk_gso_max_segs * tp->mss_cache)) + tp->xmit_size_goal_segs * tp->mss_cache)) goto send_now; /* Middle in queue won't get any more data, full sendable already? */ @@ -1837,7 +1841,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, while ((skb = tcp_send_head(sk))) { unsigned int limit; - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); @@ -1866,13 +1869,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } - /* TSQ : sk_wmem_alloc accounts skb truesize, - * including skb overhead. But thats OK. + /* TCP Small Queues : + * Control number of packets in qdisc/devices to two packets / or ~1 ms. + * This allows for : + * - better RTT estimation and ACK scheduling + * - faster recovery + * - high rates */ - if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { + limit = max(skb->truesize, sk->sk_pacing_rate >> 10); + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); break; } + limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, @@ -2334,6 +2344,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int oldpcount = tcp_skb_pcount(skb); if (unlikely(oldpcount > 1)) { + if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; tcp_init_tso_segs(sk, skb, cur_mss); tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); } @@ -2670,7 +2682,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, int tcp_header_size; int mss; - skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2814,6 +2826,8 @@ void tcp_connect_init(struct sock *sk) if (likely(!tp->repair)) tp->rcv_nxt = 0; + else + tp->rcv_tstamp = tcp_time_stamp; tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; |