diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 53 |
1 files changed, 41 insertions, 12 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5423223e93c..6e5617b9f9d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -283,6 +283,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; +int sysctl_tcp_min_tso_segs __read_mostly = 2; + struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -410,10 +412,6 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - /* Presumed zeroed, in order of appearance: - * cookie_in_always, cookie_out_never, - * s_data_constant, s_data_in, s_data_out - */ sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; @@ -499,7 +497,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) mask |= POLLIN | POLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { + if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { /* send SIGIO later */ set_bit(SOCK_ASYNC_NOSPACE, @@ -510,7 +508,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) * wspace test but before the flags are set, * IO signal will be lost. */ - if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) + if (sk_stream_is_writeable(sk)) mask |= POLLOUT | POLLWRNORM; } } else @@ -789,12 +787,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, xmit_size_goal = mss_now; if (large_allowed && sk_can_gso(sk)) { - xmit_size_goal = ((sk->sk_gso_max_size - 1) - - inet_csk(sk)->icsk_af_ops->net_header_len - - inet_csk(sk)->icsk_ext_hdr_len - - tp->tcp_header_len); + u32 gso_size, hlen; + + /* Maybe we should/could use sk->sk_prot->max_header here ? */ + hlen = inet_csk(sk)->icsk_af_ops->net_header_len + + inet_csk(sk)->icsk_ext_hdr_len + + tp->tcp_header_len; + + /* Goal is to send at least one packet per ms, + * not one big TSO packet every 100 ms. + * This preserves ACK clocking and is consistent + * with tcp_tso_should_defer() heuristic. + */ + gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC); + gso_size = max_t(u32, gso_size, + sysctl_tcp_min_tso_segs * mss_now); + + xmit_size_goal = min_t(u32, gso_size, + sk->sk_gso_max_size - 1 - hlen); - /* TSQ : try to have two TSO segments in flight */ + /* TSQ : try to have at least two segments in flight + * (one in NIC TX ring, another in Qdisc) + */ xmit_size_goal = min_t(u32, xmit_size_goal, sysctl_tcp_limit_output_bytes >> 1); @@ -1121,6 +1135,13 @@ new_segment: goto wait_for_memory; /* + * All packets are restored as if they have + * already been sent. + */ + if (tp->repair) + TCP_SKB_CB(skb)->when = tcp_time_stamp; + + /* * Check whether we can use HW checksum. */ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) @@ -2447,10 +2468,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_THIN_DUPACK: if (val < 0 || val > 1) err = -EINVAL; - else + else { tp->thin_dupack = val; if (tp->thin_dupack) tcp_disable_early_retrans(tp); + } break; case TCP_REPAIR: @@ -2631,6 +2653,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else tp->tsoffset = val - tcp_time_stamp; break; + case TCP_NOTSENT_LOWAT: + tp->notsent_lowat = val; + sk->sk_write_space(sk); + break; default: err = -ENOPROTOOPT; break; @@ -2847,6 +2873,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_TIMESTAMP: val = tcp_time_stamp + tp->tsoffset; break; + case TCP_NOTSENT_LOWAT: + val = tp->notsent_lowat; + break; default: return -ENOPROTOOPT; } |