summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c50
1 files changed, 41 insertions, 9 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 65caf8b95e1..a2a796c5536 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
*/
int sysctl_tcp_tso_win_divisor __read_mostly = 3;
-int sysctl_tcp_mtu_probing __read_mostly = 0;
-int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
-
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
@@ -948,7 +945,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
- skb->destructor = tcp_wfree;
+ skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
skb_set_hash_from_sk(skb, sk);
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
@@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
- icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
+ icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
icsk->icsk_af_ops->net_header_len;
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
icsk->icsk_mtup.probe_size = 0;
}
EXPORT_SYMBOL(tcp_mtup_init);
@@ -2939,6 +2937,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
}
EXPORT_SYMBOL(tcp_make_synack);
+static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct tcp_congestion_ops *ca;
+ u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
+
+ if (ca_key == TCP_CA_UNSPEC)
+ return;
+
+ rcu_read_lock();
+ ca = tcp_ca_find_key(ca_key);
+ if (likely(ca && try_module_get(ca->owner))) {
+ module_put(icsk->icsk_ca_ops->owner);
+ icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
+ icsk->icsk_ca_ops = ca;
+ }
+ rcu_read_unlock();
+}
+
/* Do all connect socket setups that can be done AF independent. */
static void tcp_connect_init(struct sock *sk)
{
@@ -2964,6 +2981,8 @@ static void tcp_connect_init(struct sock *sk)
tcp_mtup_init(sk);
tcp_sync_mss(sk, dst_mtu(dst));
+ tcp_ca_dst_init(sk, dst);
+
if (!tp->window_clamp)
tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
tp->advmss = dst_metric_advmss(dst);
@@ -3034,7 +3053,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
- int syn_loss = 0, space, err = 0;
+ int syn_loss = 0, space, err = 0, copied;
unsigned long last_syn_loss = 0;
struct sk_buff *syn_data;
@@ -3072,11 +3091,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
goto fallback;
syn_data->ip_summed = CHECKSUM_PARTIAL;
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
- if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
- fo->data->msg_iter.iov, 0, space))) {
+ copied = copy_from_iter(skb_put(syn_data, space), space,
+ &fo->data->msg_iter);
+ if (unlikely(!copied)) {
kfree_skb(syn_data);
goto fallback;
}
+ if (copied != space) {
+ skb_trim(syn_data, copied);
+ space = copied;
+ }
/* No more data pending in inet_wait_for_connect() */
if (space == fo->size)
@@ -3244,6 +3268,14 @@ void tcp_send_ack(struct sock *sk)
skb_reserve(buff, MAX_TCP_HEADER);
tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
+ /* We do not want pure acks influencing TCP Small Queues or fq/pacing
+ * too much.
+ * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
+ * We also avoid tcp_wfree() overhead (cache line miss accessing
+ * tp->tsq_flags) by using regular sock_wfree()
+ */
+ skb_set_tcp_pure_ack(buff);
+
/* Send it off, this clears delayed acks for us. */
skb_mstamp_get(&buff->skb_mstamp);
tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));