From 5d424d5a674f782d0659a3b66d951f412901faee Mon Sep 17 00:00:00 2001 From: John Heffner Date: Mon, 20 Mar 2006 17:53:41 -0800 Subject: [TCP]: MTU probing Implementation of packetization layer path mtu discovery for TCP, based on the internet-draft currently found at . Signed-off-by: John Heffner Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e9a54ae7d69..0ac388e3d01 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1891,6 +1891,34 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) } } +static void tcp_mtup_probe_failed(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1; + icsk->icsk_mtup.probe_size = 0; +} + +static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + + /* FIXME: breaks with very large cwnd */ + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_cwnd = tp->snd_cwnd * + tcp_mss_to_mtu(sk, tp->mss_cache) / + icsk->icsk_mtup.probe_size; + tp->snd_cwnd_cnt = 0; + tp->snd_cwnd_stamp = tcp_time_stamp; + tp->rcv_ssthresh = tcp_current_ssthresh(sk); + + icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; + icsk->icsk_mtup.probe_size = 0; + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); +} + + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2023,6 +2051,17 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, return; } + /* MTU probe failure: don't reduce cwnd */ + if (icsk->icsk_ca_state < TCP_CA_CWR && + icsk->icsk_mtup.probe_size && + tp->snd_una == icsk->icsk_mtup.probe_seq_start) { + tcp_mtup_probe_failed(sk); + /* Restores the reduction we did in tcp_mtup_probe() */ + tp->snd_cwnd++; + tcp_simple_retransmit(sk); + return; + } + /* Otherwise enter Recovery state */ if (IsReno(tp)) @@ -2243,6 +2282,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) tp->retrans_stamp = 0; } + /* MTU probing checks */ + if (icsk->icsk_mtup.probe_size) { + if (!after(icsk->icsk_mtup.probe_seq_end, TCP_SKB_CB(skb)->end_seq)) { + tcp_mtup_probe_success(sk, skb); + } + } + if (sacked) { if (sacked & TCPCB_RETRANS) { if(sacked & TCPCB_SACKED_RETRANS) @@ -4101,6 +4147,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.sack_ok && sysctl_tcp_fack) tp->rx_opt.sack_ok |= 2; + tcp_mtup_init(sk); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); @@ -4211,6 +4258,7 @@ discard: if (tp->ecn_flags&TCP_ECN_OK) sock_set_flag(sk, SOCK_NO_LARGESEND); + tcp_mtup_init(sk); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); @@ -4399,6 +4447,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ tp->lsndtime = tcp_time_stamp; + tcp_mtup_init(sk); tcp_initialize_rcv_mss(sk); tcp_init_buffer_space(sk); tcp_fast_path_on(tp); -- cgit v1.2.3-70-g09d2 From 0e7b13685f9a06949ea3070c97c0f0085a08cd37 Mon Sep 17 00:00:00 2001 From: John Heffner Date: Mon, 20 Mar 2006 21:32:58 -0800 Subject: [TCP] mtu probing: move tcp-specific data out of inet_connection_sock This moves some TCP-specific MTU probing state out of inet_connection_sock back to tcp_sock. Signed-off-by: John Heffner Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++++ include/net/inet_connection_sock.h | 2 -- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_output.c | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f2bb2396853..542d39596bd 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -343,6 +343,12 @@ struct tcp_sock { __u32 seq; __u32 time; } rcvq_space; + +/* TCP-specific MTU probe information. */ + struct { + __u32 probe_seq_start; + __u32 probe_seq_end; + } mtu_probe; }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b3abe33f4e5..4e5a9ff99fc 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -114,8 +114,6 @@ struct inet_connection_sock { /* Information on the current probe. */ int probe_size; - __u32 probe_seq_start; - __u32 probe_seq_end; } icsk_mtup; u32 icsk_ca_priv[16]; #define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0ac388e3d01..195d8358455 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2054,7 +2054,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* MTU probe failure: don't reduce cwnd */ if (icsk->icsk_ca_state < TCP_CA_CWR && icsk->icsk_mtup.probe_size && - tp->snd_una == icsk->icsk_mtup.probe_seq_start) { + tp->snd_una == tp->mtu_probe.probe_seq_start) { tcp_mtup_probe_failed(sk); /* Restores the reduction we did in tcp_mtup_probe() */ tp->snd_cwnd++; @@ -2284,7 +2284,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) /* MTU probing checks */ if (icsk->icsk_mtup.probe_size) { - if (!after(icsk->icsk_mtup.probe_seq_end, TCP_SKB_CB(skb)->end_seq)) { + if (!after(tp->mtu_probe.probe_seq_end, TCP_SKB_CB(skb)->end_seq)) { tcp_mtup_probe_success(sk, skb); } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8197b5e12f1..518e568b53f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1238,8 +1238,8 @@ static int tcp_mtu_probe(struct sock *sk) update_send_head(sk, tp, nskb); icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); - icsk->icsk_mtup.probe_seq_start = TCP_SKB_CB(nskb)->seq; - icsk->icsk_mtup.probe_seq_end = TCP_SKB_CB(nskb)->end_seq; + tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; + tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq; return 1; } -- cgit v1.2.3-70-g09d2