diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 85 |
1 files changed, 70 insertions, 15 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d73aab3fbfc..81cae641c9a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -217,16 +217,25 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; } -static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) { - if (tp->ecn_flags & TCP_ECN_OK) { - if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + if (!(tp->ecn_flags & TCP_ECN_OK)) + return; + + switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { + case INET_ECN_NOT_ECT: /* Funny extension: if ECT is not set on a segment, - * it is surely retransmit. It is not in ECN RFC, - * but Linux follows this rule. */ - else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) + * and we already seen ECT on a previous segment, + * it is probably a retransmit. + */ + if (tp->ecn_flags & TCP_ECN_SEEN) tcp_enter_quickack_mode((struct sock *)tp); + break; + case INET_ECN_CE: + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + /* fallinto */ + default: + tp->ecn_flags |= TCP_ECN_SEEN; } } @@ -1438,7 +1447,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tp->lost_cnt_hint -= tcp_skb_pcount(prev); } - TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; + TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); @@ -2828,9 +2837,13 @@ static int tcp_try_undo_loss(struct sock *sk) static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - /* Do not moderate cwnd if it's already undone in cwr or recovery */ - if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { - tp->snd_cwnd = tp->snd_ssthresh; + + /* Do not moderate cwnd if it's already undone in cwr or recovery. */ + if (tp->undo_marker) { + if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); + else /* PRR */ + tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2948,6 +2961,38 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); +/* This function implements the PRR algorithm, specifcally the PRR-SSRB + * (proportional rate reduction with slow start reduction bound) as described in + * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. + * It computes the number of packets to send (sndcnt) based on packets newly + * delivered: + * 1) If the packets in flight is larger than ssthresh, PRR spreads the + * cwnd reductions across a full RTT. + * 2) If packets in flight is lower than ssthresh (such as due to excess + * losses and/or application stalls), do not perform any further cwnd + * reductions, but instead slow start up to ssthresh. + */ +static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, + int fast_rexmit, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + int sndcnt = 0; + int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + + if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { + u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + + tp->prior_cwnd - 1; + sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; + } else { + sndcnt = min_t(int, delta, + max_t(int, tp->prr_delivered - tp->prr_out, + newly_acked_sacked) + 1); + } + + sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); + tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2959,7 +3004,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit); * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) +static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, + int newly_acked_sacked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -3109,13 +3155,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; + tp->prior_cwnd = tp->snd_cwnd; + tp->prr_delivered = 0; + tp->prr_out = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); fast_rexmit = 1; } if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_down(sk, flag); + tp->prr_delivered += newly_acked_sacked; + tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); tcp_xmit_retransmit_queue(sk); } @@ -3296,7 +3346,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if (!(scb->flags & TCPHDR_SYN)) { + if (!(scb->tcp_flags & TCPHDR_SYN)) { flag |= FLAG_DATA_ACKED; } else { flag |= FLAG_SYN_ACKED; @@ -3630,6 +3680,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets; + int prior_sacked = tp->sacked_out; + int newly_acked_sacked = 0; int frto_cwnd = 0; /* If the ack is older than previous acks @@ -3701,6 +3753,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); + newly_acked_sacked = (prior_packets - prior_sacked) - + (tp->packets_out - tp->sacked_out); + if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); /* Guarantee sacktag reordering detection against wrap-arounds */ @@ -3713,7 +3768,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, - flag); + newly_acked_sacked, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); |