diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 107 |
1 files changed, 50 insertions, 57 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 52b5c2d0ecd..976034f8232 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ -#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ @@ -322,7 +321,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) /* Check #1 */ if (tp->rcv_ssthresh < tp->window_clamp && (int)tp->rcv_ssthresh < tcp_space(sk) && - !tcp_memory_pressure) { + !sk_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -411,8 +410,8 @@ static void tcp_clamp_window(struct sock *sk) if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && - !tcp_memory_pressure && - atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { + !sk_under_memory_pressure(sk) && + sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sysctl_tcp_rmem[2]); } @@ -865,13 +864,13 @@ static void tcp_disable_fack(struct tcp_sock *tp) /* RFC3517 uses different metric in lost marker => reset on change */ if (tcp_is_fack(tp)) tp->lost_skb_hint = NULL; - tp->rx_opt.sack_ok &= ~2; + tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED; } /* Take a notice that peer is sending D-SACKs */ static void tcp_dsack_seen(struct tcp_sock *tp) { - tp->rx_opt.sack_ok |= 4; + tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; } /* Initialize metrics on socket. */ @@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, * These 6 states form finite state machine, controlled by the following events: * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) - * 3. Loss detection event of one of three flavors: + * 3. Loss detection event of two flavors: * A. Scoreboard estimator decided the packet is lost. * A'. Reno "three dupacks" marks head of queue lost. - * A''. Its FACK modfication, head until snd.fack is lost. - * B. SACK arrives sacking data transmitted after never retransmitted - * hole was sent out. - * C. SACK arrives sacking SND.NXT at the moment, when the + * A''. Its FACK modification, head until snd.fack is lost. + * B. SACK arrives sacking SND.NXT at the moment, when the * segment was retransmitted. * 4. D-SACK added new rule: D-SACK changes any tag to S. * @@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, } /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". - * Event "C". Later note: FACK people cheated me again 8), we have to account + * Event "B". Later note: FACK people cheated me again 8), we have to account * for reordering! Ugly, but should help. * * Search retransmitted skbs from write_queue that were sent when snd_nxt was @@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, if (found_dup_sack && ((i + 1) == first_sack_index)) next_dup = &sp[i + 1]; - /* Event "B" in the comment above. */ - if (after(end_seq, tp->high_seq)) - state.flag |= FLAG_DATA_LOST; - /* Skip too early cached blocks */ while (tcp_sack_cache_ok(tp, cache) && !before(start_seq, cache->end_seq)) @@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk) tcp_verify_left_out(tp); } -/* Mark head of queue up as lost. With RFC3517 SACK, the packets is - * is against sacked "cnt", otherwise it's against facked "cnt" +/* Detect loss in event "A" above by marking head of queue up as lost. + * For FACK or non-SACK(Reno) senders, the first "packets" number of segments + * are considered lost. For RFC3517 SACK, a segment is considered lost if it + * has at least tp->reordering SACKed seqments above it; "packets" refers to + * the maximum SACKed segments to pass before reaching this limit. */ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) { @@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) int cnt, oldcnt; int err; unsigned int mss; + /* Use SACK to deduce losses of new sequences sent during recovery */ + const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; WARN_ON(packets > tp->packets_out); if (tp->lost_skb_hint) { @@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) tp->lost_skb_hint = skb; tp->lost_cnt_hint = cnt; - if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) + if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) break; oldcnt = cnt; @@ -2663,7 +2661,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", @@ -2858,7 +2856,7 @@ static void tcp_try_keep_open(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int state = TCP_CA_Open; - if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker) + if (tcp_left_out(tp) || tcp_any_retrans_done(sk)) state = TCP_CA_Disorder; if (inet_csk(sk)->icsk_ca_state != state) { @@ -2881,7 +2879,8 @@ static void tcp_try_to_open(struct sock *sk, int flag) if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { tcp_try_keep_open(sk); - tcp_moderate_cwnd(tp); + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + tcp_moderate_cwnd(tp); } else { tcp_cwnd_down(sk, flag); } @@ -3009,11 +3008,11 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int newly_acked_sacked, int flag) + int newly_acked_sacked, bool is_dupack, + int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); int fast_rexmit = 0, mib_idx; @@ -3032,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (tcp_check_sack_reneging(sk, flag)) return; - /* C. Process data loss notification, provided it is valid. */ - if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) && - before(tp->snd_una, tp->high_seq) && - icsk->icsk_ca_state != TCP_CA_Open && - tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); - } - - /* D. Check consistency of the current state. */ + /* C. Check consistency of the current state. */ tcp_verify_left_out(tp); - /* E. Check state exit conditions. State can be terminated + /* D. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (icsk->icsk_ca_state == TCP_CA_Open) { WARN_ON(tp->retrans_out != 0); @@ -3066,17 +3056,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, } break; - case TCP_CA_Disorder: - tcp_try_undo_dsack(sk); - if (!tp->undo_marker || - /* For SACK case do not Open to allow to undo - * catching for all duplicate ACKs. */ - tcp_is_reno(tp) || tp->snd_una != tp->high_seq) { - tp->undo_marker = 0; - tcp_set_ca_state(sk, TCP_CA_Open); - } - break; - case TCP_CA_Recovery: if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); @@ -3087,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, } } - /* F. Process state. */ + /* E. Process state. */ switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: if (!(flag & FLAG_SND_UNA_ADVANCED)) { @@ -3117,7 +3096,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tcp_add_reno_sack(sk); } - if (icsk->icsk_ca_state == TCP_CA_Disorder) + if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk)) { @@ -3681,10 +3660,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; + bool is_dupack = false; u32 prior_in_flight; u32 prior_fackets; int prior_packets; int prior_sacked = tp->sacked_out; + int pkts_acked = 0; int newly_acked_sacked = 0; int frto_cwnd = 0; @@ -3757,6 +3738,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); + pkts_acked = prior_packets - tp->packets_out; newly_acked_sacked = (prior_packets - prior_sacked) - (tp->packets_out - tp->sacked_out); @@ -3771,8 +3753,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); - tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, - newly_acked_sacked, flag); + is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); + tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + is_dupack, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); @@ -3784,6 +3767,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) return 1; no_queue: + /* If data was DSACKed, see if we can undo a cwnd reduction. */ + if (flag & FLAG_DSACKING_ACK) + tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3797,10 +3784,14 @@ invalid_ack: return -1; old_ack: + /* If data was SACKed, tag it and see if we should send more data. + * If data was DSACKed, see if we can undo a cwnd reduction. + */ if (TCP_SKB_CB(skb)->sacked) { - tcp_sacktag_write_queue(sk, skb, prior_snd_una); - if (icsk->icsk_ca_state == TCP_CA_Open) - tcp_try_keep_open(sk); + flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); + newly_acked_sacked = tp->sacked_out - prior_sacked; + tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + is_dupack, flag); } SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); @@ -3876,7 +3867,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && !estab && sysctl_tcp_sack) { - opt_rx->sack_ok = 1; + opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } break; @@ -4864,7 +4855,7 @@ static int tcp_prune_queue(struct sock *sk) if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); - else if (tcp_memory_pressure) + else if (sk_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); @@ -4930,11 +4921,11 @@ static int tcp_should_expand_sndbuf(const struct sock *sk) return 0; /* If we are under global TCP memory pressure, do not expand. */ - if (tcp_memory_pressure) + if (sk_under_memory_pressure(sk)) return 0; /* If we are under soft global TCP memory pressure, do not expand. */ - if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) + if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0)) return 0; /* If we filled the congestion window, do not expand. */ @@ -5809,6 +5800,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; if (th->syn) { + if (th->fin) + goto discard; if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; |