diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-04-14 13:18:27 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-04-14 13:19:04 +0200 |
commit | 6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e (patch) | |
tree | 021cc9f6b477146fcebe6f3be4752abfa2ba18a9 /net/ipv4/tcp_input.c | |
parent | 682968e0c425c60f0dde37977e5beb2b12ddc4cc (diff) | |
parent | a385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff) |
Merge branch 'perf/core' into perf/uprobes
Merge in latest upstream (and the latest perf development tree),
to prepare for tooling changes, and also to pick up v3.4 MM
changes that the uprobes code needs to take care of.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 264 |
1 files changed, 153 insertions, 111 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53c8ce4046b..e886e2f7fa8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -61,6 +61,8 @@ * Pasi Sarolahti: F-RTO for dealing with spurious RTOs */ +#define pr_fmt(fmt) "TCP: " fmt + #include <linux/mm.h> #include <linux/slab.h> #include <linux/module.h> @@ -1403,8 +1405,16 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, BUG_ON(!pcount); - /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ - if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) + /* Adjust counters and hints for the newly sacked sequence + * range but discard the return value since prev is already + * marked. We must tag the range first because the seq + * advancement below implicitly advances + * tcp_highest_sack_seq() when skb is highest_sack. + */ + tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, + start_seq, end_seq, dup_sack, pcount); + + if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; TCP_SKB_CB(prev)->end_seq += shifted; @@ -1430,12 +1440,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, skb_shinfo(skb)->gso_type = 0; } - /* Adjust counters and hints for the newly sacked sequence range but - * discard the return value since prev is already marked. - */ - tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, - start_seq, end_seq, dup_sack, pcount); - /* Difference in this won't matter, both ACKed by the same cumul. ACK */ TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); @@ -1583,6 +1587,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, } } + /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */ + if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) + goto fallback; + if (!skb_shift(prev, skb, len)) goto fallback; if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) @@ -2567,6 +2575,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) if (cnt > packets) { if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || (oldcnt >= packets)) break; @@ -3860,9 +3869,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o opt_rx->wscale_ok = 1; if (snd_wscale > 14) { if (net_ratelimit()) - printk(KERN_INFO "tcp_parse_options: Illegal window " - "scaling value %d >14 received.\n", - snd_wscale); + pr_info("%s: Illegal window scaling value %d >14 received\n", + __func__, + snd_wscale); snd_wscale = 14; } opt_rx->snd_wscale = snd_wscale; @@ -4184,7 +4193,7 @@ static void tcp_fin(struct sock *sk) /* Only TCP_LISTEN and TCP_CLOSE are left, in these * cases we should never reach this piece of code. */ - printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", + pr_err("%s: Impossible, sk->sk_state=%d\n", __func__, sk->sk_state); break; } @@ -4437,6 +4446,137 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) return 0; } +static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb1; + u32 seq, end_seq; + + TCP_ECN_check_ce(tp, skb); + + if (tcp_try_rmem_schedule(sk, skb->truesize)) { + /* TODO: should increment a counter */ + __kfree_skb(skb); + return; + } + + /* Disable header prediction. */ + tp->pred_flags = 0; + inet_csk_schedule_ack(sk); + + SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", + tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + + skb1 = skb_peek_tail(&tp->out_of_order_queue); + if (!skb1) { + /* Initial out of order segment, build 1 SACK. */ + if (tcp_is_sack(tp)) { + tp->rx_opt.num_sacks = 1; + tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; + tp->selective_acks[0].end_seq = + TCP_SKB_CB(skb)->end_seq; + } + __skb_queue_head(&tp->out_of_order_queue, skb); + goto end; + } + + seq = TCP_SKB_CB(skb)->seq; + end_seq = TCP_SKB_CB(skb)->end_seq; + + if (seq == TCP_SKB_CB(skb1)->end_seq) { + /* Packets in ofo can stay in queue a long time. + * Better try to coalesce them right now + * to avoid future tcp_collapse_ofo_queue(), + * probably the most expensive function in tcp stack. + */ + if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) { + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPRCVCOALESCE); + BUG_ON(skb_copy_bits(skb, 0, + skb_put(skb1, skb->len), + skb->len)); + TCP_SKB_CB(skb1)->end_seq = end_seq; + TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq; + __kfree_skb(skb); + skb = NULL; + } else { + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + } + + if (!tp->rx_opt.num_sacks || + tp->selective_acks[0].end_seq != seq) + goto add_sack; + + /* Common case: data arrive in order after hole. */ + tp->selective_acks[0].end_seq = end_seq; + goto end; + } + + /* Find place to insert this segment. */ + while (1) { + if (!after(TCP_SKB_CB(skb1)->seq, seq)) + break; + if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { + skb1 = NULL; + break; + } + skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); + } + + /* Do skb overlap to previous one? */ + if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + /* All the bits are present. Drop. */ + __kfree_skb(skb); + skb = NULL; + tcp_dsack_set(sk, seq, end_seq); + goto add_sack; + } + if (after(seq, TCP_SKB_CB(skb1)->seq)) { + /* Partial overlap. */ + tcp_dsack_set(sk, seq, + TCP_SKB_CB(skb1)->end_seq); + } else { + if (skb_queue_is_first(&tp->out_of_order_queue, + skb1)) + skb1 = NULL; + else + skb1 = skb_queue_prev( + &tp->out_of_order_queue, + skb1); + } + } + if (!skb1) + __skb_queue_head(&tp->out_of_order_queue, skb); + else + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + + /* And clean segments covered by new one as whole. */ + while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { + skb1 = skb_queue_next(&tp->out_of_order_queue, skb); + + if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) + break; + if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, + end_seq); + break; + } + __skb_unlink(skb1, &tp->out_of_order_queue); + tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, + TCP_SKB_CB(skb1)->end_seq); + __kfree_skb(skb1); + } + +add_sack: + if (tcp_is_sack(tp)) + tcp_sack_new_ofo_skb(sk, seq, end_seq); +end: + if (skb) + skb_set_owner_r(skb, sk); +} + + static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); @@ -4552,105 +4692,7 @@ drop: goto queue_and_out; } - TCP_ECN_check_ce(tp, skb); - - if (tcp_try_rmem_schedule(sk, skb->truesize)) - goto drop; - - /* Disable header prediction. */ - tp->pred_flags = 0; - inet_csk_schedule_ack(sk); - - SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - - skb_set_owner_r(skb, sk); - - if (!skb_peek(&tp->out_of_order_queue)) { - /* Initial out of order segment, build 1 SACK. */ - if (tcp_is_sack(tp)) { - tp->rx_opt.num_sacks = 1; - tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; - tp->selective_acks[0].end_seq = - TCP_SKB_CB(skb)->end_seq; - } - __skb_queue_head(&tp->out_of_order_queue, skb); - } else { - struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue); - u32 seq = TCP_SKB_CB(skb)->seq; - u32 end_seq = TCP_SKB_CB(skb)->end_seq; - - if (seq == TCP_SKB_CB(skb1)->end_seq) { - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - - if (!tp->rx_opt.num_sacks || - tp->selective_acks[0].end_seq != seq) - goto add_sack; - - /* Common case: data arrive in order after hole. */ - tp->selective_acks[0].end_seq = end_seq; - return; - } - - /* Find place to insert this segment. */ - while (1) { - if (!after(TCP_SKB_CB(skb1)->seq, seq)) - break; - if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { - skb1 = NULL; - break; - } - skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); - } - - /* Do skb overlap to previous one? */ - if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { - if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - /* All the bits are present. Drop. */ - __kfree_skb(skb); - tcp_dsack_set(sk, seq, end_seq); - goto add_sack; - } - if (after(seq, TCP_SKB_CB(skb1)->seq)) { - /* Partial overlap. */ - tcp_dsack_set(sk, seq, - TCP_SKB_CB(skb1)->end_seq); - } else { - if (skb_queue_is_first(&tp->out_of_order_queue, - skb1)) - skb1 = NULL; - else - skb1 = skb_queue_prev( - &tp->out_of_order_queue, - skb1); - } - } - if (!skb1) - __skb_queue_head(&tp->out_of_order_queue, skb); - else - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - - /* And clean segments covered by new one as whole. */ - while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { - skb1 = skb_queue_next(&tp->out_of_order_queue, skb); - - if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) - break; - if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, - end_seq); - break; - } - __skb_unlink(skb1, &tp->out_of_order_queue); - tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, - TCP_SKB_CB(skb1)->end_seq); - __kfree_skb(skb1); - } - -add_sack: - if (tcp_is_sack(tp)) - tcp_sack_new_ofo_skb(sk, seq, end_seq); - } + tcp_data_queue_ofo(sk, skb); } static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, |