From 745898eaf0eb7a04a56dec1188d9148259510863 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2009 18:50:24 +0000 Subject: tcp: Optimise GRO port comparisons Instead of doing two 16-bit operations for the source/destination ports, we can do one 32-bit operation to take care both. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7a0f0b27bf1..ff6adecc54c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2544,7 +2544,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) th2 = tcp_hdr(p); - if ((th->source ^ th2->source) | (th->dest ^ th2->dest)) { + if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { NAPI_GRO_CB(p)->same_flow = 0; continue; } -- cgit v1.2.3-70-g09d2 From 4a9a2968a17eae42ef5dffca8b37534c864e30cc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2009 18:50:25 +0000 Subject: tcp: Remove unnecessary window comparisons for GRO The window has already been checked as part of the flag word so there is no need to check it explicitly. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ff6adecc54c..313960e4cfd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2559,7 +2559,7 @@ found: flush |= flags & TCP_FLAG_CWR; flush |= (flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); - flush |= (th->ack_seq ^ th2->ack_seq) | (th->window ^ th2->window); + flush |= th->ack_seq ^ th2->ack_seq; for (i = sizeof(*th); !flush && i < thlen; i += 4) flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); -- cgit v1.2.3-70-g09d2 From 30a3ae30c775e2723f86ef70746ad3cb4404a4c9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2009 18:50:26 +0000 Subject: tcp: Optimise len/mss comparison Instead of checking len > mss || len == 0, we can accomplish both by checking (len - 1) > mss using the unsigned wraparound. At nearly a million times a second, this might just help. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 313960e4cfd..68342d43189 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2566,7 +2566,7 @@ found: mss = skb_shinfo(p)->gso_size; - flush |= (len > mss) | !len; + flush |= (len - 1) >= mss; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); if (flush || skb_gro_receive(head, skb)) { -- cgit v1.2.3-70-g09d2 From a5b1cf288d4200506ab62fbb86cc81ace948a306 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2009 18:50:28 +0000 Subject: gro: Avoid unnecessary comparison after skb_gro_header For the overwhelming majority of cases, skb_gro_header's return value cannot be NULL. Yet we must check it because of its current form. This patch splits it up into multiple functions in order to avoid this. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 23 ++++++++++++++--------- net/core/dev.c | 17 ++++++++++++----- net/ipv4/af_inet.c | 13 ++++++++++--- net/ipv4/tcp.c | 22 ++++++++++++++++------ net/ipv6/af_inet6.c | 13 ++++++++++--- 5 files changed, 62 insertions(+), 26 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2e44a049be0..371ece521e5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1132,18 +1132,23 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) NAPI_GRO_CB(skb)->data_offset += len; } -static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) +static inline void *skb_gro_header_fast(struct sk_buff *skb, + unsigned int offset) { - unsigned int offset = skb_gro_offset(skb); + return NAPI_GRO_CB(skb)->frag0 + offset; +} - hlen += offset; - if (NAPI_GRO_CB(skb)->frag0_len < hlen) { - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; - return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; - } +static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +{ + return NAPI_GRO_CB(skb)->frag0_len < hlen; +} - return NAPI_GRO_CB(skb)->frag0 + offset; +static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) +{ + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; + return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; } static inline void *skb_gro_mac_header(struct sk_buff *skb) diff --git a/net/core/dev.c b/net/core/dev.c index b1722a2d1fb..cd29e613bc5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2590,17 +2590,24 @@ struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; struct ethhdr *eth; + unsigned int hlen; + unsigned int off; napi->skb = NULL; skb_reset_mac_header(skb); skb_gro_reset_offset(skb); - eth = skb_gro_header(skb, sizeof(*eth)); - if (!eth) { - napi_reuse_skb(napi, skb); - skb = NULL; - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*eth); + eth = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + eth = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!eth)) { + napi_reuse_skb(napi, skb); + skb = NULL; + goto out; + } } skb_gro_pull(skb, sizeof(*eth)); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 170689681aa..644cc553531 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1246,13 +1246,20 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff **pp = NULL; struct sk_buff *p; struct iphdr *iph; + unsigned int hlen; + unsigned int off; int flush = 1; int proto; int id; - iph = skb_gro_header(skb, sizeof(*iph)); - if (unlikely(!iph)) - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*iph); + iph = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + iph = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!iph)) + goto out; + } proto = iph->protocol & (MAX_INET_PROTOS - 1); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 68342d43189..c3dcec5efea 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2518,20 +2518,30 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) unsigned int thlen; unsigned int flags; unsigned int mss = 1; + unsigned int hlen; + unsigned int off; int flush = 1; int i; - th = skb_gro_header(skb, sizeof(*th)); - if (unlikely(!th)) - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } thlen = th->doff * 4; if (thlen < sizeof(*th)) goto out; - th = skb_gro_header(skb, thlen); - if (unlikely(!th)) - goto out; + hlen = off + thlen; + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } skb_gro_pull(skb, thlen); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 61f55386a23..b6215be0963 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -817,13 +817,20 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, struct sk_buff *p; struct ipv6hdr *iph; unsigned int nlen; + unsigned int hlen; + unsigned int off; int flush = 1; int proto; __wsum csum; - iph = skb_gro_header(skb, sizeof(*iph)); - if (unlikely(!iph)) - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*iph); + iph = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + iph = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!iph)) + goto out; + } skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); -- cgit v1.2.3-70-g09d2 From a2a804cddfe65f18f903985e8a8d04c7c9eec354 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2009 18:50:34 +0000 Subject: tcp: Do not check flush when comparing options for GRO There is no need to repeatedly check flush when comparing TCP options for GRO as it will be false 99% of the time where it matters. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c3dcec5efea..0fb8b441f1f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2570,7 +2570,7 @@ found: flush |= (flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); flush |= th->ack_seq ^ th2->ack_seq; - for (i = sizeof(*th); !flush && i < thlen; i += 4) + for (i = sizeof(*th); i < thlen; i += 4) flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); -- cgit v1.2.3-70-g09d2 From 915219441d566f1da0caa0e262be49b666159e17 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 May 2009 21:35:47 -0700 Subject: tcp: Use SKB queue and list helpers instead of doing it by-hand. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 17 +++----- net/ipv4/tcp_input.c | 118 ++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 90 insertions(+), 45 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0fb8b441f1f..17b89c523f9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -439,12 +439,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) !tp->urg_data || before(tp->urg_seq, tp->copied_seq) || !before(tp->urg_seq, tp->rcv_nxt)) { + struct sk_buff *skb; + answ = tp->rcv_nxt - tp->copied_seq; /* Subtract 1, if FIN is in queue. */ - if (answ && !skb_queue_empty(&sk->sk_receive_queue)) - answ -= - tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin; + skb = skb_peek_tail(&sk->sk_receive_queue); + if (answ && skb) + answ -= tcp_hdr(skb)->fin; } else answ = tp->urg_seq - tp->copied_seq; release_sock(sk); @@ -1382,11 +1384,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Next get a buffer. */ - skb = skb_peek(&sk->sk_receive_queue); - do { - if (!skb) - break; - + skb_queue_walk(&sk->sk_receive_queue, skb) { /* Now that we have two receive queues this * shouldn't happen. */ @@ -1403,8 +1401,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (tcp_hdr(skb)->fin) goto found_fin_ok; WARN_ON(!(flags & MSG_PEEK)); - skb = skb->next; - } while (skb != (struct sk_buff *)&sk->sk_receive_queue); + } /* Well, if we have backlog, try to process it now yet. */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eeb8a92aa41..ba34a23c1bf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4426,7 +4426,7 @@ drop: } __skb_queue_head(&tp->out_of_order_queue, skb); } else { - struct sk_buff *skb1 = tp->out_of_order_queue.prev; + struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue); u32 seq = TCP_SKB_CB(skb)->seq; u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -4443,15 +4443,18 @@ drop: } /* Find place to insert this segment. */ - do { + while (1) { if (!after(TCP_SKB_CB(skb1)->seq, seq)) break; - } while ((skb1 = skb1->prev) != - (struct sk_buff *)&tp->out_of_order_queue); + if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { + skb1 = NULL; + break; + } + skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); + } /* Do skb overlap to previous one? */ - if (skb1 != (struct sk_buff *)&tp->out_of_order_queue && - before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ __kfree_skb(skb); @@ -4463,24 +4466,41 @@ drop: tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq); } else { - skb1 = skb1->prev; + if (skb_queue_is_first(&tp->out_of_order_queue, + skb1)) + skb1 = NULL; + else + skb1 = skb_queue_prev( + &tp->out_of_order_queue, + skb1); } } - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + if (!skb1) + __skb_queue_head(&tp->out_of_order_queue, skb); + else + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); /* And clean segments covered by new one as whole. */ - while ((skb1 = skb->next) != - (struct sk_buff *)&tp->out_of_order_queue && - after(end_seq, TCP_SKB_CB(skb1)->seq)) { - if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + if (skb1 && !skb_queue_is_last(&tp->out_of_order_queue, skb1)) { + struct sk_buff *n; + + skb1 = skb_queue_next(&tp->out_of_order_queue, skb1); + skb_queue_walk_from_safe(&tp->out_of_order_queue, + skb1, n) { + if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) + break; + if (before(end_seq, + TCP_SKB_CB(skb1)->end_seq)) { + tcp_dsack_extend(sk, + TCP_SKB_CB(skb1)->seq, + end_seq); + break; + } + __skb_unlink(skb1, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, - end_seq); - break; + TCP_SKB_CB(skb1)->end_seq); + __kfree_skb(skb1); } - __skb_unlink(skb1, &tp->out_of_order_queue); - tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, - TCP_SKB_CB(skb1)->end_seq); - __kfree_skb(skb1); } add_sack: @@ -4492,7 +4512,10 @@ add_sack: static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff *next = skb->next; + struct sk_buff *next = NULL; + + if (!skb_queue_is_last(list, skb)) + next = skb_queue_next(list, skb); __skb_unlink(skb, list); __kfree_skb(skb); @@ -4503,6 +4526,9 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. + * + * If tail is NULL, this means until the end of the list. + * * Segments with FIN/SYN are not collapsed (only because this * simplifies code) */ @@ -4511,15 +4537,23 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end) { - struct sk_buff *skb; + struct sk_buff *skb, *n; + bool end_of_skbs; /* First, check that queue is collapsible and find * the point where collapsing can be useful. */ - for (skb = head; skb != tail;) { + skb = head; +restart: + end_of_skbs = true; + skb_queue_walk_from_safe(list, skb, n) { + if (skb == tail) + break; /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { skb = tcp_collapse_one(sk, skb, list); - continue; + if (!skb) + break; + goto restart; } /* The first skb to collapse is: @@ -4529,16 +4563,24 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, */ if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin && (tcp_win_from_space(skb->truesize) > skb->len || - before(TCP_SKB_CB(skb)->seq, start) || - (skb->next != tail && - TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq))) + before(TCP_SKB_CB(skb)->seq, start))) { + end_of_skbs = false; break; + } + + if (!skb_queue_is_last(list, skb)) { + struct sk_buff *next = skb_queue_next(list, skb); + if (next != tail && + TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) { + end_of_skbs = false; + break; + } + } /* Decided to skip this, advance start seq. */ start = TCP_SKB_CB(skb)->end_seq; - skb = skb->next; } - if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) + if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) return; while (before(start, end)) { @@ -4583,7 +4625,8 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { skb = tcp_collapse_one(sk, skb, list); - if (skb == tail || + if (!skb || + skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) return; @@ -4610,17 +4653,21 @@ static void tcp_collapse_ofo_queue(struct sock *sk) head = skb; for (;;) { - skb = skb->next; + struct sk_buff *next = NULL; + + if (!skb_queue_is_last(&tp->out_of_order_queue, skb)) + next = skb_queue_next(&tp->out_of_order_queue, skb); + skb = next; /* Segment is terminated when we see gap or when * we are at the end of all the queue. */ - if (skb == (struct sk_buff *)&tp->out_of_order_queue || + if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { tcp_collapse(sk, &tp->out_of_order_queue, head, skb, start, end); head = skb; - if (skb == (struct sk_buff *)&tp->out_of_order_queue) + if (!skb) break; /* Start new segment */ start = TCP_SKB_CB(skb)->seq; @@ -4681,10 +4728,11 @@ static int tcp_prune_queue(struct sock *sk) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); - tcp_collapse(sk, &sk->sk_receive_queue, - sk->sk_receive_queue.next, - (struct sk_buff *)&sk->sk_receive_queue, - tp->copied_seq, tp->rcv_nxt); + if (!skb_queue_empty(&sk->sk_receive_queue)) + tcp_collapse(sk, &sk->sk_receive_queue, + skb_peek(&sk->sk_receive_queue), + NULL, + tp->copied_seq, tp->rcv_nxt); sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) -- cgit v1.2.3-70-g09d2