From 71cea17ed39fdf1c0634f530ddc6a2c2fc601c2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 May 2013 06:52:26 +0000 Subject: tcp: md5: remove spinlock usage in fast path TCP md5 code uses per cpu variables but protects access to them with a shared spinlock, which is a contention point. [ tcp_md5sig_pool_lock is locked twice per incoming packet ] Makes things much simpler, by allocating crypto structures once, first time a socket needs md5 keys, and not deallocating them as they are really small. Next step would be to allow crypto allocations being done in a NUMA aware way. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 719652305a2..d20ede0c959 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1026,7 +1026,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = sock_kmalloc(sk, sizeof(*key), gfp); if (!key) return -ENOMEM; - if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { + if (!tcp_alloc_md5sig_pool()) { sock_kfree_s(sk, key, sizeof(*key)); return -ENOMEM; } @@ -1044,9 +1044,7 @@ EXPORT_SYMBOL(tcp_md5_do_add); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { - struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct tcp_md5sig_info *md5sig; key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (!key) @@ -1054,10 +1052,6 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); - md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk)); - if (hlist_empty(&md5sig->head)) - tcp_free_md5sig_pool(); return 0; } EXPORT_SYMBOL(tcp_md5_do_del); @@ -1071,8 +1065,6 @@ static void tcp_clear_md5_list(struct sock *sk) md5sig = rcu_dereference_protected(tp->md5sig_info, 1); - if (!hlist_empty(&md5sig->head)) - tcp_free_md5sig_pool(); hlist_for_each_entry_safe(key, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); -- cgit v1.2.3-70-g09d2 From 28850dc7c71da9d0c0e39246e9ff6913f41f8d0a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Jun 2013 05:11:46 +0000 Subject: net: tcp: move GRO/GSO functions to tcp_offload Would be good to make things explicit and move those functions to a new file called tcp_offload.c, thus make this similar to tcpv6_offload.c. While moving all related functions into tcp_offload.c, we can also make some of them static, since they are only used there. Also, add an explicit registration function. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 9 +- net/ipv4/Makefile | 2 +- net/ipv4/af_inet.c | 13 +- net/ipv4/tcp.c | 241 ----------------------------------- net/ipv4/tcp_ipv4.c | 66 +--------- net/ipv4/tcp_offload.c | 332 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 341 insertions(+), 322 deletions(-) create mode 100644 net/ipv4/tcp_offload.c (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index bf1cc3dced5..0d637e9403a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1541,15 +1541,14 @@ extern struct request_sock_ops tcp6_request_sock_ops; extern void tcp_v4_destroy_sock(struct sock *sk); -extern int tcp_v4_gso_send_check(struct sk_buff *skb); extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, netdev_features_t features); extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb); -extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, - struct sk_buff *skb); extern int tcp_gro_complete(struct sk_buff *skb); -extern int tcp4_gro_complete(struct sk_buff *skb); + +extern void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, + __be32 daddr); #ifdef CONFIG_PROC_FS extern int tcp4_proc_init(void); @@ -1584,6 +1583,8 @@ struct tcp_request_sock_ops { #endif }; +extern int tcpv4_offload_init(void); + extern void tcp_v4_init(void); extern void tcp_init(void); diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 089cb9f3638..4d3e138c564 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - datagram.o raw.o udp.o udplite.o \ + tcp_offload.o datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9c090c7daea..7b514290efc 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1559,15 +1559,6 @@ static const struct net_protocol tcp_protocol = { .netns_ok = 1, }; -static const struct net_offload tcp_offload = { - .callbacks = { - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, - }, -}; - static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, @@ -1683,8 +1674,8 @@ static int __init ipv4_offload_init(void) */ if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); - if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) - pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); + if (tcpv4_offload_init() < 0) + pr_crit("%s: Cannot add TCP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); return 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6a1cf95abc9..bc4246940f6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2877,247 +2877,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct tcphdr *th; - unsigned int thlen; - unsigned int seq; - __be32 delta; - unsigned int oldlen; - unsigned int mss; - struct sk_buff *gso_skb = skb; - __sum16 newcheck; - bool ooo_okay, copy_destructor; - - if (!pskb_may_pull(skb, sizeof(*th))) - goto out; - - th = tcp_hdr(skb); - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - goto out; - - if (!pskb_may_pull(skb, thlen)) - goto out; - - oldlen = (u16)~skb->len; - __skb_pull(skb, thlen); - - mss = tcp_skb_mss(skb); - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_MPLS | - SKB_GSO_UDP_TUNNEL | - 0) || - !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - copy_destructor = gso_skb->destructor == tcp_wfree; - ooo_okay = gso_skb->ooo_okay; - /* All segments but the first should have ooo_okay cleared */ - skb->ooo_okay = 0; - - segs = skb_segment(skb, features); - if (IS_ERR(segs)) - goto out; - - /* Only first segment might have ooo_okay set */ - segs->ooo_okay = ooo_okay; - - delta = htonl(oldlen + (thlen + mss)); - - skb = segs; - th = tcp_hdr(skb); - seq = ntohl(th->seq); - - newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); - - do { - th->fin = th->psh = 0; - th->check = newcheck; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = - csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); - - seq += mss; - if (copy_destructor) { - skb->destructor = gso_skb->destructor; - skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; - } - skb = skb->next; - th = tcp_hdr(skb); - - th->seq = htonl(seq); - th->cwr = 0; - } while (skb->next); - - /* Following permits TCP Small Queues to work well with GSO : - * The callback to TCP stack will be called at the time last frag - * is freed at TX completion, and not right now when gso_skb - * is freed by GSO engine - */ - if (copy_destructor) { - swap(gso_skb->sk, skb->sk); - swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); - } - - delta = htonl(oldlen + (skb_tail_pointer(skb) - - skb_transport_header(skb)) + - skb->data_len); - th->check = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); - if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); - -out: - return segs; -} -EXPORT_SYMBOL(tcp_tso_segment); - -struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - struct sk_buff **pp = NULL; - struct sk_buff *p; - struct tcphdr *th; - struct tcphdr *th2; - unsigned int len; - unsigned int thlen; - __be32 flags; - unsigned int mss = 1; - unsigned int hlen; - unsigned int off; - int flush = 1; - int i; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*th); - th = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } - - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - goto out; - - hlen = off + thlen; - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } - - skb_gro_pull(skb, thlen); - - len = skb_gro_len(skb); - flags = tcp_flag_word(th); - - for (; (p = *head); head = &p->next) { - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - th2 = tcp_hdr(p); - - if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } - - goto found; - } - - goto out_check_final; - -found: - flush = NAPI_GRO_CB(p)->flush; - flush |= (__force int)(flags & TCP_FLAG_CWR); - flush |= (__force int)((flags ^ tcp_flag_word(th2)) & - ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); - flush |= (__force int)(th->ack_seq ^ th2->ack_seq); - for (i = sizeof(*th); i < thlen; i += 4) - flush |= *(u32 *)((u8 *)th + i) ^ - *(u32 *)((u8 *)th2 + i); - - mss = tcp_skb_mss(p); - - flush |= (len - 1) >= mss; - flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); - - if (flush || skb_gro_receive(head, skb)) { - mss = 1; - goto out_check_final; - } - - p = *head; - th2 = tcp_hdr(p); - tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); - -out_check_final: - flush = len < mss; - flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | - TCP_FLAG_RST | TCP_FLAG_SYN | - TCP_FLAG_FIN)); - - if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) - pp = head; - -out: - NAPI_GRO_CB(skb)->flush |= flush; - - return pp; -} -EXPORT_SYMBOL(tcp_gro_receive); - -int tcp_gro_complete(struct sk_buff *skb) -{ - struct tcphdr *th = tcp_hdr(skb); - - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - - skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - - if (th->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - - return 0; -} -EXPORT_SYMBOL(tcp_gro_complete); - #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; static DEFINE_MUTEX(tcp_md5sig_mutex); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d20ede0c959..289039b4d8d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -545,8 +545,7 @@ out: sock_put(sk); } -static void __tcp_v4_send_check(struct sk_buff *skb, - __be32 saddr, __be32 daddr) +void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct tcphdr *th = tcp_hdr(skb); @@ -571,23 +570,6 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); -int tcp_v4_gso_send_check(struct sk_buff *skb) -{ - const struct iphdr *iph; - struct tcphdr *th; - - if (!pskb_may_pull(skb, sizeof(*th))) - return -EINVAL; - - iph = ip_hdr(skb); - th = tcp_hdr(skb); - - th->check = 0; - skb->ip_summed = CHECKSUM_PARTIAL; - __tcp_v4_send_check(skb, iph->saddr, iph->daddr); - return 0; -} - /* * This routine will send an RST to the other tcp. * @@ -2795,52 +2777,6 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - const struct iphdr *iph = skb_gro_network_header(skb); - __wsum wsum; - __sum16 sum; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } -flush: - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - - return tcp_gro_receive(head, skb); -} - -int tcp4_gro_complete(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - - th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), - iph->saddr, iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - - return tcp_gro_complete(skb); -} - struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c new file mode 100644 index 00000000000..3a7525e6c08 --- /dev/null +++ b/net/ipv4/tcp_offload.c @@ -0,0 +1,332 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TCPv4 GSO/GRO support + */ + +#include +#include +#include + +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct tcphdr *th; + unsigned int thlen; + unsigned int seq; + __be32 delta; + unsigned int oldlen; + unsigned int mss; + struct sk_buff *gso_skb = skb; + __sum16 newcheck; + bool ooo_okay, copy_destructor; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = tcp_hdr(skb); + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + oldlen = (u16)~skb->len; + __skb_pull(skb, thlen); + + mss = tcp_skb_mss(skb); + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + SKB_GSO_GRE | + SKB_GSO_MPLS | + SKB_GSO_UDP_TUNNEL | + 0) || + !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + copy_destructor = gso_skb->destructor == tcp_wfree; + ooo_okay = gso_skb->ooo_okay; + /* All segments but the first should have ooo_okay cleared */ + skb->ooo_okay = 0; + + segs = skb_segment(skb, features); + if (IS_ERR(segs)) + goto out; + + /* Only first segment might have ooo_okay set */ + segs->ooo_okay = ooo_okay; + + delta = htonl(oldlen + (thlen + mss)); + + skb = segs; + th = tcp_hdr(skb); + seq = ntohl(th->seq); + + newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + + do { + th->fin = th->psh = 0; + th->check = newcheck; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = + csum_fold(csum_partial(skb_transport_header(skb), + thlen, skb->csum)); + + seq += mss; + if (copy_destructor) { + skb->destructor = gso_skb->destructor; + skb->sk = gso_skb->sk; + /* {tcp|sock}_wfree() use exact truesize accounting : + * sum(skb->truesize) MUST be exactly be gso_skb->truesize + * So we account mss bytes of 'true size' for each segment. + * The last segment will contain the remaining. + */ + skb->truesize = mss; + gso_skb->truesize -= mss; + } + skb = skb->next; + th = tcp_hdr(skb); + + th->seq = htonl(seq); + th->cwr = 0; + } while (skb->next); + + /* Following permits TCP Small Queues to work well with GSO : + * The callback to TCP stack will be called at the time last frag + * is freed at TX completion, and not right now when gso_skb + * is freed by GSO engine + */ + if (copy_destructor) { + swap(gso_skb->sk, skb->sk); + swap(gso_skb->destructor, skb->destructor); + swap(gso_skb->truesize, skb->truesize); + } + + delta = htonl(oldlen + (skb_tail_pointer(skb) - + skb_transport_header(skb)) + + skb->data_len); + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = csum_fold(csum_partial(skb_transport_header(skb), + thlen, skb->csum)); +out: + return segs; +} +EXPORT_SYMBOL(tcp_tso_segment); + +struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct tcphdr *th; + struct tcphdr *th2; + unsigned int len; + unsigned int thlen; + __be32 flags; + unsigned int mss = 1; + unsigned int hlen; + unsigned int off; + int flush = 1; + int i; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } + + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + hlen = off + thlen; + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } + + skb_gro_pull(skb, thlen); + + len = skb_gro_len(skb); + flags = tcp_flag_word(th); + + for (; (p = *head); head = &p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + + if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + goto found; + } + + goto out_check_final; + +found: + flush = NAPI_GRO_CB(p)->flush; + flush |= (__force int)(flags & TCP_FLAG_CWR); + flush |= (__force int)((flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); + flush |= (__force int)(th->ack_seq ^ th2->ack_seq); + for (i = sizeof(*th); i < thlen; i += 4) + flush |= *(u32 *)((u8 *)th + i) ^ + *(u32 *)((u8 *)th2 + i); + + mss = tcp_skb_mss(p); + + flush |= (len - 1) >= mss; + flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); + + if (flush || skb_gro_receive(head, skb)) { + mss = 1; + goto out_check_final; + } + + p = *head; + th2 = tcp_hdr(p); + tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); + +out_check_final: + flush = len < mss; + flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | + TCP_FLAG_RST | TCP_FLAG_SYN | + TCP_FLAG_FIN)); + + if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) + pp = head; + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} +EXPORT_SYMBOL(tcp_gro_receive); + +int tcp_gro_complete(struct sk_buff *skb) +{ + struct tcphdr *th = tcp_hdr(skb); + + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + if (th->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + + return 0; +} +EXPORT_SYMBOL(tcp_gro_complete); + +static int tcp_v4_gso_send_check(struct sk_buff *skb) +{ + const struct iphdr *iph; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + iph = ip_hdr(skb); + th = tcp_hdr(skb); + + th->check = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + __tcp_v4_send_check(skb, iph->saddr, iph->daddr); + return 0; +} + +static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + + case CHECKSUM_NONE: + wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), IPPROTO_TCP, 0); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + return tcp_gro_receive(head, skb); +} + +static int tcp4_gro_complete(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), + iph->saddr, iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + return tcp_gro_complete(skb); +} + +static const struct net_offload tcpv4_offload = { + .callbacks = { + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + }, +}; + +int __init tcpv4_offload_init(void) +{ + return inet_add_offload(&tcpv4_offload, IPPROTO_TCP); +} -- cgit v1.2.3-70-g09d2 From d30e383bb856f614ddb5bbbb5a7d3f86240e41ec Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:40:10 +0300 Subject: tcp: add low latency socket poll support. Adds low latency socket poll support for TCP. In tcp_v[46]_rcv() add a call to sk_mark_ll() to copy the napi_id from the skb to the sk. In tcp_recvmsg(), when there is no data in the socket we busy-poll. This is a good example of how to add busy-poll support to more protocols. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Signed-off-by: Eliezer Tamir Acked-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++++ net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/tcp_ipv6.c | 2 ++ 3 files changed, 9 insertions(+) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bc4246940f6..46ed9afd1f5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,6 +279,7 @@ #include #include +#include int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; @@ -1553,6 +1554,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sk_buff *skb; u32 urg_hole = 0; + if (sk_valid_ll(sk) && skb_queue_empty(&sk->sk_receive_queue) + && (sk->sk_state == TCP_ESTABLISHED)) + sk_poll_ll(sk, nonblock); + lock_sock(sk); err = -ENOTCONN; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 289039b4d8d..1063bb83e34 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -1993,6 +1994,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_mark_ll(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0a17ed9eaf3..5cffa5c3e6b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -63,6 +63,7 @@ #include #include #include +#include #include @@ -1498,6 +1499,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_mark_ll(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); -- cgit v1.2.3-70-g09d2 From 076bb0c82a44fbe46fe2c8527a5b5b64b69f679d Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 10 Jul 2013 17:13:17 +0300 Subject: net: rename include/net/ll_poll.h to include/net/busy_poll.h Rename the file and correct all the places where it is included. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- fs/select.c | 2 +- include/net/busy_poll.h | 183 ++++++++++++++++++++++++ include/net/ll_poll.h | 183 ------------------------ net/core/datagram.c | 2 +- net/core/sock.c | 2 +- net/core/sysctl_net_core.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- net/socket.c | 2 +- 16 files changed, 197 insertions(+), 197 deletions(-) create mode 100644 include/net/busy_poll.h delete mode 100644 include/net/ll_poll.h (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ec3aa1d451e..05b6b4e8b07 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include "bnx2x_cmn.h" #include "bnx2x_init.h" diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index fb098b46c6a..7be725cdfea 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -52,7 +52,7 @@ #include #endif -#include +#include #ifdef CONFIG_NET_LL_RX_POLL #define LL_EXTENDED_STATS diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index caf20477056..0fb2438dc2c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 76997b93fdf..90746d37ac9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -31,7 +31,7 @@ * */ -#include +#include #include #include #include diff --git a/fs/select.c b/fs/select.c index f9f49c40cfd..35d4adc749d 100644 --- a/fs/select.c +++ b/fs/select.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h new file mode 100644 index 00000000000..76f03408774 --- /dev/null +++ b/include/net/busy_poll.h @@ -0,0 +1,183 @@ +/* + * Low Latency Sockets + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Author: Eliezer Tamir + * + * Contact Information: + * e1000-devel Mailing List + */ + +#ifndef _LINUX_NET_LL_POLL_H +#define _LINUX_NET_LL_POLL_H + +#include +#include + +#ifdef CONFIG_NET_LL_RX_POLL + +struct napi_struct; +extern unsigned int sysctl_net_ll_read __read_mostly; +extern unsigned int sysctl_net_ll_poll __read_mostly; + +/* return values from ndo_ll_poll */ +#define LL_FLUSH_FAILED -1 +#define LL_FLUSH_BUSY -2 + +static inline bool net_busy_loop_on(void) +{ + return sysctl_net_ll_poll; +} + +/* a wrapper to make debug_smp_processor_id() happy + * we can use sched_clock() because we don't care much about precision + * we only care that the average is bounded + */ +#ifdef CONFIG_DEBUG_PREEMPT +static inline u64 busy_loop_us_clock(void) +{ + u64 rc; + + preempt_disable_notrace(); + rc = sched_clock(); + preempt_enable_no_resched_notrace(); + + return rc >> 10; +} +#else /* CONFIG_DEBUG_PREEMPT */ +static inline u64 busy_loop_us_clock(void) +{ + return sched_clock() >> 10; +} +#endif /* CONFIG_DEBUG_PREEMPT */ + +static inline unsigned long sk_busy_loop_end_time(struct sock *sk) +{ + return busy_loop_us_clock() + ACCESS_ONCE(sk->sk_ll_usec); +} + +/* in poll/select we use the global sysctl_net_ll_poll value */ +static inline unsigned long busy_loop_end_time(void) +{ + return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_ll_poll); +} + +static inline bool sk_can_busy_loop(struct sock *sk) +{ + return sk->sk_ll_usec && sk->sk_napi_id && + !need_resched() && !signal_pending(current); +} + + +static inline bool busy_loop_timeout(unsigned long end_time) +{ + unsigned long now = busy_loop_us_clock(); + + return time_after(now, end_time); +} + +/* when used in sock_poll() nonblock is known at compile time to be true + * so the loop and end_time will be optimized out + */ +static inline bool sk_busy_loop(struct sock *sk, int nonblock) +{ + unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; + const struct net_device_ops *ops; + struct napi_struct *napi; + int rc = false; + + /* + * rcu read lock for napi hash + * bh so we don't race with net_rx_action + */ + rcu_read_lock_bh(); + + napi = napi_by_id(sk->sk_napi_id); + if (!napi) + goto out; + + ops = napi->dev->netdev_ops; + if (!ops->ndo_ll_poll) + goto out; + + do { + rc = ops->ndo_ll_poll(napi); + + if (rc == LL_FLUSH_FAILED) + break; /* permanent failure */ + + if (rc > 0) + /* local bh are disabled so it is ok to use _BH */ + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_LOWLATENCYRXPACKETS, rc); + + } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && + !need_resched() && !busy_loop_timeout(end_time)); + + rc = !skb_queue_empty(&sk->sk_receive_queue); +out: + rcu_read_unlock_bh(); + return rc; +} + +/* used in the NIC receive handler to mark the skb */ +static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi) +{ + skb->napi_id = napi->napi_id; +} + +/* used in the protocol hanlder to propagate the napi_id to the socket */ +static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) +{ + sk->sk_napi_id = skb->napi_id; +} + +#else /* CONFIG_NET_LL_RX_POLL */ +static inline unsigned long net_busy_loop_on(void) +{ + return 0; +} + +static inline unsigned long busy_loop_end_time(void) +{ + return 0; +} + +static inline bool sk_can_busy_loop(struct sock *sk) +{ + return false; +} + +static inline bool sk_busy_poll(struct sock *sk, int nonblock) +{ + return false; +} + +static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi) +{ +} + +static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) +{ +} + +static inline bool busy_loop_timeout(unsigned long end_time) +{ + return true; +} + +#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* _LINUX_NET_LL_POLL_H */ diff --git a/include/net/ll_poll.h b/include/net/ll_poll.h deleted file mode 100644 index 76f03408774..00000000000 --- a/include/net/ll_poll.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Low Latency Sockets - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * Author: Eliezer Tamir - * - * Contact Information: - * e1000-devel Mailing List - */ - -#ifndef _LINUX_NET_LL_POLL_H -#define _LINUX_NET_LL_POLL_H - -#include -#include - -#ifdef CONFIG_NET_LL_RX_POLL - -struct napi_struct; -extern unsigned int sysctl_net_ll_read __read_mostly; -extern unsigned int sysctl_net_ll_poll __read_mostly; - -/* return values from ndo_ll_poll */ -#define LL_FLUSH_FAILED -1 -#define LL_FLUSH_BUSY -2 - -static inline bool net_busy_loop_on(void) -{ - return sysctl_net_ll_poll; -} - -/* a wrapper to make debug_smp_processor_id() happy - * we can use sched_clock() because we don't care much about precision - * we only care that the average is bounded - */ -#ifdef CONFIG_DEBUG_PREEMPT -static inline u64 busy_loop_us_clock(void) -{ - u64 rc; - - preempt_disable_notrace(); - rc = sched_clock(); - preempt_enable_no_resched_notrace(); - - return rc >> 10; -} -#else /* CONFIG_DEBUG_PREEMPT */ -static inline u64 busy_loop_us_clock(void) -{ - return sched_clock() >> 10; -} -#endif /* CONFIG_DEBUG_PREEMPT */ - -static inline unsigned long sk_busy_loop_end_time(struct sock *sk) -{ - return busy_loop_us_clock() + ACCESS_ONCE(sk->sk_ll_usec); -} - -/* in poll/select we use the global sysctl_net_ll_poll value */ -static inline unsigned long busy_loop_end_time(void) -{ - return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_ll_poll); -} - -static inline bool sk_can_busy_loop(struct sock *sk) -{ - return sk->sk_ll_usec && sk->sk_napi_id && - !need_resched() && !signal_pending(current); -} - - -static inline bool busy_loop_timeout(unsigned long end_time) -{ - unsigned long now = busy_loop_us_clock(); - - return time_after(now, end_time); -} - -/* when used in sock_poll() nonblock is known at compile time to be true - * so the loop and end_time will be optimized out - */ -static inline bool sk_busy_loop(struct sock *sk, int nonblock) -{ - unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; - const struct net_device_ops *ops; - struct napi_struct *napi; - int rc = false; - - /* - * rcu read lock for napi hash - * bh so we don't race with net_rx_action - */ - rcu_read_lock_bh(); - - napi = napi_by_id(sk->sk_napi_id); - if (!napi) - goto out; - - ops = napi->dev->netdev_ops; - if (!ops->ndo_ll_poll) - goto out; - - do { - rc = ops->ndo_ll_poll(napi); - - if (rc == LL_FLUSH_FAILED) - break; /* permanent failure */ - - if (rc > 0) - /* local bh are disabled so it is ok to use _BH */ - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_LOWLATENCYRXPACKETS, rc); - - } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && - !need_resched() && !busy_loop_timeout(end_time)); - - rc = !skb_queue_empty(&sk->sk_receive_queue); -out: - rcu_read_unlock_bh(); - return rc; -} - -/* used in the NIC receive handler to mark the skb */ -static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi) -{ - skb->napi_id = napi->napi_id; -} - -/* used in the protocol hanlder to propagate the napi_id to the socket */ -static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) -{ - sk->sk_napi_id = skb->napi_id; -} - -#else /* CONFIG_NET_LL_RX_POLL */ -static inline unsigned long net_busy_loop_on(void) -{ - return 0; -} - -static inline unsigned long busy_loop_end_time(void) -{ - return 0; -} - -static inline bool sk_can_busy_loop(struct sock *sk) -{ - return false; -} - -static inline bool sk_busy_poll(struct sock *sk, int nonblock) -{ - return false; -} - -static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi) -{ -} - -static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) -{ -} - -static inline bool busy_loop_timeout(unsigned long end_time) -{ - return true; -} - -#endif /* CONFIG_NET_LL_RX_POLL */ -#endif /* _LINUX_NET_LL_POLL_H */ diff --git a/net/core/datagram.c b/net/core/datagram.c index 6e9ab31e457..8ab48cd8955 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -56,7 +56,7 @@ #include #include #include -#include +#include /* * Is a socket 'connection oriented' ? diff --git a/net/core/sock.c b/net/core/sock.c index ab06b719f5b..9bfe83f4d67 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,7 +139,7 @@ #include #endif -#include +#include static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index afc677eadd9..1a298cb3dae 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include static int one = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 15cbfa94bd8..5423223e93c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,7 +279,7 @@ #include #include -#include +#include int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 35675e46aff..3a261b41a00 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -75,7 +75,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6b270e53c20..bcc0ff2c16d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -109,7 +109,7 @@ #include #include #include -#include +#include #include "udp_impl.h" struct udp_table udp_table __read_mostly; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5cffa5c3e6b..345bd92d4dd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -63,7 +63,7 @@ #include #include #include -#include +#include #include diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b6f31437a1f..40e72034da0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/socket.c b/net/socket.c index 45afa648364..6a3e9a3f50a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,7 +104,7 @@ #include #include #include -#include +#include #ifdef CONFIG_NET_LL_RX_POLL unsigned int sysctl_net_ll_read __read_mostly; -- cgit v1.2.3-70-g09d2 From 8b80cda536ea9bceec0364e897868a30ee13b992 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 10 Jul 2013 17:13:26 +0300 Subject: net: rename ll methods to busy-poll Rename ndo_ll_poll to ndo_busy_poll. Rename sk_mark_ll to sk_mark_napi_id. Rename skb_mark_ll to skb_mark_napi_id. Correct all useres of these functions. Update comments and defines in include/net/busy_poll.h Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- include/linux/netdevice.h | 2 +- include/net/busy_poll.h | 22 ++++++++++++---------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- 11 files changed, 23 insertions(+), 21 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 05b6b4e8b07..3353efe7919 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -990,7 +990,7 @@ reuse_rx: __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(cqe_fp->vlan_tag)); - skb_mark_ll(skb, &fp->napi); + skb_mark_napi_id(skb, &fp->napi); if (bnx2x_fp_ll_polling(fp)) netif_receive_skb(skb); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 15a528bda87..e5da07858a2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12027,7 +12027,7 @@ static const struct net_device_ops bnx2x_netdev_ops = { #endif #ifdef CONFIG_NET_LL_RX_POLL - .ndo_ll_poll = bnx2x_low_latency_recv, + .ndo_busy_poll = bnx2x_low_latency_recv, #endif }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 047ebaaf014..bad8f14b194 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1978,7 +1978,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } #endif /* IXGBE_FCOE */ - skb_mark_ll(skb, &q_vector->napi); + skb_mark_napi_id(skb, &q_vector->napi); ixgbe_rx_skb(q_vector, skb); /* update budget accounting */ @@ -7228,7 +7228,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_poll_controller = ixgbe_netpoll, #endif #ifdef CONFIG_NET_LL_RX_POLL - .ndo_ll_poll = ixgbe_low_latency_recv, + .ndo_busy_poll = ixgbe_low_latency_recv, #endif #ifdef IXGBE_FCOE .ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 0fb2438dc2c..5eac871399d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2141,7 +2141,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif #ifdef CONFIG_NET_LL_RX_POLL - .ndo_ll_poll = mlx4_en_low_latency_recv, + .ndo_busy_poll = mlx4_en_low_latency_recv, #endif }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 90746d37ac9..dec455c8f62 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -767,7 +767,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud timestamp); } - skb_mark_ll(skb, &cq->napi); + skb_mark_napi_id(skb, &cq->napi); /* Push it up the stack */ netif_receive_skb(skb); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bb82871b849..0741a1e919a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -974,7 +974,7 @@ struct net_device_ops { void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif #ifdef CONFIG_NET_LL_RX_POLL - int (*ndo_ll_poll)(struct napi_struct *dev); + int (*ndo_busy_poll)(struct napi_struct *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 76f03408774..4ff71908fd4 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -1,5 +1,5 @@ /* - * Low Latency Sockets + * net busy poll support * Copyright(c) 2013 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it @@ -21,8 +21,8 @@ * e1000-devel Mailing List */ -#ifndef _LINUX_NET_LL_POLL_H -#define _LINUX_NET_LL_POLL_H +#ifndef _LINUX_NET_BUSY_POLL_H +#define _LINUX_NET_BUSY_POLL_H #include #include @@ -110,11 +110,11 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) goto out; ops = napi->dev->netdev_ops; - if (!ops->ndo_ll_poll) + if (!ops->ndo_busy_poll) goto out; do { - rc = ops->ndo_ll_poll(napi); + rc = ops->ndo_busy_poll(napi); if (rc == LL_FLUSH_FAILED) break; /* permanent failure */ @@ -134,13 +134,14 @@ out: } /* used in the NIC receive handler to mark the skb */ -static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi) +static inline void skb_mark_napi_id(struct sk_buff *skb, + struct napi_struct *napi) { skb->napi_id = napi->napi_id; } /* used in the protocol hanlder to propagate the napi_id to the socket */ -static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) +static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) { sk->sk_napi_id = skb->napi_id; } @@ -166,11 +167,12 @@ static inline bool sk_busy_poll(struct sock *sk, int nonblock) return false; } -static inline void skb_mark_ll(struct sk_buff *skb, struct napi_struct *napi) +static inline void skb_mark_napi_id(struct sk_buff *skb, + struct napi_struct *napi) { } -static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb) +static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) { } @@ -180,4 +182,4 @@ static inline bool busy_loop_timeout(unsigned long end_time) } #endif /* CONFIG_NET_LL_RX_POLL */ -#endif /* _LINUX_NET_LL_POLL_H */ +#endif /* _LINUX_NET_BUSY_POLL_H */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3a261b41a00..b299da5ff49 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1994,7 +1994,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bcc0ff2c16d..a0d7151ffbd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1713,7 +1713,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 345bd92d4dd..6e1649d5853 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1499,7 +1499,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 40e72034da0..f4058150262 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -844,7 +844,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); -- cgit v1.2.3-70-g09d2