From 673d57e72398edfedc93fb50ff58048077c9d587 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 31 Oct 2008 00:53:57 -0700 Subject: net: replace NIPQUAD() in net/ipv4/ net/ipv6/ Using NIPQUAD() with NIPQUAD_FMT, %d.%d.%d.%d or %u.%u.%u.%u can be replaced with %pI4 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net/ipv4/af_inet.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1fbff5fa424..e3286814c8d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1070,11 +1070,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) return 0; if (sysctl_ip_dynaddr > 1) { - printk(KERN_INFO "%s(): shifting inet->" - "saddr from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n", - __func__, - NIPQUAD(old_saddr), - NIPQUAD(new_saddr)); + printk(KERN_INFO "%s(): shifting inet->saddr from %pI4 to %pI4\n", + __func__, &old_saddr, &new_saddr); } inet->saddr = inet->rcv_saddr = new_saddr; -- cgit v1.2.3-70-g09d2 From 04f258ce7f085dd69422fa01d41c8f0194a0e270 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Nov 2008 15:42:23 -0800 Subject: net: some optimizations in af_inet 1) Use eq_net() in inet_netns_ok() to speedup socket creation if !CONFIG_NET_NS 2) Reorder the tests about inet_ehash_secret generation (once only) Use the unlikely() macro when testing if inet_ehash_secret already generated. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/ipv4/af_inet.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b1462e8c64c..fe03048c130 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -245,7 +245,7 @@ static inline int inet_netns_ok(struct net *net, int protocol) int hash; struct net_protocol *ipprot; - if (net == &init_net) + if (net_eq(net, &init_net)) return 1; hash = protocol & (MAX_INET_PROTOS - 1); @@ -272,10 +272,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) int try_loading_module = 0; int err; - if (sock->type != SOCK_RAW && - sock->type != SOCK_DGRAM && - !inet_ehash_secret) - build_ehash_secret(); + if (unlikely(!inet_ehash_secret)) + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) + build_ehash_secret(); sock->state = SS_UNCONNECTED; -- cgit v1.2.3-70-g09d2 From 73cc19f1556b95976934de236fd9043f7208844f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:41:09 -0800 Subject: ipv4: Add GRO infrastructure This patch adds GRO support for IPv4. The criteria for merging is more stringent than LRO, in particular, we require all fields in the IP header to be identical except for the length, ID and checksum. In addition, the ID must form an arithmetic sequence with a difference of one. The ID requirement might seem overly strict, however, most hardware TSO solutions already obey this rule. Linux itself also obeys this whether GSO is in use or not. In future we could relax this rule by storing the IDs (or rather making sure that we don't drop them when pulling the aggregate skb's tail). Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/protocol.h | 3 ++ net/ipv4/af_inet.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) (limited to 'net/ipv4/af_inet.c') diff --git a/include/net/protocol.h b/include/net/protocol.h index 8d024d7cb74..cb2965aa1b6 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -39,6 +39,9 @@ struct net_protocol { int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); unsigned int no_policy:1, netns_ok:1; }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fe03048c130..a85595307fa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -94,6 +94,7 @@ #include #include #include +#include #include #include #include @@ -1241,6 +1242,100 @@ out: return segs; } +static struct sk_buff **inet_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct net_protocol *ops; + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct iphdr *iph; + int flush = 1; + int proto; + int id; + + if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + goto out; + + iph = ip_hdr(skb); + proto = iph->protocol & (MAX_INET_PROTOS - 1); + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (!ops || !ops->gro_receive) + goto out_unlock; + + if (iph->version != 4 || iph->ihl != 5) + goto out_unlock; + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto out_unlock; + + flush = ntohs(iph->tot_len) != skb->len || + iph->frag_off != htons(IP_DF); + id = ntohs(iph->id); + + for (p = *head; p; p = p->next) { + struct iphdr *iph2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + iph2 = ip_hdr(p); + + if (iph->protocol != iph2->protocol || + iph->tos != iph2->tos || + memcmp(&iph->saddr, &iph2->saddr, 8)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + /* All fields must match except length and checksum. */ + NAPI_GRO_CB(p)->flush |= + memcmp(&iph->frag_off, &iph2->frag_off, 4) || + (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id; + + NAPI_GRO_CB(p)->flush |= flush; + } + + NAPI_GRO_CB(skb)->flush |= flush; + __skb_pull(skb, sizeof(*iph)); + skb_reset_transport_header(skb); + + pp = ops->gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int inet_gro_complete(struct sk_buff *skb) +{ + struct net_protocol *ops; + struct iphdr *iph = ip_hdr(skb); + int proto = iph->protocol & (MAX_INET_PROTOS - 1); + int err = -ENOSYS; + __be16 newlen = htons(skb->len - skb_network_offset(skb)); + + csum_replace2(&iph->check, iph->tot_len, newlen); + iph->tot_len = newlen; + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (WARN_ON(!ops || !ops->gro_complete)) + goto out_unlock; + + err = ops->gro_complete(skb); + +out_unlock: + rcu_read_unlock(); + + return err; +} + int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net) @@ -1407,6 +1502,8 @@ static struct packet_type ip_packet_type = { .func = ip_rcv, .gso_send_check = inet_gso_send_check, .gso_segment = inet_gso_segment, + .gro_receive = inet_gro_receive, + .gro_complete = inet_gro_complete, }; static int __init inet_init(void) -- cgit v1.2.3-70-g09d2 From bf296b125b21b8d558ceb6ec30bb4eba2730cd6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:43:36 -0800 Subject: tcp: Add GRO support This patch adds the TCP-specific portion of GRO. The criterion for merging is extremely strict (the TCP header must match exactly apart from the checksum) so as to allow refragmentation. Otherwise this is pretty much identical to LRO, except that we support the merging of ECN packets. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++++ net/ipv4/af_inet.c | 2 ++ net/ipv4/tcp.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 35 ++++++++++++++++++ 4 files changed, 143 insertions(+) (limited to 'net/ipv4/af_inet.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index de1e91d959b..218235de896 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1358,6 +1358,12 @@ extern void tcp_v4_destroy_sock(struct sock *sk); extern int tcp_v4_gso_send_check(struct sk_buff *skb); extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); +extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern int tcp_gro_complete(struct sk_buff *skb); +extern int tcp4_gro_complete(struct sk_buff *skb); #ifdef CONFIG_PROC_FS extern int tcp4_proc_init(void); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a85595307fa..664ff0ee1c8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1410,6 +1410,8 @@ static struct net_protocol tcp_protocol = { .err_handler = tcp_v4_err, .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, .no_policy = 1, .netns_ok = 1, }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 01924340862..1f3d52946b3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2465,6 +2465,106 @@ out: } EXPORT_SYMBOL(tcp_tso_segment); +struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct tcphdr *th; + struct tcphdr *th2; + unsigned int thlen; + unsigned int flags; + unsigned int total; + unsigned int mss = 1; + int flush = 1; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = tcp_hdr(skb); + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + th = tcp_hdr(skb); + __skb_pull(skb, thlen); + + flags = tcp_flag_word(th); + + for (; (p = *head); head = &p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + + if (th->source != th2->source || th->dest != th2->dest) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + goto found; + } + + goto out_check_final; + +found: + flush = NAPI_GRO_CB(p)->flush; + flush |= flags & TCP_FLAG_CWR; + flush |= (flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); + flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; + flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); + + total = p->len; + mss = total; + if (skb_shinfo(p)->frag_list) + mss = skb_shinfo(p)->frag_list->len; + + flush |= skb->len > mss || skb->len <= 0; + flush |= ntohl(th2->seq) + total != ntohl(th->seq); + + if (flush || skb_gro_receive(head, skb)) { + mss = 1; + goto out_check_final; + } + + p = *head; + th2 = tcp_hdr(p); + tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); + +out_check_final: + flush = skb->len < mss; + flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | + TCP_FLAG_SYN | TCP_FLAG_FIN); + + if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) + pp = head; + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +int tcp_gro_complete(struct sk_buff *skb) +{ + struct tcphdr *th = tcp_hdr(skb); + + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_size = skb_shinfo(skb)->frag_list->len; + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + if (th->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + + return 0; +} + #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; static struct tcp_md5sig_pool **tcp_md5sig_pool; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 26b9030747c..10172487921 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2346,6 +2346,41 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + /* fall through */ + case CHECKSUM_NONE: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + return tcp_gro_receive(head, skb); +} +EXPORT_SYMBOL(tcp4_gro_receive); + +int tcp4_gro_complete(struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), + iph->saddr, iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + return tcp_gro_complete(skb); +} +EXPORT_SYMBOL(tcp4_gro_complete); + struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, -- cgit v1.2.3-70-g09d2 From b4ee07df3d8121060200dbe1c6686a4e0682bee2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 25 Dec 2008 16:42:23 -0800 Subject: netns: igmp: allow IPPROTO_IGMP sockets in netns Looks like everything is already ready. Required for ebtables(8) for one thing. Also, required for ipmr per-netns (coming soon). (Benjamin) Signed-off-by: Alexey Dobriyan Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4/af_inet.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 664ff0ee1c8..743f5542d65 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1402,6 +1402,7 @@ EXPORT_SYMBOL_GPL(snmp_mib_free); #ifdef CONFIG_IP_MULTICAST static struct net_protocol igmp_protocol = { .handler = igmp_rcv, + .netns_ok = 1, }; #endif -- cgit v1.2.3-70-g09d2