summaryrefslogtreecommitdiffstats
path: root/net/ipv6/ip6_offload.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 18:07:07 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 18:07:07 -0800
commit6be35c700f742e911ecedd07fcc43d4439922334 (patch)
treeca9f37214d204465fcc2d79c82efd291e357c53c /net/ipv6/ip6_offload.c
parente37aa63e87bd581f9be5555ed0ba83f5295c92fc (diff)
parent520dfe3a3645257bf83660f672c47f8558f3d4c4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) Allow to dump, monitor, and change the bridge multicast database using netlink. From Cong Wang. 2) RFC 5961 TCP blind data injection attack mitigation, from Eric Dumazet. 3) Networking user namespace support from Eric W. Biederman. 4) tuntap/virtio-net multiqueue support by Jason Wang. 5) Support for checksum offload of encapsulated packets (basically, tunneled traffic can still be checksummed by HW). From Joseph Gasparakis. 6) Allow BPF filter access to VLAN tags, from Eric Dumazet and Daniel Borkmann. 7) Bridge port parameters over netlink and BPDU blocking support from Stephen Hemminger. 8) Improve data access patterns during inet socket demux by rearranging socket layout, from Eric Dumazet. 9) TIPC protocol updates and cleanups from Ying Xue, Paul Gortmaker, and Jon Maloy. 10) Update TCP socket hash sizing to be more in line with current day realities. The existing heurstics were choosen a decade ago. From Eric Dumazet. 11) Fix races, queue bloat, and excessive wakeups in ATM and associated drivers, from Krzysztof Mazur and David Woodhouse. 12) Support DOVE (Distributed Overlay Virtual Ethernet) extensions in VXLAN driver, from David Stevens. 13) Add "oops_only" mode to netconsole, from Amerigo Wang. 14) Support set and query of VEB/VEPA bridge mode via PF_BRIDGE, also allow DCB netlink to work on namespaces other than the initial namespace. From John Fastabend. 15) Support PTP in the Tigon3 driver, from Matt Carlson. 16) tun/vhost zero copy fixes and improvements, plus turn it on by default, from Michael S. Tsirkin. 17) Support per-association statistics in SCTP, from Michele Baldessari. And many, many, driver updates, cleanups, and improvements. Too numerous to mention individually. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1722 commits) net/mlx4_en: Add support for destination MAC in steering rules net/mlx4_en: Use generic etherdevice.h functions. net: ethtool: Add destination MAC address to flow steering API bridge: add support of adding and deleting mdb entries bridge: notify mdb changes via netlink ndisc: Unexport ndisc_{build,send}_skb(). uapi: add missing netconf.h to export list pkt_sched: avoid requeues if possible solos-pci: fix double-free of TX skb in DMA mode bnx2: Fix accidental reversions. bna: Driver Version Updated to 3.1.2.1 bna: Firmware update bna: Add RX State bna: Rx Page Based Allocation bna: TX Intr Coalescing Fix bna: Tx and Rx Optimizations bna: Code Cleanup and Enhancements ath9k: check pdata variable before dereferencing it ath5k: RX timestamp is reported at end of frame ath9k_htc: RX timestamp is reported at end of frame ...
Diffstat (limited to 'net/ipv6/ip6_offload.c')
-rw-r--r--net/ipv6/ip6_offload.c282
1 files changed, 282 insertions, 0 deletions
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
new file mode 100644
index 00000000000..f26f0da7f09
--- /dev/null
+++ b/net/ipv6/ip6_offload.c
@@ -0,0 +1,282 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET6 implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/printk.h>
+
+#include <net/protocol.h>
+#include <net/ipv6.h>
+
+#include "ip6_offload.h"
+
+static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
+{
+ const struct net_offload *ops = NULL;
+
+ for (;;) {
+ struct ipv6_opt_hdr *opth;
+ int len;
+
+ if (proto != NEXTHDR_HOP) {
+ ops = rcu_dereference(inet6_offloads[proto]);
+
+ if (unlikely(!ops))
+ break;
+
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+ }
+
+ if (unlikely(!pskb_may_pull(skb, 8)))
+ break;
+
+ opth = (void *)skb->data;
+ len = ipv6_optlen(opth);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ break;
+
+ proto = opth->nexthdr;
+ __skb_pull(skb, len);
+ }
+
+ return proto;
+}
+
+static int ipv6_gso_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ const struct net_offload *ops;
+ int err = -EINVAL;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ err = -EPROTONOSUPPORT;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[
+ ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
+
+ if (likely(ops && ops->callbacks.gso_send_check)) {
+ skb_reset_transport_header(skb);
+ err = ops->callbacks.gso_send_check(skb);
+ }
+ rcu_read_unlock();
+
+out:
+ return err;
+}
+
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct ipv6hdr *ipv6h;
+ const struct net_offload *ops;
+ int proto;
+ struct frag_hdr *fptr;
+ unsigned int unfrag_ip6hlen;
+ u8 *prevhdr;
+ int offset = 0;
+
+ if (!(features & NETIF_F_V6_CSUM))
+ features &= ~NETIF_F_SG;
+
+ if (unlikely(skb_shinfo(skb)->gso_type &
+ ~(SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ SKB_GSO_TCPV6 |
+ 0)))
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ segs = ERR_PTR(-EPROTONOSUPPORT);
+
+ proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_segment)) {
+ skb_reset_transport_header(skb);
+ segs = ops->callbacks.gso_segment(skb, features);
+ }
+ rcu_read_unlock();
+
+ if (IS_ERR(segs))
+ goto out;
+
+ for (skb = segs; skb; skb = skb->next) {
+ ipv6h = ipv6_hdr(skb);
+ ipv6h->payload_len = htons(skb->len - skb->mac_len -
+ sizeof(*ipv6h));
+ if (proto == IPPROTO_UDP) {
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ fptr = (struct frag_hdr *)(skb_network_header(skb) +
+ unfrag_ip6hlen);
+ fptr->frag_off = htons(offset);
+ if (skb->next != NULL)
+ fptr->frag_off |= htons(IP6_MF);
+ offset += (ntohs(ipv6h->payload_len) -
+ sizeof(struct frag_hdr));
+ }
+ }
+
+out:
+ return segs;
+}
+
+static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct net_offload *ops;
+ struct sk_buff **pp = NULL;
+ struct sk_buff *p;
+ struct ipv6hdr *iph;
+ unsigned int nlen;
+ unsigned int hlen;
+ unsigned int off;
+ int flush = 1;
+ int proto;
+ __wsum csum;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*iph);
+ iph = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ iph = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!iph))
+ goto out;
+ }
+
+ skb_gro_pull(skb, sizeof(*iph));
+ skb_set_transport_header(skb, skb_gro_offset(skb));
+
+ flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+
+ rcu_read_lock();
+ proto = iph->nexthdr;
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive) {
+ __pskb_pull(skb, skb_gro_offset(skb));
+ proto = ipv6_gso_pull_exthdrs(skb, proto);
+ skb_gro_pull(skb, -skb_transport_offset(skb));
+ skb_reset_transport_header(skb);
+ __skb_push(skb, skb_gro_offset(skb));
+
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out_unlock;
+
+ iph = ipv6_hdr(skb);
+ }
+
+ NAPI_GRO_CB(skb)->proto = proto;
+
+ flush--;
+ nlen = skb_network_header_len(skb);
+
+ for (p = *head; p; p = p->next) {
+ const struct ipv6hdr *iph2;
+ __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ iph2 = ipv6_hdr(p);
+ first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
+
+ /* All fields must match except length and Traffic Class. */
+ if (nlen != skb_network_header_len(p) ||
+ (first_word & htonl(0xF00FFFFF)) ||
+ memcmp(&iph->nexthdr, &iph2->nexthdr,
+ nlen - offsetof(struct ipv6hdr, nexthdr))) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ /* flush if Traffic Class fields are different */
+ NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+ NAPI_GRO_CB(p)->flush |= flush;
+ }
+
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ csum = skb->csum;
+ skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
+
+ pp = ops->callbacks.gro_receive(head, skb);
+
+ skb->csum = csum;
+
+out_unlock:
+ rcu_read_unlock();
+
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
+}
+
+static int ipv6_gro_complete(struct sk_buff *skb)
+{
+ const struct net_offload *ops;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int err = -ENOSYS;
+
+ iph->payload_len = htons(skb->len - skb_network_offset(skb) -
+ sizeof(*iph));
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ goto out_unlock;
+
+ err = ops->callbacks.gro_complete(skb);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static struct packet_offload ipv6_packet_offload __read_mostly = {
+ .type = cpu_to_be16(ETH_P_IPV6),
+ .callbacks = {
+ .gso_send_check = ipv6_gso_send_check,
+ .gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
+ },
+};
+
+static int __init ipv6_offload_init(void)
+{
+
+ if (tcpv6_offload_init() < 0)
+ pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
+ if (udp_offload_init() < 0)
+ pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
+ if (ipv6_exthdrs_offload_init() < 0)
+ pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
+
+ dev_add_offload(&ipv6_packet_offload);
+ return 0;
+}
+
+fs_initcall(ipv6_offload_init);