summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/gre.c323
-rw-r--r--net/ipv4/ip_gre.c247
-rw-r--r--net/ipv4/ip_tunnel.c68
-rw-r--r--net/ipv4/ip_tunnel_core.c122
-rw-r--r--net/ipv4/ipip.c6
6 files changed, 464 insertions, 304 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 7fcf8101d85..86ded0bac9c 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_offload.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o \
- inet_fragment.o ping.o
+ inet_fragment.o ping.o ip_tunnel_core.o
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index b2e805af9b8..ba4803e609b 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -13,6 +13,8 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
+#include <linux/if.h>
+#include <linux/icmp.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/skbuff.h>
@@ -24,51 +26,270 @@
#include <net/protocol.h>
#include <net/gre.h>
+#include <net/icmp.h>
+#include <net/route.h>
+#include <net/xfrm.h>
static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
-static DEFINE_SPINLOCK(gre_proto_lock);
+static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX];
int gre_add_protocol(const struct gre_protocol *proto, u8 version)
{
if (version >= GREPROTO_MAX)
- goto err_out;
-
- spin_lock(&gre_proto_lock);
- if (gre_proto[version])
- goto err_out_unlock;
-
- RCU_INIT_POINTER(gre_proto[version], proto);
- spin_unlock(&gre_proto_lock);
- return 0;
+ return -EINVAL;
-err_out_unlock:
- spin_unlock(&gre_proto_lock);
-err_out:
- return -1;
+ return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ?
+ 0 : -EBUSY;
}
EXPORT_SYMBOL_GPL(gre_add_protocol);
int gre_del_protocol(const struct gre_protocol *proto, u8 version)
{
+ int ret;
+
if (version >= GREPROTO_MAX)
- goto err_out;
-
- spin_lock(&gre_proto_lock);
- if (rcu_dereference_protected(gre_proto[version],
- lockdep_is_held(&gre_proto_lock)) != proto)
- goto err_out_unlock;
- RCU_INIT_POINTER(gre_proto[version], NULL);
- spin_unlock(&gre_proto_lock);
+ return -EINVAL;
+
+ ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ?
+ 0 : -EBUSY;
+
+ if (ret)
+ return ret;
+
synchronize_rcu();
return 0;
-
-err_out_unlock:
- spin_unlock(&gre_proto_lock);
-err_out:
- return -1;
}
EXPORT_SYMBOL_GPL(gre_del_protocol);
+void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ int hdr_len)
+{
+ struct gre_base_hdr *greh;
+
+ skb_push(skb, hdr_len);
+
+ greh = (struct gre_base_hdr *)skb->data;
+ greh->flags = tnl_flags_to_gre_flags(tpi->flags);
+ greh->protocol = tpi->proto;
+
+ if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+
+ if (tpi->flags&TUNNEL_SEQ) {
+ *ptr = tpi->seq;
+ ptr--;
+ }
+ if (tpi->flags&TUNNEL_KEY) {
+ *ptr = tpi->key;
+ ptr--;
+ }
+ if (tpi->flags&TUNNEL_CSUM &&
+ !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
+ *ptr = 0;
+ *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
+ skb->len, 0));
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(gre_build_header);
+
+struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
+{
+ int err;
+
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
+ if (skb_is_gso(skb)) {
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (unlikely(err))
+ goto error;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
+ return skb;
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) {
+ err = skb_checksum_help(skb);
+ if (unlikely(err))
+ goto error;
+ } else if (skb->ip_summed != CHECKSUM_PARTIAL)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return skb;
+error:
+ kfree_skb(skb);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(gre_handle_offloads);
+
+static __sum16 check_checksum(struct sk_buff *skb)
+{
+ __sum16 csum = 0;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ csum = csum_fold(skb->csum);
+
+ if (!csum)
+ break;
+ /* Fall through. */
+
+ case CHECKSUM_NONE:
+ skb->csum = 0;
+ csum = __skb_checksum_complete(skb);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ break;
+ }
+
+ return csum;
+}
+
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ bool *csum_err)
+{
+ unsigned int ip_hlen = ip_hdrlen(skb);
+ const struct gre_base_hdr *greh;
+ __be32 *options;
+ int hdr_len;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+ return -EINVAL;
+
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+ if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+ return -EINVAL;
+
+ tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+ hdr_len = ip_gre_calc_hlen(tpi->flags);
+
+ if (!pskb_may_pull(skb, hdr_len))
+ return -EINVAL;
+
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+ tpi->proto = greh->protocol;
+
+ options = (__be32 *)(greh + 1);
+ if (greh->flags & GRE_CSUM) {
+ if (check_checksum(skb)) {
+ *csum_err = true;
+ return -EINVAL;
+ }
+ options++;
+ }
+
+ if (greh->flags & GRE_KEY) {
+ tpi->key = *options;
+ options++;
+ } else
+ tpi->key = 0;
+
+ if (unlikely(greh->flags & GRE_SEQ)) {
+ tpi->seq = *options;
+ options++;
+ } else
+ tpi->seq = 0;
+
+ /* WCCP version 1 and 2 protocol decoding.
+ * - Change protocol to IP
+ * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+ */
+ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ tpi->proto = htons(ETH_P_IP);
+ if ((*(u8 *)options & 0xF0) != 0x40) {
+ hdr_len += 4;
+ if (!pskb_may_pull(skb, hdr_len))
+ return -EINVAL;
+ }
+ }
+
+ return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+}
+
+static int gre_cisco_rcv(struct sk_buff *skb)
+{
+ struct tnl_ptk_info tpi;
+ int i;
+ bool csum_err = false;
+
+ if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+ goto drop;
+
+ rcu_read_lock();
+ for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
+ struct gre_cisco_protocol *proto;
+ int ret;
+
+ proto = rcu_dereference(gre_cisco_proto_list[i]);
+ if (!proto)
+ continue;
+ ret = proto->handler(skb, &tpi);
+ if (ret == PACKET_RCVD) {
+ rcu_read_unlock();
+ return 0;
+ }
+ }
+ rcu_read_unlock();
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static void gre_cisco_err(struct sk_buff *skb, u32 info)
+{
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ *
+ * Moreover, Cisco "wise men" put GRE key to the third word
+ * in GRE header. It makes impossible maintaining even soft
+ * state for keyed
+ * GRE tunnels with enabled checksum. Tell them "thank you".
+ *
+ * Well, I wonder, rfc1812 was written by Cisco employee,
+ * what the hell these idiots break standards established
+ * by themselves???
+ */
+
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
+ struct tnl_ptk_info tpi;
+ bool csum_err = false;
+ int i;
+
+ if (parse_gre_header(skb, &tpi, &csum_err)) {
+ if (!csum_err) /* ignore csum errors. */
+ return;
+ }
+
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+ skb->dev->ifindex, 0, IPPROTO_GRE, 0);
+ return;
+ }
+ if (type == ICMP_REDIRECT) {
+ ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
+ IPPROTO_GRE, 0);
+ return;
+ }
+
+ rcu_read_lock();
+ for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
+ struct gre_cisco_protocol *proto;
+
+ proto = rcu_dereference(gre_cisco_proto_list[i]);
+ if (!proto)
+ continue;
+
+ if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD)
+ goto out;
+
+ }
+out:
+ rcu_read_unlock();
+}
+
static int gre_rcv(struct sk_buff *skb)
{
const struct gre_protocol *proto;
@@ -220,27 +441,68 @@ static const struct net_offload gre_offload = {
},
};
+static const struct gre_protocol ipgre_protocol = {
+ .handler = gre_cisco_rcv,
+ .err_handler = gre_cisco_err,
+};
+
+int gre_cisco_register(struct gre_cisco_protocol *newp)
+{
+ struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
+ &gre_cisco_proto_list[newp->priority];
+
+ return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY;
+}
+EXPORT_SYMBOL_GPL(gre_cisco_register);
+
+int gre_cisco_unregister(struct gre_cisco_protocol *del_proto)
+{
+ struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
+ &gre_cisco_proto_list[del_proto->priority];
+ int ret;
+
+ ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL;
+
+ if (ret)
+ return ret;
+
+ synchronize_net();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gre_cisco_unregister);
+
static int __init gre_init(void)
{
pr_info("GRE over IPv4 demultiplexor driver\n");
if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
pr_err("can't add protocol\n");
- return -EAGAIN;
+ goto err;
+ }
+
+ if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) {
+ pr_info("%s: can't add ipgre handler\n", __func__);
+ goto err_gre;
}
if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
pr_err("can't add protocol offload\n");
- inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
- return -EAGAIN;
+ goto err_gso;
}
return 0;
+err_gso:
+ gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
+err_gre:
+ inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+err:
+ return -EAGAIN;
}
static void __exit gre_exit(void)
{
inet_del_offload(&gre_offload, IPPROTO_GRE);
+ gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
}
@@ -250,4 +512,3 @@ module_exit(gre_exit);
MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
MODULE_LICENSE("GPL");
-
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a982657d05e..c326e869993 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -121,103 +121,8 @@ static int ipgre_tunnel_init(struct net_device *dev);
static int ipgre_net_id __read_mostly;
static int gre_tap_net_id __read_mostly;
-static __sum16 check_checksum(struct sk_buff *skb)
-{
- __sum16 csum = 0;
-
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- csum = csum_fold(skb->csum);
-
- if (!csum)
- break;
- /* Fall through. */
-
- case CHECKSUM_NONE:
- skb->csum = 0;
- csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_COMPLETE;
- break;
- }
-
- return csum;
-}
-
-static int ip_gre_calc_hlen(__be16 o_flags)
-{
- int addend = 4;
-
- if (o_flags&TUNNEL_CSUM)
- addend += 4;
- if (o_flags&TUNNEL_KEY)
- addend += 4;
- if (o_flags&TUNNEL_SEQ)
- addend += 4;
- return addend;
-}
-
-static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
- bool *csum_err, int *hdr_len)
-{
- unsigned int ip_hlen = ip_hdrlen(skb);
- const struct gre_base_hdr *greh;
- __be32 *options;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
- return -EINVAL;
-
- greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
- if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
- return -EINVAL;
-
- tpi->flags = gre_flags_to_tnl_flags(greh->flags);
- *hdr_len = ip_gre_calc_hlen(tpi->flags);
-
- if (!pskb_may_pull(skb, *hdr_len))
- return -EINVAL;
-
- greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
-
- tpi->proto = greh->protocol;
-
- options = (__be32 *)(greh + 1);
- if (greh->flags & GRE_CSUM) {
- if (check_checksum(skb)) {
- *csum_err = true;
- return -EINVAL;
- }
- options++;
- }
-
- if (greh->flags & GRE_KEY) {
- tpi->key = *options;
- options++;
- } else
- tpi->key = 0;
-
- if (unlikely(greh->flags & GRE_SEQ)) {
- tpi->seq = *options;
- options++;
- } else
- tpi->seq = 0;
-
- /* WCCP version 1 and 2 protocol decoding.
- * - Change protocol to IP
- * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
- */
- if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
- tpi->proto = htons(ETH_P_IP);
- if ((*(u8 *)options & 0xF0) != 0x40) {
- *hdr_len += 4;
- if (!pskb_may_pull(skb, *hdr_len))
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-static void ipgre_err(struct sk_buff *skb, u32 info)
+static int ipgre_err(struct sk_buff *skb, u32 info,
+ const struct tnl_ptk_info *tpi)
{
/* All the routers (except for Linux) return only
@@ -239,26 +144,18 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
- struct tnl_ptk_info tpi;
- int hdr_len;
- bool csum_err = false;
-
- if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
- if (!csum_err) /* ignore csum errors. */
- return;
- }
switch (type) {
default:
case ICMP_PARAMETERPROB:
- return;
+ return PACKET_RCVD;
case ICMP_DEST_UNREACH:
switch (code) {
case ICMP_SR_FAILED:
case ICMP_PORT_UNREACH:
/* Impossible event. */
- return;
+ return PACKET_RCVD;
default:
/* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -269,138 +166,61 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
break;
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
- return;
+ return PACKET_RCVD;
break;
case ICMP_REDIRECT:
break;
}
- if (tpi.proto == htons(ETH_P_TEB))
+ if (tpi->proto == htons(ETH_P_TEB))
itn = net_generic(net, gre_tap_net_id);
else
itn = net_generic(net, ipgre_net_id);
iph = (const struct iphdr *)skb->data;
- t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
- iph->daddr, iph->saddr, tpi.key);
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
+ iph->daddr, iph->saddr, tpi->key);
if (t == NULL)
- return;
+ return PACKET_REJECT;
- if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, IPPROTO_GRE, 0);
- return;
- }
- if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- IPPROTO_GRE, 0);
- return;
- }
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
- return;
+ return PACKET_RCVD;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
- return;
+ return PACKET_RCVD;
if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
t->err_count++;
else
t->err_count = 1;
t->err_time = jiffies;
+ return PACKET_RCVD;
}
-static int ipgre_rcv(struct sk_buff *skb)
+static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
{
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn;
const struct iphdr *iph;
struct ip_tunnel *tunnel;
- struct tnl_ptk_info tpi;
- int hdr_len;
- bool csum_err = false;
-
- if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
- goto drop;
- if (tpi.proto == htons(ETH_P_TEB))
+ if (tpi->proto == htons(ETH_P_TEB))
itn = net_generic(net, gre_tap_net_id);
else
itn = net_generic(net, ipgre_net_id);
iph = ip_hdr(skb);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
- iph->saddr, iph->daddr, tpi.key);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
+ iph->saddr, iph->daddr, tpi->key);
if (tunnel) {
- ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
- return 0;
+ ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
+ return PACKET_RCVD;
}
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-drop:
- kfree_skb(skb);
- return 0;
-}
-
-static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
-{
- int err;
-
- if (skb_is_gso(skb)) {
- err = skb_unclone(skb, GFP_ATOMIC);
- if (unlikely(err))
- goto error;
- skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
- return skb;
- } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
- tunnel->parms.o_flags&TUNNEL_CSUM) {
- err = skb_checksum_help(skb);
- if (unlikely(err))
- goto error;
- } else if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->ip_summed = CHECKSUM_NONE;
-
- return skb;
-
-error:
- kfree_skb(skb);
- return ERR_PTR(err);
-}
-
-static struct sk_buff *gre_build_header(struct sk_buff *skb,
- const struct tnl_ptk_info *tpi,
- int hdr_len)
-{
- struct gre_base_hdr *greh;
-
- skb_push(skb, hdr_len);
-
- greh = (struct gre_base_hdr *)skb->data;
- greh->flags = tnl_flags_to_gre_flags(tpi->flags);
- greh->protocol = tpi->proto;
-
- if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
- __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
- if (tpi->flags&TUNNEL_SEQ) {
- *ptr = tpi->seq;
- ptr--;
- }
- if (tpi->flags&TUNNEL_KEY) {
- *ptr = tpi->key;
- ptr--;
- }
- if (tpi->flags&TUNNEL_CSUM &&
- !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
- *(__sum16 *)ptr = 0;
- *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
- skb->len, 0));
- }
- }
-
- return skb;
+ return PACKET_REJECT;
}
static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -410,11 +230,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
struct ip_tunnel *tunnel = netdev_priv(dev);
struct tnl_ptk_info tpi;
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
-
tpi.flags = tunnel->parms.o_flags;
tpi.proto = proto;
tpi.key = tunnel->parms.o_key;
@@ -423,11 +238,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
tpi.seq = htonl(tunnel->o_seqno);
/* Push GRE header. */
- skb = gre_build_header(skb, &tpi, tunnel->hlen);
- if (unlikely(!skb)) {
- dev->stats.tx_dropped++;
- return;
- }
+ gre_build_header(skb, &tpi, tunnel->hlen);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -438,7 +249,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tnl_params;
- skb = handle_offloads(tunnel, skb);
+ skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
if (IS_ERR(skb))
goto out;
@@ -477,7 +288,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- skb = handle_offloads(tunnel, skb);
+ skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
if (IS_ERR(skb))
goto out;
@@ -708,9 +519,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
return ip_tunnel_init(dev);
}
-static const struct gre_protocol ipgre_protocol = {
- .handler = ipgre_rcv,
- .err_handler = ipgre_err,
+static struct gre_cisco_protocol ipgre_protocol = {
+ .handler = ipgre_rcv,
+ .err_handler = ipgre_err,
+ .priority = 0,
};
static int __net_init ipgre_init_net(struct net *net)
@@ -978,7 +790,7 @@ static int __init ipgre_init(void)
if (err < 0)
goto pnet_tap_faied;
- err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
+ err = gre_cisco_register(&ipgre_protocol);
if (err < 0) {
pr_info("%s: can't add protocol\n", __func__);
goto add_proto_failed;
@@ -997,7 +809,7 @@ static int __init ipgre_init(void)
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed:
- gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
+ gre_cisco_unregister(&ipgre_protocol);
add_proto_failed:
unregister_pernet_device(&ipgre_tap_net_ops);
pnet_tap_faied:
@@ -1009,8 +821,7 @@ static void __exit ipgre_fini(void)
{
rtnl_link_unregister(&ipgre_tap_ops);
rtnl_link_unregister(&ipgre_link_ops);
- if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
- pr_info("%s: can't remove protocol\n", __func__);
+ gre_cisco_unregister(&ipgre_protocol);
unregister_pernet_device(&ipgre_tap_net_ops);
unregister_pernet_device(&ipgre_net_ops);
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e189db409b0..bd227e5ea9d 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -408,13 +408,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
const struct iphdr *iph = ip_hdr(skb);
int err;
- secpath_reset(skb);
-
- skb->protocol = tpi->proto;
-
- skb->mac_header = skb->network_header;
- __pskb_pull(skb, tunnel->hlen);
- skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
/* Looped back packet, drop it! */
@@ -442,23 +435,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
- /* Warning: All skb pointers will be invalidated! */
- if (tunnel->dev->type == ARPHRD_ETHER) {
- if (!pskb_may_pull(skb, ETH_HLEN)) {
- tunnel->dev->stats.rx_length_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
-
- iph = ip_hdr(skb);
- skb->protocol = eth_type_trans(skb, tunnel->dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
- }
-
- skb->pkt_type = PACKET_HOST;
- __skb_tunnel_rx(skb, tunnel->dev);
-
- skb_reset_network_header(skb);
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -477,6 +453,12 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
+ if (tunnel->dev->type == ARPHRD_ETHER) {
+ skb->protocol = eth_type_trans(skb, tunnel->dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ } else {
+ skb->dev = tunnel->dev;
+ }
gro_cells_receive(&tunnel->gro_cells, skb);
return 0;
@@ -491,19 +473,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *inner_iph;
- struct iphdr *iph;
struct flowi4 fl4;
u8 tos, ttl;
__be16 df;
struct rtable *rt; /* Route to the other host */
- struct net_device *tdev; /* Device to other host */
unsigned int max_headroom; /* The extra header space needed */
__be32 dst;
int mtu;
+ int err;
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
dst = tnl_params->daddr;
if (dst == 0) {
/* NBMA tunnel */
@@ -571,14 +551,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
dev->stats.tx_carrier_errors++;
goto tx_error;
}
- tdev = rt->dst.dev;
-
- if (tdev == dev) {
+ if (rt->dst.dev == dev) {
ip_rt_put(rt);
dev->stats.collisions++;
goto tx_error;
}
-
df = tnl_params->frag_off;
if (df)
@@ -596,6 +573,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
if (!skb_is_gso(skb) &&
(inner_iph->frag_off&htons(IP_DF)) &&
mtu < ntohs(inner_iph->tot_len)) {
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
ip_rt_put(rt);
goto tx_error;
@@ -646,8 +624,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ttl = ip4_dst_hoplimit(&rt->dst);
}
- max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
- + rt->dst.header_len;
+ max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ + rt->dst.header_len;
if (max_headroom > dev->needed_headroom) {
dev->needed_headroom = max_headroom;
if (skb_cow_head(skb, dev->needed_headroom)) {
@@ -657,27 +635,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
- /* Push down and install the IP header. */
- skb_push(skb, sizeof(struct iphdr));
- skb_reset_network_header(skb);
-
- iph = ip_hdr(skb);
- inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+ err = iptunnel_xmit(dev_net(dev), rt, skb,
+ fl4.saddr, fl4.daddr, protocol,
+ ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df);
+ iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr) >> 2;
- iph->frag_off = df;
- iph->protocol = protocol;
- iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
- iph->daddr = fl4.daddr;
- iph->saddr = fl4.saddr;
- iph->ttl = ttl;
- tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
-
- iptunnel_xmit(skb, dev);
return;
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
new file mode 100644
index 00000000000..7167b08977d
--- /dev/null
+++ b/net/ipv4/ip_tunnel_core.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/ip_tunnels.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+
+int iptunnel_xmit(struct net *net, struct rtable *rt,
+ struct sk_buff *skb,
+ __be32 src, __be32 dst, __u8 proto,
+ __u8 tos, __u8 ttl, __be16 df)
+{
+ int pkt_len = skb->len;
+ struct iphdr *iph;
+ int err;
+
+ nf_reset(skb);
+ secpath_reset(skb);
+ skb->rxhash = 0;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->dst);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+
+ /* Push down and install the IP header. */
+ __skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+
+ iph = ip_hdr(skb);
+
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr) >> 2;
+ iph->frag_off = df;
+ iph->protocol = proto;
+ iph->tos = tos;
+ iph->daddr = dst;
+ iph->saddr = src;
+ iph->ttl = ttl;
+ tunnel_ip_select_ident(skb,
+ (const struct iphdr *)skb_inner_network_header(skb),
+ &rt->dst);
+
+ err = ip_local_out(skb);
+ if (unlikely(net_xmit_eval(err)))
+ pkt_len = 0;
+ return pkt_len;
+}
+EXPORT_SYMBOL_GPL(iptunnel_xmit);
+
+int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
+{
+ if (unlikely(!pskb_may_pull(skb, hdr_len)))
+ return -ENOMEM;
+
+ skb_pull_rcsum(skb, hdr_len);
+
+ if (inner_proto == htons(ETH_P_TEB)) {
+ struct ethhdr *eh = (struct ethhdr *)skb->data;
+
+ if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+ return -ENOMEM;
+
+ if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
+ skb->protocol = eh->h_proto;
+ else
+ skb->protocol = htons(ETH_P_802_2);
+
+ } else {
+ skb->protocol = inner_proto;
+ }
+
+ nf_reset(skb);
+ secpath_reset(skb);
+ if (!skb->l4_rxhash)
+ skb->rxhash = 0;
+ skb_dst_drop(skb);
+ skb->vlan_tci = 0;
+ skb_set_queue_mapping(skb, 0);
+ skb->pkt_type = PACKET_HOST;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iptunnel_pull_header);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 9df7ecd393f..e6905fbda2a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -188,8 +188,12 @@ static int ipip_rcv(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
struct ip_tunnel *tunnel;
- const struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph;
+ if (iptunnel_pull_header(skb, 0, tpi.proto))
+ goto drop;
+
+ iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel) {