summaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_gre.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 13:38:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 13:38:27 -0700
commitaecdc33e111b2c447b622e287c6003726daa1426 (patch)
tree3e7657eae4b785e1a1fb5dfb225dbae0b2f0cfc6 /net/ipv4/ip_gre.c
parenta20acf99f75e49271381d65db097c9763060a1e8 (diff)
parenta3a6cab5ea10cca64d036851fe0d932448f2fe4f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) GRE now works over ipv6, from Dmitry Kozlov. 2) Make SCTP more network namespace aware, from Eric Biederman. 3) TEAM driver now works with non-ethernet devices, from Jiri Pirko. 4) Make openvswitch network namespace aware, from Pravin B Shelar. 5) IPV6 NAT implementation, from Patrick McHardy. 6) Server side support for TCP Fast Open, from Jerry Chu and others. 7) Packet BPF filter supports MOD and XOR, from Eric Dumazet and Daniel Borkmann. 8) Increate the loopback default MTU to 64K, from Eric Dumazet. 9) Use a per-task rather than per-socket page fragment allocator for outgoing networking traffic. This benefits processes that have very many mostly idle sockets, which is quite common. From Eric Dumazet. 10) Use up to 32K for page fragment allocations, with fallbacks to smaller sizes when higher order page allocations fail. Benefits are a) less segments for driver to process b) less calls to page allocator c) less waste of space. From Eric Dumazet. 11) Allow GRO to be used on GRE tunnels, from Eric Dumazet. 12) VXLAN device driver, one way to handle VLAN issues such as the limitation of 4096 VLAN IDs yet still have some level of isolation. From Stephen Hemminger. 13) As usual there is a large boatload of driver changes, with the scale perhaps tilted towards the wireless side this time around. Fix up various fairly trivial conflicts, mostly caused by the user namespace changes. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1012 commits) hyperv: Add buffer for extended info after the RNDIS response message. hyperv: Report actual status in receive completion packet hyperv: Remove extra allocated space for recv_pkt_list elements hyperv: Fix page buffer handling in rndis_filter_send_request() hyperv: Fix the missing return value in rndis_filter_set_packet_filter() hyperv: Fix the max_xfer_size in RNDIS initialization vxlan: put UDP socket in correct namespace vxlan: Depend on CONFIG_INET sfc: Fix the reported priorities of different filter types sfc: Remove EFX_FILTER_FLAG_RX_OVERRIDE_IP sfc: Fix loopback self-test with separate_tx_channels=1 sfc: Fix MCDI structure field lookup sfc: Add parentheses around use of bitfield macro arguments sfc: Fix null function pointer in efx_sriov_channel_type vxlan: virtual extensible lan igmp: export symbol ip_mc_leave_group netlink: add attributes to fdb interface tg3: unconditionally select HWMON support when tg3 is enabled. Revert "net: ti cpsw ethernet: allow reading phy interface mode from DT" gre: fix sparse warning ...
Diffstat (limited to 'net/ipv4/ip_gre.c')
-rw-r--r--net/ipv4/ip_gre.c128
1 files changed, 86 insertions, 42 deletions
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b062a98574f..7240f8e2dd4 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -120,6 +120,10 @@
Alexey Kuznetsov.
*/
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
static void ipgre_tunnel_setup(struct net_device *dev);
@@ -204,7 +208,9 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
tot->rx_crc_errors = dev->stats.rx_crc_errors;
tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
tot->rx_length_errors = dev->stats.rx_length_errors;
+ tot->rx_frame_errors = dev->stats.rx_frame_errors;
tot->rx_errors = dev->stats.rx_errors;
+
tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
tot->tx_dropped = dev->stats.tx_dropped;
@@ -214,11 +220,25 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
return tot;
}
+/* Does key in tunnel parameters match packet */
+static bool ipgre_key_match(const struct ip_tunnel_parm *p,
+ __be16 flags, __be32 key)
+{
+ if (p->i_flags & GRE_KEY) {
+ if (flags & GRE_KEY)
+ return key == p->i_key;
+ else
+ return false; /* key expected, none present */
+ } else
+ return !(flags & GRE_KEY);
+}
+
/* Given src, dst and key, find appropriate for input tunnel. */
static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
__be32 remote, __be32 local,
- __be32 key, __be16 gre_proto)
+ __be16 flags, __be32 key,
+ __be16 gre_proto)
{
struct net *net = dev_net(dev);
int link = dev->ifindex;
@@ -233,10 +253,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
if (local != t->parms.iph.saddr ||
remote != t->parms.iph.daddr ||
- key != t->parms.i_key ||
!(t->dev->flags & IFF_UP))
continue;
+ if (!ipgre_key_match(&t->parms, flags, key))
+ continue;
+
if (t->dev->type != ARPHRD_IPGRE &&
t->dev->type != dev_type)
continue;
@@ -257,10 +279,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
if (remote != t->parms.iph.daddr ||
- key != t->parms.i_key ||
!(t->dev->flags & IFF_UP))
continue;
+ if (!ipgre_key_match(&t->parms, flags, key))
+ continue;
+
if (t->dev->type != ARPHRD_IPGRE &&
t->dev->type != dev_type)
continue;
@@ -283,10 +307,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
if ((local != t->parms.iph.saddr &&
(local != t->parms.iph.daddr ||
!ipv4_is_multicast(local))) ||
- key != t->parms.i_key ||
!(t->dev->flags & IFF_UP))
continue;
+ if (!ipgre_key_match(&t->parms, flags, key))
+ continue;
+
if (t->dev->type != ARPHRD_IPGRE &&
t->dev->type != dev_type)
continue;
@@ -489,6 +515,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
__be16 flags;
+ __be32 key = 0;
flags = p[0];
if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
@@ -505,6 +532,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
if (skb_headlen(skb) < grehlen)
return;
+ if (flags & GRE_KEY)
+ key = *(((__be32 *)p) + (grehlen / 4) - 1);
+
switch (type) {
default:
case ICMP_PARAMETERPROB:
@@ -533,49 +563,34 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
break;
}
- rcu_read_lock();
t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
- flags & GRE_KEY ?
- *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
- p[1]);
+ flags, key, p[1]);
+
if (t == NULL)
- goto out;
+ return;
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
t->parms.link, 0, IPPROTO_GRE, 0);
- goto out;
+ return;
}
if (type == ICMP_REDIRECT) {
ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_GRE, 0);
- goto out;
+ return;
}
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
- goto out;
+ return;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
- goto out;
+ return;
if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
t->err_count++;
else
t->err_count = 1;
t->err_time = jiffies;
-out:
- rcu_read_unlock();
-}
-
-static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
-{
- if (INET_ECN_is_ce(iph->tos)) {
- if (skb->protocol == htons(ETH_P_IP)) {
- IP_ECN_set_ce(ip_hdr(skb));
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
- IP6_ECN_set_ce(ipv6_hdr(skb));
- }
- }
}
static inline u8
@@ -600,9 +615,10 @@ static int ipgre_rcv(struct sk_buff *skb)
struct ip_tunnel *tunnel;
int offset = 4;
__be16 gre_proto;
+ int err;
if (!pskb_may_pull(skb, 16))
- goto drop_nolock;
+ goto drop;
iph = ip_hdr(skb);
h = skb->data;
@@ -613,7 +629,7 @@ static int ipgre_rcv(struct sk_buff *skb)
- We do not support routing headers.
*/
if (flags&(GRE_VERSION|GRE_ROUTING))
- goto drop_nolock;
+ goto drop;
if (flags&GRE_CSUM) {
switch (skb->ip_summed) {
@@ -641,10 +657,10 @@ static int ipgre_rcv(struct sk_buff *skb)
gre_proto = *(__be16 *)(h + 2);
- rcu_read_lock();
- if ((tunnel = ipgre_tunnel_lookup(skb->dev,
- iph->saddr, iph->daddr, key,
- gre_proto))) {
+ tunnel = ipgre_tunnel_lookup(skb->dev,
+ iph->saddr, iph->daddr, flags, key,
+ gre_proto);
+ if (tunnel) {
struct pcpu_tstats *tstats;
secpath_reset(skb);
@@ -703,27 +719,33 @@ static int ipgre_rcv(struct sk_buff *skb)
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
}
+ __skb_tunnel_rx(skb, tunnel->dev);
+
+ skb_reset_network_header(skb);
+ err = IP_ECN_decapsulate(iph, skb);
+ if (unlikely(err)) {
+ if (log_ecn_error)
+ net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+ &iph->saddr, iph->tos);
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto drop;
+ }
+ }
+
tstats = this_cpu_ptr(tunnel->dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
- __skb_tunnel_rx(skb, tunnel->dev);
-
- skb_reset_network_header(skb);
- ipgre_ecn_decapsulate(iph, skb);
-
- netif_rx(skb);
-
- rcu_read_unlock();
+ gro_cells_receive(&tunnel->gro_cells, skb);
return 0;
}
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
drop:
- rcu_read_unlock();
-drop_nolock:
kfree_skb(skb);
return 0;
}
@@ -745,6 +767,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
__be32 dst;
int mtu;
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb))
+ goto tx_error;
+
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -1292,10 +1318,18 @@ static const struct net_device_ops ipgre_netdev_ops = {
static void ipgre_dev_free(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ gro_cells_destroy(&tunnel->gro_cells);
free_percpu(dev->tstats);
free_netdev(dev);
}
+#define GRE_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_HW_CSUM)
+
static void ipgre_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipgre_netdev_ops;
@@ -1309,12 +1343,16 @@ static void ipgre_tunnel_setup(struct net_device *dev)
dev->addr_len = 4;
dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+
+ dev->features |= GRE_FEATURES;
+ dev->hw_features |= GRE_FEATURES;
}
static int ipgre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
struct iphdr *iph;
+ int err;
tunnel = netdev_priv(dev);
iph = &tunnel->parms.iph;
@@ -1341,6 +1379,12 @@ static int ipgre_tunnel_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
+ err = gro_cells_init(&tunnel->gro_cells, dev);
+ if (err) {
+ free_percpu(dev->tstats);
+ return err;
+ }
+
return 0;
}