From 586d5fc867be8f03c049b4b89fd29d0b8b02cab5 Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Fri, 6 Jun 2014 04:34:37 +0400 Subject: ip_tunnel: fix possible rtable leak ip_rt_put(rt) is always called in "error" branches above, but was missed in skb_cow_head branch. As rt is not yet bound to skb here we have to release it by hand. Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 2acc2337d38..3f6135bc54e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -668,6 +668,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev->needed_headroom = max_headroom; if (skb_cow_head(skb, dev->needed_headroom)) { + ip_rt_put(rt); dev->stats.tx_dropped++; kfree_skb(skb); return; -- cgit v1.2.3-70-g09d2 From 87757a917b0b3c0787e0563c679762152be81312 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Jun 2014 06:44:03 -0700 Subject: net: force a list_del() in unregister_netdevice_many() unregister_netdevice_many() API is error prone and we had too many bugs because of dangling LIST_HEAD on stacks. See commit f87e6f47933e3e ("net: dont leave active on stack LIST_HEAD") In fact, instead of making sure no caller leaves an active list_head, just force a list_del() in the callee. No one seems to need to access the list after unregister_netdevice_many() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 1 - net/core/dev.c | 5 ++++- net/core/rtnetlink.c | 1 - net/mac80211/iface.c | 1 - 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d53e299ae1d..7eec598c5cb 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1036,7 +1036,6 @@ static int macvlan_device_event(struct notifier_block *unused, list_for_each_entry_safe(vlan, next, &port->vlans, list) vlan->dev->rtnl_link_ops->dellink(vlan->dev, &list_kill); unregister_netdevice_many(&list_kill); - list_del(&list_kill); break; case NETDEV_PRE_TYPE_CHANGE: /* Forbid underlaying device to change its type. */ diff --git a/net/core/dev.c b/net/core/dev.c index fb8b0546485..a30bef1882f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6613,6 +6613,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue); /** * unregister_netdevice_many - unregister many devices * @head: list of devices + * + * Note: As most callers use a stack allocated list_head, + * we force a list_del() to make sure stack wont be corrupted later. */ void unregister_netdevice_many(struct list_head *head) { @@ -6622,6 +6625,7 @@ void unregister_netdevice_many(struct list_head *head) rollback_registered_many(head); list_for_each_entry(dev, head, unreg_list) net_set_todo(dev); + list_del(head); } } EXPORT_SYMBOL(unregister_netdevice_many); @@ -7077,7 +7081,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) } } unregister_netdevice_many(&dev_kill_list); - list_del(&dev_kill_list); rtnl_unlock(); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f4e9037f9a0..fbdb1556b0d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1744,7 +1744,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) ops->dellink(dev, &list_kill); unregister_netdevice_many(&list_kill); - list_del(&list_kill); return 0; } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b8d331e7d88..34799e06ee0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1758,7 +1758,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } mutex_unlock(&local->iflist_mtx); unregister_netdevice_many(&unreg_list); - list_del(&unreg_list); list_for_each_entry_safe(sdata, tmp, &wdev_list, list) { list_del(&sdata->list); -- cgit v1.2.3-70-g09d2 From 2346829e641b804ece9ac9298136b56d9567c278 Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Fri, 6 Jun 2014 23:19:21 +0400 Subject: ipip, sit: fix ipv4_{update_pmtu,redirect} calls ipv4_{update_pmtu,redirect} were called with tunnel's ifindex (t->dev is a tunnel netdevice). It caused wrong route lookup and failure of pmtu update or redirect. We should use the same ifindex that we use in ip_route_output_* in *tunnel_xmit code. It is t->parms.link . Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 4 ++-- net/ipv6/sit.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 812b1835146..09680ddbc67 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -149,13 +149,13 @@ static int ipip_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPIP, 0); + t->parms.link, 0, IPPROTO_IPIP, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_IPIP, 0); err = 0; goto out; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e5a453ca302..45397b2a4a0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -560,12 +560,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; -- cgit v1.2.3-70-g09d2 From 84a7c0b1db1c17d5ded8d3800228a608e1070b40 Mon Sep 17 00:00:00 2001 From: Manuel Schölling Date: Sat, 7 Jun 2014 23:57:25 +0200 Subject: dns_resolver: assure that dns_query() result is null-terminated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dns_query() credulously assumes that keys are null-terminated and returns a copy of a memory block that is off by one. Signed-off-by: Manuel Schölling Signed-off-by: David S. Miller --- net/dns_resolver/dns_query.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index e7b6d53eef8..6853d22ebc0 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -149,7 +149,9 @@ int dns_query(const char *type, const char *name, size_t namelen, if (!*_result) goto put; - memcpy(*_result, upayload->data, len + 1); + memcpy(*_result, upayload->data, len); + *_result[len] = '\0'; + if (_expiry) *_expiry = rkey->expiry; -- cgit v1.2.3-70-g09d2 From 7c8e6b9c2811fd37702a9043eabea3545022011e Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Sun, 8 Jun 2014 02:06:25 +0400 Subject: ip_vti: Fix 'ip tunnel add' with 'key' parameters ip tunnel add remote 10.2.2.1 local 10.2.2.2 mode vti ikey 1 okey 2 translates to p->iflags = VTI_ISVTI|GRE_KEY and p->i_key = 1, but GRE_KEY != TUNNEL_KEY, so ip_tunnel_ioctl would set i_key to 0 (same story with o_key) making us unable to create vti tunnels with [io]key via ip tunnel. We cannot simply translate GRE_KEY to TUNNEL_KEY (as GRE module does) because vti_tunnels with same local/remote addresses but different ikeys will be treated as different then. So, imo the best option here is to move p->i_flags & *_KEY check for vti tunnels from ip_tunnel.c to ip_vti.c and to think about [io]_mark field for ip_tunnel_parm in the future. Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 10 ++++++---- net/ipv4/ip_vti.c | 8 +++++++- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 3f6135bc54e..3dbb550abb3 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -748,10 +748,12 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) goto done; if (p->iph.ttl) p->iph.frag_off |= htons(IP_DF); - if (!(p->i_flags&TUNNEL_KEY)) - p->i_key = 0; - if (!(p->o_flags&TUNNEL_KEY)) - p->o_key = 0; + if (!(p->i_flags & VTI_ISVTI)) { + if (!(p->i_flags & TUNNEL_KEY)) + p->i_key = 0; + if (!(p->o_flags & TUNNEL_KEY)) + p->o_key = 0; + } t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 13ef00f1e17..b8960f3527f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -313,7 +313,13 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EINVAL; } - p.i_flags |= VTI_ISVTI; + if (!(p.i_flags & GRE_KEY)) + p.i_key = 0; + if (!(p.o_flags & GRE_KEY)) + p.o_key = 0; + + p.i_flags = VTI_ISVTI; + err = ip_tunnel_ioctl(dev, &p, cmd); if (err) return err; -- cgit v1.2.3-70-g09d2 From 5ce54af1fc9d2718d46c9fd92a161379fb197266 Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Sun, 8 Jun 2014 03:03:08 +0400 Subject: ip_tunnel: fix i_key matching in ip_tunnel_find Some tunnels (though only vti as for now) can use i_key just for internal use: for example vti uses it for fwmark'ing incoming packets. So raw i_key value shouldn't be treated as a distinguisher for them. ip_tunnel_key_match exists for cases when we want to compare two ip_tunnel_parms' i_keys. Example bug: ip link add type vti ikey 1 local 1.0.0.1 remote 2.0.0.2 ip link add type vti ikey 2 local 1.0.0.1 remote 2.0.0.2 spawned two tunnels, although it doesn't make sense. Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 3dbb550abb3..9b553157e55 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -268,6 +268,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; __be32 key = parms->i_key; + __be16 flags = parms->i_flags; int link = parms->link; struct ip_tunnel *t = NULL; struct hlist_head *head = ip_bucket(itn, parms); @@ -275,9 +276,9 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && - key == t->parms.i_key && link == t->parms.link && - type == t->dev->type) + type == t->dev->type && + ip_tunnel_key_match(&t->parms, flags, key)) break; } return t; -- cgit v1.2.3-70-g09d2 From f6c20c596f41fe6331cee2053b270bf12142b04e Mon Sep 17 00:00:00 2001 From: huizhang Date: Mon, 9 Jun 2014 12:37:25 +0800 Subject: net: ipv6: Fixed up ipsec packet be re-routing issue Bug report on https://bugzilla.kernel.org/show_bug.cgi?id=75781 When a local output ipsec packet match the mangle table rule, and be set mark value, the packet will be route again in route_me_harder -> _session_decoder6 In this case, the nhoff in CB of skb was still the default value 0. So the protocal match can't success and the packet can't match correct SA rule,and then the packet be send out in plaintext. To fixed up the issue. The CB->nhoff must be set. Signed-off-by: Hui Zhang Signed-off-by: David S. Miller --- net/ipv6/output_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 56596ce390a..a2bbc0d08d9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -104,6 +104,7 @@ int __ip6_local_out(struct sk_buff *skb) if (len > IPV6_MAXPLEN) len = 0; ipv6_hdr(skb)->payload_len = htons(len); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, dst_output); -- cgit v1.2.3-70-g09d2 From 5882a07c72093dc3a18e2d2b129fb200686bb6ee Mon Sep 17 00:00:00 2001 From: Wei-Chun Chao Date: Sun, 8 Jun 2014 23:48:54 -0700 Subject: net: fix UDP tunnel GSO of frag_list GRO packets This patch fixes a kernel BUG_ON in skb_segment. It is hit when testing two VMs on openvswitch with one VM acting as VXLAN gateway. During VXLAN packet GSO, skb_segment is called with skb->data pointing to inner TCP payload. skb_segment calls skb_network_protocol to retrieve the inner protocol. skb_network_protocol actually expects skb->data to point to MAC and it calls pskb_may_pull with ETH_HLEN. This ends up pulling in ETH_HLEN data from header tail. As a result, pskb_trim logic is skipped and BUG_ON is hit later. Move skb_push in front of skb_network_protocol so that skb->data lines up properly. kernel BUG at net/core/skbuff.c:2999! Call Trace: [] tcp_gso_segment+0x122/0x410 [] inet_gso_segment+0x13c/0x390 [] skb_mac_gso_segment+0x9b/0x170 [] skb_udp_tunnel_segment+0xd8/0x390 [] udp4_ufo_fragment+0x120/0x140 [] inet_gso_segment+0x13c/0x390 [] ? default_wake_function+0x12/0x20 [] skb_mac_gso_segment+0x9b/0x170 [] __skb_gso_segment+0x60/0xc0 [] dev_hard_start_xmit+0x183/0x550 [] sch_direct_xmit+0xfe/0x1d0 [] __dev_queue_xmit+0x214/0x4f0 [] dev_queue_xmit+0x10/0x20 [] ip_finish_output+0x66b/0x890 [] ip_output+0x58/0x90 [] ? fib_table_lookup+0x29f/0x350 [] ip_local_out_sk+0x39/0x50 [] iptunnel_xmit+0x10d/0x130 [] vxlan_xmit_skb+0x1d0/0x330 [vxlan] [] vxlan_tnl_send+0x129/0x1a0 [openvswitch] [] ovs_vport_send+0x26/0xa0 [openvswitch] [] do_output+0x2e/0x50 [openvswitch] Signed-off-by: Wei-Chun Chao Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8383b2bddeb..9433047b245 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2881,12 +2881,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int pos; int dummy; + __skb_push(head_skb, doffset); proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) return ERR_PTR(-EINVAL); csum = !!can_checksum_protocol(features, proto); - __skb_push(head_skb, doffset); + headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); -- cgit v1.2.3-70-g09d2 From c5b46160877a9bb23ab6f96fb666e7766f407704 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Wed, 11 Jun 2014 10:38:03 -0400 Subject: net/core: Add VF link state control policy Commit 1d8faf48c7 (net/core: Add VF link state control) added VF link state control to the netlink VF nested structure, but failed to add a proper entry for the new structure into the VF policy table. Add the missing entry so the table and the actual data copied into the netlink nested struct are in sync. Signed-off-by: Doug Ledford Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fbdb1556b0d..d57d7bc2218 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1208,6 +1208,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { .len = sizeof(struct ifla_vf_tx_rate) }, [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY, .len = sizeof(struct ifla_vf_spoofchk) }, + [IFLA_VF_LINK_STATE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_link_state) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { -- cgit v1.2.3-70-g09d2