summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig41
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c765
-rw-r--r--net/ipv6/addrlabel.c25
-rw-r--r--net/ipv6/af_inet6.c227
-rw-r--r--net/ipv6/anycast.c63
-rw-r--r--net/ipv6/fib6_rules.c113
-rw-r--r--net/ipv6/icmp.c145
-rw-r--r--net/ipv6/inet6_connection_sock.c4
-rw-r--r--net/ipv6/inet6_hashtables.c10
-rw-r--r--net/ipv6/ip6_fib.c298
-rw-r--r--net/ipv6/ip6_flowlabel.c101
-rw-r--r--net/ipv6/ip6_input.c98
-rw-r--r--net/ipv6/ip6_output.c40
-rw-r--r--net/ipv6/ip6_tunnel.c228
-rw-r--r--net/ipv6/ip6mr.c1643
-rw-r--r--net/ipv6/ipv6_sockglue.c365
-rw-r--r--net/ipv6/mcast.c272
-rw-r--r--net/ipv6/mip6.c24
-rw-r--r--net/ipv6/ndisc.c237
-rw-r--r--net/ipv6/netfilter.c46
-rw-r--r--net/ipv6/netfilter/ip6_queue.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c51
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c6
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c7
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c3
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c3
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c2
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c29
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c23
-rw-r--r--net/ipv6/proc.c73
-rw-r--r--net/ipv6/raw.c69
-rw-r--r--net/ipv6/reassembly.c19
-rw-r--r--net/ipv6/route.c640
-rw-r--r--net/ipv6/sit.c456
-rw-r--r--net/ipv6/syncookies.c279
-rw-r--r--net/ipv6/sysctl_net_ipv6.c15
-rw-r--r--net/ipv6/tcp_ipv6.c199
-rw-r--r--net/ipv6/udp.c44
-rw-r--r--net/ipv6/udp_impl.h2
-rw-r--r--net/ipv6/udplite.c40
-rw-r--r--net/ipv6/xfrm6_input.c55
-rw-r--r--net/ipv6/xfrm6_policy.c9
-rw-r--r--net/ipv6/xfrm6_state.c171
-rw-r--r--net/ipv6/xfrm6_tunnel.c45
48 files changed, 5053 insertions, 1955 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 47263e45bac..42814a2ec9d 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -3,7 +3,7 @@
#
# IPv6 as module will cause a CRASH if you try to unload it
-config IPV6
+menuconfig IPV6
tristate "The IPv6 protocol"
default m
---help---
@@ -19,9 +19,10 @@ config IPV6
To compile this protocol support as a module, choose M here: the
module will be called ipv6.
+if IPV6
+
config IPV6_PRIVACY
bool "IPv6: Privacy Extensions support"
- depends on IPV6
---help---
Privacy Extensions for Stateless Address Autoconfiguration in IPv6
support. With this option, additional periodically-alter
@@ -40,7 +41,6 @@ config IPV6_PRIVACY
config IPV6_ROUTER_PREF
bool "IPv6: Router Preference (RFC 4191) support"
- depends on IPV6
---help---
Router Preference is an optional extension to the Router
Advertisement message to improve the ability of hosts
@@ -59,7 +59,7 @@ config IPV6_ROUTE_INFO
config IPV6_OPTIMISTIC_DAD
bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)"
- depends on IPV6 && EXPERIMENTAL
+ depends on EXPERIMENTAL
---help---
This is experimental support for optimistic Duplicate
Address Detection. It allows for autoconfigured addresses
@@ -69,7 +69,6 @@ config IPV6_OPTIMISTIC_DAD
config INET6_AH
tristate "IPv6: AH transformation"
- depends on IPV6
select XFRM
select CRYPTO
select CRYPTO_HMAC
@@ -82,7 +81,6 @@ config INET6_AH
config INET6_ESP
tristate "IPv6: ESP transformation"
- depends on IPV6
select XFRM
select CRYPTO
select CRYPTO_AUTHENC
@@ -98,7 +96,6 @@ config INET6_ESP
config INET6_IPCOMP
tristate "IPv6: IPComp transformation"
- depends on IPV6
select XFRM
select INET6_XFRM_TUNNEL
select CRYPTO
@@ -111,7 +108,7 @@ config INET6_IPCOMP
config IPV6_MIP6
tristate "IPv6: Mobility (EXPERIMENTAL)"
- depends on IPV6 && EXPERIMENTAL
+ depends on EXPERIMENTAL
select XFRM
---help---
Support for IPv6 Mobility described in RFC 3775.
@@ -129,7 +126,6 @@ config INET6_TUNNEL
config INET6_XFRM_MODE_TRANSPORT
tristate "IPv6: IPsec transport mode"
- depends on IPV6
default IPV6
select XFRM
---help---
@@ -139,7 +135,6 @@ config INET6_XFRM_MODE_TRANSPORT
config INET6_XFRM_MODE_TUNNEL
tristate "IPv6: IPsec tunnel mode"
- depends on IPV6
default IPV6
select XFRM
---help---
@@ -149,7 +144,6 @@ config INET6_XFRM_MODE_TUNNEL
config INET6_XFRM_MODE_BEET
tristate "IPv6: IPsec BEET mode"
- depends on IPV6
default IPV6
select XFRM
---help---
@@ -159,15 +153,15 @@ config INET6_XFRM_MODE_BEET
config INET6_XFRM_MODE_ROUTEOPTIMIZATION
tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
- depends on IPV6 && EXPERIMENTAL
+ depends on EXPERIMENTAL
select XFRM
---help---
Support for MIPv6 route optimization mode.
config IPV6_SIT
tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"
- depends on IPV6
select INET_TUNNEL
+ select IPV6_NDISC_NODETYPE
default y
---help---
Tunneling means encapsulating data of one protocol type within
@@ -178,10 +172,12 @@ config IPV6_SIT
Saying M here will produce a module called sit.ko. If unsure, say Y.
+config IPV6_NDISC_NODETYPE
+ bool
+
config IPV6_TUNNEL
tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)"
select INET6_TUNNEL
- depends on IPV6
---help---
Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
RFC 2473.
@@ -190,7 +186,7 @@ config IPV6_TUNNEL
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
- depends on IPV6 && EXPERIMENTAL
+ depends on EXPERIMENTAL
select FIB_RULES
---help---
Support multiple routing tables.
@@ -209,3 +205,18 @@ config IPV6_SUBTREES
If unsure, say N.
+config IPV6_MROUTE
+ bool "IPv6: multicast routing (EXPERIMENTAL)"
+ depends on IPV6 && EXPERIMENTAL
+ ---help---
+ Experimental support for IPv6 multicast forwarding.
+ If unsure, say N.
+
+config IPV6_PIMSM_V2
+ bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)"
+ depends on IPV6_MROUTE
+ ---help---
+ Support for IPv6 PIM multicast routing protocol PIM-SMv2.
+ If unsure, say N.
+
+endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 24f3aa0f2a3..686934acfac 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -11,11 +11,14 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
+ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
+
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
xfrm6_output.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
+ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e08955baedf..8a0fd4007bd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -222,6 +222,8 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
/* Check if a valid qdisc is available */
static inline int addrconf_qdisc_ok(struct net_device *dev)
@@ -321,7 +323,6 @@ EXPORT_SYMBOL(in6_dev_finish_destroy);
static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
{
struct inet6_dev *ndev;
- struct in6_addr maddr;
ASSERT_RTNL();
@@ -335,7 +336,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
rwlock_init(&ndev->lock);
ndev->dev = dev;
- memcpy(&ndev->cnf, dev->nd_net->ipv6.devconf_dflt, sizeof(ndev->cnf));
+ memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
ndev->cnf.mtu6 = dev->mtu;
ndev->cnf.sysctl = NULL;
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -349,7 +350,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
if (snmp6_alloc_dev(ndev) < 0) {
ADBG((KERN_WARNING
"%s(): cannot allocate memory for statistics; dev=%s.\n",
- __FUNCTION__, dev->name));
+ __func__, dev->name));
neigh_parms_release(&nd_tbl, ndev->nd_parms);
ndev->dead = 1;
in6_dev_finish_destroy(ndev);
@@ -359,7 +360,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
if (snmp6_register_dev(ndev) < 0) {
ADBG((KERN_WARNING
"%s(): cannot create /proc/net/dev_snmp6/%s\n",
- __FUNCTION__, dev->name));
+ __func__, dev->name));
neigh_parms_release(&nd_tbl, ndev->nd_parms);
ndev->dead = 1;
in6_dev_finish_destroy(ndev);
@@ -407,8 +408,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
rcu_assign_pointer(dev->ip6_ptr, ndev);
/* Join all-node multicast group */
- ipv6_addr_all_nodes(&maddr);
- ipv6_dev_mc_inc(dev, &maddr);
+ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
return ndev;
}
@@ -434,18 +434,15 @@ static void dev_forward_change(struct inet6_dev *idev)
{
struct net_device *dev;
struct inet6_ifaddr *ifa;
- struct in6_addr addr;
if (!idev)
return;
dev = idev->dev;
if (dev && (dev->flags & IFF_MULTICAST)) {
- ipv6_addr_all_routers(&addr);
-
if (idev->cnf.forwarding)
- ipv6_dev_mc_inc(dev, &addr);
+ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
else
- ipv6_dev_mc_dec(dev, &addr);
+ ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
}
for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
if (ifa->flags&IFA_F_TENTATIVE)
@@ -494,7 +491,7 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
dev_forward_change((struct inet6_dev *)table->extra1);
if (*p)
- rt6_purge_dflt_routers();
+ rt6_purge_dflt_routers(net);
}
#endif
@@ -542,6 +539,25 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
*ifap = ifp;
}
+/*
+ * Hash function taken from net_alias.c
+ */
+static u8 ipv6_addr_hash(const struct in6_addr *addr)
+{
+ __u32 word;
+
+ /*
+ * We perform the hash function over the last 64 bits of the address
+ * This will include the IEEE address token on links that support it.
+ */
+
+ word = (__force u32)(addr->s6_addr32[2] ^ addr->s6_addr32[3]);
+ word ^= (word >> 16);
+ word ^= (word >> 8);
+
+ return ((word ^ (word >> 4)) & 0x0f);
+}
+
/* On success it returns ifp with increased reference count */
static struct inet6_ifaddr *
@@ -562,7 +578,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
write_lock(&addrconf_hash_lock);
/* Ignore adding duplicate addresses on an interface */
- if (ipv6_chk_same_addr(&init_net, addr, idev->dev)) {
+ if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
ADBG(("ipv6_add_addr: already assigned\n"));
err = -EEXIST;
goto out;
@@ -752,9 +768,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
struct in6_addr prefix;
struct rt6_info *rt;
-
+ struct net *net = dev_net(ifp->idev->dev);
ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
- rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1);
+ rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1);
if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
if (onlink == 0) {
@@ -894,20 +910,40 @@ out:
/*
* Choose an appropriate source address (RFC3484)
*/
+enum {
+ IPV6_SADDR_RULE_INIT = 0,
+ IPV6_SADDR_RULE_LOCAL,
+ IPV6_SADDR_RULE_SCOPE,
+ IPV6_SADDR_RULE_PREFERRED,
+#ifdef CONFIG_IPV6_MIP6
+ IPV6_SADDR_RULE_HOA,
+#endif
+ IPV6_SADDR_RULE_OIF,
+ IPV6_SADDR_RULE_LABEL,
+#ifdef CONFIG_IPV6_PRIVACY
+ IPV6_SADDR_RULE_PRIVACY,
+#endif
+ IPV6_SADDR_RULE_ORCHID,
+ IPV6_SADDR_RULE_PREFIX,
+ IPV6_SADDR_RULE_MAX
+};
+
struct ipv6_saddr_score {
- int addr_type;
- unsigned int attrs;
- int matchlen;
- int scope;
- unsigned int rule;
+ int rule;
+ int addr_type;
+ struct inet6_ifaddr *ifa;
+ DECLARE_BITMAP(scorebits, IPV6_SADDR_RULE_MAX);
+ int scopedist;
+ int matchlen;
};
-#define IPV6_SADDR_SCORE_LOCAL 0x0001
-#define IPV6_SADDR_SCORE_PREFERRED 0x0004
-#define IPV6_SADDR_SCORE_HOA 0x0008
-#define IPV6_SADDR_SCORE_OIF 0x0010
-#define IPV6_SADDR_SCORE_LABEL 0x0020
-#define IPV6_SADDR_SCORE_PRIVACY 0x0040
+struct ipv6_saddr_dst {
+ const struct in6_addr *addr;
+ int ifindex;
+ int scope;
+ int label;
+ unsigned int prefs;
+};
static inline int ipv6_saddr_preferred(int type)
{
@@ -917,27 +953,152 @@ static inline int ipv6_saddr_preferred(int type)
return 0;
}
-int ipv6_dev_get_saddr(struct net_device *daddr_dev,
- struct in6_addr *daddr, struct in6_addr *saddr)
+static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score,
+ struct ipv6_saddr_dst *dst,
+ int i)
{
- struct ipv6_saddr_score hiscore;
- struct inet6_ifaddr *ifa_result = NULL;
- int daddr_type = __ipv6_addr_type(daddr);
- int daddr_scope = __ipv6_addr_src_scope(daddr_type);
- int daddr_ifindex = daddr_dev ? daddr_dev->ifindex : 0;
- u32 daddr_label = ipv6_addr_label(daddr, daddr_type, daddr_ifindex);
+ int ret;
+
+ if (i <= score->rule) {
+ switch (i) {
+ case IPV6_SADDR_RULE_SCOPE:
+ ret = score->scopedist;
+ break;
+ case IPV6_SADDR_RULE_PREFIX:
+ ret = score->matchlen;
+ break;
+ default:
+ ret = !!test_bit(i, score->scorebits);
+ }
+ goto out;
+ }
+
+ switch (i) {
+ case IPV6_SADDR_RULE_INIT:
+ /* Rule 0: remember if hiscore is not ready yet */
+ ret = !!score->ifa;
+ break;
+ case IPV6_SADDR_RULE_LOCAL:
+ /* Rule 1: Prefer same address */
+ ret = ipv6_addr_equal(&score->ifa->addr, dst->addr);
+ break;
+ case IPV6_SADDR_RULE_SCOPE:
+ /* Rule 2: Prefer appropriate scope
+ *
+ * ret
+ * ^
+ * -1 | d 15
+ * ---+--+-+---> scope
+ * |
+ * | d is scope of the destination.
+ * B-d | \
+ * | \ <- smaller scope is better if
+ * B-15 | \ if scope is enough for destinaion.
+ * | ret = B - scope (-1 <= scope >= d <= 15).
+ * d-C-1 | /
+ * |/ <- greater is better
+ * -C / if scope is not enough for destination.
+ * /| ret = scope - C (-1 <= d < scope <= 15).
+ *
+ * d - C - 1 < B -15 (for all -1 <= d <= 15).
+ * C > d + 14 - B >= 15 + 14 - B = 29 - B.
+ * Assume B = 0 and we get C > 29.
+ */
+ ret = __ipv6_addr_src_scope(score->addr_type);
+ if (ret >= dst->scope)
+ ret = -ret;
+ else
+ ret -= 128; /* 30 is enough */
+ score->scopedist = ret;
+ break;
+ case IPV6_SADDR_RULE_PREFERRED:
+ /* Rule 3: Avoid deprecated and optimistic addresses */
+ ret = ipv6_saddr_preferred(score->addr_type) ||
+ !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC));
+ break;
+#ifdef CONFIG_IPV6_MIP6
+ case IPV6_SADDR_RULE_HOA:
+ {
+ /* Rule 4: Prefer home address */
+ int prefhome = !(dst->prefs & IPV6_PREFER_SRC_COA);
+ ret = !(score->ifa->flags & IFA_F_HOMEADDRESS) ^ prefhome;
+ break;
+ }
+#endif
+ case IPV6_SADDR_RULE_OIF:
+ /* Rule 5: Prefer outgoing interface */
+ ret = (!dst->ifindex ||
+ dst->ifindex == score->ifa->idev->dev->ifindex);
+ break;
+ case IPV6_SADDR_RULE_LABEL:
+ /* Rule 6: Prefer matching label */
+ ret = ipv6_addr_label(&score->ifa->addr, score->addr_type,
+ score->ifa->idev->dev->ifindex) == dst->label;
+ break;
+#ifdef CONFIG_IPV6_PRIVACY
+ case IPV6_SADDR_RULE_PRIVACY:
+ {
+ /* Rule 7: Prefer public address
+ * Note: prefer temprary address if use_tempaddr >= 2
+ */
+ int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ?
+ !!(dst->prefs & IPV6_PREFER_SRC_TMP) :
+ score->ifa->idev->cnf.use_tempaddr >= 2;
+ ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
+ break;
+ }
+#endif
+ case IPV6_SADDR_RULE_ORCHID:
+ /* Rule 8-: Prefer ORCHID vs ORCHID or
+ * non-ORCHID vs non-ORCHID
+ */
+ ret = !(ipv6_addr_orchid(&score->ifa->addr) ^
+ ipv6_addr_orchid(dst->addr));
+ break;
+ case IPV6_SADDR_RULE_PREFIX:
+ /* Rule 8: Use longest matching prefix */
+ score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr,
+ dst->addr);
+ break;
+ default:
+ ret = 0;
+ }
+
+ if (ret)
+ __set_bit(i, score->scorebits);
+ score->rule = i;
+out:
+ return ret;
+}
+
+int ipv6_dev_get_saddr(struct net_device *dst_dev,
+ const struct in6_addr *daddr, unsigned int prefs,
+ struct in6_addr *saddr)
+{
+ struct ipv6_saddr_score scores[2],
+ *score = &scores[0], *hiscore = &scores[1];
+ struct net *net = dev_net(dst_dev);
+ struct ipv6_saddr_dst dst;
struct net_device *dev;
+ int dst_type;
+
+ dst_type = __ipv6_addr_type(daddr);
+ dst.addr = daddr;
+ dst.ifindex = dst_dev ? dst_dev->ifindex : 0;
+ dst.scope = __ipv6_addr_src_scope(dst_type);
+ dst.label = ipv6_addr_label(daddr, dst_type, dst.ifindex);
+ dst.prefs = prefs;
- memset(&hiscore, 0, sizeof(hiscore));
+ hiscore->rule = -1;
+ hiscore->ifa = NULL;
read_lock(&dev_base_lock);
rcu_read_lock();
- for_each_netdev(&init_net, dev) {
+ for_each_netdev(net, dev) {
struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
- /* Rule 0: Candidate Source Address (section 4)
+ /* Candidate Source Address (section 4)
* - multicast and link-local destination address,
* the set of candidate source address MUST only
* include addresses assigned to interfaces
@@ -949,9 +1110,9 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
* belonging to the same site as the outgoing
* interface.)
*/
- if ((daddr_type & IPV6_ADDR_MULTICAST ||
- daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
- daddr_dev && dev != daddr_dev)
+ if (((dst_type & IPV6_ADDR_MULTICAST) ||
+ dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
+ dst.ifindex && dev->ifindex != dst.ifindex)
continue;
idev = __in6_dev_get(dev);
@@ -959,12 +1120,10 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
continue;
read_lock_bh(&idev->lock);
- for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
- struct ipv6_saddr_score score;
-
- score.addr_type = __ipv6_addr_type(&ifa->addr);
+ for (score->ifa = idev->addr_list; score->ifa; score->ifa = score->ifa->if_next) {
+ int i;
- /* Rule 0:
+ /*
* - Tentative Address (RFC2462 section 5.4)
* - A tentative address is not considered
* "assigned to an interface" in the traditional
@@ -974,11 +1133,14 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
* addresses, and the unspecified address MUST
* NOT be included in a candidate set.
*/
- if ((ifa->flags & IFA_F_TENTATIVE) &&
- (!(ifa->flags & IFA_F_OPTIMISTIC)))
+ if ((score->ifa->flags & IFA_F_TENTATIVE) &&
+ (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
continue;
- if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
- score.addr_type & IPV6_ADDR_MULTICAST)) {
+
+ score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+
+ if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
+ score->addr_type & IPV6_ADDR_MULTICAST)) {
LIMIT_NETDEBUG(KERN_DEBUG
"ADDRCONF: unspecified / multicast address "
"assigned as unicast address on %s",
@@ -986,207 +1148,63 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
continue;
}
- score.attrs = 0;
- score.matchlen = 0;
- score.scope = 0;
- score.rule = 0;
+ score->rule = -1;
+ bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
+
+ for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
+ int minihiscore, miniscore;
+
+ minihiscore = ipv6_get_saddr_eval(hiscore, &dst, i);
+ miniscore = ipv6_get_saddr_eval(score, &dst, i);
+
+ if (minihiscore > miniscore) {
+ if (i == IPV6_SADDR_RULE_SCOPE &&
+ score->scopedist > 0) {
+ /*
+ * special case:
+ * each remaining entry
+ * has too small (not enough)
+ * scope, because ifa entries
+ * are sorted by their scope
+ * values.
+ */
+ goto try_nextdev;
+ }
+ break;
+ } else if (minihiscore < miniscore) {
+ struct ipv6_saddr_score *tmp;
- if (ifa_result == NULL) {
- /* record it if the first available entry */
- goto record_it;
- }
+ if (hiscore->ifa)
+ in6_ifa_put(hiscore->ifa);
- /* Rule 1: Prefer same address */
- if (hiscore.rule < 1) {
- if (ipv6_addr_equal(&ifa_result->addr, daddr))
- hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL;
- hiscore.rule++;
- }
- if (ipv6_addr_equal(&ifa->addr, daddr)) {
- score.attrs |= IPV6_SADDR_SCORE_LOCAL;
- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) {
- score.rule = 1;
- goto record_it;
- }
- } else {
- if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)
- continue;
- }
+ in6_ifa_hold(score->ifa);
- /* Rule 2: Prefer appropriate scope */
- if (hiscore.rule < 2) {
- hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type);
- hiscore.rule++;
- }
- score.scope = __ipv6_addr_src_scope(score.addr_type);
- if (hiscore.scope < score.scope) {
- if (hiscore.scope < daddr_scope) {
- score.rule = 2;
- goto record_it;
- } else
- continue;
- } else if (score.scope < hiscore.scope) {
- if (score.scope < daddr_scope)
- break; /* addresses sorted by scope */
- else {
- score.rule = 2;
- goto record_it;
- }
- }
+ tmp = hiscore;
+ hiscore = score;
+ score = tmp;
- /* Rule 3: Avoid deprecated and optimistic addresses */
- if (hiscore.rule < 3) {
- if (ipv6_saddr_preferred(hiscore.addr_type) ||
- (((ifa_result->flags &
- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0)))
- hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED;
- hiscore.rule++;
- }
- if (ipv6_saddr_preferred(score.addr_type) ||
- (((ifa->flags &
- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
- score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
- score.rule = 3;
- goto record_it;
- }
- } else {
- if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)
- continue;
- }
+ /* restore our iterator */
+ score->ifa = hiscore->ifa;
- /* Rule 4: Prefer home address */
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- if (hiscore.rule < 4) {
- if (ifa_result->flags & IFA_F_HOMEADDRESS)
- hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
- hiscore.rule++;
- }
- if (ifa->flags & IFA_F_HOMEADDRESS) {
- score.attrs |= IPV6_SADDR_SCORE_HOA;
- if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) {
- score.rule = 4;
- goto record_it;
- }
- } else {
- if (hiscore.attrs & IPV6_SADDR_SCORE_HOA)
- continue;
- }
-#else
- if (hiscore.rule < 4)
- hiscore.rule++;
-#endif
-
- /* Rule 5: Prefer outgoing interface */
- if (hiscore.rule < 5) {
- if (daddr_dev == NULL ||
- daddr_dev == ifa_result->idev->dev)
- hiscore.attrs |= IPV6_SADDR_SCORE_OIF;
- hiscore.rule++;
- }
- if (daddr_dev == NULL ||
- daddr_dev == ifa->idev->dev) {
- score.attrs |= IPV6_SADDR_SCORE_OIF;
- if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) {
- score.rule = 5;
- goto record_it;
- }
- } else {
- if (hiscore.attrs & IPV6_SADDR_SCORE_OIF)
- continue;
- }
-
- /* Rule 6: Prefer matching label */
- if (hiscore.rule < 6) {
- if (ipv6_addr_label(&ifa_result->addr,
- hiscore.addr_type,
- ifa_result->idev->dev->ifindex) == daddr_label)
- hiscore.attrs |= IPV6_SADDR_SCORE_LABEL;
- hiscore.rule++;
- }
- if (ipv6_addr_label(&ifa->addr,
- score.addr_type,
- ifa->idev->dev->ifindex) == daddr_label) {
- score.attrs |= IPV6_SADDR_SCORE_LABEL;
- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) {
- score.rule = 6;
- goto record_it;
- }
- } else {
- if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL)
- continue;
- }
-
-#ifdef CONFIG_IPV6_PRIVACY
- /* Rule 7: Prefer public address
- * Note: prefer temprary address if use_tempaddr >= 2
- */
- if (hiscore.rule < 7) {
- if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^
- (ifa_result->idev->cnf.use_tempaddr >= 2))
- hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY;
- hiscore.rule++;
- }
- if ((!(ifa->flags & IFA_F_TEMPORARY)) ^
- (ifa->idev->cnf.use_tempaddr >= 2)) {
- score.attrs |= IPV6_SADDR_SCORE_PRIVACY;
- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) {
- score.rule = 7;
- goto record_it;
+ break;
}
- } else {
- if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)
- continue;
}
-#else
- if (hiscore.rule < 7)
- hiscore.rule++;
-#endif
- /* Rule 8: Use longest matching prefix */
- if (hiscore.rule < 8) {
- hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr);
- hiscore.rule++;
- }
- score.matchlen = ipv6_addr_diff(&ifa->addr, daddr);
- if (score.matchlen > hiscore.matchlen) {
- score.rule = 8;
- goto record_it;
- }
-#if 0
- else if (score.matchlen < hiscore.matchlen)
- continue;
-#endif
-
- /* Final Rule: choose first available one */
- continue;
-record_it:
- if (ifa_result)
- in6_ifa_put(ifa_result);
- in6_ifa_hold(ifa);
- ifa_result = ifa;
- hiscore = score;
}
+try_nextdev:
read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
read_unlock(&dev_base_lock);
- if (!ifa_result)
+ if (!hiscore->ifa)
return -EADDRNOTAVAIL;
- ipv6_addr_copy(saddr, &ifa_result->addr);
- in6_ifa_put(ifa_result);
+ ipv6_addr_copy(saddr, &hiscore->ifa->addr);
+ in6_ifa_put(hiscore->ifa);
return 0;
}
-
-int ipv6_get_saddr(struct dst_entry *dst,
- struct in6_addr *daddr, struct in6_addr *saddr)
-{
- return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr);
-}
-
-EXPORT_SYMBOL(ipv6_get_saddr);
+EXPORT_SYMBOL(ipv6_dev_get_saddr);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
unsigned char banned_flags)
@@ -1232,7 +1250,7 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
read_lock_bh(&addrconf_hash_lock);
for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
- if (ifp->idev->dev->nd_net != net)
+ if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr) &&
!(ifp->flags&IFA_F_TENTATIVE)) {
@@ -1254,7 +1272,7 @@ int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
u8 hash = ipv6_addr_hash(addr);
for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
- if (ifp->idev->dev->nd_net != net)
+ if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (dev == NULL || ifp->idev->dev == dev)
@@ -1264,7 +1282,32 @@ int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
return ifp != NULL;
}
-struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, struct in6_addr *addr,
+int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ int onlink;
+
+ onlink = 0;
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
+ onlink = ipv6_prefix_equal(addr, &ifa->addr,
+ ifa->prefix_len);
+ if (onlink)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+ return onlink;
+}
+
+EXPORT_SYMBOL(ipv6_chk_prefix);
+
+struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
struct net_device *dev, int strict)
{
struct inet6_ifaddr * ifp;
@@ -1272,7 +1315,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, struct in6_addr *addr,
read_lock_bh(&addrconf_hash_lock);
for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
- if (ifp->idev->dev->nd_net != net)
+ if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (dev == NULL || ifp->idev->dev == dev ||
@@ -1449,6 +1492,29 @@ static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev)
return 0;
}
+int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
+{
+ eui[0] = (ipv4_is_zeronet(addr) || ipv4_is_private_10(addr) ||
+ ipv4_is_loopback(addr) || ipv4_is_linklocal_169(addr) ||
+ ipv4_is_private_172(addr) || ipv4_is_test_192(addr) ||
+ ipv4_is_anycast_6to4(addr) || ipv4_is_private_192(addr) ||
+ ipv4_is_test_198(addr) || ipv4_is_multicast(addr) ||
+ ipv4_is_lbcast(addr)) ? 0x00 : 0x02;
+ eui[1] = 0;
+ eui[2] = 0x5E;
+ eui[3] = 0xFE;
+ memcpy(eui + 4, &addr, 4);
+ return 0;
+}
+EXPORT_SYMBOL(__ipv6_isatap_ifid);
+
+static int addrconf_ifid_sit(u8 *eui, struct net_device *dev)
+{
+ if (dev->priv_flags & IFF_ISATAP)
+ return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
+ return -1;
+}
+
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
{
switch (dev->type) {
@@ -1461,8 +1527,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
case ARPHRD_INFINIBAND:
return addrconf_ifid_infiniband(eui, dev);
case ARPHRD_SIT:
- if (dev->priv_flags & IFF_ISATAP)
- return ipv6_isatap_eui64(eui, *(__be32 *)dev->dev_addr);
+ return addrconf_ifid_sit(eui, dev);
}
return -1;
}
@@ -1574,7 +1639,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
.fc_expires = expires,
.fc_dst_len = plen,
.fc_flags = RTF_UP | flags,
- .fc_nlinfo.nl_net = &init_net,
+ .fc_nlinfo.nl_net = dev_net(dev),
};
ipv6_addr_copy(&cfg.fc_dst, pfx);
@@ -1601,7 +1666,7 @@ static void addrconf_add_mroute(struct net_device *dev)
.fc_ifindex = dev->ifindex,
.fc_dst_len = 8,
.fc_flags = RTF_UP,
- .fc_nlinfo.nl_net = &init_net,
+ .fc_nlinfo.nl_net = dev_net(dev),
};
ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
@@ -1618,7 +1683,7 @@ static void sit_route_add(struct net_device *dev)
.fc_ifindex = dev->ifindex,
.fc_dst_len = 96,
.fc_flags = RTF_UP | RTF_NONEXTHOP,
- .fc_nlinfo.nl_net = &init_net,
+ .fc_nlinfo.nl_net = dev_net(dev),
};
/* prefix length - 96 bits "::d.d.d.d" */
@@ -1719,7 +1784,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
if (pinfo->onlink) {
struct rt6_info *rt;
- rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
+ rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL,
+ dev->ifindex, 1);
if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
if (rt->rt6i_flags&RTF_EXPIRES) {
@@ -1762,7 +1828,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
ok:
- ifp = ipv6_get_ifaddr(&init_net, &addr, dev, 1);
+ ifp = ipv6_get_ifaddr(dev_net(dev), &addr, dev, 1);
if (ifp == NULL && valid_lft) {
int max_addresses = in6_dev->cnf.max_addresses;
@@ -1888,7 +1954,7 @@ ok:
* Special case for SIT interfaces where we create a new "virtual"
* device.
*/
-int addrconf_set_dstaddr(void __user *arg)
+int addrconf_set_dstaddr(struct net *net, void __user *arg)
{
struct in6_ifreq ireq;
struct net_device *dev;
@@ -1900,7 +1966,7 @@ int addrconf_set_dstaddr(void __user *arg)
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
goto err_exit;
- dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex);
+ dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
err = -ENODEV;
if (dev == NULL)
@@ -1931,7 +1997,8 @@ int addrconf_set_dstaddr(void __user *arg)
if (err == 0) {
err = -ENOBUFS;
- if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL)
+ dev = __dev_get_by_name(net, p.name);
+ if (!dev)
goto err_exit;
err = dev_open(dev);
}
@@ -1946,8 +2013,9 @@ err_exit:
/*
* Manual configuration of address on an interface
*/
-static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
- __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft)
+static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
+ int plen, __u8 ifa_flags, __u32 prefered_lft,
+ __u32 valid_lft)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -1961,7 +2029,8 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
if (!valid_lft || prefered_lft > valid_lft)
return -EINVAL;
- if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL)
+ dev = __dev_get_by_index(net, ifindex);
+ if (!dev)
return -ENODEV;
if ((idev = addrconf_add_dev(dev)) == NULL)
@@ -2006,13 +2075,15 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
return PTR_ERR(ifp);
}
-static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
+static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
+ int plen)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
struct net_device *dev;
- if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL)
+ dev = __dev_get_by_index(net, ifindex);
+ if (!dev)
return -ENODEV;
if ((idev = __in6_dev_get(dev)) == NULL)
@@ -2040,7 +2111,7 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
}
-int addrconf_add_ifaddr(void __user *arg)
+int addrconf_add_ifaddr(struct net *net, void __user *arg)
{
struct in6_ifreq ireq;
int err;
@@ -2052,13 +2123,14 @@ int addrconf_add_ifaddr(void __user *arg)
return -EFAULT;
rtnl_lock();
- err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen,
- IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ ireq.ifr6_prefixlen, IFA_F_PERMANENT,
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
rtnl_unlock();
return err;
}
-int addrconf_del_ifaddr(void __user *arg)
+int addrconf_del_ifaddr(struct net *net, void __user *arg)
{
struct in6_ifreq ireq;
int err;
@@ -2070,7 +2142,8 @@ int addrconf_del_ifaddr(void __user *arg)
return -EFAULT;
rtnl_lock();
- err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+ err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ ireq.ifr6_prefixlen);
rtnl_unlock();
return err;
}
@@ -2081,6 +2154,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
struct inet6_ifaddr * ifp;
struct in6_addr addr;
struct net_device *dev;
+ struct net *net = dev_net(idev->dev);
int scope;
ASSERT_RTNL();
@@ -2107,7 +2181,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
return;
}
- for_each_netdev(&init_net, dev) {
+ for_each_netdev(net, dev) {
struct in_device * in_dev = __in_dev_get_rtnl(dev);
if (in_dev && (dev->flags & IFF_UP)) {
struct in_ifaddr * ifa;
@@ -2270,15 +2344,16 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
{
struct net_device *link_dev;
+ struct net *net = dev_net(idev->dev);
/* first try to inherit the link-local address from the link device */
if (idev->dev->iflink &&
- (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) {
+ (link_dev = __dev_get_by_index(net, idev->dev->iflink))) {
if (!ipv6_inherit_linklocal(idev, link_dev))
return;
}
/* then try to inherit it from any device */
- for_each_netdev(&init_net, link_dev) {
+ for_each_netdev(net, link_dev) {
if (!ipv6_inherit_linklocal(idev, link_dev))
return;
}
@@ -2311,9 +2386,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
int run_pending = 0;
int err;
- if (dev->nd_net != &init_net)
- return NOTIFY_DONE;
-
switch(event) {
case NETDEV_REGISTER:
if (!idev && dev->mtu >= IPV6_MIN_MTU) {
@@ -2453,6 +2525,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
{
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, **bifa;
+ struct net *net = dev_net(dev);
int i;
ASSERT_RTNL();
@@ -2460,7 +2533,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
if ((dev->flags & IFF_LOOPBACK) && how == 1)
how = 0;
- rt6_ifdown(dev);
+ rt6_ifdown(net, dev);
neigh_ifdown(&nd_tbl, dev);
idev = __in6_dev_get(dev);
@@ -2579,8 +2652,6 @@ static void addrconf_rs_timer(unsigned long data)
spin_lock(&ifp->lock);
if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
- struct in6_addr all_routers;
-
/* The wait after the last probe can be shorter */
addrconf_mod_timer(ifp, AC_RS,
(ifp->probes == ifp->idev->cnf.rtr_solicits) ?
@@ -2588,9 +2659,7 @@ static void addrconf_rs_timer(unsigned long data)
ifp->idev->cnf.rtr_solicit_interval);
spin_unlock(&ifp->lock);
- ipv6_addr_all_routers(&all_routers);
-
- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
+ ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
} else {
spin_unlock(&ifp->lock);
/*
@@ -2677,7 +2746,6 @@ static void addrconf_dad_timer(unsigned long data)
{
struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
struct inet6_dev *idev = ifp->idev;
- struct in6_addr unspec;
struct in6_addr mcaddr;
read_lock_bh(&idev->lock);
@@ -2706,9 +2774,8 @@ static void addrconf_dad_timer(unsigned long data)
read_unlock_bh(&idev->lock);
/* send a neighbour solicitation for our addr */
- memset(&unspec, 0, sizeof(unspec));
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
+ ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
}
@@ -2731,16 +2798,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
ifp->idev->cnf.rtr_solicits > 0 &&
(dev->flags&IFF_LOOPBACK) == 0 &&
(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
- struct in6_addr all_routers;
-
- ipv6_addr_all_routers(&all_routers);
-
/*
* If a host as already performed a random delay
* [...] as part of DAD [...] there is no need
* to delay again before sending the first RS
*/
- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
+ ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
spin_lock_bh(&ifp->lock);
ifp->probes = 1;
@@ -2776,12 +2839,12 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
{
struct inet6_ifaddr *ifa = NULL;
struct if6_iter_state *state = seq->private;
- struct net *net = state->p.net;
+ struct net *net = seq_file_net(seq);
for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
ifa = inet6_addr_lst[state->bucket];
- while (ifa && ifa->idev->dev->nd_net != net)
+ while (ifa && !net_eq(dev_net(ifa->idev->dev), net))
ifa = ifa->lst_next;
if (ifa)
break;
@@ -2792,12 +2855,12 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
{
struct if6_iter_state *state = seq->private;
- struct net *net = state->p.net;
+ struct net *net = seq_file_net(seq);
ifa = ifa->lst_next;
try_again:
if (ifa) {
- if (ifa->idev->dev->nd_net != net) {
+ if (!net_eq(dev_net(ifa->idev->dev), net)) {
ifa = ifa->lst_next;
goto try_again;
}
@@ -2915,9 +2978,9 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
u8 hash = ipv6_addr_hash(addr);
read_lock_bh(&addrconf_hash_lock);
for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
- if (ifp->idev->dev->nd_net != net)
+ if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
- if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
+ if (ipv6_addr_equal(&ifp->addr, addr) &&
(ifp->flags & IFA_F_HOMEADDRESS)) {
ret = 1;
break;
@@ -3064,15 +3127,12 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
static int
inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *pfx;
int err;
- if (net != &init_net)
- return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
if (err < 0)
return err;
@@ -3082,7 +3142,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if (pfx == NULL)
return -EINVAL;
- return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+ return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen);
}
static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
@@ -3125,7 +3185,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
static int
inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *pfx;
@@ -3135,9 +3195,6 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
u8 ifa_flags;
int err;
- if (net != &init_net)
- return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
if (err < 0)
return err;
@@ -3158,7 +3215,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
valid_lft = INFINITY_LIFE_TIME;
}
- dev = __dev_get_by_index(&init_net, ifm->ifa_index);
+ dev = __dev_get_by_index(net, ifm->ifa_index);
if (dev == NULL)
return -ENODEV;
@@ -3171,8 +3228,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
* It would be best to check for !NLM_F_CREATE here but
* userspace alreay relies on not having to provide this.
*/
- return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
- ifa_flags, preferred_lft, valid_lft);
+ return inet6_addr_add(net, ifm->ifa_index, pfx,
+ ifm->ifa_prefixlen, ifa_flags,
+ preferred_lft, valid_lft);
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
@@ -3337,12 +3395,13 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
struct inet6_ifaddr *ifa;
struct ifmcaddr6 *ifmca;
struct ifacaddr6 *ifaca;
+ struct net *net = sock_net(skb->sk);
s_idx = cb->args[0];
s_ip_idx = ip_idx = cb->args[1];
idx = 0;
- for_each_netdev(&init_net, dev) {
+ for_each_netdev(net, dev) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
@@ -3409,42 +3468,30 @@ cont:
static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
enum addr_type_t type = UNICAST_ADDR;
- if (net != &init_net)
- return 0;
-
return inet6_dump_addr(skb, cb, type);
}
static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
enum addr_type_t type = MULTICAST_ADDR;
- if (net != &init_net)
- return 0;
-
return inet6_dump_addr(skb, cb, type);
}
static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
enum addr_type_t type = ANYCAST_ADDR;
- if (net != &init_net)
- return 0;
-
return inet6_dump_addr(skb, cb, type);
}
static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
void *arg)
{
- struct net *net = in_skb->sk->sk_net;
+ struct net *net = sock_net(in_skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *addr = NULL;
@@ -3453,9 +3500,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
struct sk_buff *skb;
int err;
- if (net != &init_net)
- return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
if (err < 0)
goto errout;
@@ -3468,7 +3512,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
- dev = __dev_get_by_index(&init_net, ifm->ifa_index);
+ dev = __dev_get_by_index(net, ifm->ifa_index);
if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) {
err = -EADDRNOTAVAIL;
@@ -3488,7 +3532,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
kfree_skb(skb);
goto errout_ifa;
}
- err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
errout_ifa:
in6_ifa_put(ifa);
errout:
@@ -3498,6 +3542,7 @@ errout:
static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
{
struct sk_buff *skb;
+ struct net *net = dev_net(ifa->idev->dev);
int err = -ENOBUFS;
skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
@@ -3511,10 +3556,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
errout:
if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}
static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
@@ -3556,6 +3601,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
#endif
+#ifdef CONFIG_IPV6_MROUTE
+ array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
+#endif
}
static inline size_t inet6_if_nlmsg_size(void)
@@ -3673,18 +3721,15 @@ nla_put_failure:
static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
int idx, err;
int s_idx = cb->args[0];
struct net_device *dev;
struct inet6_dev *idev;
- if (net != &init_net)
- return 0;
-
read_lock(&dev_base_lock);
idx = 0;
- for_each_netdev(&init_net, dev) {
+ for_each_netdev(net, dev) {
if (idx < s_idx)
goto cont;
if ((idev = in6_dev_get(dev)) == NULL)
@@ -3706,6 +3751,7 @@ cont:
void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
{
struct sk_buff *skb;
+ struct net *net = dev_net(idev->dev);
int err = -ENOBUFS;
skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC);
@@ -3719,10 +3765,10 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
errout:
if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}
static inline size_t inet6_prefix_nlmsg_size(void)
@@ -3775,6 +3821,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
struct prefix_info *pinfo)
{
struct sk_buff *skb;
+ struct net *net = dev_net(idev->dev);
int err = -ENOBUFS;
skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC);
@@ -3788,10 +3835,10 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+ err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
errout:
if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
}
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -3887,7 +3934,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
static struct addrconf_sysctl_table
{
struct ctl_table_header *sysctl_header;
- ctl_table addrconf_vars[__NET_IPV6_MAX];
+ ctl_table addrconf_vars[DEVCONF_MAX+1];
char *dev_name;
} addrconf_sysctl __read_mostly = {
.sysctl_header = NULL,
@@ -4105,6 +4152,16 @@ static struct addrconf_sysctl_table
},
#endif
+#ifdef CONFIG_IPV6_MROUTE
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "mc_forwarding",
+ .data = &ipv6_devconf.mc_forwarding,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
{
.ctl_name = 0, /* sentinel */
}
@@ -4186,7 +4243,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev)
NET_IPV6_NEIGH, "ipv6",
&ndisc_ifinfo_sysctl_change,
NULL);
- __addrconf_sysctl_register(idev->dev->nd_net, idev->dev->name,
+ __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
idev->dev->ifindex, idev, &idev->cnf);
}
@@ -4281,6 +4338,32 @@ int unregister_inet6addr_notifier(struct notifier_block *nb)
EXPORT_SYMBOL(unregister_inet6addr_notifier);
+
+static int addrconf_net_init(struct net *net)
+{
+ return 0;
+}
+
+static void addrconf_net_exit(struct net *net)
+{
+ struct net_device *dev;
+
+ rtnl_lock();
+ /* clean dev list */
+ for_each_netdev(net, dev) {
+ if (__in6_dev_get(dev) == NULL)
+ continue;
+ addrconf_ifdown(dev, 1);
+ }
+ addrconf_ifdown(net->loopback_dev, 2);
+ rtnl_unlock();
+}
+
+static struct pernet_operations addrconf_net_ops = {
+ .init = addrconf_net_init,
+ .exit = addrconf_net_exit,
+};
+
/*
* Init / cleanup code
*/
@@ -4322,14 +4405,9 @@ int __init addrconf_init(void)
if (err)
goto errlo;
- ip6_null_entry.u.dst.dev = init_net.loopback_dev;
- ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev);
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- ip6_prohibit_entry.u.dst.dev = init_net.loopback_dev;
- ip6_prohibit_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev);
- ip6_blk_hole_entry.u.dst.dev = init_net.loopback_dev;
- ip6_blk_hole_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev);
-#endif
+ err = register_pernet_device(&addrconf_net_ops);
+ if (err)
+ return err;
register_netdevice_notifier(&ipv6_dev_notf);
@@ -4359,31 +4437,19 @@ errlo:
void addrconf_cleanup(void)
{
- struct net_device *dev;
struct inet6_ifaddr *ifa;
int i;
unregister_netdevice_notifier(&ipv6_dev_notf);
+ unregister_pernet_device(&addrconf_net_ops);
unregister_pernet_subsys(&addrconf_ops);
rtnl_lock();
/*
- * clean dev list.
- */
-
- for_each_netdev(&init_net, dev) {
- if (__in6_dev_get(dev) == NULL)
- continue;
- addrconf_ifdown(dev, 1);
- }
- addrconf_ifdown(init_net.loopback_dev, 2);
-
- /*
* Check hash table.
*/
-
write_lock_bh(&addrconf_hash_lock);
for (i=0; i < IN6_ADDR_HSIZE; i++) {
for (ifa=inet6_addr_lst[i]; ifa; ) {
@@ -4400,6 +4466,7 @@ void addrconf_cleanup(void)
write_unlock_bh(&addrconf_hash_lock);
del_timer(&addr_chk_timer);
-
rtnl_unlock();
+
+ unregister_pernet_subsys(&addrconf_net_ops);
}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index a3c5a72218f..9bfa8846f26 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -58,6 +58,7 @@ static struct ip6addrlbl_table
* ::ffff:0:0/96 V4MAPPED 4
* fc00::/7 N/A 5 ULA (RFC 4193)
* 2001::/32 N/A 6 Teredo (RFC 4380)
+ * 2001:10::/28 N/A 7 ORCHID (RFC 4843)
*
* Note: 0xffffffff is used if we do not have any policies.
*/
@@ -85,6 +86,10 @@ static const __initdata struct ip6addrlbl_init_table
.prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
.prefixlen = 32,
.label = 6,
+ },{ /* 2001:10::/28 */
+ .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
+ .prefixlen = 28,
+ .label = 7,
},{ /* ::ffff:0:0 */
.prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
.prefixlen = 96,
@@ -161,7 +166,7 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
rcu_read_unlock();
ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n",
- __FUNCTION__,
+ __func__,
NIP6(*addr), type, ifindex,
label);
@@ -177,7 +182,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
int addrtype;
ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n",
- __FUNCTION__,
+ __func__,
NIP6(*prefix), prefixlen,
ifindex,
(unsigned int)label);
@@ -221,7 +226,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
int ret = 0;
ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
- __FUNCTION__,
+ __func__,
newp, replace);
if (hlist_empty(&ip6addrlbl_table.head)) {
@@ -263,7 +268,7 @@ static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
int ret = 0;
ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
- __FUNCTION__,
+ __func__,
NIP6(*prefix), prefixlen,
ifindex,
(unsigned int)label,
@@ -289,7 +294,7 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
int ret = -ESRCH;
ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
- __FUNCTION__,
+ __func__,
NIP6(*prefix), prefixlen,
ifindex);
@@ -313,7 +318,7 @@ static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
int ret;
ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
- __FUNCTION__,
+ __func__,
NIP6(*prefix), prefixlen,
ifindex);
@@ -330,7 +335,7 @@ static __init int ip6addrlbl_init(void)
int err = 0;
int i;
- ADDRLABEL(KERN_DEBUG "%s()\n", __FUNCTION__);
+ ADDRLABEL(KERN_DEBUG "%s()\n", __func__);
for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix,
@@ -359,7 +364,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct ifaddrlblmsg *ifal;
struct nlattr *tb[IFAL_MAX+1];
struct in6_addr *pfx;
@@ -447,7 +452,7 @@ static int ip6addrlbl_fill(struct sk_buff *skb,
static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct ip6addrlbl_entry *p;
struct hlist_node *pos;
int idx = 0, s_idx = cb->args[0];
@@ -485,7 +490,7 @@ static inline int ip6addrlbl_msgsize(void)
static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
void *arg)
{
- struct net *net = in_skb->sk->sk_net;
+ struct net *net = sock_net(in_skb->sk);
struct ifaddrlblmsg *ifal;
struct nlattr *tb[IFAL_MAX+1];
struct in6_addr *addr;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index f0aa9773874..3c6aafb0218 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -61,6 +61,9 @@
#include <asm/uaccess.h>
#include <asm/system.h>
+#ifdef CONFIG_IPV6_MROUTE
+#include <linux/mroute6.h>
+#endif
MODULE_AUTHOR("Cast of dozens");
MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
@@ -92,9 +95,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol)
int try_loading_module = 0;
int err;
- if (net != &init_net)
- return -EAFNOSUPPORT;
-
if (sock->type != SOCK_RAW &&
sock->type != SOCK_DGRAM &&
!inet_ehash_secret)
@@ -248,6 +248,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
int addr_type = 0;
@@ -278,7 +279,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* Check if the address belongs to the host. */
if (addr_type == IPV6_ADDR_MAPPED) {
v4addr = addr->sin6_addr.s6_addr32[3];
- if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) {
+ if (inet_addr_type(net, v4addr) != RTN_LOCAL) {
err = -EADDRNOTAVAIL;
goto out;
}
@@ -300,7 +301,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
err = -EINVAL;
goto out;
}
- dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+ dev = dev_get_by_index(net, sk->sk_bound_dev_if);
if (!dev) {
err = -ENODEV;
goto out;
@@ -312,7 +313,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
*/
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
- if (!ipv6_chk_addr(&init_net, &addr->sin6_addr,
+ if (!ipv6_chk_addr(net, &addr->sin6_addr,
dev, 0)) {
if (dev)
dev_put(dev);
@@ -440,6 +441,7 @@ EXPORT_SYMBOL(inet6_getname);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
switch(cmd)
{
@@ -452,14 +454,14 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCADDRT:
case SIOCDELRT:
- return(ipv6_route_ioctl(cmd,(void __user *)arg));
+ return(ipv6_route_ioctl(net, cmd, (void __user *)arg));
case SIOCSIFADDR:
- return addrconf_add_ifaddr((void __user *) arg);
+ return addrconf_add_ifaddr(net, (void __user *) arg);
case SIOCDIFADDR:
- return addrconf_del_ifaddr((void __user *) arg);
+ return addrconf_del_ifaddr(net, (void __user *) arg);
case SIOCSIFDSTADDR:
- return addrconf_set_dstaddr((void __user *) arg);
+ return addrconf_set_dstaddr(net, (void __user *) arg);
default:
if (!sk->sk_prot->ioctl)
return -ENOIOCTLCMD;
@@ -678,6 +680,129 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
+static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb,
+ int proto)
+{
+ struct inet6_protocol *ops = NULL;
+
+ for (;;) {
+ struct ipv6_opt_hdr *opth;
+ int len;
+
+ if (proto != NEXTHDR_HOP) {
+ ops = rcu_dereference(inet6_protos[proto]);
+
+ if (unlikely(!ops))
+ break;
+
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+ }
+
+ if (unlikely(!pskb_may_pull(skb, 8)))
+ break;
+
+ opth = (void *)skb->data;
+ len = ipv6_optlen(opth);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ break;
+
+ proto = opth->nexthdr;
+ __skb_pull(skb, len);
+ }
+
+ return ops;
+}
+
+static int ipv6_gso_send_check(struct sk_buff *skb)
+{
+ struct ipv6hdr *ipv6h;
+ struct inet6_protocol *ops;
+ int err = -EINVAL;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ err = -EPROTONOSUPPORT;
+
+ rcu_read_lock();
+ ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ if (likely(ops && ops->gso_send_check)) {
+ skb_reset_transport_header(skb);
+ err = ops->gso_send_check(skb);
+ }
+ rcu_read_unlock();
+
+out:
+ return err;
+}
+
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct ipv6hdr *ipv6h;
+ struct inet6_protocol *ops;
+
+ if (!(features & NETIF_F_V6_CSUM))
+ features &= ~NETIF_F_SG;
+
+ if (unlikely(skb_shinfo(skb)->gso_type &
+ ~(SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ SKB_GSO_TCPV6 |
+ 0)))
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ goto out;
+
+ ipv6h = ipv6_hdr(skb);
+ __skb_pull(skb, sizeof(*ipv6h));
+ segs = ERR_PTR(-EPROTONOSUPPORT);
+
+ rcu_read_lock();
+ ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ if (likely(ops && ops->gso_segment)) {
+ skb_reset_transport_header(skb);
+ segs = ops->gso_segment(skb, features);
+ }
+ rcu_read_unlock();
+
+ if (unlikely(IS_ERR(segs)))
+ goto out;
+
+ for (skb = segs; skb; skb = skb->next) {
+ ipv6h = ipv6_hdr(skb);
+ ipv6h->payload_len = htons(skb->len - skb->mac_len -
+ sizeof(*ipv6h));
+ }
+
+out:
+ return segs;
+}
+
+static struct packet_type ipv6_packet_type = {
+ .type = __constant_htons(ETH_P_IPV6),
+ .func = ipv6_rcv,
+ .gso_send_check = ipv6_gso_send_check,
+ .gso_segment = ipv6_gso_segment,
+};
+
+static int __init ipv6_packet_init(void)
+{
+ dev_add_pack(&ipv6_packet_type);
+ return 0;
+}
+
+static void ipv6_packet_cleanup(void)
+{
+ dev_remove_pack(&ipv6_packet_type);
+}
+
static int __init init_ipv6_mibs(void)
{
if (snmp_mib_init((void **)ipv6_statistics,
@@ -720,6 +845,8 @@ static void cleanup_ipv6_mibs(void)
static int inet6_net_init(struct net *net)
{
+ int err = 0;
+
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.flush_delay = 0;
net->ipv6.sysctl.ip6_rt_max_size = 4096;
@@ -731,12 +858,36 @@ static int inet6_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
- return 0;
+#ifdef CONFIG_PROC_FS
+ err = udp6_proc_init(net);
+ if (err)
+ goto out;
+ err = tcp6_proc_init(net);
+ if (err)
+ goto proc_tcp6_fail;
+ err = ac6_proc_init(net);
+ if (err)
+ goto proc_ac6_fail;
+out:
+#endif
+ return err;
+
+#ifdef CONFIG_PROC_FS
+proc_ac6_fail:
+ tcp6_proc_exit(net);
+proc_tcp6_fail:
+ udp6_proc_exit(net);
+ goto out;
+#endif
}
static void inet6_net_exit(struct net *net)
{
- return;
+#ifdef CONFIG_PROC_FS
+ udp6_proc_exit(net);
+ tcp6_proc_exit(net);
+ ac6_proc_exit(net);
+#endif
}
static struct pernet_operations inet6_net_ops = {
@@ -802,19 +953,16 @@ static int __init inet6_init(void)
err = register_pernet_subsys(&inet6_net_ops);
if (err)
goto register_pernet_fail;
-
-#ifdef CONFIG_SYSCTL
- err = ipv6_sysctl_register();
- if (err)
- goto sysctl_fail;
-#endif
- err = icmpv6_init(&inet6_family_ops);
+ err = icmpv6_init();
if (err)
goto icmp_fail;
- err = ndisc_init(&inet6_family_ops);
+#ifdef CONFIG_IPV6_MROUTE
+ ip6_mr_init();
+#endif
+ err = ndisc_init();
if (err)
goto ndisc_fail;
- err = igmp6_init(&inet6_family_ops);
+ err = igmp6_init();
if (err)
goto igmp_fail;
err = ipv6_netfilter_init();
@@ -825,17 +973,10 @@ static int __init inet6_init(void)
err = -ENOMEM;
if (raw6_proc_init())
goto proc_raw6_fail;
- if (tcp6_proc_init())
- goto proc_tcp6_fail;
- if (udp6_proc_init())
- goto proc_udp6_fail;
if (udplite6_proc_init())
goto proc_udplite6_fail;
if (ipv6_misc_proc_init())
goto proc_misc6_fail;
-
- if (ac6_proc_init())
- goto proc_anycast6_fail;
if (if6_proc_init())
goto proc_if6_fail;
#endif
@@ -874,9 +1015,19 @@ static int __init inet6_init(void)
err = ipv6_packet_init();
if (err)
goto ipv6_packet_fail;
+
+#ifdef CONFIG_SYSCTL
+ err = ipv6_sysctl_register();
+ if (err)
+ goto sysctl_fail;
+#endif
out:
return err;
+#ifdef CONFIG_SYSCTL
+sysctl_fail:
+ ipv6_packet_cleanup();
+#endif
ipv6_packet_fail:
tcpv6_exit();
tcpv6_fail:
@@ -897,16 +1048,10 @@ ip6_route_fail:
#ifdef CONFIG_PROC_FS
if6_proc_exit();
proc_if6_fail:
- ac6_proc_exit();
-proc_anycast6_fail:
ipv6_misc_proc_exit();
proc_misc6_fail:
udplite6_proc_exit();
proc_udplite6_fail:
- udp6_proc_exit();
-proc_udp6_fail:
- tcp6_proc_exit();
-proc_tcp6_fail:
raw6_proc_exit();
proc_raw6_fail:
#endif
@@ -918,10 +1063,6 @@ igmp_fail:
ndisc_fail:
icmpv6_cleanup();
icmp_fail:
-#ifdef CONFIG_SYSCTL
- ipv6_sysctl_unregister();
-sysctl_fail:
-#endif
unregister_pernet_subsys(&inet6_net_ops);
register_pernet_fail:
cleanup_ipv6_mibs();
@@ -949,6 +1090,9 @@ static void __exit inet6_exit(void)
/* Disallow any further netlink messages */
rtnl_unregister_all(PF_INET6);
+#ifdef CONFIG_SYSCTL
+ ipv6_sysctl_unregister();
+#endif
udpv6_exit();
udplitev6_exit();
tcpv6_exit();
@@ -964,11 +1108,8 @@ static void __exit inet6_exit(void)
/* Cleanup code parts. */
if6_proc_exit();
- ac6_proc_exit();
ipv6_misc_proc_exit();
udplite6_proc_exit();
- udp6_proc_exit();
- tcp6_proc_exit();
raw6_proc_exit();
#endif
ipv6_netfilter_fini();
@@ -976,9 +1117,7 @@ static void __exit inet6_exit(void)
ndisc_cleanup();
icmpv6_cleanup();
rawv6_exit();
-#ifdef CONFIG_SYSCTL
- ipv6_sysctl_unregister();
-#endif
+
unregister_pernet_subsys(&inet6_net_ops);
cleanup_ipv6_mibs();
proto_unregister(&rawv6_prot);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index e5f56c953b5..4e1b29fabdf 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -48,29 +48,6 @@ static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr);
/* Big ac list lock for all the sockets */
static DEFINE_RWLOCK(ipv6_sk_ac_lock);
-static int
-ip6_onlink(struct in6_addr *addr, struct net_device *dev)
-{
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
- int onlink;
-
- onlink = 0;
- rcu_read_lock();
- idev = __in6_dev_get(dev);
- if (idev) {
- read_lock_bh(&idev->lock);
- for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
- onlink = ipv6_prefix_equal(addr, &ifa->addr,
- ifa->prefix_len);
- if (onlink)
- break;
- }
- read_unlock_bh(&idev->lock);
- }
- rcu_read_unlock();
- return onlink;
-}
/*
* socket join an anycast group
@@ -82,6 +59,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
struct net_device *dev = NULL;
struct inet6_dev *idev;
struct ipv6_ac_socklist *pac;
+ struct net *net = sock_net(sk);
int ishost = !ipv6_devconf.forwarding;
int err = 0;
@@ -89,7 +67,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
return -EPERM;
if (ipv6_addr_is_multicast(addr))
return -EINVAL;
- if (ipv6_chk_addr(&init_net, addr, NULL, 0))
+ if (ipv6_chk_addr(net, addr, NULL, 0))
return -EINVAL;
pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -101,7 +79,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, 0);
if (rt) {
dev = rt->rt6i_dev;
dev_hold(dev);
@@ -112,10 +90,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
} else {
/* router, no matching interface: just pick one */
- dev = dev_get_by_flags(&init_net, IFF_UP, IFF_UP|IFF_LOOPBACK);
+ dev = dev_get_by_flags(net, IFF_UP, IFF_UP|IFF_LOOPBACK);
}
} else
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(net, ifindex);
if (dev == NULL) {
err = -ENODEV;
@@ -141,7 +119,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
* This obviates the need for propagating anycast routes while
* still allowing some non-router anycast participation.
*/
- if (!ip6_onlink(addr, dev)) {
+ if (!ipv6_chk_prefix(addr, dev)) {
if (ishost)
err = -EADDRNOTAVAIL;
if (err)
@@ -176,6 +154,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
struct ipv6_pinfo *np = inet6_sk(sk);
struct net_device *dev;
struct ipv6_ac_socklist *pac, *prev_pac;
+ struct net *net = sock_net(sk);
write_lock_bh(&ipv6_sk_ac_lock);
prev_pac = NULL;
@@ -196,7 +175,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
write_unlock_bh(&ipv6_sk_ac_lock);
- dev = dev_get_by_index(&init_net, pac->acl_ifindex);
+ dev = dev_get_by_index(net, pac->acl_ifindex);
if (dev) {
ipv6_dev_ac_dec(dev, &pac->acl_addr);
dev_put(dev);
@@ -210,6 +189,7 @@ void ipv6_sock_ac_close(struct sock *sk)
struct ipv6_pinfo *np = inet6_sk(sk);
struct net_device *dev = NULL;
struct ipv6_ac_socklist *pac;
+ struct net *net = sock_net(sk);
int prev_index;
write_lock_bh(&ipv6_sk_ac_lock);
@@ -224,7 +204,7 @@ void ipv6_sock_ac_close(struct sock *sk)
if (pac->acl_ifindex != prev_index) {
if (dev)
dev_put(dev);
- dev = dev_get_by_index(&init_net, pac->acl_ifindex);
+ dev = dev_get_by_index(net, pac->acl_ifindex);
prev_index = pac->acl_ifindex;
}
if (dev)
@@ -417,14 +397,15 @@ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
/*
* check if given interface (or any, if dev==0) has this anycast address
*/
-int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
+int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
+ struct in6_addr *addr)
{
int found = 0;
if (dev)
return ipv6_chk_acast_dev(dev, addr);
read_lock(&dev_base_lock);
- for_each_netdev(&init_net, dev)
+ for_each_netdev(net, dev)
if (ipv6_chk_acast_dev(dev, addr)) {
found = 1;
break;
@@ -436,6 +417,7 @@ int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
#ifdef CONFIG_PROC_FS
struct ac6_iter_state {
+ struct seq_net_private p;
struct net_device *dev;
struct inet6_dev *idev;
};
@@ -446,9 +428,10 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
{
struct ifacaddr6 *im = NULL;
struct ac6_iter_state *state = ac6_seq_private(seq);
+ struct net *net = seq_file_net(seq);
state->idev = NULL;
- for_each_netdev(&init_net, state->dev) {
+ for_each_netdev(net, state->dev) {
struct inet6_dev *idev;
idev = in6_dev_get(state->dev);
if (!idev)
@@ -546,8 +529,8 @@ static const struct seq_operations ac6_seq_ops = {
static int ac6_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ac6_seq_ops,
- sizeof(struct ac6_iter_state));
+ return seq_open_net(inode, file, &ac6_seq_ops,
+ sizeof(struct ac6_iter_state));
}
static const struct file_operations ac6_seq_fops = {
@@ -555,20 +538,20 @@ static const struct file_operations ac6_seq_fops = {
.open = ac6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
-int __init ac6_proc_init(void)
+int ac6_proc_init(struct net *net)
{
- if (!proc_net_fops_create(&init_net, "anycast6", S_IRUGO, &ac6_seq_fops))
+ if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops))
return -ENOMEM;
return 0;
}
-void ac6_proc_exit(void)
+void ac6_proc_exit(struct net *net)
{
- proc_net_remove(&init_net, "anycast6");
+ proc_net_remove(net, "anycast6");
}
#endif
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 695c0ca8a41..8d05527524e 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -29,24 +29,22 @@ struct fib6_rule
u8 tclass;
};
-static struct fib_rules_ops fib6_rules_ops;
-
-struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
- pol_lookup_t lookup)
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
+ int flags, pol_lookup_t lookup)
{
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
};
- fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg);
+ fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg);
if (arg.rule)
fib_rule_put(arg.rule);
if (arg.result)
return arg.result;
- dst_hold(&ip6_null_entry.u.dst);
- return &ip6_null_entry.u.dst;
+ dst_hold(&net->ipv6.ip6_null_entry->u.dst);
+ return &net->ipv6.ip6_null_entry->u.dst;
}
static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -54,28 +52,29 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
{
struct rt6_info *rt = NULL;
struct fib6_table *table;
+ struct net *net = rule->fr_net;
pol_lookup_t lookup = arg->lookup_ptr;
switch (rule->action) {
case FR_ACT_TO_TBL:
break;
case FR_ACT_UNREACHABLE:
- rt = &ip6_null_entry;
+ rt = net->ipv6.ip6_null_entry;
goto discard_pkt;
default:
case FR_ACT_BLACKHOLE:
- rt = &ip6_blk_hole_entry;
+ rt = net->ipv6.ip6_blk_hole_entry;
goto discard_pkt;
case FR_ACT_PROHIBIT:
- rt = &ip6_prohibit_entry;
+ rt = net->ipv6.ip6_prohibit_entry;
goto discard_pkt;
}
- table = fib6_get_table(rule->table);
+ table = fib6_get_table(net, rule->table);
if (table)
- rt = lookup(table, flp, flags);
+ rt = lookup(net, table, flp, flags);
- if (rt != &ip6_null_entry) {
+ if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
/*
@@ -85,8 +84,18 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
if ((rule->flags & FIB_RULE_FIND_SADDR) &&
r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
struct in6_addr saddr;
- if (ipv6_get_saddr(&rt->u.dst, &flp->fl6_dst,
- &saddr))
+ unsigned int srcprefs = 0;
+
+ if (flags & RT6_LOOKUP_F_SRCPREF_TMP)
+ srcprefs |= IPV6_PREFER_SRC_TMP;
+ if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC)
+ srcprefs |= IPV6_PREFER_SRC_PUBLIC;
+ if (flags & RT6_LOOKUP_F_SRCPREF_COA)
+ srcprefs |= IPV6_PREFER_SRC_COA;
+
+ if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
+ &flp->fl6_dst, srcprefs,
+ &saddr))
goto again;
if (!ipv6_prefix_equal(&saddr, &r->src.addr,
r->src.plen))
@@ -145,13 +154,14 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct nlattr **tb)
{
int err = -EINVAL;
+ struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
if (rule->action == FR_ACT_TO_TBL) {
if (rule->table == RT6_TABLE_UNSPEC)
goto errout;
- if (fib6_new_table(rule->table) == NULL) {
+ if (fib6_new_table(net, rule->table) == NULL) {
err = -ENOBUFS;
goto errout;
}
@@ -234,7 +244,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
+ nla_total_size(16); /* src */
}
-static struct fib_rules_ops fib6_rules_ops = {
+static struct fib_rules_ops fib6_rules_ops_template = {
.family = AF_INET6,
.rule_size = sizeof(struct fib6_rule),
.addr_size = sizeof(struct in6_addr),
@@ -247,45 +257,64 @@ static struct fib_rules_ops fib6_rules_ops = {
.nlmsg_payload = fib6_rule_nlmsg_payload,
.nlgroup = RTNLGRP_IPV6_RULE,
.policy = fib6_rule_policy,
- .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list),
.owner = THIS_MODULE,
.fro_net = &init_net,
};
-static int __init fib6_default_rules_init(void)
+static int fib6_rules_net_init(struct net *net)
{
- int err;
+ int err = -ENOMEM;
- err = fib_default_rule_add(&fib6_rules_ops, 0,
- RT6_TABLE_LOCAL, FIB_RULE_PERMANENT);
- if (err < 0)
- return err;
- err = fib_default_rule_add(&fib6_rules_ops, 0x7FFE, RT6_TABLE_MAIN, 0);
- if (err < 0)
- return err;
- return 0;
-}
+ net->ipv6.fib6_rules_ops = kmemdup(&fib6_rules_ops_template,
+ sizeof(*net->ipv6.fib6_rules_ops),
+ GFP_KERNEL);
+ if (!net->ipv6.fib6_rules_ops)
+ goto out;
-int __init fib6_rules_init(void)
-{
- int ret;
+ net->ipv6.fib6_rules_ops->fro_net = net;
+ INIT_LIST_HEAD(&net->ipv6.fib6_rules_ops->rules_list);
- ret = fib6_default_rules_init();
- if (ret)
- goto out;
+ err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0,
+ RT6_TABLE_LOCAL, FIB_RULE_PERMANENT);
+ if (err)
+ goto out_fib6_rules_ops;
- ret = fib_rules_register(&fib6_rules_ops);
- if (ret)
- goto out_default_rules_init;
+ err = fib_default_rule_add(net->ipv6.fib6_rules_ops,
+ 0x7FFE, RT6_TABLE_MAIN, 0);
+ if (err)
+ goto out_fib6_default_rule_add;
+
+ err = fib_rules_register(net->ipv6.fib6_rules_ops);
+ if (err)
+ goto out_fib6_default_rule_add;
out:
- return ret;
+ return err;
-out_default_rules_init:
- fib_rules_cleanup_ops(&fib6_rules_ops);
+out_fib6_default_rule_add:
+ fib_rules_cleanup_ops(net->ipv6.fib6_rules_ops);
+out_fib6_rules_ops:
+ kfree(net->ipv6.fib6_rules_ops);
goto out;
}
+static void fib6_rules_net_exit(struct net *net)
+{
+ fib_rules_unregister(net->ipv6.fib6_rules_ops);
+ kfree(net->ipv6.fib6_rules_ops);
+}
+
+static struct pernet_operations fib6_rules_net_ops = {
+ .init = fib6_rules_net_init,
+ .exit = fib6_rules_net_exit,
+};
+
+int __init fib6_rules_init(void)
+{
+ return register_pernet_subsys(&fib6_rules_net_ops);
+}
+
+
void fib6_rules_cleanup(void)
{
- fib_rules_unregister(&fib6_rules_ops);
+ unregister_pernet_subsys(&fib6_rules_net_ops);
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 893287ecc62..d42dd16d348 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -64,6 +64,7 @@
#include <net/addrconf.h>
#include <net/icmp.h>
#include <net/xfrm.h>
+#include <net/inet_common.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -80,8 +81,10 @@ EXPORT_SYMBOL(icmpv6msg_statistics);
*
* On SMP we have one ICMP socket per-cpu.
*/
-static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
-#define icmpv6_socket __get_cpu_var(__icmpv6_socket)
+static inline struct sock *icmpv6_sk(struct net *net)
+{
+ return net->ipv6.icmp_sk[smp_processor_id()];
+}
static int icmpv6_rcv(struct sk_buff *skb);
@@ -90,11 +93,11 @@ static struct inet6_protocol icmpv6_protocol = {
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
-static __inline__ int icmpv6_xmit_lock(void)
+static __inline__ int icmpv6_xmit_lock(struct sock *sk)
{
local_bh_disable();
- if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
+ if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path (f.e. SIT or
* ip6ip6 tunnel) signals dst_link_failure() for an
* outgoing ICMP6 packet.
@@ -105,9 +108,9 @@ static __inline__ int icmpv6_xmit_lock(void)
return 0;
}
-static __inline__ void icmpv6_xmit_unlock(void)
+static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
{
- spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
+ spin_unlock_bh(&sk->sk_lock.slock);
}
/*
@@ -161,6 +164,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
struct flowi *fl)
{
struct dst_entry *dst;
+ struct net *net = sock_net(sk);
int res = 0;
/* Informational messages are not limited. */
@@ -176,7 +180,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
* XXX: perhaps the expire for routing entries cloned by
* this lookup should be more aggressive (not longer than timeout).
*/
- dst = ip6_route_output(sk, fl);
+ dst = ip6_route_output(net, sk, fl);
if (dst->error) {
IP6_INC_STATS(ip6_dst_idev(dst),
IPSTATS_MIB_OUTNOROUTES);
@@ -184,7 +188,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
res = 1;
} else {
struct rt6_info *rt = (struct rt6_info *)dst;
- int tmo = init_net.ipv6.sysctl.icmpv6_time;
+ int tmo = net->ipv6.sysctl.icmpv6_time;
/* Give more bandwidth to wider prefixes. */
if (rt->rt6i_dst.plen < 128)
@@ -303,6 +307,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {}
void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
struct net_device *dev)
{
+ struct net *net = dev_net(skb->dev);
struct inet6_dev *idev = NULL;
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct sock *sk;
@@ -332,7 +337,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
*/
addr_type = ipv6_addr_type(&hdr->daddr);
- if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0))
+ if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
saddr = &hdr->daddr;
/*
@@ -389,12 +394,12 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
fl.fl_icmp_code = code;
security_skb_classify_flow(skb, &fl);
- if (icmpv6_xmit_lock())
- return;
-
- sk = icmpv6_socket->sk;
+ sk = icmpv6_sk(net);
np = inet6_sk(sk);
+ if (icmpv6_xmit_lock(sk))
+ return;
+
if (!icmpv6_xrlim_allow(sk, type, &fl))
goto out;
@@ -462,9 +467,7 @@ route_done:
else
hlimit = np->hop_limit;
if (hlimit < 0)
- hlimit = dst_metric(dst, RTAX_HOPLIMIT);
- if (hlimit < 0)
- hlimit = ipv6_get_hoplimit(dst->dev);
+ hlimit = ip6_dst_hoplimit(dst);
tclass = np->tclass;
if (tclass < 0)
@@ -500,13 +503,14 @@ out_put:
out_dst_release:
dst_release(dst);
out:
- icmpv6_xmit_unlock();
+ icmpv6_xmit_unlock(sk);
}
EXPORT_SYMBOL(icmpv6_send);
static void icmpv6_echo_reply(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb->dev);
struct sock *sk;
struct inet6_dev *idev;
struct ipv6_pinfo *np;
@@ -537,12 +541,12 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
security_skb_classify_flow(skb, &fl);
- if (icmpv6_xmit_lock())
- return;
-
- sk = icmpv6_socket->sk;
+ sk = icmpv6_sk(net);
np = inet6_sk(sk);
+ if (icmpv6_xmit_lock(sk))
+ return;
+
if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
fl.oif = np->mcast_oif;
@@ -557,9 +561,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
else
hlimit = np->hop_limit;
if (hlimit < 0)
- hlimit = dst_metric(dst, RTAX_HOPLIMIT);
- if (hlimit < 0)
- hlimit = ipv6_get_hoplimit(dst->dev);
+ hlimit = ip6_dst_hoplimit(dst);
tclass = np->tclass;
if (tclass < 0)
@@ -586,7 +588,7 @@ out_put:
in6_dev_put(idev);
dst_release(dst);
out:
- icmpv6_xmit_unlock();
+ icmpv6_xmit_unlock(sk);
}
static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
@@ -777,19 +779,40 @@ drop_no_count:
return 0;
}
+void icmpv6_flow_init(struct sock *sk, struct flowi *fl,
+ u8 type,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ int oif)
+{
+ memset(fl, 0, sizeof(*fl));
+ ipv6_addr_copy(&fl->fl6_src, saddr);
+ ipv6_addr_copy(&fl->fl6_dst, daddr);
+ fl->proto = IPPROTO_ICMPV6;
+ fl->fl_icmp_type = type;
+ fl->fl_icmp_code = 0;
+ fl->oif = oif;
+ security_sk_classify_flow(sk, fl);
+}
+
/*
- * Special lock-class for __icmpv6_socket:
+ * Special lock-class for __icmpv6_sk:
*/
static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
-int __init icmpv6_init(struct net_proto_family *ops)
+static int __net_init icmpv6_sk_init(struct net *net)
{
struct sock *sk;
int err, i, j;
+ net->ipv6.icmp_sk =
+ kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
+ if (net->ipv6.icmp_sk == NULL)
+ return -ENOMEM;
+
for_each_possible_cpu(i) {
- err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
- &per_cpu(__icmpv6_socket, i));
+ err = inet_ctl_sock_create(&sk, PF_INET6,
+ SOCK_RAW, IPPROTO_ICMPV6, net);
if (err < 0) {
printk(KERN_ERR
"Failed to initialize the ICMP6 control socket "
@@ -798,12 +821,12 @@ int __init icmpv6_init(struct net_proto_family *ops)
goto fail;
}
- sk = per_cpu(__icmpv6_socket, i)->sk;
- sk->sk_allocation = GFP_ATOMIC;
+ net->ipv6.icmp_sk[i] = sk;
+
/*
* Split off their lock-class, because sk->sk_dst_lock
* gets used from softirqs, which is safe for
- * __icmpv6_socket (because those never get directly used
+ * __icmpv6_sk (because those never get directly used
* via userspace syscalls), but unsafe for normal sockets.
*/
lockdep_set_class(&sk->sk_dst_lock,
@@ -814,39 +837,57 @@ int __init icmpv6_init(struct net_proto_family *ops)
*/
sk->sk_sndbuf =
(2 * ((64 * 1024) + sizeof(struct sk_buff)));
-
- sk->sk_prot->unhash(sk);
}
-
-
- if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
- printk(KERN_ERR "Failed to register ICMP6 protocol\n");
- err = -EAGAIN;
- goto fail;
- }
-
return 0;
fail:
- for (j = 0; j < i; j++) {
- if (!cpu_possible(j))
- continue;
- sock_release(per_cpu(__icmpv6_socket, j));
- }
-
+ for (j = 0; j < i; j++)
+ inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
+ kfree(net->ipv6.icmp_sk);
return err;
}
-void icmpv6_cleanup(void)
+static void __net_exit icmpv6_sk_exit(struct net *net)
{
int i;
for_each_possible_cpu(i) {
- sock_release(per_cpu(__icmpv6_socket, i));
+ inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
}
+ kfree(net->ipv6.icmp_sk);
+}
+
+static struct pernet_operations icmpv6_sk_ops = {
+ .init = icmpv6_sk_init,
+ .exit = icmpv6_sk_exit,
+};
+
+int __init icmpv6_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&icmpv6_sk_ops);
+ if (err < 0)
+ return err;
+
+ err = -EAGAIN;
+ if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
+ goto fail;
+ return 0;
+
+fail:
+ printk(KERN_ERR "Failed to register ICMP6 protocol\n");
+ unregister_pernet_subsys(&icmpv6_sk_ops);
+ return err;
+}
+
+void icmpv6_cleanup(void)
+{
+ unregister_pernet_subsys(&icmpv6_sk_ops);
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}
+
static const struct icmp6_err {
int err;
int fatal;
@@ -927,6 +968,10 @@ struct ctl_table *ipv6_icmp_sysctl_init(struct net *net)
table = kmemdup(ipv6_icmp_table_template,
sizeof(ipv6_icmp_table_template),
GFP_KERNEL);
+
+ if (table)
+ table[0].data = &net->ipv6.sysctl.icmpv6_time;
+
return table;
}
#endif
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 78de42ada84..87801cc1b2f 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -33,6 +33,10 @@ int inet6_csk_bind_conflict(const struct sock *sk,
const struct hlist_node *node;
/* We must walk the whole port owner list in this case. -DaveM */
+ /*
+ * See comment in inet_csk_bind_conflict about sock lookup
+ * vs net namespaces issues.
+ */
sk_for_each_bound(sk2, node, &tb->owners) {
if (sk != sk2 &&
(!sk->sk_bound_dev_if ||
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 99fd25f7f00..580014aea4d 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -24,7 +24,7 @@
void __inet6_hash(struct sock *sk)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo;
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_head *list;
rwlock_t *lock;
@@ -43,7 +43,7 @@ void __inet6_hash(struct sock *sk)
}
__sk_add_node(sk, list);
- sock_prot_inuse_add(sk->sk_prot, 1);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock(lock);
}
EXPORT_SYMBOL(__inet6_hash);
@@ -105,7 +105,7 @@ struct sock *inet6_lookup_listener(struct net *net,
read_lock(&hashinfo->lhash_lock);
sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
- if (sk->sk_net == net && inet_sk(sk)->num == hnum &&
+ if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum &&
sk->sk_family == PF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -172,7 +172,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
- struct net *net = sk->sk_net;
+ struct net *net = sock_net(sk);
prefetch(head->chain.first);
write_lock(lock);
@@ -204,7 +204,7 @@ unique:
BUG_TRAP(sk_unhashed(sk));
__sk_add_node(sk, &head->chain);
sk->sk_hash = hash;
- sock_prot_inuse_add(sk->sk_prot, 1);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock(lock);
if (twp != NULL) {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index bab72b6f144..50f3f8f8a59 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -48,8 +48,6 @@
#define RT6_TRACE(x...) do { ; } while (0)
#endif
-struct rt6_statistics rt6_stats;
-
static struct kmem_cache * fib6_node_kmem __read_mostly;
enum fib_walk_state_t
@@ -66,6 +64,7 @@ enum fib_walk_state_t
struct fib6_cleaner_t
{
struct fib6_walker_t w;
+ struct net *net;
int (*func)(struct rt6_info *, void *arg);
void *arg;
};
@@ -78,9 +77,10 @@ static DEFINE_RWLOCK(fib6_walker_lock);
#define FWS_INIT FWS_L
#endif
-static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
-static struct rt6_info * fib6_find_prefix(struct fib6_node *fn);
-static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
+ struct rt6_info *rt);
+static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
+static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
static int fib6_walk(struct fib6_walker_t *w);
static int fib6_walk_continue(struct fib6_walker_t *w);
@@ -93,7 +93,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w);
static __u32 rt_sernum;
-static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
+static void fib6_gc_timer_cb(unsigned long arg);
static struct fib6_walker_t fib6_walker_list = {
.prev = &fib6_walker_list,
@@ -166,22 +166,13 @@ static __inline__ void rt6_release(struct rt6_info *rt)
dst_free(&rt->u.dst);
}
-static struct fib6_table fib6_main_tbl = {
- .tb6_id = RT6_TABLE_MAIN,
- .tb6_root = {
- .leaf = &ip6_null_entry,
- .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
- },
-};
-
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB_TABLE_HASHSZ 256
#else
#define FIB_TABLE_HASHSZ 1
#endif
-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
-static void fib6_link_table(struct fib6_table *tb)
+static void fib6_link_table(struct net *net, struct fib6_table *tb)
{
unsigned int h;
@@ -197,52 +188,46 @@ static void fib6_link_table(struct fib6_table *tb)
* No protection necessary, this is the only list mutatation
* operation, tables never disappear once they exist.
*/
- hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
+ hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
}
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-static struct fib6_table fib6_local_tbl = {
- .tb6_id = RT6_TABLE_LOCAL,
- .tb6_root = {
- .leaf = &ip6_null_entry,
- .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
- },
-};
-static struct fib6_table *fib6_alloc_table(u32 id)
+static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
{
struct fib6_table *table;
table = kzalloc(sizeof(*table), GFP_ATOMIC);
if (table != NULL) {
table->tb6_id = id;
- table->tb6_root.leaf = &ip6_null_entry;
+ table->tb6_root.leaf = net->ipv6.ip6_null_entry;
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
}
return table;
}
-struct fib6_table *fib6_new_table(u32 id)
+struct fib6_table *fib6_new_table(struct net *net, u32 id)
{
struct fib6_table *tb;
if (id == 0)
id = RT6_TABLE_MAIN;
- tb = fib6_get_table(id);
+ tb = fib6_get_table(net, id);
if (tb)
return tb;
- tb = fib6_alloc_table(id);
+ tb = fib6_alloc_table(net, id);
if (tb != NULL)
- fib6_link_table(tb);
+ fib6_link_table(net, tb);
return tb;
}
-struct fib6_table *fib6_get_table(u32 id)
+struct fib6_table *fib6_get_table(struct net *net, u32 id)
{
struct fib6_table *tb;
+ struct hlist_head *head;
struct hlist_node *node;
unsigned int h;
@@ -250,7 +235,8 @@ struct fib6_table *fib6_get_table(u32 id)
id = RT6_TABLE_MAIN;
h = id & (FIB_TABLE_HASHSZ - 1);
rcu_read_lock();
- hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
if (tb->tb6_id == id) {
rcu_read_unlock();
return tb;
@@ -261,33 +247,32 @@ struct fib6_table *fib6_get_table(u32 id)
return NULL;
}
-static void __init fib6_tables_init(void)
+static void fib6_tables_init(struct net *net)
{
- fib6_link_table(&fib6_main_tbl);
- fib6_link_table(&fib6_local_tbl);
+ fib6_link_table(net, net->ipv6.fib6_main_tbl);
+ fib6_link_table(net, net->ipv6.fib6_local_tbl);
}
-
#else
-struct fib6_table *fib6_new_table(u32 id)
+struct fib6_table *fib6_new_table(struct net *net, u32 id)
{
- return fib6_get_table(id);
+ return fib6_get_table(net, id);
}
-struct fib6_table *fib6_get_table(u32 id)
+struct fib6_table *fib6_get_table(struct net *net, u32 id)
{
- return &fib6_main_tbl;
+ return net->ipv6.fib6_main_tbl;
}
-struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
- pol_lookup_t lookup)
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
+ int flags, pol_lookup_t lookup)
{
- return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
+ return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags);
}
-static void __init fib6_tables_init(void)
+static void fib6_tables_init(struct net *net)
{
- fib6_link_table(&fib6_main_tbl);
+ fib6_link_table(net, net->ipv6.fib6_main_tbl);
}
#endif
@@ -361,18 +346,16 @@ end:
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct rt6_rtnl_dump_arg arg;
struct fib6_walker_t *w;
struct fib6_table *tb;
struct hlist_node *node;
+ struct hlist_head *head;
int res = 0;
- if (net != &init_net)
- return 0;
-
s_h = cb->args[0];
s_e = cb->args[1];
@@ -401,7 +384,8 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
- hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry(tb, node, head, tb6_hlist) {
if (e < s_e)
goto next;
res = fib6_dump_table(tb, skb, cb);
@@ -667,29 +651,29 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
inet6_rt_notify(RTM_NEWROUTE, rt, info);
- rt6_stats.fib_rt_entries++;
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
if ((fn->fn_flags & RTN_RTINFO) == 0) {
- rt6_stats.fib_route_nodes++;
+ info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO;
}
return 0;
}
-static __inline__ void fib6_start_gc(struct rt6_info *rt)
+static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
{
- if (ip6_fib_timer.expires == 0 &&
+ if (net->ipv6.ip6_fib_timer->expires == 0 &&
(rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
- mod_timer(&ip6_fib_timer, jiffies +
- init_net.ipv6.sysctl.ip6_rt_gc_interval);
+ mod_timer(net->ipv6.ip6_fib_timer, jiffies +
+ net->ipv6.sysctl.ip6_rt_gc_interval);
}
-void fib6_force_start_gc(void)
+void fib6_force_start_gc(struct net *net)
{
- if (ip6_fib_timer.expires == 0)
- mod_timer(&ip6_fib_timer, jiffies +
- init_net.ipv6.sysctl.ip6_rt_gc_interval);
+ if (net->ipv6.ip6_fib_timer->expires == 0)
+ mod_timer(net->ipv6.ip6_fib_timer, jiffies +
+ net->ipv6.sysctl.ip6_rt_gc_interval);
}
/*
@@ -733,8 +717,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
if (sfn == NULL)
goto st_failure;
- sfn->leaf = &ip6_null_entry;
- atomic_inc(&ip6_null_entry.rt6i_ref);
+ sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
+ atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
sfn->fn_flags = RTN_ROOT;
sfn->fn_sernum = fib6_new_sernum();
@@ -776,9 +760,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
err = fib6_add_rt2node(fn, rt, info);
if (err == 0) {
- fib6_start_gc(rt);
+ fib6_start_gc(info->nl_net, rt);
if (!(rt->rt6i_flags&RTF_CACHE))
- fib6_prune_clones(pn, rt);
+ fib6_prune_clones(info->nl_net, pn, rt);
}
out:
@@ -788,12 +772,16 @@ out:
* If fib6_add_1 has cleared the old leaf pointer in the
* super-tree leaf node we have to find a new one for it.
*/
+ if (pn != fn && pn->leaf == rt) {
+ pn->leaf = NULL;
+ atomic_dec(&rt->rt6i_ref);
+ }
if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
- pn->leaf = fib6_find_prefix(pn);
+ pn->leaf = fib6_find_prefix(info->nl_net, pn);
#if RT6_DEBUG >= 2
if (!pn->leaf) {
BUG_TRAP(pn->leaf != NULL);
- pn->leaf = &ip6_null_entry;
+ pn->leaf = info->nl_net->ipv6.ip6_null_entry;
}
#endif
atomic_inc(&pn->leaf->rt6i_ref);
@@ -809,7 +797,7 @@ out:
*/
st_failure:
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
- fib6_repair_tree(fn);
+ fib6_repair_tree(info->nl_net, fn);
dst_free(&rt->u.dst);
return err;
#endif
@@ -975,10 +963,10 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
*
*/
-static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
+static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
{
if (fn->fn_flags&RTN_ROOT)
- return &ip6_null_entry;
+ return net->ipv6.ip6_null_entry;
while(fn) {
if(fn->left)
@@ -997,7 +985,8 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
* is the node we want to try and remove.
*/
-static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
+static struct fib6_node *fib6_repair_tree(struct net *net,
+ struct fib6_node *fn)
{
int children;
int nstate;
@@ -1024,11 +1013,11 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
|| (children && fn->fn_flags&RTN_ROOT)
#endif
) {
- fn->leaf = fib6_find_prefix(fn);
+ fn->leaf = fib6_find_prefix(net, fn);
#if RT6_DEBUG >= 2
if (fn->leaf==NULL) {
BUG_TRAP(fn->leaf);
- fn->leaf = &ip6_null_entry;
+ fn->leaf = net->ipv6.ip6_null_entry;
}
#endif
atomic_inc(&fn->leaf->rt6i_ref);
@@ -1101,14 +1090,15 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
{
struct fib6_walker_t *w;
struct rt6_info *rt = *rtp;
+ struct net *net = info->nl_net;
RT6_TRACE("fib6_del_route\n");
/* Unlink it */
*rtp = rt->u.dst.rt6_next;
rt->rt6i_node = NULL;
- rt6_stats.fib_rt_entries--;
- rt6_stats.fib_discarded_routes++;
+ net->ipv6.rt6_stats->fib_rt_entries--;
+ net->ipv6.rt6_stats->fib_discarded_routes++;
/* Reset round-robin state, if necessary */
if (fn->rr_ptr == rt)
@@ -1131,8 +1121,8 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
/* If it was last route, expunge its radix tree node */
if (fn->leaf == NULL) {
fn->fn_flags &= ~RTN_RTINFO;
- rt6_stats.fib_route_nodes--;
- fn = fib6_repair_tree(fn);
+ net->ipv6.rt6_stats->fib_route_nodes--;
+ fn = fib6_repair_tree(net, fn);
}
if (atomic_read(&rt->rt6i_ref) != 1) {
@@ -1144,7 +1134,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
*/
while (fn) {
if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
- fn->leaf = fib6_find_prefix(fn);
+ fn->leaf = fib6_find_prefix(net, fn);
atomic_inc(&fn->leaf->rt6i_ref);
rt6_release(rt);
}
@@ -1160,6 +1150,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
int fib6_del(struct rt6_info *rt, struct nl_info *info)
{
+ struct net *net = info->nl_net;
struct fib6_node *fn = rt->rt6i_node;
struct rt6_info **rtp;
@@ -1169,7 +1160,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
return -ENOENT;
}
#endif
- if (fn == NULL || rt == &ip6_null_entry)
+ if (fn == NULL || rt == net->ipv6.ip6_null_entry)
return -ENOENT;
BUG_TRAP(fn->fn_flags&RTN_RTINFO);
@@ -1184,7 +1175,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
pn = pn->parent;
}
#endif
- fib6_prune_clones(pn, rt);
+ fib6_prune_clones(info->nl_net, pn, rt);
}
/*
@@ -1314,12 +1305,12 @@ static int fib6_walk(struct fib6_walker_t *w)
static int fib6_clean_node(struct fib6_walker_t *w)
{
- struct nl_info info = {
- .nl_net = &init_net,
- };
int res;
struct rt6_info *rt;
struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w);
+ struct nl_info info = {
+ .nl_net = c->net,
+ };
for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
res = c->func(rt, c->arg);
@@ -1351,7 +1342,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
* ignoring pure split nodes) will be scanned.
*/
-static void fib6_clean_tree(struct fib6_node *root,
+static void fib6_clean_tree(struct net *net, struct fib6_node *root,
int (*func)(struct rt6_info *, void *arg),
int prune, void *arg)
{
@@ -1362,23 +1353,26 @@ static void fib6_clean_tree(struct fib6_node *root,
c.w.prune = prune;
c.func = func;
c.arg = arg;
+ c.net = net;
fib6_walk(&c.w);
}
-void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
+void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
int prune, void *arg)
{
struct fib6_table *table;
struct hlist_node *node;
+ struct hlist_head *head;
unsigned int h;
rcu_read_lock();
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
- hlist_for_each_entry_rcu(table, node, &fib_table_hash[h],
- tb6_hlist) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
write_lock_bh(&table->tb6_lock);
- fib6_clean_tree(&table->tb6_root, func, prune, arg);
+ fib6_clean_tree(net, &table->tb6_root,
+ func, prune, arg);
write_unlock_bh(&table->tb6_lock);
}
}
@@ -1395,9 +1389,10 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg)
return 0;
}
-static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt)
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
+ struct rt6_info *rt)
{
- fib6_clean_tree(fn, fib6_prune_clone, 1, rt);
+ fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt);
}
/*
@@ -1447,54 +1442,145 @@ static int fib6_age(struct rt6_info *rt, void *arg)
static DEFINE_SPINLOCK(fib6_gc_lock);
-void fib6_run_gc(unsigned long dummy)
+void fib6_run_gc(unsigned long expires, struct net *net)
{
- if (dummy != ~0UL) {
+ if (expires != ~0UL) {
spin_lock_bh(&fib6_gc_lock);
- gc_args.timeout = dummy ? (int)dummy :
- init_net.ipv6.sysctl.ip6_rt_gc_interval;
+ gc_args.timeout = expires ? (int)expires :
+ net->ipv6.sysctl.ip6_rt_gc_interval;
} else {
local_bh_disable();
if (!spin_trylock(&fib6_gc_lock)) {
- mod_timer(&ip6_fib_timer, jiffies + HZ);
+ mod_timer(net->ipv6.ip6_fib_timer, jiffies + HZ);
local_bh_enable();
return;
}
- gc_args.timeout = init_net.ipv6.sysctl.ip6_rt_gc_interval;
+ gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
}
gc_args.more = 0;
- ndisc_dst_gc(&gc_args.more);
- fib6_clean_all(fib6_age, 0, NULL);
+ icmp6_dst_gc(&gc_args.more);
+
+ fib6_clean_all(net, fib6_age, 0, NULL);
if (gc_args.more)
- mod_timer(&ip6_fib_timer, jiffies +
- init_net.ipv6.sysctl.ip6_rt_gc_interval);
+ mod_timer(net->ipv6.ip6_fib_timer, jiffies +
+ net->ipv6.sysctl.ip6_rt_gc_interval);
else {
- del_timer(&ip6_fib_timer);
- ip6_fib_timer.expires = 0;
+ del_timer(net->ipv6.ip6_fib_timer);
+ net->ipv6.ip6_fib_timer->expires = 0;
}
spin_unlock_bh(&fib6_gc_lock);
}
-int __init fib6_init(void)
+static void fib6_gc_timer_cb(unsigned long arg)
+{
+ fib6_run_gc(0, (struct net *)arg);
+}
+
+static int fib6_net_init(struct net *net)
{
int ret;
+ struct timer_list *timer;
+
+ ret = -ENOMEM;
+ timer = kzalloc(sizeof(*timer), GFP_KERNEL);
+ if (!timer)
+ goto out;
+
+ setup_timer(timer, fib6_gc_timer_cb, (unsigned long)net);
+ net->ipv6.ip6_fib_timer = timer;
+
+ net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
+ if (!net->ipv6.rt6_stats)
+ goto out_timer;
+
+ net->ipv6.fib_table_hash =
+ kzalloc(sizeof(*net->ipv6.fib_table_hash)*FIB_TABLE_HASHSZ,
+ GFP_KERNEL);
+ if (!net->ipv6.fib_table_hash)
+ goto out_rt6_stats;
+
+ net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
+ GFP_KERNEL);
+ if (!net->ipv6.fib6_main_tbl)
+ goto out_fib_table_hash;
+
+ net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
+ net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
+ RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
+ GFP_KERNEL);
+ if (!net->ipv6.fib6_local_tbl)
+ goto out_fib6_main_tbl;
+ net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
+ net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
+ RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+#endif
+ fib6_tables_init(net);
+
+ ret = 0;
+out:
+ return ret;
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+out_fib6_main_tbl:
+ kfree(net->ipv6.fib6_main_tbl);
+#endif
+out_fib_table_hash:
+ kfree(net->ipv6.fib_table_hash);
+out_rt6_stats:
+ kfree(net->ipv6.rt6_stats);
+out_timer:
+ kfree(timer);
+ goto out;
+ }
+
+static void fib6_net_exit(struct net *net)
+{
+ rt6_ifdown(net, NULL);
+ del_timer(net->ipv6.ip6_fib_timer);
+ kfree(net->ipv6.ip6_fib_timer);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ kfree(net->ipv6.fib6_local_tbl);
+#endif
+ kfree(net->ipv6.fib6_main_tbl);
+ kfree(net->ipv6.fib_table_hash);
+ kfree(net->ipv6.rt6_stats);
+}
+
+static struct pernet_operations fib6_net_ops = {
+ .init = fib6_net_init,
+ .exit = fib6_net_exit,
+};
+
+int __init fib6_init(void)
+{
+ int ret = -ENOMEM;
+
fib6_node_kmem = kmem_cache_create("fib6_nodes",
sizeof(struct fib6_node),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (!fib6_node_kmem)
- return -ENOMEM;
+ goto out;
- fib6_tables_init();
+ ret = register_pernet_subsys(&fib6_net_ops);
+ if (ret)
+ goto out_kmem_cache_create;
ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
if (ret)
- goto out_kmem_cache_create;
+ goto out_unregister_subsys;
out:
return ret;
+out_unregister_subsys:
+ unregister_pernet_subsys(&fib6_net_ops);
out_kmem_cache_create:
kmem_cache_destroy(fib6_node_kmem);
goto out;
@@ -1502,6 +1588,6 @@ out_kmem_cache_create:
void fib6_gc_cleanup(void)
{
- del_timer(&ip6_fib_timer);
+ unregister_pernet_subsys(&fib6_net_ops);
kmem_cache_destroy(fib6_node_kmem);
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 2b7d9ee9883..eb7a940310f 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -62,23 +62,23 @@ static DEFINE_RWLOCK(ip6_fl_lock);
static DEFINE_RWLOCK(ip6_sk_fl_lock);
-static __inline__ struct ip6_flowlabel * __fl_lookup(__be32 label)
+static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
{
struct ip6_flowlabel *fl;
for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
- if (fl->label == label)
+ if (fl->label == label && fl->fl_net == net)
return fl;
}
return NULL;
}
-static struct ip6_flowlabel * fl_lookup(__be32 label)
+static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
{
struct ip6_flowlabel *fl;
read_lock_bh(&ip6_fl_lock);
- fl = __fl_lookup(label);
+ fl = __fl_lookup(net, label);
if (fl)
atomic_inc(&fl->users);
read_unlock_bh(&ip6_fl_lock);
@@ -88,8 +88,10 @@ static struct ip6_flowlabel * fl_lookup(__be32 label)
static void fl_free(struct ip6_flowlabel *fl)
{
- if (fl)
+ if (fl) {
+ release_net(fl->fl_net);
kfree(fl->opt);
+ }
kfree(fl);
}
@@ -112,7 +114,6 @@ static void fl_release(struct ip6_flowlabel *fl)
time_after(ip6_fl_gc_timer.expires, ttd))
mod_timer(&ip6_fl_gc_timer, ttd);
}
-
write_unlock_bh(&ip6_fl_lock);
}
@@ -148,13 +149,34 @@ static void ip6_fl_gc(unsigned long dummy)
if (!sched && atomic_read(&fl_size))
sched = now + FL_MAX_LINGER;
if (sched) {
- ip6_fl_gc_timer.expires = sched;
- add_timer(&ip6_fl_gc_timer);
+ mod_timer(&ip6_fl_gc_timer, sched);
+ }
+ write_unlock(&ip6_fl_lock);
+}
+
+static void ip6_fl_purge(struct net *net)
+{
+ int i;
+
+ write_lock(&ip6_fl_lock);
+ for (i = 0; i <= FL_HASH_MASK; i++) {
+ struct ip6_flowlabel *fl, **flp;
+ flp = &fl_ht[i];
+ while ((fl = *flp) != NULL) {
+ if (fl->fl_net == net && atomic_read(&fl->users) == 0) {
+ *flp = fl->next;
+ fl_free(fl);
+ atomic_dec(&fl_size);
+ continue;
+ }
+ flp = &fl->next;
+ }
}
write_unlock(&ip6_fl_lock);
}
-static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label)
+static struct ip6_flowlabel *fl_intern(struct net *net,
+ struct ip6_flowlabel *fl, __be32 label)
{
struct ip6_flowlabel *lfl;
@@ -165,7 +187,7 @@ static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label)
for (;;) {
fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
if (fl->label) {
- lfl = __fl_lookup(fl->label);
+ lfl = __fl_lookup(net, fl->label);
if (lfl == NULL)
break;
}
@@ -179,7 +201,7 @@ static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label)
* done in ipv6_flowlabel_opt - sock is locked, so new entry
* with the same label can only appear on another sock
*/
- lfl = __fl_lookup(fl->label);
+ lfl = __fl_lookup(net, fl->label);
if (lfl != NULL) {
atomic_inc(&lfl->users);
write_unlock_bh(&ip6_fl_lock);
@@ -298,7 +320,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
}
static struct ip6_flowlabel *
-fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *err_p)
+fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
+ int optlen, int *err_p)
{
struct ip6_flowlabel *fl;
int olen;
@@ -343,6 +366,7 @@ fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *
}
}
+ fl->fl_net = hold_net(net);
fl->expires = jiffies;
err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
if (err)
@@ -441,6 +465,7 @@ static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
{
int err;
+ struct net *net = sock_net(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_flowlabel_req freq;
struct ipv6_fl_socklist *sfl1=NULL;
@@ -483,7 +508,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
read_unlock_bh(&ip6_sk_fl_lock);
if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
- fl = fl_lookup(freq.flr_label);
+ fl = fl_lookup(net, freq.flr_label);
if (fl) {
err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
fl_release(fl);
@@ -496,7 +521,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
return -EINVAL;
- fl = fl_create(&freq, optval, optlen, &err);
+ fl = fl_create(net, &freq, optval, optlen, &err);
if (fl == NULL)
return err;
sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
@@ -518,7 +543,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
read_unlock_bh(&ip6_sk_fl_lock);
if (fl1 == NULL)
- fl1 = fl_lookup(freq.flr_label);
+ fl1 = fl_lookup(net, freq.flr_label);
if (fl1) {
recheck:
err = -EEXIST;
@@ -559,7 +584,7 @@ release:
if (sfl1 == NULL || (err = mem_check(sk)) != 0)
goto done;
- fl1 = fl_intern(fl, freq.flr_label);
+ fl1 = fl_intern(net, fl, freq.flr_label);
if (fl1 != NULL)
goto recheck;
@@ -586,6 +611,7 @@ done:
#ifdef CONFIG_PROC_FS
struct ip6fl_iter_state {
+ struct seq_net_private p;
int bucket;
};
@@ -595,12 +621,15 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
{
struct ip6_flowlabel *fl = NULL;
struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+ struct net *net = seq_file_net(seq);
for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
- if (fl_ht[state->bucket]) {
- fl = fl_ht[state->bucket];
+ fl = fl_ht[state->bucket];
+
+ while (fl && fl->fl_net != net)
+ fl = fl->next;
+ if (fl)
break;
- }
}
return fl;
}
@@ -608,12 +637,18 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
{
struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+ struct net *net = seq_file_net(seq);
fl = fl->next;
+try_again:
+ while (fl && fl->fl_net != net)
+ fl = fl->next;
+
while (!fl) {
- if (++state->bucket <= FL_HASH_MASK)
+ if (++state->bucket <= FL_HASH_MASK) {
fl = fl_ht[state->bucket];
- else
+ goto try_again;
+ } else
break;
}
return fl;
@@ -683,8 +718,8 @@ static const struct seq_operations ip6fl_seq_ops = {
static int ip6fl_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ip6fl_seq_ops,
- sizeof(struct ip6fl_iter_state));
+ return seq_open_net(inode, file, &ip6fl_seq_ops,
+ sizeof(struct ip6fl_iter_state));
}
static const struct file_operations ip6fl_seq_fops = {
@@ -692,12 +727,13 @@ static const struct file_operations ip6fl_seq_fops = {
.open = ip6fl_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
static int ip6_flowlabel_proc_init(struct net *net)
{
- if (!proc_net_fops_create(net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops))
+ if (!proc_net_fops_create(net, "ip6_flowlabel",
+ S_IRUGO, &ip6fl_seq_fops))
return -ENOMEM;
return 0;
}
@@ -717,13 +753,24 @@ static inline void ip6_flowlabel_proc_fini(struct net *net)
}
#endif
+static inline void ip6_flowlabel_net_exit(struct net *net)
+{
+ ip6_fl_purge(net);
+ ip6_flowlabel_proc_fini(net);
+}
+
+static struct pernet_operations ip6_flowlabel_net_ops = {
+ .init = ip6_flowlabel_proc_init,
+ .exit = ip6_flowlabel_net_exit,
+};
+
int ip6_flowlabel_init(void)
{
- return ip6_flowlabel_proc_init(&init_net);
+ return register_pernet_subsys(&ip6_flowlabel_net_ops);
}
void ip6_flowlabel_cleanup(void)
{
del_timer(&ip6_fl_gc_timer);
- ip6_flowlabel_proc_fini(&init_net);
+ unregister_pernet_subsys(&ip6_flowlabel_net_ops);
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 98ab4f45990..4e5c8615832 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -29,6 +29,7 @@
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/icmpv6.h>
+#include <linux/mroute6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
@@ -61,11 +62,6 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
u32 pkt_len;
struct inet6_dev *idev;
- if (dev->nd_net != &init_net) {
- kfree_skb(skb);
- return 0;
- }
-
if (skb->pkt_type == PACKET_OTHERHOST) {
kfree_skb(skb);
return 0;
@@ -241,38 +237,84 @@ int ip6_mc_input(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
+#ifdef CONFIG_IPV6_MROUTE
/*
- * IPv6 multicast router mode isnt currently supported.
+ * IPv6 multicast router mode is now supported ;)
*/
-#if 0
- if (ipv6_config.multicast_route) {
- int addr_type;
-
- addr_type = ipv6_addr_type(&hdr->daddr);
-
- if (!(addr_type & (IPV6_ADDR_LOOPBACK | IPV6_ADDR_LINKLOCAL))) {
- struct sk_buff *skb2;
- struct dst_entry *dst;
+ if (ipv6_devconf.mc_forwarding &&
+ likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
+ /*
+ * Okay, we try to forward - split and duplicate
+ * packets.
+ */
+ struct sk_buff *skb2;
+ struct inet6_skb_parm *opt = IP6CB(skb);
+
+ /* Check for MLD */
+ if (unlikely(opt->ra)) {
+ /* Check if this is a mld message */
+ u8 *ptr = skb_network_header(skb) + opt->ra;
+ struct icmp6hdr *icmp6;
+ u8 nexthdr = hdr->nexthdr;
+ int offset;
+
+ /* Check if the value of Router Alert
+ * is for MLD (0x0000).
+ */
+ if ((ptr[2] | ptr[3]) == 0) {
+ deliver = 0;
+
+ if (!ipv6_ext_hdr(nexthdr)) {
+ /* BUG */
+ goto out;
+ }
+ offset = ipv6_skip_exthdr(skb, sizeof(*hdr),
+ &nexthdr);
+ if (offset < 0)
+ goto out;
+
+ if (nexthdr != IPPROTO_ICMPV6)
+ goto out;
+
+ if (!pskb_may_pull(skb, (skb_network_header(skb) +
+ offset + 1 - skb->data)))
+ goto out;
+
+ icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
+
+ switch (icmp6->icmp6_type) {
+ case ICMPV6_MGM_QUERY:
+ case ICMPV6_MGM_REPORT:
+ case ICMPV6_MGM_REDUCTION:
+ case ICMPV6_MLD2_REPORT:
+ deliver = 1;
+ break;
+ }
+ goto out;
+ }
+ /* unknown RA - process it normally */
+ }
- dst = skb->dst;
+ if (deliver)
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ else {
+ skb2 = skb;
+ skb = NULL;
+ }
- if (deliver) {
- skb2 = skb_clone(skb, GFP_ATOMIC);
- dst_output(skb2);
- } else {
- dst_output(skb);
- return 0;
- }
+ if (skb2) {
+ skb2->dev = skb2->dst->dev;
+ ip6_mr_input(skb2);
}
}
+out:
#endif
-
- if (likely(deliver)) {
+ if (likely(deliver))
ip6_input(skb);
- return 0;
+ else {
+ /* discard */
+ kfree_skb(skb);
}
- /* discard */
- kfree_skb(skb);
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8b67ca07467..0af2e055f88 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -55,6 +55,7 @@
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/checksum.h>
+#include <linux/mroute6.h>
static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
@@ -137,8 +138,9 @@ static int ip6_output2(struct sk_buff *skb)
struct inet6_dev *idev = ip6_dst_idev(skb->dst);
if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
- ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr)) {
+ ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
+ ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr))) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
/* Do not check for IFF_ALLMULTI; multicast routing
@@ -237,9 +239,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
if (np)
hlimit = np->hop_limit;
if (hlimit < 0)
- hlimit = dst_metric(dst, RTAX_HOPLIMIT);
- if (hlimit < 0)
- hlimit = ipv6_get_hoplimit(dst->dev);
+ hlimit = ip6_dst_hoplimit(dst);
tclass = -1;
if (np)
@@ -286,7 +286,7 @@ EXPORT_SYMBOL(ip6_xmit);
*/
int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
- struct in6_addr *saddr, struct in6_addr *daddr,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
int proto, int len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -404,6 +404,7 @@ int ip6_forward(struct sk_buff *skb)
struct dst_entry *dst = skb->dst;
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
+ struct net *net = dev_net(dst->dev);
if (ipv6_devconf.forwarding == 0)
goto error;
@@ -450,7 +451,7 @@ int ip6_forward(struct sk_buff *skb)
/* XXX: idev->cnf.proxy_ndp? */
if (ipv6_devconf.proxy_ndp &&
- pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
+ pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
int proxied = ip6_forward_proxy_check(skb);
if (proxied > 0)
return ip6_input(skb);
@@ -596,7 +597,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
return offset;
}
-EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
@@ -912,15 +912,19 @@ static int ip6_dst_lookup_tail(struct sock *sk,
struct dst_entry **dst, struct flowi *fl)
{
int err;
+ struct net *net = sock_net(sk);
if (*dst == NULL)
- *dst = ip6_route_output(sk, fl);
+ *dst = ip6_route_output(net, sk, fl);
if ((err = (*dst)->error))
goto out_err_release;
if (ipv6_addr_any(&fl->fl6_src)) {
- err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
+ err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
+ &fl->fl6_dst,
+ sk ? inet6_sk(sk)->srcprefs : 0,
+ &fl->fl6_src);
if (err)
goto out_err_release;
}
@@ -939,7 +943,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
struct flowi fl_gw;
int redirect;
- ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src,
+ ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
(*dst)->dev, 1);
redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
@@ -954,7 +958,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
dst_release(*dst);
memcpy(&fl_gw, fl, sizeof(struct flowi));
memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
- *dst = ip6_route_output(sk, &fl_gw);
+ *dst = ip6_route_output(net, sk, &fl_gw);
if ((err = (*dst)->error))
goto out_err_release;
}
@@ -1113,7 +1117,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
/* need source address above miyazawa*/
}
dst_hold(&rt->u.dst);
- np->cork.rt = rt;
+ inet->cork.dst = &rt->u.dst;
inet->cork.fl = *fl;
np->cork.hop_limit = hlimit;
np->cork.tclass = tclass;
@@ -1134,7 +1138,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
length += exthdrlen;
transhdrlen += exthdrlen;
} else {
- rt = np->cork.rt;
+ rt = (struct rt6_info *)inet->cork.dst;
fl = &inet->cork.fl;
if (inet->cork.flags & IPCORK_OPT)
opt = np->cork.opt;
@@ -1379,9 +1383,9 @@ static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
inet->cork.flags &= ~IPCORK_OPT;
kfree(np->cork.opt);
np->cork.opt = NULL;
- if (np->cork.rt) {
- dst_release(&np->cork.rt->u.dst);
- np->cork.rt = NULL;
+ if (inet->cork.dst) {
+ dst_release(inet->cork.dst);
+ inet->cork.dst = NULL;
inet->cork.flags &= ~IPCORK_ALLFRAG;
}
memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
@@ -1396,7 +1400,7 @@ int ip6_push_pending_frames(struct sock *sk)
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = np->cork.opt;
- struct rt6_info *rt = np->cork.rt;
+ struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
struct flowi *fl = &inet->cork.fl;
unsigned char proto = fl->proto;
int err = 0;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 78f43888092..2bda3ba100b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -52,6 +52,8 @@
#include <net/xfrm.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
@@ -60,7 +62,7 @@ MODULE_LICENSE("GPL");
#define IPV6_TLV_TEL_DST_SIZE 8
#ifdef IP6_TNL_DEBUG
-#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __FUNCTION__)
+#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
#else
#define IP6_TNL_TRACE(x...) do {;} while(0)
#endif
@@ -78,14 +80,15 @@ static int ip6_fb_tnl_dev_init(struct net_device *dev);
static int ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
-/* the IPv6 tunnel fallback device */
-static struct net_device *ip6_fb_tnl_dev;
-
-
-/* lists for storing tunnels in use */
-static struct ip6_tnl *tnls_r_l[HASH_SIZE];
-static struct ip6_tnl *tnls_wc[1];
-static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
+static int ip6_tnl_net_id;
+struct ip6_tnl_net {
+ /* the IPv6 tunnel fallback device */
+ struct net_device *fb_tnl_dev;
+ /* lists for storing tunnels in use */
+ struct ip6_tnl *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl *tnls_wc[1];
+ struct ip6_tnl **tnls[2];
+};
/* lock for the tunnel lists */
static DEFINE_RWLOCK(ip6_tnl_lock);
@@ -130,19 +133,20 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
**/
static struct ip6_tnl *
-ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
+ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
{
unsigned h0 = HASH(remote);
unsigned h1 = HASH(local);
struct ip6_tnl *t;
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- for (t = tnls_r_l[h0 ^ h1]; t; t = t->next) {
+ for (t = ip6n->tnls_r_l[h0 ^ h1]; t; t = t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
ipv6_addr_equal(remote, &t->parms.raddr) &&
(t->dev->flags & IFF_UP))
return t;
}
- if ((t = tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
+ if ((t = ip6n->tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
return t;
return NULL;
@@ -160,7 +164,7 @@ ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
**/
static struct ip6_tnl **
-ip6_tnl_bucket(struct ip6_tnl_parm *p)
+ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
{
struct in6_addr *remote = &p->raddr;
struct in6_addr *local = &p->laddr;
@@ -171,7 +175,7 @@ ip6_tnl_bucket(struct ip6_tnl_parm *p)
prio = 1;
h = HASH(remote) ^ HASH(local);
}
- return &tnls[prio][h];
+ return &ip6n->tnls[prio][h];
}
/**
@@ -180,9 +184,9 @@ ip6_tnl_bucket(struct ip6_tnl_parm *p)
**/
static void
-ip6_tnl_link(struct ip6_tnl *t)
+ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
{
- struct ip6_tnl **tp = ip6_tnl_bucket(&t->parms);
+ struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms);
t->next = *tp;
write_lock_bh(&ip6_tnl_lock);
@@ -196,11 +200,11 @@ ip6_tnl_link(struct ip6_tnl *t)
**/
static void
-ip6_tnl_unlink(struct ip6_tnl *t)
+ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
{
struct ip6_tnl **tp;
- for (tp = ip6_tnl_bucket(&t->parms); *tp; tp = &(*tp)->next) {
+ for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) {
if (t == *tp) {
write_lock_bh(&ip6_tnl_lock);
*tp = t->next;
@@ -222,12 +226,13 @@ ip6_tnl_unlink(struct ip6_tnl *t)
* created tunnel or NULL
**/
-static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
+static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
{
struct net_device *dev;
struct ip6_tnl *t;
char name[IFNAMSIZ];
int err;
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (p->name[0])
strlcpy(name, p->name, IFNAMSIZ);
@@ -238,6 +243,8 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
if (dev == NULL)
goto failed;
+ dev_net_set(dev, net);
+
if (strchr(name, '%')) {
if (dev_alloc_name(dev, name) < 0)
goto failed_free;
@@ -251,7 +258,7 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
goto failed_free;
dev_hold(dev);
- ip6_tnl_link(t);
+ ip6_tnl_link(ip6n, t);
return t;
failed_free:
@@ -274,20 +281,22 @@ failed:
* matching tunnel or NULL
**/
-static struct ip6_tnl *ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
+static struct ip6_tnl *ip6_tnl_locate(struct net *net,
+ struct ip6_tnl_parm *p, int create)
{
struct in6_addr *remote = &p->raddr;
struct in6_addr *local = &p->laddr;
struct ip6_tnl *t;
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- for (t = *ip6_tnl_bucket(p); t; t = t->next) {
+ for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
ipv6_addr_equal(remote, &t->parms.raddr))
return t;
}
if (!create)
return NULL;
- return ip6_tnl_create(p);
+ return ip6_tnl_create(net, p);
}
/**
@@ -302,13 +311,15 @@ static void
ip6_tnl_dev_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- if (dev == ip6_fb_tnl_dev) {
+ if (dev == ip6n->fb_tnl_dev) {
write_lock_bh(&ip6_tnl_lock);
- tnls_wc[0] = NULL;
+ ip6n->tnls_wc[0] = NULL;
write_unlock_bh(&ip6_tnl_lock);
} else {
- ip6_tnl_unlink(t);
+ ip6_tnl_unlink(ip6n, t);
}
ip6_tnl_dst_reset(t);
dev_put(dev);
@@ -401,7 +412,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
processing of the error. */
read_lock(&ip6_tnl_lock);
- if ((t = ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+ if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
+ &ipv6h->saddr)) == NULL)
goto out;
if (t->parms.proto != ipproto && t->parms.proto != 0)
@@ -533,7 +545,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
fl.fl4_dst = eiph->saddr;
fl.fl4_tos = RT_TOS(eiph->tos);
fl.proto = IPPROTO_IPIP;
- if (ip_route_output_key(&init_net, &rt, &fl))
+ if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
goto out;
skb2->dev = rt->u.dst.dev;
@@ -545,7 +557,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
fl.fl4_dst = eiph->daddr;
fl.fl4_src = eiph->saddr;
fl.fl4_tos = eiph->tos;
- if (ip_route_output_key(&init_net, &rt, &fl) ||
+ if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
rt->u.dst.dev->type != ARPHRD_TUNNEL) {
ip_rt_put(rt);
goto out;
@@ -602,7 +614,8 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
skb_reset_network_header(skb2);
/* Try to guess incoming interface */
- rt = rt6_lookup(&ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
+ NULL, 0, 0);
if (rt && rt->rt6i_dev)
skb2->dev = rt->rt6i_dev;
@@ -646,16 +659,17 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
{
struct ip6_tnl_parm *p = &t->parms;
int ret = 0;
+ struct net *net = dev_net(t->dev);
if (p->flags & IP6_TNL_F_CAP_RCV) {
struct net_device *ldev = NULL;
if (p->link)
- ldev = dev_get_by_index(&init_net, p->link);
+ ldev = dev_get_by_index(net, p->link);
if ((ipv6_addr_is_multicast(&p->laddr) ||
- likely(ipv6_chk_addr(&init_net, &p->laddr, ldev, 0))) &&
- likely(!ipv6_chk_addr(&init_net, &p->raddr, NULL, 0)))
+ likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
+ likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
ret = 1;
if (ldev)
@@ -684,7 +698,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
read_lock(&ip6_tnl_lock);
- if ((t = ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
+ if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
+ &ipv6h->daddr)) != NULL) {
if (t->parms.proto != ipproto && t->parms.proto != 0) {
read_unlock(&ip6_tnl_lock);
goto discard;
@@ -782,19 +797,20 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
{
struct ip6_tnl_parm *p = &t->parms;
int ret = 0;
+ struct net *net = dev_net(t->dev);
if (p->flags & IP6_TNL_F_CAP_XMIT) {
struct net_device *ldev = NULL;
if (p->link)
- ldev = dev_get_by_index(&init_net, p->link);
+ ldev = dev_get_by_index(net, p->link);
- if (unlikely(!ipv6_chk_addr(&init_net, &p->laddr, ldev, 0)))
+ if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
printk(KERN_WARNING
"%s xmit: Local address not yet configured!\n",
p->name);
else if (!ipv6_addr_is_multicast(&p->raddr) &&
- unlikely(ipv6_chk_addr(&init_net, &p->raddr, NULL, 0)))
+ unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
printk(KERN_WARNING
"%s xmit: Routing loop! "
"Remote address found on this node!\n",
@@ -847,7 +863,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
if ((dst = ip6_tnl_dst_check(t)) != NULL)
dst_hold(dst);
else {
- dst = ip6_route_output(NULL, fl);
+ dst = ip6_route_output(dev_net(dev), NULL, fl);
if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0)
goto tx_err_link_failure;
@@ -1112,7 +1128,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
int strict = (ipv6_addr_type(&p->raddr) &
(IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
- struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr,
+ struct rt6_info *rt = rt6_lookup(dev_net(dev),
+ &p->raddr, &p->laddr,
p->link, strict);
if (rt == NULL)
@@ -1191,15 +1208,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
int err = 0;
struct ip6_tnl_parm p;
struct ip6_tnl *t = NULL;
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
switch (cmd) {
case SIOCGETTUNNEL:
- if (dev == ip6_fb_tnl_dev) {
+ if (dev == ip6n->fb_tnl_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
err = -EFAULT;
break;
}
- t = ip6_tnl_locate(&p, 0);
+ t = ip6_tnl_locate(net, &p, 0);
}
if (t == NULL)
t = netdev_priv(dev);
@@ -1220,8 +1239,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
p.proto != 0)
break;
- t = ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
- if (dev != ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+ t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL);
+ if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
err = -EEXIST;
@@ -1230,9 +1249,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
} else
t = netdev_priv(dev);
- ip6_tnl_unlink(t);
+ ip6_tnl_unlink(ip6n, t);
err = ip6_tnl_change(t, &p);
- ip6_tnl_link(t);
+ ip6_tnl_link(ip6n, t);
netdev_state_change(dev);
}
if (t) {
@@ -1248,15 +1267,15 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!capable(CAP_NET_ADMIN))
break;
- if (dev == ip6_fb_tnl_dev) {
+ if (dev == ip6n->fb_tnl_dev) {
err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
break;
err = -ENOENT;
- if ((t = ip6_tnl_locate(&p, 0)) == NULL)
+ if ((t = ip6_tnl_locate(net, &p, 0)) == NULL)
break;
err = -EPERM;
- if (t->dev == ip6_fb_tnl_dev)
+ if (t->dev == ip6n->fb_tnl_dev)
break;
dev = t->dev;
}
@@ -1324,6 +1343,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
+ dev->features |= NETIF_F_NETNS_LOCAL;
}
@@ -1365,10 +1385,13 @@ static int
ip6_fb_tnl_dev_init(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
ip6_tnl_dev_init_gen(dev);
t->parms.proto = IPPROTO_IPV6;
dev_hold(dev);
- tnls_wc[0] = t;
+ ip6n->tnls_wc[0] = t;
return 0;
}
@@ -1384,6 +1407,78 @@ static struct xfrm6_tunnel ip6ip6_handler = {
.priority = 1,
};
+static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
+{
+ int h;
+ struct ip6_tnl *t;
+
+ for (h = 0; h < HASH_SIZE; h++) {
+ while ((t = ip6n->tnls_r_l[h]) != NULL)
+ unregister_netdevice(t->dev);
+ }
+
+ t = ip6n->tnls_wc[0];
+ unregister_netdevice(t->dev);
+}
+
+static int ip6_tnl_init_net(struct net *net)
+{
+ int err;
+ struct ip6_tnl_net *ip6n;
+
+ err = -ENOMEM;
+ ip6n = kzalloc(sizeof(struct ip6_tnl_net), GFP_KERNEL);
+ if (ip6n == NULL)
+ goto err_alloc;
+
+ err = net_assign_generic(net, ip6_tnl_net_id, ip6n);
+ if (err < 0)
+ goto err_assign;
+
+ ip6n->tnls[0] = ip6n->tnls_wc;
+ ip6n->tnls[1] = ip6n->tnls_r_l;
+
+ err = -ENOMEM;
+ ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
+ ip6_tnl_dev_setup);
+
+ if (!ip6n->fb_tnl_dev)
+ goto err_alloc_dev;
+
+ ip6n->fb_tnl_dev->init = ip6_fb_tnl_dev_init;
+ dev_net_set(ip6n->fb_tnl_dev, net);
+
+ err = register_netdev(ip6n->fb_tnl_dev);
+ if (err < 0)
+ goto err_register;
+ return 0;
+
+err_register:
+ free_netdev(ip6n->fb_tnl_dev);
+err_alloc_dev:
+ /* nothing */
+err_assign:
+ kfree(ip6n);
+err_alloc:
+ return err;
+}
+
+static void ip6_tnl_exit_net(struct net *net)
+{
+ struct ip6_tnl_net *ip6n;
+
+ ip6n = net_generic(net, ip6_tnl_net_id);
+ rtnl_lock();
+ ip6_tnl_destroy_tunnels(ip6n);
+ rtnl_unlock();
+ kfree(ip6n);
+}
+
+static struct pernet_operations ip6_tnl_net_ops = {
+ .init = ip6_tnl_init_net,
+ .exit = ip6_tnl_exit_net,
+};
+
/**
* ip6_tunnel_init - register protocol and reserve needed resources
*
@@ -1405,21 +1500,12 @@ static int __init ip6_tunnel_init(void)
err = -EAGAIN;
goto unreg_ip4ip6;
}
- ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
- ip6_tnl_dev_setup);
- if (!ip6_fb_tnl_dev) {
- err = -ENOMEM;
- goto fail;
- }
- ip6_fb_tnl_dev->init = ip6_fb_tnl_dev_init;
-
- if ((err = register_netdev(ip6_fb_tnl_dev))) {
- free_netdev(ip6_fb_tnl_dev);
- goto fail;
- }
+ err = register_pernet_gen_device(&ip6_tnl_net_id, &ip6_tnl_net_ops);
+ if (err < 0)
+ goto err_pernet;
return 0;
-fail:
+err_pernet:
xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
unreg_ip4ip6:
xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
@@ -1427,20 +1513,6 @@ out:
return err;
}
-static void __exit ip6_tnl_destroy_tunnels(void)
-{
- int h;
- struct ip6_tnl *t;
-
- for (h = 0; h < HASH_SIZE; h++) {
- while ((t = tnls_r_l[h]) != NULL)
- unregister_netdevice(t->dev);
- }
-
- t = tnls_wc[0];
- unregister_netdevice(t->dev);
-}
-
/**
* ip6_tunnel_cleanup - free resources and unregister protocol
**/
@@ -1453,9 +1525,7 @@ static void __exit ip6_tunnel_cleanup(void)
if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
- rtnl_lock();
- ip6_tnl_destroy_tunnels();
- rtnl_unlock();
+ unregister_pernet_gen_device(ip6_tnl_net_id, &ip6_tnl_net_ops);
}
module_init(ip6_tunnel_init);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
new file mode 100644
index 00000000000..c8c6e33d116
--- /dev/null
+++ b/net/ipv6/ip6mr.c
@@ -0,0 +1,1643 @@
+/*
+ * Linux IPv6 multicast routing support for BSD pim6sd
+ * Based on net/ipv4/ipmr.c.
+ *
+ * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
+ * LSIIT Laboratory, Strasbourg, France
+ * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
+ * 6WIND, Paris, France
+ * Copyright (C)2007,2008 USAGI/WIDE Project
+ * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/raw.h>
+#include <linux/notifier.h>
+#include <linux/if_arp.h>
+#include <net/checksum.h>
+#include <net/netlink.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/mroute6.h>
+#include <linux/pim.h>
+#include <net/addrconf.h>
+#include <linux/netfilter_ipv6.h>
+
+struct sock *mroute6_socket;
+
+
+/* Big lock, protecting vif table, mrt cache and mroute socket state.
+ Note that the changes are semaphored via rtnl_lock.
+ */
+
+static DEFINE_RWLOCK(mrt_lock);
+
+/*
+ * Multicast router control variables
+ */
+
+static struct mif_device vif6_table[MAXMIFS]; /* Devices */
+static int maxvif;
+
+#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
+
+static int mroute_do_assert; /* Set in PIM assert */
+#ifdef CONFIG_IPV6_PIMSM_V2
+static int mroute_do_pim;
+#else
+#define mroute_do_pim 0
+#endif
+
+static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */
+
+static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
+static atomic_t cache_resolve_queue_len; /* Size of unresolved */
+
+/* Special spinlock for queue of unresolved entries */
+static DEFINE_SPINLOCK(mfc_unres_lock);
+
+/* We return to original Alan's scheme. Hash table of resolved
+ entries is changed only in process context and protected
+ with weak lock mrt_lock. Queue of unresolved entries is protected
+ with strong spinlock mfc_unres_lock.
+
+ In this case data path is free of exclusive locks at all.
+ */
+
+static struct kmem_cache *mrt_cachep __read_mostly;
+
+static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
+static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
+static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+static struct inet6_protocol pim6_protocol;
+#endif
+
+static struct timer_list ipmr_expire_timer;
+
+
+#ifdef CONFIG_PROC_FS
+
+struct ipmr_mfc_iter {
+ struct mfc6_cache **cache;
+ int ct;
+};
+
+
+static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
+{
+ struct mfc6_cache *mfc;
+
+ it->cache = mfc6_cache_array;
+ read_lock(&mrt_lock);
+ for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
+ for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
+ if (pos-- == 0)
+ return mfc;
+ read_unlock(&mrt_lock);
+
+ it->cache = &mfc_unres_queue;
+ spin_lock_bh(&mfc_unres_lock);
+ for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
+ if (pos-- == 0)
+ return mfc;
+ spin_unlock_bh(&mfc_unres_lock);
+
+ it->cache = NULL;
+ return NULL;
+}
+
+
+
+
+/*
+ * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
+struct ipmr_vif_iter {
+ int ct;
+};
+
+static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
+ loff_t pos)
+{
+ for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
+ if (!MIF_EXISTS(iter->ct))
+ continue;
+ if (pos-- == 0)
+ return &vif6_table[iter->ct];
+ }
+ return NULL;
+}
+
+static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(mrt_lock)
+{
+ read_lock(&mrt_lock);
+ return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
+ : SEQ_START_TOKEN);
+}
+
+static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ipmr_vif_iter *iter = seq->private;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return ip6mr_vif_seq_idx(iter, 0);
+
+ while (++iter->ct < maxvif) {
+ if (!MIF_EXISTS(iter->ct))
+ continue;
+ return &vif6_table[iter->ct];
+ }
+ return NULL;
+}
+
+static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
+ __releases(mrt_lock)
+{
+ read_unlock(&mrt_lock);
+}
+
+static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
+ } else {
+ const struct mif_device *vif = v;
+ const char *name = vif->dev ? vif->dev->name : "none";
+
+ seq_printf(seq,
+ "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n",
+ vif - vif6_table,
+ name, vif->bytes_in, vif->pkt_in,
+ vif->bytes_out, vif->pkt_out,
+ vif->flags);
+ }
+ return 0;
+}
+
+static struct seq_operations ip6mr_vif_seq_ops = {
+ .start = ip6mr_vif_seq_start,
+ .next = ip6mr_vif_seq_next,
+ .stop = ip6mr_vif_seq_stop,
+ .show = ip6mr_vif_seq_show,
+};
+
+static int ip6mr_vif_open(struct inode *inode, struct file *file)
+{
+ return seq_open_private(file, &ip6mr_vif_seq_ops,
+ sizeof(struct ipmr_vif_iter));
+}
+
+static struct file_operations ip6mr_vif_fops = {
+ .owner = THIS_MODULE,
+ .open = ip6mr_vif_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
+ : SEQ_START_TOKEN);
+}
+
+static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct mfc6_cache *mfc = v;
+ struct ipmr_mfc_iter *it = seq->private;
+
+ ++*pos;
+
+ if (v == SEQ_START_TOKEN)
+ return ipmr_mfc_seq_idx(seq->private, 0);
+
+ if (mfc->next)
+ return mfc->next;
+
+ if (it->cache == &mfc_unres_queue)
+ goto end_of_list;
+
+ BUG_ON(it->cache != mfc6_cache_array);
+
+ while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
+ mfc = mfc6_cache_array[it->ct];
+ if (mfc)
+ return mfc;
+ }
+
+ /* exhausted cache_array, show unresolved */
+ read_unlock(&mrt_lock);
+ it->cache = &mfc_unres_queue;
+ it->ct = 0;
+
+ spin_lock_bh(&mfc_unres_lock);
+ mfc = mfc_unres_queue;
+ if (mfc)
+ return mfc;
+
+ end_of_list:
+ spin_unlock_bh(&mfc_unres_lock);
+ it->cache = NULL;
+
+ return NULL;
+}
+
+static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+ struct ipmr_mfc_iter *it = seq->private;
+
+ if (it->cache == &mfc_unres_queue)
+ spin_unlock_bh(&mfc_unres_lock);
+ else if (it->cache == mfc6_cache_array)
+ read_unlock(&mrt_lock);
+}
+
+static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
+{
+ int n;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Group "
+ "Origin "
+ "Iif Pkts Bytes Wrong Oifs\n");
+ } else {
+ const struct mfc6_cache *mfc = v;
+ const struct ipmr_mfc_iter *it = seq->private;
+
+ seq_printf(seq,
+ NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
+ NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
+ mfc->mf6c_parent,
+ mfc->mfc_un.res.pkt,
+ mfc->mfc_un.res.bytes,
+ mfc->mfc_un.res.wrong_if);
+
+ if (it->cache != &mfc_unres_queue) {
+ for (n = mfc->mfc_un.res.minvif;
+ n < mfc->mfc_un.res.maxvif; n++) {
+ if (MIF_EXISTS(n) &&
+ mfc->mfc_un.res.ttls[n] < 255)
+ seq_printf(seq,
+ " %2d:%-3d",
+ n, mfc->mfc_un.res.ttls[n]);
+ }
+ }
+ seq_putc(seq, '\n');
+ }
+ return 0;
+}
+
+static struct seq_operations ipmr_mfc_seq_ops = {
+ .start = ipmr_mfc_seq_start,
+ .next = ipmr_mfc_seq_next,
+ .stop = ipmr_mfc_seq_stop,
+ .show = ipmr_mfc_seq_show,
+};
+
+static int ipmr_mfc_open(struct inode *inode, struct file *file)
+{
+ return seq_open_private(file, &ipmr_mfc_seq_ops,
+ sizeof(struct ipmr_mfc_iter));
+}
+
+static struct file_operations ip6mr_mfc_fops = {
+ .owner = THIS_MODULE,
+ .open = ipmr_mfc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+#endif
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+static int reg_vif_num = -1;
+
+static int pim6_rcv(struct sk_buff *skb)
+{
+ struct pimreghdr *pim;
+ struct ipv6hdr *encap;
+ struct net_device *reg_dev = NULL;
+
+ if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
+ goto drop;
+
+ pim = (struct pimreghdr *)skb_transport_header(skb);
+ if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
+ (pim->flags & PIM_NULL_REGISTER) ||
+ (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
+ (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))))
+ goto drop;
+
+ /* check if the inner packet is destined to mcast group */
+ encap = (struct ipv6hdr *)(skb_transport_header(skb) +
+ sizeof(*pim));
+
+ if (!ipv6_addr_is_multicast(&encap->daddr) ||
+ encap->payload_len == 0 ||
+ ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
+ goto drop;
+
+ read_lock(&mrt_lock);
+ if (reg_vif_num >= 0)
+ reg_dev = vif6_table[reg_vif_num].dev;
+ if (reg_dev)
+ dev_hold(reg_dev);
+ read_unlock(&mrt_lock);
+
+ if (reg_dev == NULL)
+ goto drop;
+
+ skb->mac_header = skb->network_header;
+ skb_pull(skb, (u8 *)encap - skb->data);
+ skb_reset_network_header(skb);
+ skb->dev = reg_dev;
+ skb->protocol = htons(ETH_P_IP);
+ skb->ip_summed = 0;
+ skb->pkt_type = PACKET_HOST;
+ dst_release(skb->dst);
+ ((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len;
+ ((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++;
+ skb->dst = NULL;
+ nf_reset(skb);
+ netif_rx(skb);
+ dev_put(reg_dev);
+ return 0;
+ drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static struct inet6_protocol pim6_protocol = {
+ .handler = pim6_rcv,
+};
+
+/* Service routines creating virtual interfaces: PIMREG */
+
+static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ read_lock(&mrt_lock);
+ ((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len;
+ ((struct net_device_stats *)netdev_priv(dev))->tx_packets++;
+ ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
+ read_unlock(&mrt_lock);
+ kfree_skb(skb);
+ return 0;
+}
+
+static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
+{
+ return (struct net_device_stats *)netdev_priv(dev);
+}
+
+static void reg_vif_setup(struct net_device *dev)
+{
+ dev->type = ARPHRD_PIMREG;
+ dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
+ dev->flags = IFF_NOARP;
+ dev->hard_start_xmit = reg_vif_xmit;
+ dev->get_stats = reg_vif_get_stats;
+ dev->destructor = free_netdev;
+}
+
+static struct net_device *ip6mr_reg_vif(void)
+{
+ struct net_device *dev;
+
+ dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg",
+ reg_vif_setup);
+
+ if (dev == NULL)
+ return NULL;
+
+ if (register_netdevice(dev)) {
+ free_netdev(dev);
+ return NULL;
+ }
+ dev->iflink = 0;
+
+ if (dev_open(dev))
+ goto failure;
+
+ return dev;
+
+failure:
+ /* allow the register to be completed before unregistering. */
+ rtnl_unlock();
+ rtnl_lock();
+
+ unregister_netdevice(dev);
+ return NULL;
+}
+#endif
+
+/*
+ * Delete a VIF entry
+ */
+
+static int mif6_delete(int vifi)
+{
+ struct mif_device *v;
+ struct net_device *dev;
+ if (vifi < 0 || vifi >= maxvif)
+ return -EADDRNOTAVAIL;
+
+ v = &vif6_table[vifi];
+
+ write_lock_bh(&mrt_lock);
+ dev = v->dev;
+ v->dev = NULL;
+
+ if (!dev) {
+ write_unlock_bh(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ }
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (vifi == reg_vif_num)
+ reg_vif_num = -1;
+#endif
+
+ if (vifi + 1 == maxvif) {
+ int tmp;
+ for (tmp = vifi - 1; tmp >= 0; tmp--) {
+ if (MIF_EXISTS(tmp))
+ break;
+ }
+ maxvif = tmp + 1;
+ }
+
+ write_unlock_bh(&mrt_lock);
+
+ dev_set_allmulti(dev, -1);
+
+ if (v->flags & MIFF_REGISTER)
+ unregister_netdevice(dev);
+
+ dev_put(dev);
+ return 0;
+}
+
+/* Destroy an unresolved cache entry, killing queued skbs
+ and reporting error to netlink readers.
+ */
+
+static void ip6mr_destroy_unres(struct mfc6_cache *c)
+{
+ struct sk_buff *skb;
+
+ atomic_dec(&cache_resolve_queue_len);
+
+ while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+ if (ipv6_hdr(skb)->version == 0) {
+ struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
+ nlh->nlmsg_type = NLMSG_ERROR;
+ nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ skb_trim(skb, nlh->nlmsg_len);
+ ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
+ rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+ } else
+ kfree_skb(skb);
+ }
+
+ kmem_cache_free(mrt_cachep, c);
+}
+
+
+/* Single timer process for all the unresolved queue. */
+
+static void ipmr_do_expire_process(unsigned long dummy)
+{
+ unsigned long now = jiffies;
+ unsigned long expires = 10 * HZ;
+ struct mfc6_cache *c, **cp;
+
+ cp = &mfc_unres_queue;
+
+ while ((c = *cp) != NULL) {
+ if (time_after(c->mfc_un.unres.expires, now)) {
+ /* not yet... */
+ unsigned long interval = c->mfc_un.unres.expires - now;
+ if (interval < expires)
+ expires = interval;
+ cp = &c->next;
+ continue;
+ }
+
+ *cp = c->next;
+ ip6mr_destroy_unres(c);
+ }
+
+ if (atomic_read(&cache_resolve_queue_len))
+ mod_timer(&ipmr_expire_timer, jiffies + expires);
+}
+
+static void ipmr_expire_process(unsigned long dummy)
+{
+ if (!spin_trylock(&mfc_unres_lock)) {
+ mod_timer(&ipmr_expire_timer, jiffies + 1);
+ return;
+ }
+
+ if (atomic_read(&cache_resolve_queue_len))
+ ipmr_do_expire_process(dummy);
+
+ spin_unlock(&mfc_unres_lock);
+}
+
+/* Fill oifs list. It is called under write locked mrt_lock. */
+
+static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
+{
+ int vifi;
+
+ cache->mfc_un.res.minvif = MAXMIFS;
+ cache->mfc_un.res.maxvif = 0;
+ memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
+
+ for (vifi = 0; vifi < maxvif; vifi++) {
+ if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
+ cache->mfc_un.res.ttls[vifi] = ttls[vifi];
+ if (cache->mfc_un.res.minvif > vifi)
+ cache->mfc_un.res.minvif = vifi;
+ if (cache->mfc_un.res.maxvif <= vifi)
+ cache->mfc_un.res.maxvif = vifi + 1;
+ }
+ }
+}
+
+static int mif6_add(struct mif6ctl *vifc, int mrtsock)
+{
+ int vifi = vifc->mif6c_mifi;
+ struct mif_device *v = &vif6_table[vifi];
+ struct net_device *dev;
+
+ /* Is vif busy ? */
+ if (MIF_EXISTS(vifi))
+ return -EADDRINUSE;
+
+ switch (vifc->mif6c_flags) {
+#ifdef CONFIG_IPV6_PIMSM_V2
+ case MIFF_REGISTER:
+ /*
+ * Special Purpose VIF in PIM
+ * All the packets will be sent to the daemon
+ */
+ if (reg_vif_num >= 0)
+ return -EADDRINUSE;
+ dev = ip6mr_reg_vif();
+ if (!dev)
+ return -ENOBUFS;
+ break;
+#endif
+ case 0:
+ dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
+ if (!dev)
+ return -EADDRNOTAVAIL;
+ dev_put(dev);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ dev_set_allmulti(dev, 1);
+
+ /*
+ * Fill in the VIF structures
+ */
+ v->rate_limit = vifc->vifc_rate_limit;
+ v->flags = vifc->mif6c_flags;
+ if (!mrtsock)
+ v->flags |= VIFF_STATIC;
+ v->threshold = vifc->vifc_threshold;
+ v->bytes_in = 0;
+ v->bytes_out = 0;
+ v->pkt_in = 0;
+ v->pkt_out = 0;
+ v->link = dev->ifindex;
+ if (v->flags & MIFF_REGISTER)
+ v->link = dev->iflink;
+
+ /* And finish update writing critical data */
+ write_lock_bh(&mrt_lock);
+ dev_hold(dev);
+ v->dev = dev;
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (v->flags & MIFF_REGISTER)
+ reg_vif_num = vifi;
+#endif
+ if (vifi + 1 > maxvif)
+ maxvif = vifi + 1;
+ write_unlock_bh(&mrt_lock);
+ return 0;
+}
+
+static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
+{
+ int line = MFC6_HASH(mcastgrp, origin);
+ struct mfc6_cache *c;
+
+ for (c = mfc6_cache_array[line]; c; c = c->next) {
+ if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
+ ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
+ break;
+ }
+ return c;
+}
+
+/*
+ * Allocate a multicast cache entry
+ */
+static struct mfc6_cache *ip6mr_cache_alloc(void)
+{
+ struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
+ if (c == NULL)
+ return NULL;
+ memset(c, 0, sizeof(*c));
+ c->mfc_un.res.minvif = MAXMIFS;
+ return c;
+}
+
+static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
+{
+ struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
+ if (c == NULL)
+ return NULL;
+ memset(c, 0, sizeof(*c));
+ skb_queue_head_init(&c->mfc_un.unres.unresolved);
+ c->mfc_un.unres.expires = jiffies + 10 * HZ;
+ return c;
+}
+
+/*
+ * A cache entry has gone into a resolved state from queued
+ */
+
+static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
+{
+ struct sk_buff *skb;
+
+ /*
+ * Play the pending entries through our router
+ */
+
+ while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ if (ipv6_hdr(skb)->version == 0) {
+ int err;
+ struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
+
+ if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
+ nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
+ } else {
+ nlh->nlmsg_type = NLMSG_ERROR;
+ nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ skb_trim(skb, nlh->nlmsg_len);
+ ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
+ }
+ err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+ } else
+ ip6_mr_forward(skb, c);
+ }
+}
+
+/*
+ * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
+ * expects the following bizarre scheme.
+ *
+ * Called under mrt_lock.
+ */
+
+static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
+{
+ struct sk_buff *skb;
+ struct mrt6msg *msg;
+ int ret;
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (assert == MRT6MSG_WHOLEPKT)
+ skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
+ +sizeof(*msg));
+ else
+#endif
+ skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
+
+ if (!skb)
+ return -ENOBUFS;
+
+ /* I suppose that internal messages
+ * do not require checksums */
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (assert == MRT6MSG_WHOLEPKT) {
+ /* Ugly, but we have no choice with this interface.
+ Duplicate old header, fix length etc.
+ And all this only to mangle msg->im6_msgtype and
+ to set msg->im6_mbz to "mbz" :-)
+ */
+ skb_push(skb, -skb_network_offset(pkt));
+
+ skb_push(skb, sizeof(*msg));
+ skb_reset_transport_header(skb);
+ msg = (struct mrt6msg *)skb_transport_header(skb);
+ msg->im6_mbz = 0;
+ msg->im6_msgtype = MRT6MSG_WHOLEPKT;
+ msg->im6_mif = reg_vif_num;
+ msg->im6_pad = 0;
+ ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
+ ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ } else
+#endif
+ {
+ /*
+ * Copy the IP header
+ */
+
+ skb_put(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
+
+ /*
+ * Add our header
+ */
+ skb_put(skb, sizeof(*msg));
+ skb_reset_transport_header(skb);
+ msg = (struct mrt6msg *)skb_transport_header(skb);
+
+ msg->im6_mbz = 0;
+ msg->im6_msgtype = assert;
+ msg->im6_mif = mifi;
+ msg->im6_pad = 0;
+ ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
+ ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
+
+ skb->dst = dst_clone(pkt->dst);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ skb_pull(skb, sizeof(struct ipv6hdr));
+ }
+
+ if (mroute6_socket == NULL) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ /*
+ * Deliver to user space multicast routing algorithms
+ */
+ if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
+ kfree_skb(skb);
+ }
+
+ return ret;
+}
+
+/*
+ * Queue a packet for resolution. It gets locked cache entry!
+ */
+
+static int
+ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
+{
+ int err;
+ struct mfc6_cache *c;
+
+ spin_lock_bh(&mfc_unres_lock);
+ for (c = mfc_unres_queue; c; c = c->next) {
+ if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
+ ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
+ break;
+ }
+
+ if (c == NULL) {
+ /*
+ * Create a new entry if allowable
+ */
+
+ if (atomic_read(&cache_resolve_queue_len) >= 10 ||
+ (c = ip6mr_cache_alloc_unres()) == NULL) {
+ spin_unlock_bh(&mfc_unres_lock);
+
+ kfree_skb(skb);
+ return -ENOBUFS;
+ }
+
+ /*
+ * Fill in the new cache entry
+ */
+ c->mf6c_parent = -1;
+ c->mf6c_origin = ipv6_hdr(skb)->saddr;
+ c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
+
+ /*
+ * Reflect first query at pim6sd
+ */
+ if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
+ /* If the report failed throw the cache entry
+ out - Brad Parker
+ */
+ spin_unlock_bh(&mfc_unres_lock);
+
+ kmem_cache_free(mrt_cachep, c);
+ kfree_skb(skb);
+ return err;
+ }
+
+ atomic_inc(&cache_resolve_queue_len);
+ c->next = mfc_unres_queue;
+ mfc_unres_queue = c;
+
+ ipmr_do_expire_process(1);
+ }
+
+ /*
+ * See if we can append the packet
+ */
+ if (c->mfc_un.unres.unresolved.qlen > 3) {
+ kfree_skb(skb);
+ err = -ENOBUFS;
+ } else {
+ skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ err = 0;
+ }
+
+ spin_unlock_bh(&mfc_unres_lock);
+ return err;
+}
+
+/*
+ * MFC6 cache manipulation by user space
+ */
+
+static int ip6mr_mfc_delete(struct mf6cctl *mfc)
+{
+ int line;
+ struct mfc6_cache *c, **cp;
+
+ line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+
+ for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
+ if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
+ ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+ write_lock_bh(&mrt_lock);
+ *cp = c->next;
+ write_unlock_bh(&mrt_lock);
+
+ kmem_cache_free(mrt_cachep, c);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int ip6mr_device_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct mif_device *v;
+ int ct;
+
+ if (dev_net(dev) != &init_net)
+ return NOTIFY_DONE;
+
+ if (event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+
+ v = &vif6_table[0];
+ for (ct = 0; ct < maxvif; ct++, v++) {
+ if (v->dev == dev)
+ mif6_delete(ct);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ip6_mr_notifier = {
+ .notifier_call = ip6mr_device_event
+};
+
+/*
+ * Setup for IP multicast routing
+ */
+
+void __init ip6_mr_init(void)
+{
+ mrt_cachep = kmem_cache_create("ip6_mrt_cache",
+ sizeof(struct mfc6_cache),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!mrt_cachep)
+ panic("cannot allocate ip6_mrt_cache");
+
+ setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
+ register_netdevice_notifier(&ip6_mr_notifier);
+#ifdef CONFIG_PROC_FS
+ proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops);
+ proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops);
+#endif
+}
+
+
+static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
+{
+ int line;
+ struct mfc6_cache *uc, *c, **cp;
+ unsigned char ttls[MAXMIFS];
+ int i;
+
+ memset(ttls, 255, MAXMIFS);
+ for (i = 0; i < MAXMIFS; i++) {
+ if (IF_ISSET(i, &mfc->mf6cc_ifset))
+ ttls[i] = 1;
+
+ }
+
+ line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+
+ for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
+ if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
+ ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
+ break;
+ }
+
+ if (c != NULL) {
+ write_lock_bh(&mrt_lock);
+ c->mf6c_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(c, ttls);
+ if (!mrtsock)
+ c->mfc_flags |= MFC_STATIC;
+ write_unlock_bh(&mrt_lock);
+ return 0;
+ }
+
+ if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
+ return -EINVAL;
+
+ c = ip6mr_cache_alloc();
+ if (c == NULL)
+ return -ENOMEM;
+
+ c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
+ c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
+ c->mf6c_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(c, ttls);
+ if (!mrtsock)
+ c->mfc_flags |= MFC_STATIC;
+
+ write_lock_bh(&mrt_lock);
+ c->next = mfc6_cache_array[line];
+ mfc6_cache_array[line] = c;
+ write_unlock_bh(&mrt_lock);
+
+ /*
+ * Check to see if we resolved a queued list. If so we
+ * need to send on the frames and tidy up.
+ */
+ spin_lock_bh(&mfc_unres_lock);
+ for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
+ cp = &uc->next) {
+ if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
+ ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
+ *cp = uc->next;
+ if (atomic_dec_and_test(&cache_resolve_queue_len))
+ del_timer(&ipmr_expire_timer);
+ break;
+ }
+ }
+ spin_unlock_bh(&mfc_unres_lock);
+
+ if (uc) {
+ ip6mr_cache_resolve(uc, c);
+ kmem_cache_free(mrt_cachep, uc);
+ }
+ return 0;
+}
+
+/*
+ * Close the multicast socket, and clear the vif tables etc
+ */
+
+static void mroute_clean_tables(struct sock *sk)
+{
+ int i;
+
+ /*
+ * Shut down all active vif entries
+ */
+ for (i = 0; i < maxvif; i++) {
+ if (!(vif6_table[i].flags & VIFF_STATIC))
+ mif6_delete(i);
+ }
+
+ /*
+ * Wipe the cache
+ */
+ for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
+ struct mfc6_cache *c, **cp;
+
+ cp = &mfc6_cache_array[i];
+ while ((c = *cp) != NULL) {
+ if (c->mfc_flags & MFC_STATIC) {
+ cp = &c->next;
+ continue;
+ }
+ write_lock_bh(&mrt_lock);
+ *cp = c->next;
+ write_unlock_bh(&mrt_lock);
+
+ kmem_cache_free(mrt_cachep, c);
+ }
+ }
+
+ if (atomic_read(&cache_resolve_queue_len) != 0) {
+ struct mfc6_cache *c;
+
+ spin_lock_bh(&mfc_unres_lock);
+ while (mfc_unres_queue != NULL) {
+ c = mfc_unres_queue;
+ mfc_unres_queue = c->next;
+ spin_unlock_bh(&mfc_unres_lock);
+
+ ip6mr_destroy_unres(c);
+
+ spin_lock_bh(&mfc_unres_lock);
+ }
+ spin_unlock_bh(&mfc_unres_lock);
+ }
+}
+
+static int ip6mr_sk_init(struct sock *sk)
+{
+ int err = 0;
+
+ rtnl_lock();
+ write_lock_bh(&mrt_lock);
+ if (likely(mroute6_socket == NULL))
+ mroute6_socket = sk;
+ else
+ err = -EADDRINUSE;
+ write_unlock_bh(&mrt_lock);
+
+ rtnl_unlock();
+
+ return err;
+}
+
+int ip6mr_sk_done(struct sock *sk)
+{
+ int err = 0;
+
+ rtnl_lock();
+ if (sk == mroute6_socket) {
+ write_lock_bh(&mrt_lock);
+ mroute6_socket = NULL;
+ write_unlock_bh(&mrt_lock);
+
+ mroute_clean_tables(sk);
+ } else
+ err = -EACCES;
+ rtnl_unlock();
+
+ return err;
+}
+
+/*
+ * Socket options and virtual interface manipulation. The whole
+ * virtual interface system is a complete heap, but unfortunately
+ * that's how BSD mrouted happens to think. Maybe one day with a proper
+ * MOSPF/PIM router set up we can clean this up.
+ */
+
+int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
+{
+ int ret;
+ struct mif6ctl vif;
+ struct mf6cctl mfc;
+ mifi_t mifi;
+
+ if (optname != MRT6_INIT) {
+ if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
+ return -EACCES;
+ }
+
+ switch (optname) {
+ case MRT6_INIT:
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->num != IPPROTO_ICMPV6)
+ return -EOPNOTSUPP;
+ if (optlen < sizeof(int))
+ return -EINVAL;
+
+ return ip6mr_sk_init(sk);
+
+ case MRT6_DONE:
+ return ip6mr_sk_done(sk);
+
+ case MRT6_ADD_MIF:
+ if (optlen < sizeof(vif))
+ return -EINVAL;
+ if (copy_from_user(&vif, optval, sizeof(vif)))
+ return -EFAULT;
+ if (vif.mif6c_mifi >= MAXMIFS)
+ return -ENFILE;
+ rtnl_lock();
+ ret = mif6_add(&vif, sk == mroute6_socket);
+ rtnl_unlock();
+ return ret;
+
+ case MRT6_DEL_MIF:
+ if (optlen < sizeof(mifi_t))
+ return -EINVAL;
+ if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
+ return -EFAULT;
+ rtnl_lock();
+ ret = mif6_delete(mifi);
+ rtnl_unlock();
+ return ret;
+
+ /*
+ * Manipulate the forwarding caches. These live
+ * in a sort of kernel/user symbiosis.
+ */
+ case MRT6_ADD_MFC:
+ case MRT6_DEL_MFC:
+ if (optlen < sizeof(mfc))
+ return -EINVAL;
+ if (copy_from_user(&mfc, optval, sizeof(mfc)))
+ return -EFAULT;
+ rtnl_lock();
+ if (optname == MRT6_DEL_MFC)
+ ret = ip6mr_mfc_delete(&mfc);
+ else
+ ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
+ rtnl_unlock();
+ return ret;
+
+ /*
+ * Control PIM assert (to activate pim will activate assert)
+ */
+ case MRT6_ASSERT:
+ {
+ int v;
+ if (get_user(v, (int __user *)optval))
+ return -EFAULT;
+ mroute_do_assert = !!v;
+ return 0;
+ }
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ case MRT6_PIM:
+ {
+ int v;
+ if (get_user(v, (int __user *)optval))
+ return -EFAULT;
+ v = !!v;
+ rtnl_lock();
+ ret = 0;
+ if (v != mroute_do_pim) {
+ mroute_do_pim = v;
+ mroute_do_assert = v;
+ if (mroute_do_pim)
+ ret = inet6_add_protocol(&pim6_protocol,
+ IPPROTO_PIM);
+ else
+ ret = inet6_del_protocol(&pim6_protocol,
+ IPPROTO_PIM);
+ if (ret < 0)
+ ret = -EAGAIN;
+ }
+ rtnl_unlock();
+ return ret;
+ }
+
+#endif
+ /*
+ * Spurious command, or MRT_VERSION which you cannot
+ * set.
+ */
+ default:
+ return -ENOPROTOOPT;
+ }
+}
+
+/*
+ * Getsock opt support for the multicast routing system.
+ */
+
+int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
+ int __user *optlen)
+{
+ int olr;
+ int val;
+
+ switch (optname) {
+ case MRT6_VERSION:
+ val = 0x0305;
+ break;
+#ifdef CONFIG_IPV6_PIMSM_V2
+ case MRT6_PIM:
+ val = mroute_do_pim;
+ break;
+#endif
+ case MRT6_ASSERT:
+ val = mroute_do_assert;
+ break;
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ if (get_user(olr, optlen))
+ return -EFAULT;
+
+ olr = min_t(int, olr, sizeof(int));
+ if (olr < 0)
+ return -EINVAL;
+
+ if (put_user(olr, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, olr))
+ return -EFAULT;
+ return 0;
+}
+
+/*
+ * The IP multicast ioctl support routines.
+ */
+
+int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
+{
+ struct sioc_sg_req6 sr;
+ struct sioc_mif_req6 vr;
+ struct mif_device *vif;
+ struct mfc6_cache *c;
+
+ switch (cmd) {
+ case SIOCGETMIFCNT_IN6:
+ if (copy_from_user(&vr, arg, sizeof(vr)))
+ return -EFAULT;
+ if (vr.mifi >= maxvif)
+ return -EINVAL;
+ read_lock(&mrt_lock);
+ vif = &vif6_table[vr.mifi];
+ if (MIF_EXISTS(vr.mifi)) {
+ vr.icount = vif->pkt_in;
+ vr.ocount = vif->pkt_out;
+ vr.ibytes = vif->bytes_in;
+ vr.obytes = vif->bytes_out;
+ read_unlock(&mrt_lock);
+
+ if (copy_to_user(arg, &vr, sizeof(vr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ case SIOCGETSGCNT_IN6:
+ if (copy_from_user(&sr, arg, sizeof(sr)))
+ return -EFAULT;
+
+ read_lock(&mrt_lock);
+ c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
+ if (c) {
+ sr.pktcnt = c->mfc_un.res.pkt;
+ sr.bytecnt = c->mfc_un.res.bytes;
+ sr.wrong_if = c->mfc_un.res.wrong_if;
+ read_unlock(&mrt_lock);
+
+ if (copy_to_user(arg, &sr, sizeof(sr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
+
+static inline int ip6mr_forward2_finish(struct sk_buff *skb)
+{
+ IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ return dst_output(skb);
+}
+
+/*
+ * Processing handlers for ip6mr_forward
+ */
+
+static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
+{
+ struct ipv6hdr *ipv6h;
+ struct mif_device *vif = &vif6_table[vifi];
+ struct net_device *dev;
+ struct dst_entry *dst;
+ struct flowi fl;
+
+ if (vif->dev == NULL)
+ goto out_free;
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+ if (vif->flags & MIFF_REGISTER) {
+ vif->pkt_out++;
+ vif->bytes_out += skb->len;
+ ((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len;
+ ((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++;
+ ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
+ kfree_skb(skb);
+ return 0;
+ }
+#endif
+
+ ipv6h = ipv6_hdr(skb);
+
+ fl = (struct flowi) {
+ .oif = vif->link,
+ .nl_u = { .ip6_u =
+ { .daddr = ipv6h->daddr, }
+ }
+ };
+
+ dst = ip6_route_output(&init_net, NULL, &fl);
+ if (!dst)
+ goto out_free;
+
+ dst_release(skb->dst);
+ skb->dst = dst;
+
+ /*
+ * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
+ * not only before forwarding, but after forwarding on all output
+ * interfaces. It is clear, if mrouter runs a multicasting
+ * program, it should receive packets not depending to what interface
+ * program is joined.
+ * If we will not make it, the program will have to join on all
+ * interfaces. On the other hand, multihoming host (or router, but
+ * not mrouter) cannot join to more than one interface - it will
+ * result in receiving multiple packets.
+ */
+ dev = vif->dev;
+ skb->dev = dev;
+ vif->pkt_out++;
+ vif->bytes_out += skb->len;
+
+ /* We are about to write */
+ /* XXX: extension headers? */
+ if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
+ goto out_free;
+
+ ipv6h = ipv6_hdr(skb);
+ ipv6h->hop_limit--;
+
+ IP6CB(skb)->flags |= IP6SKB_FORWARDED;
+
+ return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
+ ip6mr_forward2_finish);
+
+out_free:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int ip6mr_find_vif(struct net_device *dev)
+{
+ int ct;
+ for (ct = maxvif - 1; ct >= 0; ct--) {
+ if (vif6_table[ct].dev == dev)
+ break;
+ }
+ return ct;
+}
+
+static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
+{
+ int psend = -1;
+ int vif, ct;
+
+ vif = cache->mf6c_parent;
+ cache->mfc_un.res.pkt++;
+ cache->mfc_un.res.bytes += skb->len;
+
+ /*
+ * Wrong interface: drop packet and (maybe) send PIM assert.
+ */
+ if (vif6_table[vif].dev != skb->dev) {
+ int true_vifi;
+
+ cache->mfc_un.res.wrong_if++;
+ true_vifi = ip6mr_find_vif(skb->dev);
+
+ if (true_vifi >= 0 && mroute_do_assert &&
+ /* pimsm uses asserts, when switching from RPT to SPT,
+ so that we cannot check that packet arrived on an oif.
+ It is bad, but otherwise we would need to move pretty
+ large chunk of pimd to kernel. Ough... --ANK
+ */
+ (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ time_after(jiffies,
+ cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
+ cache->mfc_un.res.last_assert = jiffies;
+ ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
+ }
+ goto dont_forward;
+ }
+
+ vif6_table[vif].pkt_in++;
+ vif6_table[vif].bytes_in += skb->len;
+
+ /*
+ * Forward the frame
+ */
+ for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+ if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ if (psend != -1) {
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2)
+ ip6mr_forward2(skb2, cache, psend);
+ }
+ psend = ct;
+ }
+ }
+ if (psend != -1) {
+ ip6mr_forward2(skb, cache, psend);
+ return 0;
+ }
+
+dont_forward:
+ kfree_skb(skb);
+ return 0;
+}
+
+
+/*
+ * Multicast packets for forwarding arrive here
+ */
+
+int ip6_mr_input(struct sk_buff *skb)
+{
+ struct mfc6_cache *cache;
+
+ read_lock(&mrt_lock);
+ cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
+
+ /*
+ * No usable cache entry
+ */
+ if (cache == NULL) {
+ int vif;
+
+ vif = ip6mr_find_vif(skb->dev);
+ if (vif >= 0) {
+ int err = ip6mr_cache_unresolved(vif, skb);
+ read_unlock(&mrt_lock);
+
+ return err;
+ }
+ read_unlock(&mrt_lock);
+ kfree_skb(skb);
+ return -ENODEV;
+ }
+
+ ip6_mr_forward(skb, cache);
+
+ read_unlock(&mrt_lock);
+
+ return 0;
+}
+
+
+static int
+ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
+{
+ int ct;
+ struct rtnexthop *nhp;
+ struct net_device *dev = vif6_table[c->mf6c_parent].dev;
+ u8 *b = skb_tail_pointer(skb);
+ struct rtattr *mp_head;
+
+ if (dev)
+ RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
+
+ mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
+
+ for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+ if (c->mfc_un.res.ttls[ct] < 255) {
+ if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
+ goto rtattr_failure;
+ nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+ nhp->rtnh_flags = 0;
+ nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+ nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
+ nhp->rtnh_len = sizeof(*nhp);
+ }
+ }
+ mp_head->rta_type = RTA_MULTIPATH;
+ mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
+ rtm->rtm_type = RTN_MULTICAST;
+ return 1;
+
+rtattr_failure:
+ nlmsg_trim(skb, b);
+ return -EMSGSIZE;
+}
+
+int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+{
+ int err;
+ struct mfc6_cache *cache;
+ struct rt6_info *rt = (struct rt6_info *)skb->dst;
+
+ read_lock(&mrt_lock);
+ cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+
+ if (!cache) {
+ struct sk_buff *skb2;
+ struct ipv6hdr *iph;
+ struct net_device *dev;
+ int vif;
+
+ if (nowait) {
+ read_unlock(&mrt_lock);
+ return -EAGAIN;
+ }
+
+ dev = skb->dev;
+ if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
+ read_unlock(&mrt_lock);
+ return -ENODEV;
+ }
+
+ /* really correct? */
+ skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+ if (!skb2) {
+ read_unlock(&mrt_lock);
+ return -ENOMEM;
+ }
+
+ skb_reset_transport_header(skb2);
+
+ skb_put(skb2, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb2);
+
+ iph = ipv6_hdr(skb2);
+ iph->version = 0;
+ iph->priority = 0;
+ iph->flow_lbl[0] = 0;
+ iph->flow_lbl[1] = 0;
+ iph->flow_lbl[2] = 0;
+ iph->payload_len = 0;
+ iph->nexthdr = IPPROTO_NONE;
+ iph->hop_limit = 0;
+ ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
+ ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
+
+ err = ip6mr_cache_unresolved(vif, skb2);
+ read_unlock(&mrt_lock);
+
+ return err;
+ }
+
+ if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
+ cache->mfc_flags |= MFC_NOTIFY;
+
+ err = ip6mr_fill_mroute(skb, cache, rtm);
+ read_unlock(&mrt_lock);
+ return err;
+}
+
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index bf2a686aa13..06de9d0e1f6 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -16,7 +16,6 @@
*
* FIXME: Make the setsockopt code POSIX compliant: That is
*
- * o Return -EINVAL for setsockopt of short lengths
* o Truncate getsockopt returns
* o Return an optlen of the truncated length if need be
*
@@ -33,6 +32,7 @@
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/in6.h>
+#include <linux/mroute6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/init.h>
@@ -57,118 +57,6 @@
DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
-static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb,
- int proto)
-{
- struct inet6_protocol *ops = NULL;
-
- for (;;) {
- struct ipv6_opt_hdr *opth;
- int len;
-
- if (proto != NEXTHDR_HOP) {
- ops = rcu_dereference(inet6_protos[proto]);
-
- if (unlikely(!ops))
- break;
-
- if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
-
- if (unlikely(!pskb_may_pull(skb, 8)))
- break;
-
- opth = (void *)skb->data;
- len = opth->hdrlen * 8 + 8;
-
- if (unlikely(!pskb_may_pull(skb, len)))
- break;
-
- proto = opth->nexthdr;
- __skb_pull(skb, len);
- }
-
- return ops;
-}
-
-static int ipv6_gso_send_check(struct sk_buff *skb)
-{
- struct ipv6hdr *ipv6h;
- struct inet6_protocol *ops;
- int err = -EINVAL;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- goto out;
-
- ipv6h = ipv6_hdr(skb);
- __skb_pull(skb, sizeof(*ipv6h));
- err = -EPROTONOSUPPORT;
-
- rcu_read_lock();
- ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
- if (likely(ops && ops->gso_send_check)) {
- skb_reset_transport_header(skb);
- err = ops->gso_send_check(skb);
- }
- rcu_read_unlock();
-
-out:
- return err;
-}
-
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- struct ipv6hdr *ipv6h;
- struct inet6_protocol *ops;
-
- if (!(features & NETIF_F_V6_CSUM))
- features &= ~NETIF_F_SG;
-
- if (unlikely(skb_shinfo(skb)->gso_type &
- ~(SKB_GSO_UDP |
- SKB_GSO_DODGY |
- SKB_GSO_TCP_ECN |
- SKB_GSO_TCPV6 |
- 0)))
- goto out;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- goto out;
-
- ipv6h = ipv6_hdr(skb);
- __skb_pull(skb, sizeof(*ipv6h));
- segs = ERR_PTR(-EPROTONOSUPPORT);
-
- rcu_read_lock();
- ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
- if (likely(ops && ops->gso_segment)) {
- skb_reset_transport_header(skb);
- segs = ops->gso_segment(skb, features);
- }
- rcu_read_unlock();
-
- if (unlikely(IS_ERR(segs)))
- goto out;
-
- for (skb = segs; skb; skb = skb->next) {
- ipv6h = ipv6_hdr(skb);
- ipv6h->payload_len = htons(skb->len - skb->mac_len -
- sizeof(*ipv6h));
- }
-
-out:
- return segs;
-}
-
-static struct packet_type ipv6_packet_type = {
- .type = __constant_htons(ETH_P_IPV6),
- .func = ipv6_rcv,
- .gso_send_check = ipv6_gso_send_check,
- .gso_segment = ipv6_gso_segment,
-};
-
struct ip6_ra_chain *ip6_ra_chain;
DEFINE_RWLOCK(ip6_ra_lock);
@@ -215,25 +103,59 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
return 0;
}
+static
+struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
+ struct ipv6_txoptions *opt)
+{
+ if (inet_sk(sk)->is_icsk) {
+ if (opt &&
+ !((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
+ inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
+ icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
+ }
+ opt = xchg(&inet6_sk(sk)->opt, opt);
+ } else {
+ write_lock(&sk->sk_dst_lock);
+ opt = xchg(&inet6_sk(sk)->opt, opt);
+ write_unlock(&sk->sk_dst_lock);
+ }
+ sk_dst_reset(sk);
+
+ return opt;
+}
+
static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
int val, valbool;
int retv = -ENOPROTOOPT;
if (optval == NULL)
val=0;
- else if (get_user(val, (int __user *) optval))
- return -EFAULT;
+ else {
+ if (optlen >= sizeof(int)) {
+ if (get_user(val, (int __user *) optval))
+ return -EFAULT;
+ } else
+ val = 0;
+ }
valbool = (val!=0);
+ if (ip6_mroute_opt(optname))
+ return ip6_mroute_setsockopt(sk, optname, optval, optlen);
+
lock_sock(sk);
switch (optname) {
case IPV6_ADDRFORM:
+ if (optlen < sizeof(int))
+ goto e_inval;
if (val == PF_INET) {
struct ipv6_txoptions *opt;
struct sk_buff *pktopt;
@@ -266,10 +188,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (sk->sk_protocol == IPPROTO_TCP) {
struct inet_connection_sock *icsk = inet_csk(sk);
-
local_bh_disable();
- sock_prot_inuse_add(sk->sk_prot, -1);
- sock_prot_inuse_add(&tcp_prot, 1);
+ sock_prot_inuse_add(net, sk->sk_prot, -1);
+ sock_prot_inuse_add(net, &tcp_prot, 1);
local_bh_enable();
sk->sk_prot = &tcp_prot;
icsk->icsk_af_ops = &ipv4_specific;
@@ -282,8 +203,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (sk->sk_protocol == IPPROTO_UDPLITE)
prot = &udplite_prot;
local_bh_disable();
- sock_prot_inuse_add(sk->sk_prot, -1);
- sock_prot_inuse_add(prot, 1);
+ sock_prot_inuse_add(net, sk->sk_prot, -1);
+ sock_prot_inuse_add(net, prot, 1);
local_bh_enable();
sk->sk_prot = prot;
sk->sk_socket->ops = &inet_dgram_ops;
@@ -309,63 +230,86 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
goto e_inval;
case IPV6_V6ONLY:
- if (inet_sk(sk)->num)
+ if (optlen < sizeof(int) ||
+ inet_sk(sk)->num)
goto e_inval;
np->ipv6only = valbool;
retv = 0;
break;
case IPV6_RECVPKTINFO:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.rxinfo = valbool;
retv = 0;
break;
case IPV6_2292PKTINFO:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.rxoinfo = valbool;
retv = 0;
break;
case IPV6_RECVHOPLIMIT:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.rxhlim = valbool;
retv = 0;
break;
case IPV6_2292HOPLIMIT:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.rxohlim = valbool;
retv = 0;
break;
case IPV6_RECVRTHDR:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.srcrt = valbool;
retv = 0;
break;
case IPV6_2292RTHDR:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.osrcrt = valbool;
retv = 0;
break;
case IPV6_RECVHOPOPTS:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.hopopts = valbool;
retv = 0;
break;
case IPV6_2292HOPOPTS:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.ohopopts = valbool;
retv = 0;
break;
case IPV6_RECVDSTOPTS:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.dstopts = valbool;
retv = 0;
break;
case IPV6_2292DSTOPTS:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.odstopts = valbool;
retv = 0;
break;
case IPV6_TCLASS:
+ if (optlen < sizeof(int))
+ goto e_inval;
if (val < -1 || val > 0xff)
goto e_inval;
np->tclass = val;
@@ -373,11 +317,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_RECVTCLASS:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.rxtclass = valbool;
retv = 0;
break;
case IPV6_FLOWINFO:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->rxopt.bits.rxflow = valbool;
retv = 0;
break;
@@ -396,9 +344,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
break;
- retv = -EINVAL;
- if (optlen & 0x7 || optlen > 8 * 255)
- break;
+ if (optlen < sizeof(struct ipv6_opt_hdr) ||
+ optlen & 0x7 || optlen > 8 * 255)
+ goto e_inval;
opt = ipv6_renew_options(sk, np->opt, optname,
(struct ipv6_opt_hdr __user *)optval,
@@ -426,25 +374,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
}
retv = 0;
- if (inet_sk(sk)->is_icsk) {
- if (opt) {
- struct inet_connection_sock *icsk = inet_csk(sk);
- if (!((1 << sk->sk_state) &
- (TCPF_LISTEN | TCPF_CLOSE))
- && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
- icsk->icsk_ext_hdr_len =
- opt->opt_flen + opt->opt_nflen;
- icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
- }
- }
- opt = xchg(&np->opt, opt);
- sk_dst_reset(sk);
- } else {
- write_lock(&sk->sk_dst_lock);
- opt = xchg(&np->opt, opt);
- write_unlock(&sk->sk_dst_lock);
- sk_dst_reset(sk);
- }
+ opt = ipv6_update_options(sk, opt);
sticky_done:
if (opt)
sock_kfree_s(sk, opt, opt->tot_len);
@@ -490,32 +420,15 @@ sticky_done:
goto done;
update:
retv = 0;
- if (inet_sk(sk)->is_icsk) {
- if (opt) {
- struct inet_connection_sock *icsk = inet_csk(sk);
- if (!((1 << sk->sk_state) &
- (TCPF_LISTEN | TCPF_CLOSE))
- && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
- icsk->icsk_ext_hdr_len =
- opt->opt_flen + opt->opt_nflen;
- icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
- }
- }
- opt = xchg(&np->opt, opt);
- sk_dst_reset(sk);
- } else {
- write_lock(&sk->sk_dst_lock);
- opt = xchg(&np->opt, opt);
- write_unlock(&sk->sk_dst_lock);
- sk_dst_reset(sk);
- }
-
+ opt = ipv6_update_options(sk, opt);
done:
if (opt)
sock_kfree_s(sk, opt, opt->tot_len);
break;
}
case IPV6_UNICAST_HOPS:
+ if (optlen < sizeof(int))
+ goto e_inval;
if (val > 255 || val < -1)
goto e_inval;
np->hop_limit = val;
@@ -525,6 +438,8 @@ done:
case IPV6_MULTICAST_HOPS:
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
+ if (optlen < sizeof(int))
+ goto e_inval;
if (val > 255 || val < -1)
goto e_inval;
np->mcast_hops = val;
@@ -532,6 +447,8 @@ done:
break;
case IPV6_MULTICAST_LOOP:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->mc_loop = valbool;
retv = 0;
break;
@@ -539,12 +456,14 @@ done:
case IPV6_MULTICAST_IF:
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
+ if (optlen < sizeof(int))
+ goto e_inval;
if (val) {
if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
goto e_inval;
- if (__dev_get_by_index(&init_net, val) == NULL) {
+ if (__dev_get_by_index(net, val) == NULL) {
retv = -ENODEV;
break;
}
@@ -557,6 +476,9 @@ done:
{
struct ipv6_mreq mreq;
+ if (optlen < sizeof(struct ipv6_mreq))
+ goto e_inval;
+
retv = -EPROTO;
if (inet_sk(sk)->is_icsk)
break;
@@ -576,7 +498,7 @@ done:
{
struct ipv6_mreq mreq;
- if (optlen != sizeof(struct ipv6_mreq))
+ if (optlen < sizeof(struct ipv6_mreq))
goto e_inval;
retv = -EFAULT;
@@ -595,6 +517,9 @@ done:
struct group_req greq;
struct sockaddr_in6 *psin6;
+ if (optlen < sizeof(struct group_req))
+ goto e_inval;
+
retv = -EFAULT;
if (copy_from_user(&greq, optval, sizeof(struct group_req)))
break;
@@ -619,7 +544,7 @@ done:
struct group_source_req greqs;
int omode, add;
- if (optlen != sizeof(struct group_source_req))
+ if (optlen < sizeof(struct group_source_req))
goto e_inval;
if (copy_from_user(&greqs, optval, sizeof(greqs))) {
retv = -EFAULT;
@@ -693,27 +618,37 @@ done:
break;
}
case IPV6_ROUTER_ALERT:
+ if (optlen < sizeof(int))
+ goto e_inval;
retv = ip6_ra_control(sk, val, NULL);
break;
case IPV6_MTU_DISCOVER:
+ if (optlen < sizeof(int))
+ goto e_inval;
if (val<0 || val>3)
goto e_inval;
np->pmtudisc = val;
retv = 0;
break;
case IPV6_MTU:
+ if (optlen < sizeof(int))
+ goto e_inval;
if (val && val < IPV6_MIN_MTU)
goto e_inval;
np->frag_size = val;
retv = 0;
break;
case IPV6_RECVERR:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->recverr = valbool;
if (!val)
skb_queue_purge(&sk->sk_error_queue);
retv = 0;
break;
case IPV6_FLOWINFO_SEND:
+ if (optlen < sizeof(int))
+ goto e_inval;
np->sndflow = valbool;
retv = 0;
break;
@@ -728,7 +663,70 @@ done:
retv = xfrm_user_policy(sk, optname, optval, optlen);
break;
+ case IPV6_ADDR_PREFERENCES:
+ {
+ unsigned int pref = 0;
+ unsigned int prefmask = ~0;
+
+ if (optlen < sizeof(int))
+ goto e_inval;
+
+ retv = -EINVAL;
+
+ /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
+ switch (val & (IPV6_PREFER_SRC_PUBLIC|
+ IPV6_PREFER_SRC_TMP|
+ IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
+ case IPV6_PREFER_SRC_PUBLIC:
+ pref |= IPV6_PREFER_SRC_PUBLIC;
+ break;
+ case IPV6_PREFER_SRC_TMP:
+ pref |= IPV6_PREFER_SRC_TMP;
+ break;
+ case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
+ break;
+ case 0:
+ goto pref_skip_pubtmp;
+ default:
+ goto e_inval;
+ }
+
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC|
+ IPV6_PREFER_SRC_TMP);
+pref_skip_pubtmp:
+
+ /* check HOME/COA conflicts */
+ switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) {
+ case IPV6_PREFER_SRC_HOME:
+ break;
+ case IPV6_PREFER_SRC_COA:
+ pref |= IPV6_PREFER_SRC_COA;
+ case 0:
+ goto pref_skip_coa;
+ default:
+ goto e_inval;
+ }
+
+ prefmask &= ~IPV6_PREFER_SRC_COA;
+pref_skip_coa:
+
+ /* check CGA/NONCGA conflicts */
+ switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
+ case IPV6_PREFER_SRC_CGA:
+ case IPV6_PREFER_SRC_NONCGA:
+ case 0:
+ break;
+ default:
+ goto e_inval;
+ }
+
+ np->srcprefs = (np->srcprefs & prefmask) | pref;
+ retv = 0;
+
+ break;
+ }
}
+
release_sock(sk);
return retv;
@@ -839,6 +837,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
int len;
int val;
+ if (ip6_mroute_opt(optname))
+ return ip6_mroute_getsockopt(sk, optname, optval, optlen);
+
if (get_user(len, optlen))
return -EFAULT;
switch (optname) {
@@ -1015,9 +1016,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
dst = sk_dst_get(sk);
if (dst) {
if (val < 0)
- val = dst_metric(dst, RTAX_HOPLIMIT);
- if (val < 0)
- val = ipv6_get_hoplimit(dst->dev);
+ val = ip6_dst_hoplimit(dst);
dst_release(dst);
}
if (val < 0)
@@ -1045,6 +1044,24 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->sndflow;
break;
+ case IPV6_ADDR_PREFERENCES:
+ val = 0;
+
+ if (np->srcprefs & IPV6_PREFER_SRC_TMP)
+ val |= IPV6_PREFER_SRC_TMP;
+ else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC)
+ val |= IPV6_PREFER_SRC_PUBLIC;
+ else {
+ /* XXX: should we return system default? */
+ val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT;
+ }
+
+ if (np->srcprefs & IPV6_PREFER_SRC_COA)
+ val |= IPV6_PREFER_SRC_COA;
+ else
+ val |= IPV6_PREFER_SRC_HOME;
+ break;
+
default:
return -ENOPROTOOPT;
}
@@ -1128,13 +1145,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_ipv6_getsockopt);
#endif
-int __init ipv6_packet_init(void)
-{
- dev_add_pack(&ipv6_packet_type);
- return 0;
-}
-
-void ipv6_packet_cleanup(void)
-{
- dev_remove_pack(&ipv6_packet_type);
-}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index ab228d1ea11..54f91efdae5 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -59,6 +59,7 @@
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
+#include <net/inet_common.h>
#include <net/ip6_checksum.h>
@@ -126,10 +127,6 @@ static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
/* Big mc list lock for all the sockets */
static DEFINE_RWLOCK(ipv6_sk_mc_lock);
-static struct socket *igmp6_socket;
-
-int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr);
-
static void igmp6_join_group(struct ifmcaddr6 *ma);
static void igmp6_leave_group(struct ifmcaddr6 *ma);
static void igmp6_timer_handler(unsigned long data);
@@ -178,11 +175,12 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
* socket join on multicast group
*/
-int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct net_device *dev = NULL;
struct ipv6_mc_socklist *mc_lst;
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
int err;
if (!ipv6_addr_is_multicast(addr))
@@ -208,14 +206,14 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, 0);
if (rt) {
dev = rt->rt6i_dev;
dev_hold(dev);
dst_release(&rt->u.dst);
}
} else
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(net, ifindex);
if (dev == NULL) {
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
@@ -252,10 +250,11 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
/*
* socket leave on multicast group
*/
-int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_mc_socklist *mc_lst, **lnk;
+ struct net *net = sock_net(sk);
write_lock_bh(&ipv6_sk_mc_lock);
for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) {
@@ -266,7 +265,8 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
*lnk = mc_lst->next;
write_unlock_bh(&ipv6_sk_mc_lock);
- if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) {
+ dev = dev_get_by_index(net, mc_lst->ifindex);
+ if (dev != NULL) {
struct inet6_dev *idev = in6_dev_get(dev);
(void) ip6_mc_leave_src(sk, mc_lst, idev);
@@ -286,7 +286,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
return -EADDRNOTAVAIL;
}
-static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex)
+static struct inet6_dev *ip6_mc_find_dev(struct net *net,
+ struct in6_addr *group,
+ int ifindex)
{
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
@@ -294,14 +296,14 @@ static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(group, NULL, 0, 0);
+ rt = rt6_lookup(net, group, NULL, 0, 0);
if (rt) {
dev = rt->rt6i_dev;
dev_hold(dev);
dst_release(&rt->u.dst);
}
} else
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(net, ifindex);
if (!dev)
return NULL;
@@ -324,6 +326,7 @@ void ipv6_sock_mc_close(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_mc_socklist *mc_lst;
+ struct net *net = sock_net(sk);
write_lock_bh(&ipv6_sk_mc_lock);
while ((mc_lst = np->ipv6_mc_list) != NULL) {
@@ -332,7 +335,7 @@ void ipv6_sock_mc_close(struct sock *sk)
np->ipv6_mc_list = mc_lst->next;
write_unlock_bh(&ipv6_sk_mc_lock);
- dev = dev_get_by_index(&init_net, mc_lst->ifindex);
+ dev = dev_get_by_index(net, mc_lst->ifindex);
if (dev) {
struct inet6_dev *idev = in6_dev_get(dev);
@@ -361,6 +364,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
struct inet6_dev *idev;
struct ipv6_pinfo *inet6 = inet6_sk(sk);
struct ip6_sf_socklist *psl;
+ struct net *net = sock_net(sk);
int i, j, rv;
int leavegroup = 0;
int pmclocked = 0;
@@ -376,7 +380,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (!ipv6_addr_is_multicast(group))
return -EINVAL;
- idev = ip6_mc_find_dev(group, pgsr->gsr_interface);
+ idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface);
if (!idev)
return -ENODEV;
dev = idev->dev;
@@ -500,6 +504,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
struct inet6_dev *idev;
struct ipv6_pinfo *inet6 = inet6_sk(sk);
struct ip6_sf_socklist *newpsl, *psl;
+ struct net *net = sock_net(sk);
int leavegroup = 0;
int i, err;
@@ -511,7 +516,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
gsf->gf_fmode != MCAST_EXCLUDE)
return -EINVAL;
- idev = ip6_mc_find_dev(group, gsf->gf_interface);
+ idev = ip6_mc_find_dev(net, group, gsf->gf_interface);
if (!idev)
return -ENODEV;
@@ -592,13 +597,14 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
struct net_device *dev;
struct ipv6_pinfo *inet6 = inet6_sk(sk);
struct ip6_sf_socklist *psl;
+ struct net *net = sock_net(sk);
group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
if (!ipv6_addr_is_multicast(group))
return -EINVAL;
- idev = ip6_mc_find_dev(group, gsf->gf_interface);
+ idev = ip6_mc_find_dev(net, group, gsf->gf_interface);
if (!idev)
return -ENODEV;
@@ -656,8 +662,8 @@ done:
return err;
}
-int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
- struct in6_addr *src_addr)
+int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
+ const struct in6_addr *src_addr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_mc_socklist *mc;
@@ -863,7 +869,7 @@ static void mld_clear_delrec(struct inet6_dev *idev)
/*
* device multicast group inc (add if not found)
*/
-int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
+int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
{
struct ifmcaddr6 *mc;
struct inet6_dev *idev;
@@ -934,7 +940,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
/*
* device multicast group del
*/
-int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr)
+int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifmcaddr6 *ma, **map;
@@ -959,7 +965,7 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr)
return -ENOENT;
}
-int ipv6_dev_mc_dec(struct net_device *dev, struct in6_addr *addr)
+int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
{
struct inet6_dev *idev = in6_dev_get(dev);
int err;
@@ -1004,8 +1010,8 @@ int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
/*
* check if the interface/address pair is valid
*/
-int ipv6_chk_mcast_addr(struct net_device *dev, struct in6_addr *group,
- struct in6_addr *src_addr)
+int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
+ const struct in6_addr *src_addr)
{
struct inet6_dev *idev;
struct ifmcaddr6 *mc;
@@ -1393,10 +1399,12 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
static struct sk_buff *mld_newpack(struct net_device *dev, int size)
{
- struct sock *sk = igmp6_socket->sk;
+ struct net *net = dev_net(dev);
+ struct sock *sk = net->ipv6.igmp_sk;
struct sk_buff *skb;
struct mld2_report *pmr;
struct in6_addr addr_buf;
+ const struct in6_addr *saddr;
int err;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
@@ -1415,10 +1423,11 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
* use unspecified address as the source address
* when a valid link-local address is not available.
*/
- memset(&addr_buf, 0, sizeof(addr_buf));
- }
+ saddr = &in6addr_any;
+ } else
+ saddr = &addr_buf;
- ip6_nd_hdr(sk, skb, dev, &addr_buf, &mld2_all_mcr, NEXTHDR_HOP, 0);
+ ip6_nd_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
@@ -1433,25 +1442,6 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
return skb;
}
-static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- unsigned char ha[MAX_ADDR_LEN];
-
- ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1);
- if (dev_hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len) < 0) {
- kfree_skb(skb);
- return -EINVAL;
- }
- return dev_queue_xmit(skb);
-}
-
-static inline int mld_dev_queue_xmit(struct sk_buff *skb)
-{
- return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
- mld_dev_queue_xmit2);
-}
-
static void mld_sendpack(struct sk_buff *skb)
{
struct ipv6hdr *pip6 = ipv6_hdr(skb);
@@ -1459,7 +1449,9 @@ static void mld_sendpack(struct sk_buff *skb)
(struct mld2_report *)skb_transport_header(skb);
int payload_len, mldlen;
struct inet6_dev *idev = in6_dev_get(skb->dev);
+ struct net *net = dev_net(skb->dev);
int err;
+ struct flowi fl;
IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
@@ -1469,8 +1461,25 @@ static void mld_sendpack(struct sk_buff *skb)
pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
mldlen, 0));
+
+ skb->dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
+
+ if (!skb->dst) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ icmpv6_flow_init(net->ipv6.igmp_sk, &fl, ICMPV6_MLD2_REPORT,
+ &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+ skb->dev->ifindex);
+
+ err = xfrm_lookup(&skb->dst, &fl, NULL, 0);
+ if (err)
+ goto err_out;
+
err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
- mld_dev_queue_xmit);
+ dst_output);
+out:
if (!err) {
ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT);
ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
@@ -1480,6 +1489,11 @@ static void mld_sendpack(struct sk_buff *skb)
if (likely(idev != NULL))
in6_dev_put(idev);
+ return;
+
+err_out:
+ kfree_skb(skb);
+ goto out;
}
static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
@@ -1749,28 +1763,28 @@ static void mld_send_cr(struct inet6_dev *idev)
static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
{
- struct sock *sk = igmp6_socket->sk;
+ struct net *net = dev_net(dev);
+ struct sock *sk = net->ipv6.igmp_sk;
struct inet6_dev *idev;
struct sk_buff *skb;
struct icmp6hdr *hdr;
- struct in6_addr *snd_addr;
+ const struct in6_addr *snd_addr, *saddr;
struct in6_addr *addrp;
struct in6_addr addr_buf;
- struct in6_addr all_routers;
int err, len, payload_len, full_len;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
+ struct flowi fl;
rcu_read_lock();
IP6_INC_STATS(__in6_dev_get(dev),
IPSTATS_MIB_OUTREQUESTS);
rcu_read_unlock();
- snd_addr = addr;
- if (type == ICMPV6_MGM_REDUCTION) {
- snd_addr = &all_routers;
- ipv6_addr_all_routers(&all_routers);
- }
+ if (type == ICMPV6_MGM_REDUCTION)
+ snd_addr = &in6addr_linklocal_allrouters;
+ else
+ snd_addr = addr;
len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
payload_len = len + sizeof(ra);
@@ -1793,10 +1807,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
* use unspecified address as the source address
* when a valid link-local address is not available.
*/
- memset(&addr_buf, 0, sizeof(addr_buf));
- }
+ saddr = &in6addr_any;
+ } else
+ saddr = &addr_buf;
- ip6_nd_hdr(sk, skb, dev, &addr_buf, snd_addr, NEXTHDR_HOP, payload_len);
+ ip6_nd_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);
memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
@@ -1807,14 +1822,29 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr));
ipv6_addr_copy(addrp, addr);
- hdr->icmp6_cksum = csum_ipv6_magic(&addr_buf, snd_addr, len,
+ hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len,
IPPROTO_ICMPV6,
csum_partial((__u8 *) hdr, len, 0));
idev = in6_dev_get(skb->dev);
+ skb->dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
+ if (!skb->dst) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ icmpv6_flow_init(sk, &fl, type,
+ &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+ skb->dev->ifindex);
+
+ err = xfrm_lookup(&skb->dst, &fl, NULL, 0);
+ if (err)
+ goto err_out;
+
err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
- mld_dev_queue_xmit);
+ dst_output);
+out:
if (!err) {
ICMP6MSGOUT_INC_STATS(idev, type);
ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
@@ -1825,6 +1855,10 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
if (likely(idev != NULL))
in6_dev_put(idev);
return;
+
+err_out:
+ kfree_skb(skb);
+ goto out;
}
static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
@@ -2276,24 +2310,19 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
void ipv6_mc_destroy_dev(struct inet6_dev *idev)
{
struct ifmcaddr6 *i;
- struct in6_addr maddr;
/* Deactivate timers */
ipv6_mc_down(idev);
/* Delete all-nodes address. */
- ipv6_addr_all_nodes(&maddr);
-
/* We cannot call ipv6_dev_mc_dec() directly, our caller in
* addrconf.c has NULL'd out dev->ip6_ptr so in6_dev_get() will
* fail.
*/
- __ipv6_dev_mc_dec(idev, &maddr);
+ __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allnodes);
- if (idev->cnf.forwarding) {
- ipv6_addr_all_routers(&maddr);
- __ipv6_dev_mc_dec(idev, &maddr);
- }
+ if (idev->cnf.forwarding)
+ __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allrouters);
write_lock_bh(&idev->lock);
while ((i = idev->mc_list) != NULL) {
@@ -2310,6 +2339,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
#ifdef CONFIG_PROC_FS
struct igmp6_mc_iter_state {
+ struct seq_net_private p;
struct net_device *dev;
struct inet6_dev *idev;
};
@@ -2320,9 +2350,10 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
{
struct ifmcaddr6 *im = NULL;
struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+ struct net *net = seq_file_net(seq);
state->idev = NULL;
- for_each_netdev(&init_net, state->dev) {
+ for_each_netdev(net, state->dev) {
struct inet6_dev *idev;
idev = in6_dev_get(state->dev);
if (!idev)
@@ -2424,8 +2455,8 @@ static const struct seq_operations igmp6_mc_seq_ops = {
static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &igmp6_mc_seq_ops,
- sizeof(struct igmp6_mc_iter_state));
+ return seq_open_net(inode, file, &igmp6_mc_seq_ops,
+ sizeof(struct igmp6_mc_iter_state));
}
static const struct file_operations igmp6_mc_seq_fops = {
@@ -2433,10 +2464,11 @@ static const struct file_operations igmp6_mc_seq_fops = {
.open = igmp6_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
struct igmp6_mcf_iter_state {
+ struct seq_net_private p;
struct net_device *dev;
struct inet6_dev *idev;
struct ifmcaddr6 *im;
@@ -2449,10 +2481,11 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
struct ip6_sf_list *psf = NULL;
struct ifmcaddr6 *im = NULL;
struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+ struct net *net = seq_file_net(seq);
state->idev = NULL;
state->im = NULL;
- for_each_netdev(&init_net, state->dev) {
+ for_each_netdev(net, state->dev) {
struct inet6_dev *idev;
idev = in6_dev_get(state->dev);
if (unlikely(idev == NULL))
@@ -2584,8 +2617,8 @@ static const struct seq_operations igmp6_mcf_seq_ops = {
static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &igmp6_mcf_seq_ops,
- sizeof(struct igmp6_mcf_iter_state));
+ return seq_open_net(inode, file, &igmp6_mcf_seq_ops,
+ sizeof(struct igmp6_mcf_iter_state));
}
static const struct file_operations igmp6_mcf_seq_fops = {
@@ -2593,47 +2626,88 @@ static const struct file_operations igmp6_mcf_seq_fops = {
.open = igmp6_mcf_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
+
+static int igmp6_proc_init(struct net *net)
+{
+ int err;
+
+ err = -ENOMEM;
+ if (!proc_net_fops_create(net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops))
+ goto out;
+ if (!proc_net_fops_create(net, "mcfilter6", S_IRUGO,
+ &igmp6_mcf_seq_fops))
+ goto out_proc_net_igmp6;
+
+ err = 0;
+out:
+ return err;
+
+out_proc_net_igmp6:
+ proc_net_remove(net, "igmp6");
+ goto out;
+}
+
+static void igmp6_proc_exit(struct net *net)
+{
+ proc_net_remove(net, "mcfilter6");
+ proc_net_remove(net, "igmp6");
+}
+#else
+static int igmp6_proc_init(struct net *net)
+{
+ return 0;
+}
+static void igmp6_proc_exit(struct net *net)
+{
+ ;
+}
#endif
-int __init igmp6_init(struct net_proto_family *ops)
+static int igmp6_net_init(struct net *net)
{
- struct ipv6_pinfo *np;
- struct sock *sk;
int err;
- err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &igmp6_socket);
+ err = inet_ctl_sock_create(&net->ipv6.igmp_sk, PF_INET6,
+ SOCK_RAW, IPPROTO_ICMPV6, net);
if (err < 0) {
printk(KERN_ERR
"Failed to initialize the IGMP6 control socket (err %d).\n",
err);
- igmp6_socket = NULL; /* For safety. */
- return err;
+ goto out;
}
- sk = igmp6_socket->sk;
- sk->sk_allocation = GFP_ATOMIC;
- sk->sk_prot->unhash(sk);
+ inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1;
- np = inet6_sk(sk);
- np->hop_limit = 1;
+ err = igmp6_proc_init(net);
+ if (err)
+ goto out_sock_create;
+out:
+ return err;
-#ifdef CONFIG_PROC_FS
- proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops);
- proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
-#endif
+out_sock_create:
+ inet_ctl_sock_destroy(net->ipv6.igmp_sk);
+ goto out;
+}
- return 0;
+static void igmp6_net_exit(struct net *net)
+{
+ inet_ctl_sock_destroy(net->ipv6.igmp_sk);
+ igmp6_proc_exit(net);
}
-void igmp6_cleanup(void)
+static struct pernet_operations igmp6_net_ops = {
+ .init = igmp6_net_init,
+ .exit = igmp6_net_exit,
+};
+
+int __init igmp6_init(void)
{
- sock_release(igmp6_socket);
- igmp6_socket = NULL; /* for safety */
+ return register_pernet_subsys(&igmp6_net_ops);
+}
-#ifdef CONFIG_PROC_FS
- proc_net_remove(&init_net, "mcfilter6");
- proc_net_remove(&init_net, "igmp6");
-#endif
+void igmp6_cleanup(void)
+{
+ unregister_pernet_subsys(&igmp6_net_ops);
}
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index cd8a5bda13c..ad1cc5bbf97 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -44,9 +44,9 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen)
if (!data)
return NULL;
if (padlen == 1) {
- data[0] = MIP6_OPT_PAD_1;
+ data[0] = IPV6_TLV_PAD0;
} else if (padlen > 1) {
- data[0] = MIP6_OPT_PAD_N;
+ data[0] = IPV6_TLV_PADN;
data[1] = padlen - 2;
if (padlen > 2)
memset(data+2, 0, data[1]);
@@ -304,13 +304,13 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
static int mip6_destopt_init_state(struct xfrm_state *x)
{
if (x->id.spi) {
- printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__,
+ printk(KERN_INFO "%s: spi is not 0: %u\n", __func__,
x->id.spi);
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
printk(KERN_INFO "%s: state's mode is not %u: %u\n",
- __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+ __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
return -EINVAL;
}
@@ -439,13 +439,13 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
static int mip6_rthdr_init_state(struct xfrm_state *x)
{
if (x->id.spi) {
- printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__,
+ printk(KERN_INFO "%s: spi is not 0: %u\n", __func__,
x->id.spi);
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
printk(KERN_INFO "%s: state's mode is not %u: %u\n",
- __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+ __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
return -EINVAL;
}
@@ -480,15 +480,15 @@ static int __init mip6_init(void)
printk(KERN_INFO "Mobile IPv6\n");
if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) {
- printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__);
+ printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __func__);
goto mip6_destopt_xfrm_fail;
}
if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
- printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__);
+ printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __func__);
goto mip6_rthdr_xfrm_fail;
}
if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
- printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __FUNCTION__);
+ printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __func__);
goto mip6_rawv6_mh_fail;
}
@@ -506,11 +506,11 @@ static int __init mip6_init(void)
static void __exit mip6_fini(void)
{
if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
- printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __FUNCTION__);
+ printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __func__);
if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
- printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__);
+ printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __func__);
if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
- printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__);
+ printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __func__);
}
module_init(mip6_init);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 452a2ac4eec..2c74885f835 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -84,13 +84,12 @@
#include <net/flow.h>
#include <net/ip6_checksum.h>
+#include <net/inet_common.h>
#include <linux/proc_fs.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
-static struct socket *ndisc_socket;
-
static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
static int ndisc_constructor(struct neighbour *neigh);
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
@@ -270,7 +269,7 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
ND_PRINTK2(KERN_WARNING
"%s(): duplicated ND6 option found: type=%d\n",
- __FUNCTION__,
+ __func__,
nd_opt->nd_opt_type);
} else {
ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
@@ -301,7 +300,7 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
*/
ND_PRINTK2(KERN_NOTICE
"%s(): ignored unsupported option; type=%d, len=%d\n",
- __FUNCTION__,
+ __func__,
nd_opt->nd_opt_type, nd_opt->nd_opt_len);
}
}
@@ -441,30 +440,17 @@ static void pndisc_destructor(struct pneigh_entry *n)
/*
* Send a Neighbour Advertisement
*/
-
-static inline void ndisc_flow_init(struct flowi *fl, u8 type,
- struct in6_addr *saddr, struct in6_addr *daddr,
- int oif)
-{
- memset(fl, 0, sizeof(*fl));
- ipv6_addr_copy(&fl->fl6_src, saddr);
- ipv6_addr_copy(&fl->fl6_dst, daddr);
- fl->proto = IPPROTO_ICMPV6;
- fl->fl_icmp_type = type;
- fl->fl_icmp_code = 0;
- fl->oif = oif;
- security_sk_classify_flow(ndisc_socket->sk, fl);
-}
-
static void __ndisc_send(struct net_device *dev,
struct neighbour *neigh,
- struct in6_addr *daddr, struct in6_addr *saddr,
- struct icmp6hdr *icmp6h, struct in6_addr *target,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr,
+ struct icmp6hdr *icmp6h, const struct in6_addr *target,
int llinfo)
{
struct flowi fl;
struct dst_entry *dst;
- struct sock *sk = ndisc_socket->sk;
+ struct net *net = dev_net(dev);
+ struct sock *sk = net->ipv6.ndisc_sk;
struct sk_buff *skb;
struct icmp6hdr *hdr;
struct inet6_dev *idev;
@@ -474,10 +460,9 @@ static void __ndisc_send(struct net_device *dev,
type = icmp6h->icmp6_type;
- ndisc_flow_init(&fl, type, saddr, daddr,
- dev->ifindex);
+ icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex);
- dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
+ dst = icmp6_dst_alloc(dev, neigh, daddr);
if (!dst)
return;
@@ -499,7 +484,7 @@ static void __ndisc_send(struct net_device *dev,
if (!skb) {
ND_PRINTK0(KERN_ERR
"ICMPv6 ND: %s() failed to allocate an skb.\n",
- __FUNCTION__);
+ __func__);
dst_release(dst);
return;
}
@@ -545,25 +530,28 @@ static void __ndisc_send(struct net_device *dev,
}
static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
- struct in6_addr *daddr, struct in6_addr *solicited_addr,
- int router, int solicited, int override, int inc_opt)
+ const struct in6_addr *daddr,
+ const struct in6_addr *solicited_addr,
+ int router, int solicited, int override, int inc_opt)
{
struct in6_addr tmpaddr;
struct inet6_ifaddr *ifp;
- struct in6_addr *src_addr;
+ const struct in6_addr *src_addr;
struct icmp6hdr icmp6h = {
.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
};
/* for anycast or proxy, solicited_addr != src_addr */
- ifp = ipv6_get_ifaddr(&init_net, solicited_addr, dev, 1);
+ ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
if (ifp) {
src_addr = solicited_addr;
if (ifp->flags & IFA_F_OPTIMISTIC)
override = 0;
in6_ifa_put(ifp);
} else {
- if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
+ if (ipv6_dev_get_saddr(dev, daddr,
+ inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
+ &tmpaddr))
return;
src_addr = &tmpaddr;
}
@@ -578,8 +566,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
}
void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
- struct in6_addr *solicit,
- struct in6_addr *daddr, struct in6_addr *saddr)
+ const struct in6_addr *solicit,
+ const struct in6_addr *daddr, const struct in6_addr *saddr)
{
struct in6_addr addr_buf;
struct icmp6hdr icmp6h = {
@@ -598,8 +586,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
!ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
}
-void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
- struct in6_addr *daddr)
+void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
{
struct icmp6hdr icmp6h = {
.icmp6_type = NDISC_ROUTER_SOLICITATION,
@@ -616,7 +604,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
* suppress the inclusion of the sllao.
*/
if (send_sllao) {
- struct inet6_ifaddr *ifp = ipv6_get_ifaddr(&init_net, saddr,
+ struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
dev, 1);
if (ifp) {
if (ifp->flags & IFA_F_OPTIMISTIC) {
@@ -654,7 +642,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
int probes = atomic_read(&neigh->probes);
- if (skb && ipv6_chk_addr(&init_net, &ipv6_hdr(skb)->saddr, dev, 1))
+ if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
saddr = &ipv6_hdr(skb)->saddr;
if ((probes -= neigh->parms->ucast_probes) < 0) {
@@ -662,7 +650,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
ND_PRINTK1(KERN_DEBUG
"%s(): trying to ucast probe in NUD_INVALID: "
NIP6_FMT "\n",
- __FUNCTION__,
+ __func__,
NIP6(*target));
}
ndisc_send_ns(dev, neigh, target, target, saddr);
@@ -676,18 +664,19 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
}
}
-static struct pneigh_entry *pndisc_check_router(struct net_device *dev,
- struct in6_addr *addr, int *is_router)
+static int pndisc_is_router(const void *pkey,
+ struct net_device *dev)
{
struct pneigh_entry *n;
+ int ret = -1;
read_lock_bh(&nd_tbl.lock);
- n = __pneigh_lookup(&nd_tbl, &init_net, addr, dev);
- if (n != NULL)
- *is_router = (n->flags & NTF_ROUTER);
+ n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
+ if (n)
+ ret = !!(n->flags & NTF_ROUTER);
read_unlock_bh(&nd_tbl.lock);
- return n;
+ return ret;
}
static void ndisc_recv_ns(struct sk_buff *skb)
@@ -703,10 +692,9 @@ static void ndisc_recv_ns(struct sk_buff *skb)
struct inet6_ifaddr *ifp;
struct inet6_dev *idev = NULL;
struct neighbour *neigh;
- struct pneigh_entry *pneigh = NULL;
int dad = ipv6_addr_any(saddr);
int inc;
- int is_router = 0;
+ int is_router = -1;
if (ipv6_addr_is_multicast(&msg->target)) {
ND_PRINTK2(KERN_WARNING
@@ -756,7 +744,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
inc = ipv6_addr_is_multicast(daddr);
- if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1)) != NULL) {
+ ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
+ if (ifp) {
if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
if (dad) {
@@ -801,11 +790,10 @@ static void ndisc_recv_ns(struct sk_buff *skb)
return;
}
- if (ipv6_chk_acast_addr(dev, &msg->target) ||
+ if (ipv6_chk_acast_addr(dev_net(dev), dev, &msg->target) ||
(idev->cnf.forwarding &&
(ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
- (pneigh = pndisc_check_router(dev, &msg->target,
- &is_router)) != NULL)) {
+ (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
skb->pkt_type != PACKET_HOST &&
inc != 0 &&
@@ -826,13 +814,11 @@ static void ndisc_recv_ns(struct sk_buff *skb)
goto out;
}
- is_router = !!(pneigh ? is_router : idev->cnf.forwarding);
+ if (is_router < 0)
+ is_router = !!idev->cnf.forwarding;
if (dad) {
- struct in6_addr maddr;
-
- ipv6_addr_all_nodes(&maddr);
- ndisc_send_na(dev, NULL, &maddr, &msg->target,
+ ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
is_router, 0, (ifp != NULL), 1);
goto out;
}
@@ -914,7 +900,8 @@ static void ndisc_recv_na(struct sk_buff *skb)
return;
}
}
- if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1))) {
+ ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
+ if (ifp) {
if (ifp->flags & IFA_F_TENTATIVE) {
addrconf_dad_failure(ifp);
return;
@@ -945,7 +932,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
*/
if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
- pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) {
+ pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) {
/* XXX: idev->cnf.prixy_ndp */
goto out;
}
@@ -1035,6 +1022,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
struct sk_buff *skb;
struct nlmsghdr *nlh;
struct nduseroptmsg *ndmsg;
+ struct net *net = dev_net(ra->dev);
int err;
int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
+ (opt->nd_opt_len << 3));
@@ -1064,7 +1052,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
&ipv6_hdr(ra)->saddr);
nlmsg_end(skb, nlh);
- err = rtnl_notify(skb, &init_net, 0, RTNLGRP_ND_USEROPT, NULL,
+ err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL,
GFP_ATOMIC);
if (err < 0)
goto errout;
@@ -1075,7 +1063,7 @@ nla_put_failure:
nlmsg_free(skb);
err = -EMSGSIZE;
errout:
- rtnl_set_sk_err(&init_net, RTNLGRP_ND_USEROPT, err);
+ rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
}
static void ndisc_router_discovery(struct sk_buff *skb)
@@ -1104,6 +1092,14 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: from host or unauthorized router\n");
+ return;
+ }
+#endif
+
/*
* set the RA_RECV flag in the interface
*/
@@ -1127,6 +1123,12 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ /* skip link-specific parameters from interior routers */
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+ goto skip_linkparms;
+#endif
+
if (in6_dev->if_flags & IF_RS_SENT) {
/*
* flag that an RA was received after an RS was sent
@@ -1178,7 +1180,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (rt == NULL) {
ND_PRINTK0(KERN_ERR
"ICMPv6 RA: %s() failed to add default route.\n",
- __FUNCTION__);
+ __func__);
in6_dev_put(in6_dev);
return;
}
@@ -1187,7 +1189,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (neigh == NULL) {
ND_PRINTK0(KERN_ERR
"ICMPv6 RA: %s() got default router without neighbour.\n",
- __FUNCTION__);
+ __func__);
dst_release(&rt->u.dst);
in6_dev_put(in6_dev);
return;
@@ -1241,6 +1243,10 @@ skip_defrtr:
}
}
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+skip_linkparms:
+#endif
+
/*
* Process options.
*/
@@ -1272,7 +1278,13 @@ skip_defrtr:
for (p = ndopts.nd_opts_ri;
p;
p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
- if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
+ struct route_info *ri = (struct route_info *)p;
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
+ ri->prefix_len == 0)
+ continue;
+#endif
+ if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
continue;
rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
&ipv6_hdr(skb)->saddr);
@@ -1280,6 +1292,12 @@ skip_defrtr:
}
#endif
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ /* skip link-specific ndopts from interior routers */
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+ goto out;
+#endif
+
if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_opts_pi;
@@ -1343,6 +1361,16 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
int optlen;
u8 *lladdr = NULL;
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ switch (skb->ndisc_nodetype) {
+ case NDISC_NODETYPE_HOST:
+ case NDISC_NODETYPE_NODEFAULT:
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: from host or unauthorized router\n");
+ return;
+ }
+#endif
+
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 Redirect: source address is not link-local.\n");
@@ -1418,15 +1446,16 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
}
void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
- struct in6_addr *target)
+ const struct in6_addr *target)
{
- struct sock *sk = ndisc_socket->sk;
+ struct net_device *dev = skb->dev;
+ struct net *net = dev_net(dev);
+ struct sock *sk = net->ipv6.ndisc_sk;
int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
struct sk_buff *buff;
struct icmp6hdr *icmph;
struct in6_addr saddr_buf;
struct in6_addr *addrp;
- struct net_device *dev;
struct rt6_info *rt;
struct dst_entry *dst;
struct inet6_dev *idev;
@@ -1436,8 +1465,6 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
int err;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
- dev = skb->dev;
-
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 Redirect: no link-local address on %s\n",
@@ -1452,10 +1479,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
return;
}
- ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr,
- dev->ifindex);
+ icmpv6_flow_init(sk, &fl, NDISC_REDIRECT,
+ &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
- dst = ip6_route_output(NULL, &fl);
+ dst = ip6_route_output(net, NULL, &fl);
if (dst == NULL)
return;
@@ -1499,12 +1526,11 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
if (buff == NULL) {
ND_PRINTK0(KERN_ERR
"ICMPv6 Redirect: %s() failed to allocate an skb.\n",
- __FUNCTION__);
+ __func__);
dst_release(dst);
return;
}
-
skb_reserve(buff, LL_RESERVED_SPACE(dev));
ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
IPPROTO_ICMPV6, len);
@@ -1625,18 +1651,16 @@ int ndisc_rcv(struct sk_buff *skb)
static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
-
- if (dev->nd_net != &init_net)
- return NOTIFY_DONE;
+ struct net *net = dev_net(dev);
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&nd_tbl, dev);
- fib6_run_gc(~0UL);
+ fib6_run_gc(~0UL, net);
break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
- fib6_run_gc(~0UL);
+ fib6_run_gc(~0UL, net);
break;
default:
break;
@@ -1745,44 +1769,74 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
#endif
-int __init ndisc_init(struct net_proto_family *ops)
+static int ndisc_net_init(struct net *net)
{
struct ipv6_pinfo *np;
struct sock *sk;
int err;
- err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &ndisc_socket);
+ err = inet_ctl_sock_create(&sk, PF_INET6,
+ SOCK_RAW, IPPROTO_ICMPV6, net);
if (err < 0) {
ND_PRINTK0(KERN_ERR
"ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
err);
- ndisc_socket = NULL; /* For safety. */
return err;
}
- sk = ndisc_socket->sk;
+ net->ipv6.ndisc_sk = sk;
+
np = inet6_sk(sk);
- sk->sk_allocation = GFP_ATOMIC;
np->hop_limit = 255;
/* Do not loopback ndisc messages */
np->mc_loop = 0;
- sk->sk_prot->unhash(sk);
+ return 0;
+}
+
+static void ndisc_net_exit(struct net *net)
+{
+ inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
+}
+
+static struct pernet_operations ndisc_net_ops = {
+ .init = ndisc_net_init,
+ .exit = ndisc_net_exit,
+};
+
+int __init ndisc_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&ndisc_net_ops);
+ if (err)
+ return err;
/*
* Initialize the neighbour table
*/
-
neigh_table_init(&nd_tbl);
#ifdef CONFIG_SYSCTL
- neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH,
- "ipv6",
- &ndisc_ifinfo_sysctl_change,
- &ndisc_ifinfo_sysctl_strategy);
+ err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6,
+ NET_IPV6_NEIGH, "ipv6",
+ &ndisc_ifinfo_sysctl_change,
+ &ndisc_ifinfo_sysctl_strategy);
+ if (err)
+ goto out_unregister_pernet;
#endif
+ err = register_netdevice_notifier(&ndisc_netdev_notifier);
+ if (err)
+ goto out_unregister_sysctl;
+out:
+ return err;
- register_netdevice_notifier(&ndisc_netdev_notifier);
- return 0;
+out_unregister_sysctl:
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_unregister(&nd_tbl.parms);
+out_unregister_pernet:
+#endif
+ unregister_pernet_subsys(&ndisc_net_ops);
+ goto out;
}
void ndisc_cleanup(void)
@@ -1792,6 +1846,5 @@ void ndisc_cleanup(void)
neigh_sysctl_unregister(&nd_tbl.parms);
#endif
neigh_table_clear(&nd_tbl);
- sock_release(ndisc_socket);
- ndisc_socket = NULL; /* For safety. */
+ unregister_pernet_subsys(&ndisc_net_ops);
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 2e06724dc34..8c6c5e71f21 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -23,7 +23,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
.saddr = iph->saddr, } },
};
- dst = ip6_route_output(skb->sk, &fl);
+ dst = ip6_route_output(&init_net, skb->sk, &fl);
#ifdef CONFIG_XFRM
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -86,7 +86,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl)
{
- *dst = ip6_route_output(NULL, fl);
+ *dst = ip6_route_output(&init_net, NULL, fl);
return (*dst)->error;
}
@@ -121,16 +121,44 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
}
return csum;
}
-
EXPORT_SYMBOL(nf_ip6_checksum);
+static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ __wsum hsum;
+ __sum16 csum = 0;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (len == skb->len - dataoff)
+ return nf_ip6_checksum(skb, hook, dataoff, protocol);
+ /* fall through */
+ case CHECKSUM_NONE:
+ hsum = skb_checksum(skb, 0, dataoff, 0);
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+ &ip6h->daddr,
+ skb->len - dataoff,
+ protocol,
+ csum_sub(0, hsum)));
+ skb->ip_summed = CHECKSUM_NONE;
+ csum = __skb_checksum_complete_head(skb, dataoff + len);
+ if (!csum)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ return csum;
+};
+
static const struct nf_afinfo nf_ip6_afinfo = {
- .family = AF_INET6,
- .checksum = nf_ip6_checksum,
- .route = nf_ip6_route,
- .saveroute = nf_ip6_saveroute,
- .reroute = nf_ip6_reroute,
- .route_key_size = sizeof(struct ip6_rt_info),
+ .family = AF_INET6,
+ .checksum = nf_ip6_checksum,
+ .checksum_partial = nf_ip6_checksum_partial,
+ .route = nf_ip6_route,
+ .saveroute = nf_ip6_saveroute,
+ .reroute = nf_ip6_reroute,
+ .route_key_size = sizeof(struct ip6_rt_info),
};
int __init ipv6_netfilter_init(void)
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 8d366f7f2a9..92a36c9e540 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -484,7 +484,7 @@ ipq_rcv_dev_event(struct notifier_block *this,
{
struct net_device *dev = ptr;
- if (dev->nd_net != &init_net)
+ if (dev_net(dev) != &init_net)
return NOTIFY_DONE;
/* Drop any packets associated with the downed device */
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index bf9bb6e55bb..0b4557e0343 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -55,7 +55,7 @@ MODULE_DESCRIPTION("IPv6 packet filter");
do { \
if (!(x)) \
printk("IP_NF_ASSERT: %s:%s:%u\n", \
- __FUNCTION__, __FILE__, __LINE__); \
+ __func__, __FILE__, __LINE__); \
} while(0)
#else
#define IP_NF_ASSERT(x)
@@ -325,7 +325,7 @@ static void trace_packet(struct sk_buff *skb,
struct ip6t_entry *e)
{
void *table_base;
- struct ip6t_entry *root;
+ const struct ip6t_entry *root;
char *hookname, *chainname, *comment;
unsigned int rulenum = 0;
@@ -952,7 +952,7 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
- struct xt_table_info *private = table->private;
+ const struct xt_table_info *private = table->private;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -979,9 +979,9 @@ copy_entries_to_user(unsigned int total_size,
unsigned int off, num;
struct ip6t_entry *e;
struct xt_counters *counters;
- struct xt_table_info *private = table->private;
+ const struct xt_table_info *private = table->private;
int ret = 0;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
counters = alloc_counters(table);
if (IS_ERR(counters))
@@ -1001,8 +1001,8 @@ copy_entries_to_user(unsigned int total_size,
/* ... then go back and fix counters and names */
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
unsigned int i;
- struct ip6t_entry_match *m;
- struct ip6t_entry_target *t;
+ const struct ip6t_entry_match *m;
+ const struct ip6t_entry_target *t;
e = (struct ip6t_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
@@ -1142,7 +1142,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
"ip6table_%s", name);
if (t && !IS_ERR(t)) {
struct ip6t_getinfo info;
- struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
if (compat) {
@@ -1206,7 +1206,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, int *len)
else {
duprintf("get_entries: I've got %u not %u!\n",
private->size, get.size);
- ret = -EINVAL;
+ ret = -EAGAIN;
}
module_put(t->me);
xt_table_unlock(t);
@@ -1225,7 +1225,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_table *t;
struct xt_table_info *oldinfo;
struct xt_counters *counters;
- void *loc_cpu_old_entry;
+ const void *loc_cpu_old_entry;
ret = 0;
counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
@@ -1369,9 +1369,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
int size;
void *ptmp;
struct xt_table *t;
- struct xt_table_info *private;
+ const struct xt_table_info *private;
int ret = 0;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
#ifdef CONFIG_COMPAT
struct compat_xt_counters_info compat_tmp;
@@ -1879,11 +1879,11 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
switch (cmd) {
case IP6T_SO_SET_REPLACE:
- ret = compat_do_replace(sk->sk_net, user, len);
+ ret = compat_do_replace(sock_net(sk), user, len);
break;
case IP6T_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sk->sk_net, user, len, 1);
+ ret = do_add_counters(sock_net(sk), user, len, 1);
break;
default:
@@ -1905,11 +1905,11 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
void __user *userptr)
{
struct xt_counters *counters;
- struct xt_table_info *private = table->private;
+ const struct xt_table_info *private = table->private;
void __user *pos;
unsigned int size;
int ret = 0;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
unsigned int i = 0;
counters = alloc_counters(table);
@@ -1956,7 +1956,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
xt_compat_lock(AF_INET6);
t = xt_find_table_lock(net, AF_INET6, get.name);
if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = t->private;
struct xt_table_info info;
duprintf("t->private->number = %u\n", private->number);
ret = compat_table_info(private, &info);
@@ -1966,7 +1966,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
} else if (!ret) {
duprintf("compat_get_entries: I've got %u not %u!\n",
private->size, get.size);
- ret = -EINVAL;
+ ret = -EAGAIN;
}
xt_compat_flush_offsets(AF_INET6);
module_put(t->me);
@@ -1990,10 +1990,10 @@ compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
switch (cmd) {
case IP6T_SO_GET_INFO:
- ret = get_info(sk->sk_net, user, len, 1);
+ ret = get_info(sock_net(sk), user, len, 1);
break;
case IP6T_SO_GET_ENTRIES:
- ret = compat_get_entries(sk->sk_net, user, len);
+ ret = compat_get_entries(sock_net(sk), user, len);
break;
default:
ret = do_ip6t_get_ctl(sk, cmd, user, len);
@@ -2012,11 +2012,11 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
switch (cmd) {
case IP6T_SO_SET_REPLACE:
- ret = do_replace(sk->sk_net, user, len);
+ ret = do_replace(sock_net(sk), user, len);
break;
case IP6T_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sk->sk_net, user, len, 0);
+ ret = do_add_counters(sock_net(sk), user, len, 0);
break;
default:
@@ -2037,11 +2037,11 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
switch (cmd) {
case IP6T_SO_GET_INFO:
- ret = get_info(sk->sk_net, user, len, 0);
+ ret = get_info(sock_net(sk), user, len, 0);
break;
case IP6T_SO_GET_ENTRIES:
- ret = get_entries(sk->sk_net, user, len);
+ ret = get_entries(sock_net(sk), user, len);
break;
case IP6T_SO_GET_REVISION_MATCH:
@@ -2155,7 +2155,8 @@ icmp6_match(const struct sk_buff *skb,
unsigned int protoff,
bool *hotdrop)
{
- struct icmp6hdr _icmph, *ic;
+ const struct icmp6hdr *ic;
+ struct icmp6hdr _icmph;
const struct ip6t_icmp *icmpinfo = matchinfo;
/* Must not be a fragment. */
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 86a613810b6..3a2316974f8 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -363,11 +363,15 @@ static void dump_packet(const struct nf_loginfo *info,
if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
read_lock_bh(&skb->sk->sk_callback_lock);
if (skb->sk->sk_socket && skb->sk->sk_socket->file)
- printk("UID=%u GID=%u",
+ printk("UID=%u GID=%u ",
skb->sk->sk_socket->file->f_uid,
skb->sk->sk_socket->file->f_gid);
read_unlock_bh(&skb->sk->sk_callback_lock);
}
+
+ /* Max length: 16 "MARK=0xFFFFFFFF " */
+ if (!recurse && skb->mark)
+ printk("MARK=0x%x ", skb->mark);
}
static struct nf_loginfo default_loginfo = {
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index b23baa635fe..44c8d65a243 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -41,7 +41,8 @@ static void send_reset(struct sk_buff *oldskb)
struct tcphdr otcph, *tcph;
unsigned int otcplen, hh_len;
int tcphoff, needs_ack;
- struct ipv6hdr *oip6h = ipv6_hdr(oldskb), *ip6h;
+ const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
+ struct ipv6hdr *ip6h;
struct dst_entry *dst = NULL;
u8 proto;
struct flowi fl;
@@ -93,7 +94,7 @@ static void send_reset(struct sk_buff *oldskb)
fl.fl_ip_sport = otcph.dest;
fl.fl_ip_dport = otcph.source;
security_skb_classify_flow(oldskb, &fl);
- dst = ip6_route_output(NULL, &fl);
+ dst = ip6_route_output(&init_net, NULL, &fl);
if (dst == NULL)
return;
if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0))
@@ -177,7 +178,7 @@ reject_tg6(struct sk_buff *skb, const struct net_device *in,
{
const struct ip6t_reject_info *reject = targinfo;
- pr_debug("%s: medium point\n", __FUNCTION__);
+ pr_debug("%s: medium point\n", __func__);
/* WARNING: This code causes reentry within ip6tables.
This means that the ip6tables jump stack is now crap. We
must return an absolute verdict. --RR */
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 3a940171f82..317a8960a75 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -49,7 +49,8 @@ ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in,
temp = 0;
while (ip6t_ext_hdr(nexthdr)) {
- struct ipv6_opt_hdr _hdr, *hp;
+ const struct ipv6_opt_hdr *hp;
+ struct ipv6_opt_hdr _hdr;
int hdrlen;
/* Is there enough space for the next ext header? */
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 12a9efe9886..81aaf7aaaab 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -110,7 +110,8 @@ rt_mt6(const struct sk_buff *skb, const struct net_device *in,
!!(rtinfo->invflags & IP6T_RT_INV_TYP)));
if (ret && (rtinfo->flags & IP6T_RT_RES)) {
- u_int32_t *rp, _reserved;
+ const u_int32_t *rp;
+ u_int32_t _reserved;
rp = skb_header_pointer(skb,
ptr + offsetof(struct rt0_hdr,
reserved),
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 2d9cd095a72..f979e48b469 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -54,7 +54,7 @@ static struct
static struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
+ .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 035343a90ff..27a5e8b48d9 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -60,7 +60,7 @@ static struct
static struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
+ .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 5cd84203abf..92b91077ac2 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -38,7 +38,7 @@ static struct
static struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
+ .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 3717bdf34f6..85050c072ab 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -27,8 +27,8 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
-static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
- struct nf_conntrack_tuple *tuple)
+static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+ struct nf_conntrack_tuple *tuple)
{
const u_int32_t *ap;
u_int32_t _addrs[8];
@@ -36,21 +36,21 @@ static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
sizeof(_addrs), _addrs);
if (ap == NULL)
- return 0;
+ return false;
memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
- return 1;
+ return true;
}
-static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
+static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
{
memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
- return 1;
+ return true;
}
static int ipv6_print_tuple(struct seq_file *s,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 0897d0f4c4a..ee713b03e9e 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -28,21 +28,21 @@
static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
-static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
{
const struct icmp6hdr *hp;
struct icmp6hdr _hdr;
hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
- return 0;
+ return false;
tuple->dst.u.icmp.type = hp->icmp6_type;
tuple->src.u.icmp.id = hp->icmp6_identifier;
tuple->dst.u.icmp.code = hp->icmp6_code;
- return 1;
+ return true;
}
/* Add 1; spaces filled with 0. */
@@ -53,17 +53,17 @@ static const u_int8_t invmap[] = {
[ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
};
-static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
+static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
{
int type = orig->dst.u.icmp.type - 128;
if (type < 0 || type >= sizeof(invmap) || !invmap[type])
- return 0;
+ return false;
tuple->src.u.icmp.id = orig->src.u.icmp.id;
tuple->dst.u.icmp.type = invmap[type] - 1;
tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
- return 1;
+ return true;
}
/* Print out the per-protocol part of the tuple. */
@@ -102,9 +102,8 @@ static int icmpv6_packet(struct nf_conn *ct,
}
/* Called when a new connection for this protocol found. */
-static int icmpv6_new(struct nf_conn *ct,
- const struct sk_buff *skb,
- unsigned int dataoff)
+static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
{
static const u_int8_t valid_new[] = {
[ICMPV6_ECHO_REQUEST - 128] = 1,
@@ -116,11 +115,11 @@ static int icmpv6_new(struct nf_conn *ct,
/* Can't create a new ICMPv6 `conn' with this. */
pr_debug("icmpv6: can't create new conn with type %u\n",
type + 128);
- NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple);
- return 0;
+ nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+ return false;
}
atomic_set(&ct->proto.icmp.count, 0);
- return 1;
+ return true;
}
static int
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 24c0d03095b..2dccad48058 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -103,8 +103,8 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = {
};
#endif
-static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
- struct in6_addr *daddr)
+static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
{
u32 a, b, c;
@@ -132,7 +132,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
static unsigned int nf_hashfn(struct inet_frag_queue *q)
{
- struct nf_ct_frag6_queue *nq;
+ const struct nf_ct_frag6_queue *nq;
nq = container_of(q, struct nf_ct_frag6_queue, q);
return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr);
@@ -185,7 +185,7 @@ static void nf_ct_frag6_expire(unsigned long data)
spin_lock(&fq->q.lock);
- if (fq->q.last_in & COMPLETE)
+ if (fq->q.last_in & INET_FRAG_COMPLETE)
goto out;
fq_kill(fq);
@@ -222,12 +222,12 @@ oom:
static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
- struct frag_hdr *fhdr, int nhoff)
+ const struct frag_hdr *fhdr, int nhoff)
{
struct sk_buff *prev, *next;
int offset, end;
- if (fq->q.last_in & COMPLETE) {
+ if (fq->q.last_in & INET_FRAG_COMPLETE) {
pr_debug("Allready completed\n");
goto err;
}
@@ -254,11 +254,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
* or have different end, the segment is corrupted.
*/
if (end < fq->q.len ||
- ((fq->q.last_in & LAST_IN) && end != fq->q.len)) {
+ ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) {
pr_debug("already received last fragment\n");
goto err;
}
- fq->q.last_in |= LAST_IN;
+ fq->q.last_in |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
@@ -273,7 +273,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->q.last_in & LAST_IN) {
+ if (fq->q.last_in & INET_FRAG_LAST_IN) {
pr_debug("last packet already reached.\n");
goto err;
}
@@ -385,7 +385,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->q.last_in |= FIRST_IN;
+ fq->q.last_in |= INET_FRAG_FIRST_IN;
}
write_lock(&nf_frags.lock);
list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
@@ -647,7 +647,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
goto ret_orig;
}
- if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) {
+ if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len) {
ret_skb = nf_ct_frag6_reasm(fq, dev);
if (ret_skb == NULL)
pr_debug("Can't reassemble fragmented packets\n");
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 199ef379e50..ca8b82f96fe 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -35,16 +35,18 @@ static struct proc_dir_entry *proc_net_devsnmp6;
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
+ struct net *net = seq->private;
+
seq_printf(seq, "TCP6: inuse %d\n",
- sock_prot_inuse_get(&tcpv6_prot));
+ sock_prot_inuse_get(net, &tcpv6_prot));
seq_printf(seq, "UDP6: inuse %d\n",
- sock_prot_inuse_get(&udpv6_prot));
+ sock_prot_inuse_get(net, &udpv6_prot));
seq_printf(seq, "UDPLITE6: inuse %d\n",
- sock_prot_inuse_get(&udplitev6_prot));
+ sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
- sock_prot_inuse_get(&rawv6_prot));
+ sock_prot_inuse_get(net, &rawv6_prot));
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
- ip6_frag_nqueues(&init_net), ip6_frag_mem(&init_net));
+ ip6_frag_nqueues(net), ip6_frag_mem(net));
return 0;
}
@@ -183,7 +185,32 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
static int sockstat6_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, sockstat6_seq_show, NULL);
+ int err;
+ struct net *net;
+
+ err = -ENXIO;
+ net = get_proc_net(inode);
+ if (net == NULL)
+ goto err_net;
+
+ err = single_open(file, sockstat6_seq_show, net);
+ if (err < 0)
+ goto err_open;
+
+ return 0;
+
+err_open:
+ put_net(net);
+err_net:
+ return err;
+}
+
+static int sockstat6_seq_release(struct inode *inode, struct file *file)
+{
+ struct net *net = ((struct seq_file *)file->private_data)->private;
+
+ put_net(net);
+ return single_release(inode, file);
}
static const struct file_operations sockstat6_seq_fops = {
@@ -191,7 +218,7 @@ static const struct file_operations sockstat6_seq_fops = {
.open = sockstat6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = sockstat6_seq_release,
};
static int snmp6_seq_open(struct inode *inode, struct file *file)
@@ -214,6 +241,9 @@ int snmp6_register_dev(struct inet6_dev *idev)
if (!idev || !idev->dev)
return -EINVAL;
+ if (dev_net(idev->dev) != &init_net)
+ return 0;
+
if (!proc_net_devsnmp6)
return -ENOENT;
@@ -240,27 +270,45 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
return 0;
}
+static int ipv6_proc_init_net(struct net *net)
+{
+ if (!proc_net_fops_create(net, "sockstat6", S_IRUGO,
+ &sockstat6_seq_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+static void ipv6_proc_exit_net(struct net *net)
+{
+ proc_net_remove(net, "sockstat6");
+}
+
+static struct pernet_operations ipv6_proc_ops = {
+ .init = ipv6_proc_init_net,
+ .exit = ipv6_proc_exit_net,
+};
+
int __init ipv6_misc_proc_init(void)
{
int rc = 0;
+ if (register_pernet_subsys(&ipv6_proc_ops))
+ goto proc_net_fail;
+
if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops))
goto proc_snmp6_fail;
proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net);
if (!proc_net_devsnmp6)
goto proc_dev_snmp6_fail;
-
- if (!proc_net_fops_create(&init_net, "sockstat6", S_IRUGO, &sockstat6_seq_fops))
- goto proc_sockstat6_fail;
out:
return rc;
-proc_sockstat6_fail:
- proc_net_remove(&init_net, "dev_snmp6");
proc_dev_snmp6_fail:
proc_net_remove(&init_net, "snmp6");
proc_snmp6_fail:
+ unregister_pernet_subsys(&ipv6_proc_ops);
+proc_net_fail:
rc = -ENOMEM;
goto out;
}
@@ -270,5 +318,6 @@ void ipv6_misc_proc_exit(void)
proc_net_remove(&init_net, "sockstat6");
proc_net_remove(&init_net, "dev_snmp6");
proc_net_remove(&init_net, "snmp6");
+ unregister_pernet_subsys(&ipv6_proc_ops);
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0a6fbc1d1a5..6193b124cbc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -53,6 +53,7 @@
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
#include <net/mip6.h>
#endif
+#include <linux/mroute6.h>
#include <net/raw.h>
#include <net/rawv6.h>
@@ -62,20 +63,9 @@
#include <linux/seq_file.h>
static struct raw_hashinfo raw_v6_hashinfo = {
- .lock = __RW_LOCK_UNLOCKED(),
+ .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
};
-static void raw_v6_hash(struct sock *sk)
-{
- raw_hash_sk(sk, &raw_v6_hashinfo);
-}
-
-static void raw_v6_unhash(struct sock *sk)
-{
- raw_unhash_sk(sk, &raw_v6_hashinfo);
-}
-
-
static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
unsigned short num, struct in6_addr *loc_addr,
struct in6_addr *rmt_addr, int dif)
@@ -87,7 +77,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
if (inet_sk(sk)->num == num) {
struct ipv6_pinfo *np = inet6_sk(sk);
- if (sk->sk_net != net)
+ if (!net_eq(sock_net(sk), net))
continue;
if (!ipv6_addr_any(&np->daddr) &&
@@ -179,15 +169,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
read_lock(&raw_v6_hashinfo.lock);
sk = sk_head(&raw_v6_hashinfo.ht[hash]);
- /*
- * The first socket found will be delivered after
- * delivery to transport protocols.
- */
-
if (sk == NULL)
goto out;
- net = skb->dev->nd_net;
+ net = dev_net(skb->dev);
sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
while (sk) {
@@ -291,7 +276,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (!sk->sk_bound_dev_if)
goto out;
- dev = dev_get_by_index(sk->sk_net, sk->sk_bound_dev_if);
+ dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
if (!dev) {
err = -ENODEV;
goto out;
@@ -304,7 +289,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
err = -EADDRNOTAVAIL;
- if (!ipv6_chk_addr(sk->sk_net, &addr->sin6_addr,
+ if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
dev, 0)) {
if (dev)
dev_put(dev);
@@ -372,11 +357,11 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
read_lock(&raw_v6_hashinfo.lock);
sk = sk_head(&raw_v6_hashinfo.ht[hash]);
if (sk != NULL) {
- struct ipv6hdr *hdr = (struct ipv6hdr *) skb->data;
-
- saddr = &hdr->saddr;
- daddr = &hdr->daddr;
- net = skb->dev->nd_net;
+ /* Note: ipv6_hdr(skb) != skb->data */
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data;
+ saddr = &ip6h->saddr;
+ daddr = &ip6h->daddr;
+ net = dev_net(skb->dev);
while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
IP6CB(skb)->iif))) {
@@ -822,15 +807,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
fl.fl6_flowlabel = np->flow_label;
}
- if (ipv6_addr_any(daddr)) {
- /*
- * unspecified destination address
- * treated as error... is this correct ?
- */
- fl6_sock_release(flowlabel);
- return(-EINVAL);
- }
-
if (fl.oif == 0)
fl.oif = sk->sk_bound_dev_if;
@@ -863,7 +839,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (err)
goto out;
- ipv6_addr_copy(&fl.fl6_dst, daddr);
+ if (!ipv6_addr_any(daddr))
+ ipv6_addr_copy(&fl.fl6_dst, daddr);
+ else
+ fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
ipv6_addr_copy(&fl.fl6_src, &np->saddr);
@@ -898,9 +877,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
else
hlimit = np->hop_limit;
if (hlimit < 0)
- hlimit = dst_metric(dst, RTAX_HOPLIMIT);
- if (hlimit < 0)
- hlimit = ipv6_get_hoplimit(dst->dev);
+ hlimit = ip6_dst_hoplimit(dst);
}
if (tclass < 0) {
@@ -1155,7 +1132,11 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
}
default:
+#ifdef CONFIG_IPV6_MROUTE
+ return ip6mr_ioctl(sk, cmd, (void __user *)arg);
+#else
return -ENOIOCTLCMD;
+#endif
}
}
@@ -1163,7 +1144,7 @@ static void rawv6_close(struct sock *sk, long timeout)
{
if (inet_sk(sk)->num == IPPROTO_RAW)
ip6_ra_control(sk, -1, NULL);
-
+ ip6mr_sk_done(sk);
sk_common_release(sk);
}
@@ -1186,8 +1167,6 @@ static int rawv6_init_sk(struct sock *sk)
return(0);
}
-DEFINE_PROTO_INUSE(rawv6)
-
struct proto rawv6_prot = {
.name = "RAWv6",
.owner = THIS_MODULE,
@@ -1203,14 +1182,14 @@ struct proto rawv6_prot = {
.recvmsg = rawv6_recvmsg,
.bind = rawv6_bind,
.backlog_rcv = rawv6_rcv_skb,
- .hash = raw_v6_hash,
- .unhash = raw_v6_unhash,
+ .hash = raw_hash_sk,
+ .unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw6_sock),
+ .h.raw_hash = &raw_v6_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_rawv6_setsockopt,
.compat_getsockopt = compat_rawv6_getsockopt,
#endif
- REF_PROTO_INUSE(rawv6)
};
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f936d045a39..7b247e3a16f 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -202,7 +202,7 @@ static void ip6_frag_expire(unsigned long data)
spin_lock(&fq->q.lock);
- if (fq->q.last_in & COMPLETE)
+ if (fq->q.last_in & INET_FRAG_COMPLETE)
goto out;
fq_kill(fq);
@@ -217,7 +217,7 @@ static void ip6_frag_expire(unsigned long data)
rcu_read_unlock();
/* Don't send error if the first segment did not arrive. */
- if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments)
+ if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments)
goto out;
/*
@@ -265,7 +265,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct net_device *dev;
int offset, end;
- if (fq->q.last_in & COMPLETE)
+ if (fq->q.last_in & INET_FRAG_COMPLETE)
goto err;
offset = ntohs(fhdr->frag_off) & ~0x7;
@@ -294,9 +294,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
* or have different end, the segment is corrupted.
*/
if (end < fq->q.len ||
- ((fq->q.last_in & LAST_IN) && end != fq->q.len))
+ ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
goto err;
- fq->q.last_in |= LAST_IN;
+ fq->q.last_in |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
@@ -314,7 +314,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->q.last_in & LAST_IN)
+ if (fq->q.last_in & INET_FRAG_LAST_IN)
goto err;
fq->q.len = end;
}
@@ -417,10 +417,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->q.last_in |= FIRST_IN;
+ fq->q.last_in |= INET_FRAG_FIRST_IN;
}
- if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len)
+ if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len)
return ip6_frag_reasm(fq, prev, dev);
write_lock(&ip6_frags.lock);
@@ -600,7 +601,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- net = skb->dev->nd_net;
+ net = dev_net(skb->dev);
if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
ip6_evictor(net, ip6_dst_idev(skb->dst));
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e8b241cb60b..210a079cfc6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -36,10 +36,12 @@
#include <linux/route.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
+#include <linux/mroute6.h>
#include <linux/init.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/nsproxy.h>
#include <net/net_namespace.h>
#include <net/snmp.h>
#include <net/ipv6.h>
@@ -87,14 +89,16 @@ static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
#ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
+static struct rt6_info *rt6_add_route_info(struct net *net,
+ struct in6_addr *prefix, int prefixlen,
struct in6_addr *gwaddr, int ifindex,
unsigned pref);
-static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
+static struct rt6_info *rt6_get_route_info(struct net *net,
+ struct in6_addr *prefix, int prefixlen,
struct in6_addr *gwaddr, int ifindex);
#endif
-static struct dst_ops ip6_dst_ops = {
+static struct dst_ops ip6_dst_ops_template = {
.family = AF_INET6,
.protocol = __constant_htons(ETH_P_IPV6),
.gc = ip6_dst_gc,
@@ -124,7 +128,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.entries = ATOMIC_INIT(0),
};
-struct rt6_info ip6_null_entry = {
+static struct rt6_info ip6_null_entry_template = {
.u = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
@@ -134,8 +138,6 @@ struct rt6_info ip6_null_entry = {
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_discard,
.output = ip6_pkt_discard_out,
- .ops = &ip6_dst_ops,
- .path = (struct dst_entry*)&ip6_null_entry,
}
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
@@ -148,7 +150,7 @@ struct rt6_info ip6_null_entry = {
static int ip6_pkt_prohibit(struct sk_buff *skb);
static int ip6_pkt_prohibit_out(struct sk_buff *skb);
-struct rt6_info ip6_prohibit_entry = {
+struct rt6_info ip6_prohibit_entry_template = {
.u = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
@@ -158,8 +160,6 @@ struct rt6_info ip6_prohibit_entry = {
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_prohibit,
.output = ip6_pkt_prohibit_out,
- .ops = &ip6_dst_ops,
- .path = (struct dst_entry*)&ip6_prohibit_entry,
}
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
@@ -167,7 +167,7 @@ struct rt6_info ip6_prohibit_entry = {
.rt6i_ref = ATOMIC_INIT(1),
};
-struct rt6_info ip6_blk_hole_entry = {
+static struct rt6_info ip6_blk_hole_entry_template = {
.u = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
@@ -177,8 +177,6 @@ struct rt6_info ip6_blk_hole_entry = {
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = dst_discard,
.output = dst_discard,
- .ops = &ip6_dst_ops,
- .path = (struct dst_entry*)&ip6_blk_hole_entry,
}
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
@@ -189,9 +187,9 @@ struct rt6_info ip6_blk_hole_entry = {
#endif
/* allocate dst with ip6_dst_ops */
-static __inline__ struct rt6_info *ip6_dst_alloc(void)
+static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
{
- return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
+ return (struct rt6_info *)dst_alloc(ops);
}
static void ip6_dst_destroy(struct dst_entry *dst)
@@ -211,7 +209,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
struct net_device *loopback_dev =
- dev->nd_net->loopback_dev;
+ dev_net(dev)->loopback_dev;
if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
struct inet6_dev *loopback_idev =
@@ -239,7 +237,8 @@ static inline int rt6_need_strict(struct in6_addr *daddr)
* Route lookup. Any table->tb6_lock is implied.
*/
-static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
+static inline struct rt6_info *rt6_device_match(struct net *net,
+ struct rt6_info *rt,
int oif,
int strict)
{
@@ -268,7 +267,7 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
return local;
if (strict)
- return &ip6_null_entry;
+ return net->ipv6.ip6_null_entry;
}
return rt;
}
@@ -409,9 +408,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
{
struct rt6_info *match, *rt0;
+ struct net *net;
RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
- __FUNCTION__, fn->leaf, oif);
+ __func__, fn->leaf, oif);
rt0 = fn->rr_ptr;
if (!rt0)
@@ -432,15 +432,17 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
}
RT6_TRACE("%s() => %p\n",
- __FUNCTION__, match);
+ __func__, match);
- return (match ? match : &ip6_null_entry);
+ net = dev_net(rt0->rt6i_dev);
+ return (match ? match : net->ipv6.ip6_null_entry);
}
#ifdef CONFIG_IPV6_ROUTE_INFO
int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
struct in6_addr *gwaddr)
{
+ struct net *net = dev_net(dev);
struct route_info *rinfo = (struct route_info *) opt;
struct in6_addr prefix_buf, *prefix;
unsigned int pref;
@@ -488,7 +490,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
prefix = &prefix_buf;
}
- rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
+ rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
+ dev->ifindex);
if (rt && !lifetime) {
ip6_del_rt(rt);
@@ -496,7 +499,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
}
if (!rt && lifetime)
- rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
+ rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
pref);
else if (rt)
rt->rt6i_flags = RTF_ROUTEINFO |
@@ -515,9 +518,9 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
}
#endif
-#define BACKTRACK(saddr) \
+#define BACKTRACK(__net, saddr) \
do { \
- if (rt == &ip6_null_entry) { \
+ if (rt == __net->ipv6.ip6_null_entry) { \
struct fib6_node *pn; \
while (1) { \
if (fn->fn_flags & RTN_TL_ROOT) \
@@ -533,7 +536,8 @@ do { \
} \
} while(0)
-static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
+static struct rt6_info *ip6_pol_route_lookup(struct net *net,
+ struct fib6_table *table,
struct flowi *fl, int flags)
{
struct fib6_node *fn;
@@ -543,8 +547,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = fn->leaf;
- rt = rt6_device_match(rt, fl->oif, flags);
- BACKTRACK(&fl->fl6_src);
+ rt = rt6_device_match(net, rt, fl->oif, flags);
+ BACKTRACK(net, &fl->fl6_src);
out:
dst_use(&rt->u.dst, jiffies);
read_unlock_bh(&table->tb6_lock);
@@ -552,8 +556,8 @@ out:
}
-struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
- int oif, int strict)
+struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
+ const struct in6_addr *saddr, int oif, int strict)
{
struct flowi fl = {
.oif = oif,
@@ -571,7 +575,7 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
flags |= RT6_LOOKUP_F_HAS_SADDR;
}
- dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
+ dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
if (dst->error == 0)
return (struct rt6_info *) dst;
@@ -604,7 +608,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
int ip6_ins_rt(struct rt6_info *rt)
{
struct nl_info info = {
- .nl_net = &init_net,
+ .nl_net = dev_net(rt->rt6i_dev),
};
return __ip6_ins_rt(rt, &info);
}
@@ -660,8 +664,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
return rt;
}
-static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
- struct flowi *fl, int flags)
+static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
+ struct flowi *fl, int flags)
{
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
@@ -680,8 +684,9 @@ restart_2:
restart:
rt = rt6_select(fn, oif, strict | reachable);
- BACKTRACK(&fl->fl6_src);
- if (rt == &ip6_null_entry ||
+
+ BACKTRACK(net, &fl->fl6_src);
+ if (rt == net->ipv6.ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
@@ -699,7 +704,7 @@ restart:
}
dst_release(&rt->u.dst);
- rt = nrt ? : &ip6_null_entry;
+ rt = nrt ? : net->ipv6.ip6_null_entry;
dst_hold(&rt->u.dst);
if (nrt) {
@@ -732,15 +737,16 @@ out2:
return rt;
}
-static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
struct flowi *fl, int flags)
{
- return ip6_pol_route(table, fl->iif, fl, flags);
+ return ip6_pol_route(net, table, fl->iif, fl, flags);
}
void ip6_route_input(struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct net *net = dev_net(skb->dev);
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct flowi fl = {
.iif = skb->dev->ifindex,
@@ -758,16 +764,17 @@ void ip6_route_input(struct sk_buff *skb)
if (rt6_need_strict(&iph->daddr))
flags |= RT6_LOOKUP_F_IFACE;
- skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
+ skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
}
-static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
+static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
struct flowi *fl, int flags)
{
- return ip6_pol_route(table, fl->oif, fl, flags);
+ return ip6_pol_route(net, table, fl->oif, fl, flags);
}
-struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
+ struct flowi *fl)
{
int flags = 0;
@@ -776,8 +783,17 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
if (!ipv6_addr_any(&fl->fl6_src))
flags |= RT6_LOOKUP_F_HAS_SADDR;
+ else if (sk) {
+ unsigned int prefs = inet6_sk(sk)->srcprefs;
+ if (prefs & IPV6_PREFER_SRC_TMP)
+ flags |= RT6_LOOKUP_F_SRCPREF_TMP;
+ if (prefs & IPV6_PREFER_SRC_PUBLIC)
+ flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
+ if (prefs & IPV6_PREFER_SRC_COA)
+ flags |= RT6_LOOKUP_F_SRCPREF_COA;
+ }
- return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
+ return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
}
EXPORT_SYMBOL(ip6_route_output);
@@ -886,12 +902,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
static int ipv6_get_mtu(struct net_device *dev);
-static inline unsigned int ipv6_advmss(unsigned int mtu)
+static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
{
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
- if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
- mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
+ if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
+ mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
/*
* Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
@@ -904,21 +920,21 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
return mtu;
}
-static struct dst_entry *ndisc_dst_gc_list;
-static DEFINE_SPINLOCK(ndisc_lock);
+static struct dst_entry *icmp6_dst_gc_list;
+static DEFINE_SPINLOCK(icmp6_dst_lock);
-struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
+struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
struct neighbour *neigh,
- struct in6_addr *addr,
- int (*output)(struct sk_buff *))
+ const struct in6_addr *addr)
{
struct rt6_info *rt;
struct inet6_dev *idev = in6_dev_get(dev);
+ struct net *net = dev_net(dev);
if (unlikely(idev == NULL))
return NULL;
- rt = ip6_dst_alloc();
+ rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
if (unlikely(rt == NULL)) {
in6_dev_put(idev);
goto out;
@@ -936,8 +952,8 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
atomic_set(&rt->u.dst.__refcnt, 1);
rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
- rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
- rt->u.dst.output = output;
+ rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
+ rt->u.dst.output = ip6_output;
#if 0 /* there's no chance to use these for ndisc */
rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
@@ -947,18 +963,18 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
rt->rt6i_dst.plen = 128;
#endif
- spin_lock_bh(&ndisc_lock);
- rt->u.dst.next = ndisc_dst_gc_list;
- ndisc_dst_gc_list = &rt->u.dst;
- spin_unlock_bh(&ndisc_lock);
+ spin_lock_bh(&icmp6_dst_lock);
+ rt->u.dst.next = icmp6_dst_gc_list;
+ icmp6_dst_gc_list = &rt->u.dst;
+ spin_unlock_bh(&icmp6_dst_lock);
- fib6_force_start_gc();
+ fib6_force_start_gc(net);
out:
return &rt->u.dst;
}
-int ndisc_dst_gc(int *more)
+int icmp6_dst_gc(int *more)
{
struct dst_entry *dst, *next, **pprev;
int freed;
@@ -966,8 +982,8 @@ int ndisc_dst_gc(int *more)
next = NULL;
freed = 0;
- spin_lock_bh(&ndisc_lock);
- pprev = &ndisc_dst_gc_list;
+ spin_lock_bh(&icmp6_dst_lock);
+ pprev = &icmp6_dst_gc_list;
while ((dst = *pprev) != NULL) {
if (!atomic_read(&dst->__refcnt)) {
@@ -980,30 +996,33 @@ int ndisc_dst_gc(int *more)
}
}
- spin_unlock_bh(&ndisc_lock);
+ spin_unlock_bh(&icmp6_dst_lock);
return freed;
}
static int ip6_dst_gc(struct dst_ops *ops)
{
- static unsigned expire = 30*HZ;
- static unsigned long last_gc;
unsigned long now = jiffies;
-
- if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
- atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
+ struct net *net = ops->dst_net;
+ int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
+ int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
+ int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
+ int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
+ unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+
+ if (time_after(rt_last_gc + rt_min_interval, now) &&
+ atomic_read(&ops->entries) <= rt_max_size)
goto out;
- expire++;
- fib6_run_gc(expire);
- last_gc = now;
- if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
- expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
-
+ net->ipv6.ip6_rt_gc_expire++;
+ fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
+ net->ipv6.ip6_rt_last_gc = now;
+ if (atomic_read(&ops->entries) < ops->gc_thresh)
+ net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
out:
- expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
- return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
+ net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
+ return (atomic_read(&ops->entries) > rt_max_size);
}
/* Clean host part of a prefix. Not necessary in radix tree,
@@ -1025,15 +1044,17 @@ static int ipv6_get_mtu(struct net_device *dev)
return mtu;
}
-int ipv6_get_hoplimit(struct net_device *dev)
+int ip6_dst_hoplimit(struct dst_entry *dst)
{
- int hoplimit = ipv6_devconf.hop_limit;
- struct inet6_dev *idev;
-
- idev = in6_dev_get(dev);
- if (idev) {
- hoplimit = idev->cnf.hop_limit;
- in6_dev_put(idev);
+ int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
+ if (hoplimit < 0) {
+ struct net_device *dev = dst->dev;
+ struct inet6_dev *idev = in6_dev_get(dev);
+ if (idev) {
+ hoplimit = idev->cnf.hop_limit;
+ in6_dev_put(idev);
+ } else
+ hoplimit = ipv6_devconf.hop_limit;
}
return hoplimit;
}
@@ -1045,6 +1066,7 @@ int ipv6_get_hoplimit(struct net_device *dev)
int ip6_route_add(struct fib6_config *cfg)
{
int err;
+ struct net *net = cfg->fc_nlinfo.nl_net;
struct rt6_info *rt = NULL;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
@@ -1059,7 +1081,7 @@ int ip6_route_add(struct fib6_config *cfg)
#endif
if (cfg->fc_ifindex) {
err = -ENODEV;
- dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
+ dev = dev_get_by_index(net, cfg->fc_ifindex);
if (!dev)
goto out;
idev = in6_dev_get(dev);
@@ -1070,13 +1092,13 @@ int ip6_route_add(struct fib6_config *cfg)
if (cfg->fc_metric == 0)
cfg->fc_metric = IP6_RT_PRIO_USER;
- table = fib6_new_table(cfg->fc_table);
+ table = fib6_new_table(net, cfg->fc_table);
if (table == NULL) {
err = -ENOBUFS;
goto out;
}
- rt = ip6_dst_alloc();
+ rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
if (rt == NULL) {
err = -ENOMEM;
@@ -1117,12 +1139,12 @@ int ip6_route_add(struct fib6_config *cfg)
if ((cfg->fc_flags & RTF_REJECT) ||
(dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
/* hold loopback dev/idev if we haven't done so. */
- if (dev != init_net.loopback_dev) {
+ if (dev != net->loopback_dev) {
if (dev) {
dev_put(dev);
in6_dev_put(idev);
}
- dev = init_net.loopback_dev;
+ dev = net->loopback_dev;
dev_hold(dev);
idev = in6_dev_get(dev);
if (!idev) {
@@ -1159,7 +1181,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (!(gwa_type&IPV6_ADDR_UNICAST))
goto out;
- grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
err = -EHOSTUNREACH;
if (grt == NULL)
@@ -1226,10 +1248,13 @@ install_route:
if (!rt->u.dst.metrics[RTAX_MTU-1])
rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
- rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
+ rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
rt->u.dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
+
+ cfg->fc_nlinfo.nl_net = dev_net(dev);
+
return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
out:
@@ -1246,8 +1271,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
{
int err;
struct fib6_table *table;
+ struct net *net = dev_net(rt->rt6i_dev);
- if (rt == &ip6_null_entry)
+ if (rt == net->ipv6.ip6_null_entry)
return -ENOENT;
table = rt->rt6i_table;
@@ -1264,7 +1290,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
int ip6_del_rt(struct rt6_info *rt)
{
struct nl_info info = {
- .nl_net = &init_net,
+ .nl_net = dev_net(rt->rt6i_dev),
};
return __ip6_del_rt(rt, &info);
}
@@ -1276,7 +1302,7 @@ static int ip6_route_del(struct fib6_config *cfg)
struct rt6_info *rt;
int err = -ESRCH;
- table = fib6_get_table(cfg->fc_table);
+ table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
if (table == NULL)
return err;
@@ -1316,7 +1342,8 @@ struct ip6rd_flowi {
struct in6_addr gateway;
};
-static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
+static struct rt6_info *__ip6_route_redirect(struct net *net,
+ struct fib6_table *table,
struct flowi *fl,
int flags)
{
@@ -1359,8 +1386,8 @@ restart:
}
if (!rt)
- rt = &ip6_null_entry;
- BACKTRACK(&fl->fl6_src);
+ rt = net->ipv6.ip6_null_entry;
+ BACKTRACK(net, &fl->fl6_src);
out:
dst_hold(&rt->u.dst);
@@ -1375,6 +1402,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
struct net_device *dev)
{
int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct net *net = dev_net(dev);
struct ip6rd_flowi rdfl = {
.fl = {
.oif = dev->ifindex,
@@ -1391,7 +1419,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
if (rt6_need_strict(dest))
flags |= RT6_LOOKUP_F_IFACE;
- return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
+ return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
+ flags, __ip6_route_redirect);
}
void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
@@ -1400,10 +1429,11 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
{
struct rt6_info *rt, *nrt = NULL;
struct netevent_redirect netevent;
+ struct net *net = dev_net(neigh->dev);
rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
- if (rt == &ip6_null_entry) {
+ if (rt == net->ipv6.ip6_null_entry) {
if (net_ratelimit())
printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
"for redirect target\n");
@@ -1448,7 +1478,8 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
nrt->rt6i_nexthop = neigh_clone(neigh);
/* Reset pmtu, it may be better */
nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
- nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
+ nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
+ dst_mtu(&nrt->u.dst));
if (ip6_ins_rt(nrt))
goto out;
@@ -1476,9 +1507,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
struct net_device *dev, u32 pmtu)
{
struct rt6_info *rt, *nrt;
+ struct net *net = dev_net(dev);
int allfrag = 0;
- rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
+ rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
if (rt == NULL)
return;
@@ -1511,7 +1543,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
if (allfrag)
rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
- dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
+ dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
goto out;
}
@@ -1537,7 +1569,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
* which is 10 mins. After 10 mins the decreased pmtu is expired
* and detecting PMTU increase will be automatically happened.
*/
- dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
+ dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
ip6_ins_rt(nrt);
@@ -1552,7 +1584,8 @@ out:
static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
{
- struct rt6_info *rt = ip6_dst_alloc();
+ struct net *net = dev_net(ort->rt6i_dev);
+ struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
if (rt) {
rt->u.dst.input = ort->u.dst.input;
@@ -1583,14 +1616,15 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
}
#ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
+static struct rt6_info *rt6_get_route_info(struct net *net,
+ struct in6_addr *prefix, int prefixlen,
struct in6_addr *gwaddr, int ifindex)
{
struct fib6_node *fn;
struct rt6_info *rt = NULL;
struct fib6_table *table;
- table = fib6_get_table(RT6_TABLE_INFO);
+ table = fib6_get_table(net, RT6_TABLE_INFO);
if (table == NULL)
return NULL;
@@ -1614,7 +1648,8 @@ out:
return rt;
}
-static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
+static struct rt6_info *rt6_add_route_info(struct net *net,
+ struct in6_addr *prefix, int prefixlen,
struct in6_addr *gwaddr, int ifindex,
unsigned pref)
{
@@ -1625,6 +1660,9 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
.fc_dst_len = prefixlen,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
RTF_UP | RTF_PREF(pref),
+ .fc_nlinfo.pid = 0,
+ .fc_nlinfo.nlh = NULL,
+ .fc_nlinfo.nl_net = net,
};
ipv6_addr_copy(&cfg.fc_dst, prefix);
@@ -1636,7 +1674,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
ip6_route_add(&cfg);
- return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
+ return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
}
#endif
@@ -1645,7 +1683,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
struct rt6_info *rt;
struct fib6_table *table;
- table = fib6_get_table(RT6_TABLE_DFLT);
+ table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
if (table == NULL)
return NULL;
@@ -1662,8 +1700,6 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
return rt;
}
-EXPORT_SYMBOL(rt6_get_dflt_router);
-
struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
struct net_device *dev,
unsigned int pref)
@@ -1674,6 +1710,9 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
+ .fc_nlinfo.pid = 0,
+ .fc_nlinfo.nlh = NULL,
+ .fc_nlinfo.nl_net = dev_net(dev),
};
ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
@@ -1683,13 +1722,13 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
return rt6_get_dflt_router(gwaddr, dev);
}
-void rt6_purge_dflt_routers(void)
+void rt6_purge_dflt_routers(struct net *net)
{
struct rt6_info *rt;
struct fib6_table *table;
/* NOTE: Keep consistent with rt6_get_dflt_router */
- table = fib6_get_table(RT6_TABLE_DFLT);
+ table = fib6_get_table(net, RT6_TABLE_DFLT);
if (table == NULL)
return;
@@ -1706,7 +1745,8 @@ restart:
read_unlock_bh(&table->tb6_lock);
}
-static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
+static void rtmsg_to_fib6_config(struct net *net,
+ struct in6_rtmsg *rtmsg,
struct fib6_config *cfg)
{
memset(cfg, 0, sizeof(*cfg));
@@ -1719,14 +1759,14 @@ static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
cfg->fc_src_len = rtmsg->rtmsg_src_len;
cfg->fc_flags = rtmsg->rtmsg_flags;
- cfg->fc_nlinfo.nl_net = &init_net;
+ cfg->fc_nlinfo.nl_net = net;
ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
}
-int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
+int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct fib6_config cfg;
struct in6_rtmsg rtmsg;
@@ -1742,7 +1782,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
if (err)
return -EFAULT;
- rtmsg_to_fib6_config(&rtmsg, &cfg);
+ rtmsg_to_fib6_config(net, &rtmsg, &cfg);
rtnl_lock();
switch (cmd) {
@@ -1821,21 +1861,22 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
const struct in6_addr *addr,
int anycast)
{
- struct rt6_info *rt = ip6_dst_alloc();
+ struct net *net = dev_net(idev->dev);
+ struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
if (rt == NULL)
return ERR_PTR(-ENOMEM);
- dev_hold(init_net.loopback_dev);
+ dev_hold(net->loopback_dev);
in6_dev_hold(idev);
rt->u.dst.flags = DST_HOST;
rt->u.dst.input = ip6_input;
rt->u.dst.output = ip6_output;
- rt->rt6i_dev = init_net.loopback_dev;
+ rt->rt6i_dev = net->loopback_dev;
rt->rt6i_idev = idev;
rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
- rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
+ rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
rt->u.dst.obsolete = -1;
@@ -1852,26 +1893,39 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
rt->rt6i_dst.plen = 128;
- rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
+ rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
atomic_set(&rt->u.dst.__refcnt, 1);
return rt;
}
+struct arg_dev_net {
+ struct net_device *dev;
+ struct net *net;
+};
+
static int fib6_ifdown(struct rt6_info *rt, void *arg)
{
- if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
- rt != &ip6_null_entry) {
+ struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
+ struct net *net = ((struct arg_dev_net *)arg)->net;
+
+ if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
+ rt != net->ipv6.ip6_null_entry) {
RT6_TRACE("deleted by ifdown %p\n", rt);
return -1;
}
return 0;
}
-void rt6_ifdown(struct net_device *dev)
+void rt6_ifdown(struct net *net, struct net_device *dev)
{
- fib6_clean_all(fib6_ifdown, 0, dev);
+ struct arg_dev_net adn = {
+ .dev = dev,
+ .net = net,
+ };
+
+ fib6_clean_all(net, fib6_ifdown, 0, &adn);
}
struct rt6_mtu_change_arg
@@ -1884,6 +1938,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
{
struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
struct inet6_dev *idev;
+ struct net *net = dev_net(arg->dev);
/* In IPv6 pmtu discovery is not optional,
so that RTAX_MTU lock cannot disable it.
@@ -1915,7 +1970,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
(dst_mtu(&rt->u.dst) < arg->mtu &&
dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
- rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
+ rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
}
return 0;
}
@@ -1927,7 +1982,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu)
.mtu = mtu,
};
- fib6_clean_all(rt6_mtu_change_route, 0, &arg);
+ fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
}
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
@@ -1964,7 +2019,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
cfg->fc_nlinfo.nlh = nlh;
- cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
+ cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
if (tb[RTA_GATEWAY]) {
nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
@@ -2010,13 +2065,9 @@ errout:
static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
struct fib6_config cfg;
int err;
- if (net != &init_net)
- return -EINVAL;
-
err = rtm_to_fib6_config(skb, nlh, &cfg);
if (err < 0)
return err;
@@ -2026,13 +2077,9 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
struct fib6_config cfg;
int err;
- if (net != &init_net)
- return -EINVAL;
-
err = rtm_to_fib6_config(skb, nlh, &cfg);
if (err < 0)
return err;
@@ -2058,7 +2105,7 @@ static inline size_t rt6_nlmsg_size(void)
static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
struct in6_addr *dst, struct in6_addr *src,
int iif, int type, u32 pid, u32 seq,
- int prefix, unsigned int flags)
+ int prefix, int nowait, unsigned int flags)
{
struct rtmsg *rtm;
struct nlmsghdr *nlh;
@@ -2118,11 +2165,27 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
} else if (rtm->rtm_src_len)
NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
#endif
- if (iif)
- NLA_PUT_U32(skb, RTA_IIF, iif);
- else if (dst) {
+ if (iif) {
+#ifdef CONFIG_IPV6_MROUTE
+ if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
+ int err = ip6mr_get_route(skb, rtm, nowait);
+ if (err <= 0) {
+ if (!nowait) {
+ if (err == 0)
+ return 0;
+ goto nla_put_failure;
+ } else {
+ if (err == -EMSGSIZE)
+ goto nla_put_failure;
+ }
+ }
+ } else
+#endif
+ NLA_PUT_U32(skb, RTA_IIF, iif);
+ } else if (dst) {
struct in6_addr saddr_buf;
- if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
+ if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
+ dst, 0, &saddr_buf) == 0)
NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
}
@@ -2162,12 +2225,12 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
- prefix, NLM_F_MULTI);
+ prefix, 0, NLM_F_MULTI);
}
static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
- struct net *net = in_skb->sk->sk_net;
+ struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[RTA_MAX+1];
struct rt6_info *rt;
struct sk_buff *skb;
@@ -2175,9 +2238,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
struct flowi fl;
int err, iif = 0;
- if (net != &init_net)
- return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
if (err < 0)
goto errout;
@@ -2207,7 +2267,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
if (iif) {
struct net_device *dev;
- dev = __dev_get_by_index(&init_net, iif);
+ dev = __dev_get_by_index(net, iif);
if (!dev) {
err = -ENODEV;
goto errout;
@@ -2226,18 +2286,18 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
skb_reset_mac_header(skb);
skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
- rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
+ rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
skb->dst = &rt->u.dst;
err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
- nlh->nlmsg_seq, 0, 0);
+ nlh->nlmsg_seq, 0, 0, 0);
if (err < 0) {
kfree_skb(skb);
goto errout;
}
- err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
errout:
return err;
}
@@ -2245,6 +2305,7 @@ errout:
void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
{
struct sk_buff *skb;
+ struct net *net = info->nl_net;
u32 seq;
int err;
@@ -2256,18 +2317,38 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
goto errout;
err = rt6_fill_node(skb, rt, NULL, NULL, 0,
- event, info->pid, seq, 0, 0);
+ event, info->pid, seq, 0, 0, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, &init_net, info->pid,
- RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
+ err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
+ info->nlh, gfp_any());
errout:
if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+}
+
+static int ip6_route_dev_notify(struct notifier_block *this,
+ unsigned long event, void *data)
+{
+ struct net_device *dev = (struct net_device *)data;
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
+ net->ipv6.ip6_null_entry->u.dst.dev = dev;
+ net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
+ net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
+ net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
+ net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
+#endif
+ }
+
+ return NOTIFY_OK;
}
/*
@@ -2316,13 +2397,33 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
static int ipv6_route_show(struct seq_file *m, void *v)
{
- fib6_clean_all(rt6_info_route, 0, m);
+ struct net *net = (struct net *)m->private;
+ fib6_clean_all(net, rt6_info_route, 0, m);
return 0;
}
static int ipv6_route_open(struct inode *inode, struct file *file)
{
- return single_open(file, ipv6_route_show, NULL);
+ int err;
+ struct net *net = get_proc_net(inode);
+ if (!net)
+ return -ENXIO;
+
+ err = single_open(file, ipv6_route_show, net);
+ if (err < 0) {
+ put_net(net);
+ return err;
+ }
+
+ return 0;
+}
+
+static int ipv6_route_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct net *net = seq->private;
+ put_net(net);
+ return single_release(inode, file);
}
static const struct file_operations ipv6_route_proc_fops = {
@@ -2330,24 +2431,46 @@ static const struct file_operations ipv6_route_proc_fops = {
.open = ipv6_route_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = ipv6_route_release,
};
static int rt6_stats_seq_show(struct seq_file *seq, void *v)
{
+ struct net *net = (struct net *)seq->private;
seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
- rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
- rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
- rt6_stats.fib_rt_cache,
- atomic_read(&ip6_dst_ops.entries),
- rt6_stats.fib_discarded_routes);
+ net->ipv6.rt6_stats->fib_nodes,
+ net->ipv6.rt6_stats->fib_route_nodes,
+ net->ipv6.rt6_stats->fib_rt_alloc,
+ net->ipv6.rt6_stats->fib_rt_entries,
+ net->ipv6.rt6_stats->fib_rt_cache,
+ atomic_read(&net->ipv6.ip6_dst_ops->entries),
+ net->ipv6.rt6_stats->fib_discarded_routes);
return 0;
}
static int rt6_stats_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, rt6_stats_seq_show, NULL);
+ int err;
+ struct net *net = get_proc_net(inode);
+ if (!net)
+ return -ENXIO;
+
+ err = single_open(file, rt6_stats_seq_show, net);
+ if (err < 0) {
+ put_net(net);
+ return err;
+ }
+
+ return 0;
+}
+
+static int rt6_stats_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct net *net = (struct net *)seq->private;
+ put_net(net);
+ return single_release(inode, file);
}
static const struct file_operations rt6_stats_seq_fops = {
@@ -2355,42 +2478,8 @@ static const struct file_operations rt6_stats_seq_fops = {
.open = rt6_stats_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = rt6_stats_seq_release,
};
-
-static int ipv6_route_proc_init(struct net *net)
-{
- int ret = -ENOMEM;
- if (!proc_net_fops_create(net, "ipv6_route",
- 0, &ipv6_route_proc_fops))
- goto out;
-
- if (!proc_net_fops_create(net, "rt6_stats",
- S_IRUGO, &rt6_stats_seq_fops))
- goto out_ipv6_route;
-
- ret = 0;
-out:
- return ret;
-out_ipv6_route:
- proc_net_remove(net, "ipv6_route");
- goto out;
-}
-
-static void ipv6_route_proc_fini(struct net *net)
-{
- proc_net_remove(net, "ipv6_route");
- proc_net_remove(net, "rt6_stats");
-}
-#else
-static inline int ipv6_route_proc_init(struct net *net)
-{
- return 0;
-}
-static inline void ipv6_route_proc_fini(struct net *net)
-{
- return ;
-}
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SYSCTL
@@ -2399,10 +2488,11 @@ static
int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int delay = init_net.ipv6.sysctl.flush_delay;
+ struct net *net = current->nsproxy->net_ns;
+ int delay = net->ipv6.sysctl.flush_delay;
if (write) {
proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
- fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay);
+ fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
return 0;
} else
return -EINVAL;
@@ -2419,7 +2509,7 @@ ctl_table ipv6_route_table_template[] = {
{
.ctl_name = NET_IPV6_ROUTE_GC_THRESH,
.procname = "gc_thresh",
- .data = &ip6_dst_ops.gc_thresh,
+ .data = &ip6_dst_ops_template.gc_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -2505,33 +2595,143 @@ struct ctl_table *ipv6_route_sysctl_init(struct net *net)
table = kmemdup(ipv6_route_table_template,
sizeof(ipv6_route_table_template),
GFP_KERNEL);
+
+ if (table) {
+ table[0].data = &net->ipv6.sysctl.flush_delay;
+ table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
+ table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
+ table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+ table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
+ table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
+ table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
+ table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
+ table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
+ }
+
return table;
}
#endif
+static int ip6_route_net_init(struct net *net)
+{
+ int ret = 0;
+
+ ret = -ENOMEM;
+ net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
+ sizeof(*net->ipv6.ip6_dst_ops),
+ GFP_KERNEL);
+ if (!net->ipv6.ip6_dst_ops)
+ goto out;
+ net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
+
+ net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
+ sizeof(*net->ipv6.ip6_null_entry),
+ GFP_KERNEL);
+ if (!net->ipv6.ip6_null_entry)
+ goto out_ip6_dst_ops;
+ net->ipv6.ip6_null_entry->u.dst.path =
+ (struct dst_entry *)net->ipv6.ip6_null_entry;
+ net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
+ sizeof(*net->ipv6.ip6_prohibit_entry),
+ GFP_KERNEL);
+ if (!net->ipv6.ip6_prohibit_entry) {
+ kfree(net->ipv6.ip6_null_entry);
+ goto out;
+ }
+ net->ipv6.ip6_prohibit_entry->u.dst.path =
+ (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
+ net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
+
+ net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
+ sizeof(*net->ipv6.ip6_blk_hole_entry),
+ GFP_KERNEL);
+ if (!net->ipv6.ip6_blk_hole_entry) {
+ kfree(net->ipv6.ip6_null_entry);
+ kfree(net->ipv6.ip6_prohibit_entry);
+ goto out;
+ }
+ net->ipv6.ip6_blk_hole_entry->u.dst.path =
+ (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
+ net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
+#endif
+
+#ifdef CONFIG_PROC_FS
+ proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
+ proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+#endif
+ net->ipv6.ip6_rt_gc_expire = 30*HZ;
+
+ ret = 0;
+out:
+ return ret;
+
+out_ip6_dst_ops:
+ release_net(net->ipv6.ip6_dst_ops->dst_net);
+ kfree(net->ipv6.ip6_dst_ops);
+ goto out;
+}
+
+static void ip6_route_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_remove(net, "ipv6_route");
+ proc_net_remove(net, "rt6_stats");
+#endif
+ kfree(net->ipv6.ip6_null_entry);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ kfree(net->ipv6.ip6_prohibit_entry);
+ kfree(net->ipv6.ip6_blk_hole_entry);
+#endif
+ release_net(net->ipv6.ip6_dst_ops->dst_net);
+ kfree(net->ipv6.ip6_dst_ops);
+}
+
+static struct pernet_operations ip6_route_net_ops = {
+ .init = ip6_route_net_init,
+ .exit = ip6_route_net_exit,
+};
+
+static struct notifier_block ip6_route_dev_notifier = {
+ .notifier_call = ip6_route_dev_notify,
+ .priority = 0,
+};
+
int __init ip6_route_init(void)
{
int ret;
- ip6_dst_ops.kmem_cachep =
+ ret = -ENOMEM;
+ ip6_dst_ops_template.kmem_cachep =
kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
SLAB_HWCACHE_ALIGN, NULL);
- if (!ip6_dst_ops.kmem_cachep)
- return -ENOMEM;
-
- ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
+ if (!ip6_dst_ops_template.kmem_cachep)
+ goto out;;
- ret = fib6_init();
+ ret = register_pernet_subsys(&ip6_route_net_ops);
if (ret)
goto out_kmem_cache;
- ret = ipv6_route_proc_init(&init_net);
+ /* Registering of the loopback is done before this portion of code,
+ * the loopback reference in rt6_info will not be taken, do it
+ * manually for init_net */
+ init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #endif
+ ret = fib6_init();
if (ret)
- goto out_fib6_init;
+ goto out_register_subsys;
ret = xfrm6_init();
if (ret)
- goto out_proc_init;
+ goto out_fib6_init;
ret = fib6_rules_init();
if (ret)
@@ -2543,7 +2743,10 @@ int __init ip6_route_init(void)
__rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
goto fib6_rules_init;
- ret = 0;
+ ret = register_netdevice_notifier(&ip6_route_dev_notifier);
+ if (ret)
+ goto fib6_rules_init;
+
out:
return ret;
@@ -2551,22 +2754,21 @@ fib6_rules_init:
fib6_rules_cleanup();
xfrm6_init:
xfrm6_fini();
-out_proc_init:
- ipv6_route_proc_fini(&init_net);
out_fib6_init:
- rt6_ifdown(NULL);
fib6_gc_cleanup();
+out_register_subsys:
+ unregister_pernet_subsys(&ip6_route_net_ops);
out_kmem_cache:
- kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
+ kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
goto out;
}
void ip6_route_cleanup(void)
{
+ unregister_netdevice_notifier(&ip6_route_dev_notifier);
fib6_rules_cleanup();
- ipv6_route_proc_fini(&init_net);
xfrm6_fini();
- rt6_ifdown(NULL);
fib6_gc_cleanup();
- kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
+ unregister_pernet_subsys(&ip6_route_net_ops);
+ kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1656c003b98..4b2f1033994 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -16,7 +16,7 @@
* Changes:
* Roger Venning <r.venning@telstra.com>: 6to4 support
* Nate Thompson <nate@thebog.net>: 6to4 support
- * Fred L. Templin <fltemplin@acm.org>: isatap support
+ * Fred Templin <fred.l.templin@boeing.com>: isatap support
*/
#include <linux/module.h>
@@ -52,6 +52,8 @@
#include <net/inet_ecn.h>
#include <net/xfrm.h>
#include <net/dsfield.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
/*
This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
@@ -66,41 +68,47 @@ static int ipip6_fb_tunnel_init(struct net_device *dev);
static int ipip6_tunnel_init(struct net_device *dev);
static void ipip6_tunnel_setup(struct net_device *dev);
-static struct net_device *ipip6_fb_tunnel_dev;
+static int sit_net_id;
+struct sit_net {
+ struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+ struct ip_tunnel *tunnels_r[HASH_SIZE];
+ struct ip_tunnel *tunnels_l[HASH_SIZE];
+ struct ip_tunnel *tunnels_wc[1];
+ struct ip_tunnel **tunnels[4];
-static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_r[HASH_SIZE];
-static struct ip_tunnel *tunnels_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_wc[1];
-static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
+ struct net_device *fb_tunnel_dev;
+};
static DEFINE_RWLOCK(ipip6_lock);
-static struct ip_tunnel * ipip6_tunnel_lookup(__be32 remote, __be32 local)
+static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
+ __be32 remote, __be32 local)
{
unsigned h0 = HASH(remote);
unsigned h1 = HASH(local);
struct ip_tunnel *t;
+ struct sit_net *sitn = net_generic(net, sit_net_id);
- for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+ for (t = sitn->tunnels_r_l[h0^h1]; t; t = t->next) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
return t;
}
- for (t = tunnels_r[h0]; t; t = t->next) {
+ for (t = sitn->tunnels_r[h0]; t; t = t->next) {
if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
return t;
}
- for (t = tunnels_l[h1]; t; t = t->next) {
+ for (t = sitn->tunnels_l[h1]; t; t = t->next) {
if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
return t;
}
- if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+ if ((t = sitn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
return t;
return NULL;
}
-static struct ip_tunnel **__ipip6_bucket(struct ip_tunnel_parm *parms)
+static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn,
+ struct ip_tunnel_parm *parms)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
@@ -115,19 +123,20 @@ static struct ip_tunnel **__ipip6_bucket(struct ip_tunnel_parm *parms)
prio |= 1;
h ^= HASH(local);
}
- return &tunnels[prio][h];
+ return &sitn->tunnels[prio][h];
}
-static inline struct ip_tunnel **ipip6_bucket(struct ip_tunnel *t)
+static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn,
+ struct ip_tunnel *t)
{
- return __ipip6_bucket(&t->parms);
+ return __ipip6_bucket(sitn, &t->parms);
}
-static void ipip6_tunnel_unlink(struct ip_tunnel *t)
+static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
{
struct ip_tunnel **tp;
- for (tp = ipip6_bucket(t); *tp; tp = &(*tp)->next) {
+ for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) {
if (t == *tp) {
write_lock_bh(&ipip6_lock);
*tp = t->next;
@@ -137,9 +146,9 @@ static void ipip6_tunnel_unlink(struct ip_tunnel *t)
}
}
-static void ipip6_tunnel_link(struct ip_tunnel *t)
+static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
{
- struct ip_tunnel **tp = ipip6_bucket(t);
+ struct ip_tunnel **tp = ipip6_bucket(sitn, t);
t->next = *tp;
write_lock_bh(&ipip6_lock);
@@ -147,15 +156,17 @@ static void ipip6_tunnel_link(struct ip_tunnel *t)
write_unlock_bh(&ipip6_lock);
}
-static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
+ struct ip_tunnel_parm *parms, int create)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
char name[IFNAMSIZ];
+ struct sit_net *sitn = net_generic(net, sit_net_id);
- for (tp = __ipip6_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
return t;
}
@@ -171,6 +182,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
if (dev == NULL)
return NULL;
+ dev_net_set(dev, net);
+
if (strchr(name, '%')) {
if (dev_alloc_name(dev, name) < 0)
goto failed_free;
@@ -188,7 +201,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
dev_hold(dev);
- ipip6_tunnel_link(nt);
+ ipip6_tunnel_link(sitn, nt);
return nt;
failed_free:
@@ -197,15 +210,192 @@ failed:
return NULL;
}
+static struct ip_tunnel_prl_entry *
+__ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
+{
+ struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *)NULL;
+
+ for (p = t->prl; p; p = p->next)
+ if (p->addr == addr)
+ break;
+ return p;
+
+}
+
+static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
+{
+ struct ip_tunnel_prl *kp;
+ struct ip_tunnel_prl_entry *prl;
+ unsigned int cmax, c = 0, ca, len;
+ int ret = 0;
+
+ cmax = a->datalen / sizeof(*a);
+ if (cmax > 1 && a->addr != htonl(INADDR_ANY))
+ cmax = 1;
+
+ /* For simple GET or for root users,
+ * we try harder to allocate.
+ */
+ kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
+ kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
+ NULL;
+
+ read_lock(&ipip6_lock);
+
+ ca = t->prl_count < cmax ? t->prl_count : cmax;
+
+ if (!kp) {
+ /* We don't try hard to allocate much memory for
+ * non-root users.
+ * For root users, retry allocating enough memory for
+ * the answer.
+ */
+ kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
+ if (!kp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ c = 0;
+ for (prl = t->prl; prl; prl = prl->next) {
+ if (c > cmax)
+ break;
+ if (a->addr != htonl(INADDR_ANY) && prl->addr != a->addr)
+ continue;
+ kp[c].addr = prl->addr;
+ kp[c].flags = prl->flags;
+ c++;
+ if (a->addr != htonl(INADDR_ANY))
+ break;
+ }
+out:
+ read_unlock(&ipip6_lock);
+
+ len = sizeof(*kp) * c;
+ ret = len ? copy_to_user(a->data, kp, len) : 0;
+
+ kfree(kp);
+ if (ret)
+ return -EFAULT;
+
+ a->datalen = len;
+ return 0;
+}
+
+static int
+ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
+{
+ struct ip_tunnel_prl_entry *p;
+ int err = 0;
+
+ if (a->addr == htonl(INADDR_ANY))
+ return -EINVAL;
+
+ write_lock(&ipip6_lock);
+
+ for (p = t->prl; p; p = p->next) {
+ if (p->addr == a->addr) {
+ if (chg)
+ goto update;
+ err = -EEXIST;
+ goto out;
+ }
+ }
+
+ if (chg) {
+ err = -ENXIO;
+ goto out;
+ }
+
+ p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL);
+ if (!p) {
+ err = -ENOBUFS;
+ goto out;
+ }
+
+ p->next = t->prl;
+ t->prl = p;
+ t->prl_count++;
+update:
+ p->addr = a->addr;
+ p->flags = a->flags;
+out:
+ write_unlock(&ipip6_lock);
+ return err;
+}
+
+static int
+ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
+{
+ struct ip_tunnel_prl_entry *x, **p;
+ int err = 0;
+
+ write_lock(&ipip6_lock);
+
+ if (a && a->addr != htonl(INADDR_ANY)) {
+ for (p = &t->prl; *p; p = &(*p)->next) {
+ if ((*p)->addr == a->addr) {
+ x = *p;
+ *p = x->next;
+ kfree(x);
+ t->prl_count--;
+ goto out;
+ }
+ }
+ err = -ENXIO;
+ } else {
+ while (t->prl) {
+ x = t->prl;
+ t->prl = t->prl->next;
+ kfree(x);
+ t->prl_count--;
+ }
+ }
+out:
+ write_unlock(&ipip6_lock);
+ return 0;
+}
+
+static int
+isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t)
+{
+ struct ip_tunnel_prl_entry *p;
+ int ok = 1;
+
+ read_lock(&ipip6_lock);
+ p = __ipip6_tunnel_locate_prl(t, iph->saddr);
+ if (p) {
+ if (p->flags & PRL_DEFAULT)
+ skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT;
+ else
+ skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT;
+ } else {
+ struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
+ if (ipv6_addr_is_isatap(addr6) &&
+ (addr6->s6_addr32[3] == iph->saddr) &&
+ ipv6_chk_prefix(addr6, t->dev))
+ skb->ndisc_nodetype = NDISC_NODETYPE_HOST;
+ else
+ ok = 0;
+ }
+ read_unlock(&ipip6_lock);
+ return ok;
+}
+
static void ipip6_tunnel_uninit(struct net_device *dev)
{
- if (dev == ipip6_fb_tunnel_dev) {
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ if (dev == sitn->fb_tunnel_dev) {
write_lock_bh(&ipip6_lock);
- tunnels_wc[0] = NULL;
+ sitn->tunnels_wc[0] = NULL;
write_unlock_bh(&ipip6_lock);
dev_put(dev);
} else {
- ipip6_tunnel_unlink(netdev_priv(dev));
+ ipip6_tunnel_unlink(sitn, netdev_priv(dev));
+ ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
dev_put(dev);
}
}
@@ -256,7 +446,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
err = -ENOENT;
read_lock(&ipip6_lock);
- t = ipip6_tunnel_lookup(iph->daddr, iph->saddr);
+ t = ipip6_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
if (t == NULL || t->parms.iph.daddr == 0)
goto out;
@@ -339,11 +529,12 @@ out:
skb_reset_network_header(skb2);
/* Try to guess incoming interface */
- rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
+ rt6i = rt6_lookup(dev_net(skb->dev), &iph6->saddr, NULL, NULL, 0);
if (rt6i && rt6i->rt6i_dev) {
skb2->dev = rt6i->rt6i_dev;
- rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0);
+ rt6i = rt6_lookup(dev_net(skb->dev),
+ &iph6->daddr, &iph6->saddr, NULL, 0);
if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev);
@@ -365,48 +556,6 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
IP6_ECN_set_ce(ipv6_hdr(skb));
}
-/* ISATAP (RFC4214) - check source address */
-static int
-isatap_srcok(struct sk_buff *skb, struct iphdr *iph, struct net_device *dev)
-{
- struct neighbour *neigh;
- struct dst_entry *dst;
- struct rt6_info *rt;
- struct flowi fl;
- struct in6_addr *addr6;
- struct in6_addr rtr;
- struct ipv6hdr *iph6;
- int ok = 0;
-
- /* from onlink default router */
- ipv6_addr_set(&rtr, htonl(0xFE800000), 0, 0, 0);
- ipv6_isatap_eui64(rtr.s6_addr + 8, iph->saddr);
- if ((rt = rt6_get_dflt_router(&rtr, dev))) {
- dst_release(&rt->u.dst);
- return 1;
- }
-
- iph6 = ipv6_hdr(skb);
- memset(&fl, 0, sizeof(fl));
- fl.proto = iph6->nexthdr;
- ipv6_addr_copy(&fl.fl6_dst, &iph6->saddr);
- fl.oif = dev->ifindex;
- security_skb_classify_flow(skb, &fl);
-
- dst = ip6_route_output(NULL, &fl);
- if (!dst->error && (dst->dev == dev) && (neigh = dst->neighbour)) {
-
- addr6 = (struct in6_addr*)&neigh->primary_key;
-
- /* from correct previous hop */
- if (ipv6_addr_is_isatap(addr6) &&
- (addr6->s6_addr32[3] == iph->saddr))
- ok = 1;
- }
- dst_release(dst);
- return ok;
-}
-
static int ipip6_rcv(struct sk_buff *skb)
{
struct iphdr *iph;
@@ -418,7 +567,8 @@ static int ipip6_rcv(struct sk_buff *skb)
iph = ip_hdr(skb);
read_lock(&ipip6_lock);
- if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+ if ((tunnel = ipip6_tunnel_lookup(dev_net(skb->dev),
+ iph->saddr, iph->daddr)) != NULL) {
secpath_reset(skb);
skb->mac_header = skb->network_header;
skb_reset_network_header(skb);
@@ -427,7 +577,7 @@ static int ipip6_rcv(struct sk_buff *skb)
skb->pkt_type = PACKET_HOST;
if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
- !isatap_srcok(skb, iph, tunnel->dev)) {
+ !isatap_chksrc(skb, iph, tunnel)) {
tunnel->stat.rx_errors++;
read_unlock(&ipip6_lock);
kfree_skb(skb);
@@ -554,7 +704,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
.tos = RT_TOS(tos) } },
.oif = tunnel->parms.link,
.proto = IPPROTO_IPV6 };
- if (ip_route_output_key(&init_net, &rt, &fl)) {
+ if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
tunnel->stat.tx_carrier_errors++;
goto tx_error_icmp;
}
@@ -683,7 +833,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
.oif = tunnel->parms.link,
.proto = IPPROTO_IPV6 };
struct rtable *rt;
- if (!ip_route_output_key(&init_net, &rt, &fl)) {
+ if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
tdev = rt->u.dst.dev;
ip_rt_put(rt);
}
@@ -691,7 +841,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
}
if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+ tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
if (tdev) {
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
@@ -707,17 +857,20 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip_tunnel_parm p;
+ struct ip_tunnel_prl prl;
struct ip_tunnel *t;
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
switch (cmd) {
case SIOCGETTUNNEL:
t = NULL;
- if (dev == ipip6_fb_tunnel_dev) {
+ if (dev == sitn->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
- t = ipip6_tunnel_locate(&p, 0);
+ t = ipip6_tunnel_locate(net, &p, 0);
}
if (t == NULL)
t = netdev_priv(dev);
@@ -743,9 +896,9 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
if (p.iph.ttl)
p.iph.frag_off |= htons(IP_DF);
- t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+ t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
- if (dev != ipip6_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+ if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
err = -EEXIST;
@@ -758,12 +911,12 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
t = netdev_priv(dev);
- ipip6_tunnel_unlink(t);
+ ipip6_tunnel_unlink(sitn, t);
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
memcpy(dev->dev_addr, &p.iph.saddr, 4);
memcpy(dev->broadcast, &p.iph.daddr, 4);
- ipip6_tunnel_link(t);
+ ipip6_tunnel_link(sitn, t);
netdev_state_change(dev);
}
}
@@ -790,15 +943,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
if (!capable(CAP_NET_ADMIN))
goto done;
- if (dev == ipip6_fb_tunnel_dev) {
+ if (dev == sitn->fb_tunnel_dev) {
err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
goto done;
err = -ENOENT;
- if ((t = ipip6_tunnel_locate(&p, 0)) == NULL)
+ if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL)
goto done;
err = -EPERM;
- if (t == netdev_priv(ipip6_fb_tunnel_dev))
+ if (t == netdev_priv(sitn->fb_tunnel_dev))
goto done;
dev = t->dev;
}
@@ -806,6 +959,42 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = 0;
break;
+ case SIOCGETPRL:
+ case SIOCADDPRL:
+ case SIOCDELPRL:
+ case SIOCCHGPRL:
+ err = -EPERM;
+ if (cmd != SIOCGETPRL && !capable(CAP_NET_ADMIN))
+ goto done;
+ err = -EINVAL;
+ if (dev == sitn->fb_tunnel_dev)
+ goto done;
+ err = -EFAULT;
+ if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
+ goto done;
+ err = -ENOENT;
+ if (!(t = netdev_priv(dev)))
+ goto done;
+
+ switch (cmd) {
+ case SIOCGETPRL:
+ err = ipip6_tunnel_get_prl(t, &prl);
+ if (!err && copy_to_user(ifr->ifr_ifru.ifru_data,
+ &prl, sizeof(prl)))
+ err = -EFAULT;
+ break;
+ case SIOCDELPRL:
+ err = ipip6_tunnel_del_prl(t, &prl);
+ break;
+ case SIOCADDPRL:
+ case SIOCCHGPRL:
+ err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
+ break;
+ }
+ if (cmd != SIOCGETPRL)
+ netdev_state_change(dev);
+ break;
+
default:
err = -EINVAL;
}
@@ -842,6 +1031,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
+ dev->features |= NETIF_F_NETNS_LOCAL;
}
static int ipip6_tunnel_init(struct net_device *dev)
@@ -861,10 +1051,12 @@ static int ipip6_tunnel_init(struct net_device *dev)
return 0;
}
-static int __init ipip6_fb_tunnel_init(struct net_device *dev)
+static int ipip6_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
tunnel->dev = dev;
strcpy(tunnel->parms.name, dev->name);
@@ -875,7 +1067,7 @@ static int __init ipip6_fb_tunnel_init(struct net_device *dev)
iph->ttl = 64;
dev_hold(dev);
- tunnels_wc[0] = tunnel;
+ sitn->tunnels_wc[0] = tunnel;
return 0;
}
@@ -885,7 +1077,7 @@ static struct xfrm_tunnel sit_handler = {
.priority = 1,
};
-static void __exit sit_destroy_tunnels(void)
+static void sit_destroy_tunnels(struct sit_net *sitn)
{
int prio;
@@ -893,20 +1085,78 @@ static void __exit sit_destroy_tunnels(void)
int h;
for (h = 0; h < HASH_SIZE; h++) {
struct ip_tunnel *t;
- while ((t = tunnels[prio][h]) != NULL)
+ while ((t = sitn->tunnels[prio][h]) != NULL)
unregister_netdevice(t->dev);
}
}
}
-static void __exit sit_cleanup(void)
+static int sit_init_net(struct net *net)
{
- xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+ int err;
+ struct sit_net *sitn;
+
+ err = -ENOMEM;
+ sitn = kzalloc(sizeof(struct sit_net), GFP_KERNEL);
+ if (sitn == NULL)
+ goto err_alloc;
+
+ err = net_assign_generic(net, sit_net_id, sitn);
+ if (err < 0)
+ goto err_assign;
+
+ sitn->tunnels[0] = sitn->tunnels_wc;
+ sitn->tunnels[1] = sitn->tunnels_l;
+ sitn->tunnels[2] = sitn->tunnels_r;
+ sitn->tunnels[3] = sitn->tunnels_r_l;
+
+ sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
+ ipip6_tunnel_setup);
+ if (!sitn->fb_tunnel_dev) {
+ err = -ENOMEM;
+ goto err_alloc_dev;
+ }
+
+ sitn->fb_tunnel_dev->init = ipip6_fb_tunnel_init;
+ dev_net_set(sitn->fb_tunnel_dev, net);
+
+ if ((err = register_netdev(sitn->fb_tunnel_dev)))
+ goto err_reg_dev;
+ return 0;
+
+err_reg_dev:
+ free_netdev(sitn->fb_tunnel_dev);
+err_alloc_dev:
+ /* nothing */
+err_assign:
+ kfree(sitn);
+err_alloc:
+ return err;
+}
+
+static void sit_exit_net(struct net *net)
+{
+ struct sit_net *sitn;
+
+ sitn = net_generic(net, sit_net_id);
rtnl_lock();
- sit_destroy_tunnels();
- unregister_netdevice(ipip6_fb_tunnel_dev);
+ sit_destroy_tunnels(sitn);
+ unregister_netdevice(sitn->fb_tunnel_dev);
rtnl_unlock();
+ kfree(sitn);
+}
+
+static struct pernet_operations sit_net_ops = {
+ .init = sit_init_net,
+ .exit = sit_exit_net,
+};
+
+static void __exit sit_cleanup(void)
+{
+ xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+
+ unregister_pernet_gen_device(sit_net_id, &sit_net_ops);
}
static int __init sit_init(void)
@@ -920,25 +1170,11 @@ static int __init sit_init(void)
return -EAGAIN;
}
- ipip6_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
- ipip6_tunnel_setup);
- if (!ipip6_fb_tunnel_dev) {
- err = -ENOMEM;
- goto err1;
- }
-
- ipip6_fb_tunnel_dev->init = ipip6_fb_tunnel_init;
+ err = register_pernet_gen_device(&sit_net_id, &sit_net_ops);
+ if (err < 0)
+ xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
- if ((err = register_netdev(ipip6_fb_tunnel_dev)))
- goto err2;
-
- out:
return err;
- err2:
- free_netdev(ipip6_fb_tunnel_dev);
- err1:
- xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
- goto out;
}
module_init(sit_init);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
new file mode 100644
index 00000000000..938ce4ecde5
--- /dev/null
+++ b/net/ipv6/syncookies.c
@@ -0,0 +1,279 @@
+/*
+ * IPv6 Syncookies implementation for the Linux kernel
+ *
+ * Authors:
+ * Glenn Griffin <ggriffin.kernel@gmail.com>
+ *
+ * Based on IPv4 implementation by Andi Kleen
+ * linux/net/ipv4/syncookies.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/tcp.h>
+#include <linux/random.h>
+#include <linux/cryptohash.h>
+#include <linux/kernel.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+
+extern int sysctl_tcp_syncookies;
+extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
+
+#define COOKIEBITS 24 /* Upper bits store count */
+#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
+
+/*
+ * This table has to be sorted and terminated with (__u16)-1.
+ * XXX generate a better table.
+ * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
+ *
+ * Taken directly from ipv4 implementation.
+ * Should this list be modified for ipv6 use or is it close enough?
+ * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart
+ */
+static __u16 const msstab[] = {
+ 64 - 1,
+ 256 - 1,
+ 512 - 1,
+ 536 - 1,
+ 1024 - 1,
+ 1440 - 1,
+ 1460 - 1,
+ 4312 - 1,
+ (__u16)-1
+};
+/* The number doesn't include the -1 terminator */
+#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
+
+/*
+ * This (misnamed) value is the age of syncookie which is permitted.
+ * Its ideal value should be dependent on TCP_TIMEOUT_INIT and
+ * sysctl_tcp_retries1. It's a rather complicated formula (exponential
+ * backoff) to compute at runtime so it's currently hardcoded here.
+ */
+#define COUNTER_TRIES 4
+
+static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sock *child;
+
+ child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
+ if (child)
+ inet_csk_reqsk_queue_add(sk, req, child);
+ else
+ reqsk_free(req);
+
+ return child;
+}
+
+static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS];
+
+static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr,
+ __be16 sport, __be16 dport, u32 count, int c)
+{
+ __u32 *tmp = __get_cpu_var(cookie_scratch);
+
+ /*
+ * we have 320 bits of information to hash, copy in the remaining
+ * 192 bits required for sha_transform, from the syncookie_secret
+ * and overwrite the digest with the secret
+ */
+ memcpy(tmp + 10, syncookie_secret[c], 44);
+ memcpy(tmp, saddr, 16);
+ memcpy(tmp + 4, daddr, 16);
+ tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
+ tmp[9] = count;
+ sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
+
+ return tmp[17];
+}
+
+static __u32 secure_tcp_syn_cookie(struct in6_addr *saddr, struct in6_addr *daddr,
+ __be16 sport, __be16 dport, __u32 sseq,
+ __u32 count, __u32 data)
+{
+ return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +
+ sseq + (count << COOKIEBITS) +
+ ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data)
+ & COOKIEMASK));
+}
+
+static __u32 check_tcp_syn_cookie(__u32 cookie, struct in6_addr *saddr,
+ struct in6_addr *daddr, __be16 sport,
+ __be16 dport, __u32 sseq, __u32 count,
+ __u32 maxdiff)
+{
+ __u32 diff;
+
+ cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
+
+ diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS);
+ if (diff >= maxdiff)
+ return (__u32)-1;
+
+ return (cookie -
+ cookie_hash(saddr, daddr, sport, dport, count - diff, 1))
+ & COOKIEMASK;
+}
+
+__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ int mssind;
+ const __u16 mss = *mssp;
+
+ tcp_sk(sk)->last_synq_overflow = jiffies;
+
+ for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
+ ;
+ *mssp = msstab[mssind] + 1;
+
+ NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
+
+ return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
+ th->dest, ntohl(th->seq),
+ jiffies / (HZ * 60), mssind);
+}
+
+static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ __u32 seq = ntohl(th->seq) - 1;
+ __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
+ th->source, th->dest, seq,
+ jiffies / (HZ * 60), COUNTER_TRIES);
+
+ return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
+}
+
+struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct inet_request_sock *ireq;
+ struct inet6_request_sock *ireq6;
+ struct tcp_request_sock *treq;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcphdr *th = tcp_hdr(skb);
+ __u32 cookie = ntohl(th->ack_seq) - 1;
+ struct sock *ret = sk;
+ struct request_sock *req;
+ int mss;
+ struct dst_entry *dst;
+ __u8 rcv_wscale;
+ struct tcp_options_received tcp_opt;
+
+ if (!sysctl_tcp_syncookies || !th->ack)
+ goto out;
+
+ if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
+ (mss = cookie_check(skb, cookie)) == 0) {
+ NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED);
+ goto out;
+ }
+
+ NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
+
+ /* check for timestamp cookie support */
+ memset(&tcp_opt, 0, sizeof(tcp_opt));
+ tcp_parse_options(skb, &tcp_opt, 0);
+
+ if (tcp_opt.saw_tstamp)
+ cookie_check_timestamp(&tcp_opt);
+
+ ret = NULL;
+ req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+ if (!req)
+ goto out;
+
+ ireq = inet_rsk(req);
+ ireq6 = inet6_rsk(req);
+ treq = tcp_rsk(req);
+ ireq6->pktopts = NULL;
+
+ if (security_inet_conn_request(sk, skb, req)) {
+ reqsk_free(req);
+ goto out;
+ }
+
+ req->mss = mss;
+ ireq->rmt_port = th->source;
+ ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+ if (ipv6_opt_accepted(sk, skb) ||
+ np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+ np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+ atomic_inc(&skb->users);
+ ireq6->pktopts = skb;
+ }
+
+ ireq6->iif = sk->sk_bound_dev_if;
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq6->iif = inet6_iif(skb);
+
+ req->expires = 0UL;
+ req->retrans = 0;
+ ireq->snd_wscale = tcp_opt.snd_wscale;
+ ireq->rcv_wscale = tcp_opt.rcv_wscale;
+ ireq->sack_ok = tcp_opt.sack_ok;
+ ireq->wscale_ok = tcp_opt.wscale_ok;
+ ireq->tstamp_ok = tcp_opt.saw_tstamp;
+ req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+ treq->rcv_isn = ntohl(th->seq) - 1;
+ treq->snt_isn = cookie;
+
+ /*
+ * We need to lookup the dst_entry to get the correct window size.
+ * This is taken from tcp_v6_syn_recv_sock. Somebody please enlighten
+ * me if there is a preferred way.
+ */
+ {
+ struct in6_addr *final_p = NULL, final;
+ struct flowi fl;
+ memset(&fl, 0, sizeof(fl));
+ fl.proto = IPPROTO_TCP;
+ ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+ if (np->opt && np->opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+ ipv6_addr_copy(&final, &fl.fl6_dst);
+ ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ final_p = &final;
+ }
+ ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
+ fl.oif = sk->sk_bound_dev_if;
+ fl.fl_ip_dport = inet_rsk(req)->rmt_port;
+ fl.fl_ip_sport = inet_sk(sk)->sport;
+ security_req_classify_flow(req, &fl);
+ if (ip6_dst_lookup(sk, &dst, &fl)) {
+ reqsk_free(req);
+ goto out;
+ }
+ if (final_p)
+ ipv6_addr_copy(&fl.fl6_dst, final_p);
+ if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+ goto out;
+ }
+
+ req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+ tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ &req->rcv_wnd, &req->window_clamp,
+ ireq->wscale_ok, &rcv_wscale);
+
+ ireq->rcv_wscale = rcv_wscale;
+
+ ret = get_cookie_sock(sk, skb, req, dst);
+
+out: return ret;
+}
+
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index d6d3e68086f..3804dcbbfab 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -71,24 +71,11 @@ static int ipv6_sysctl_net_init(struct net *net)
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
goto out_ipv6_table;
+ ipv6_table[0].child = ipv6_route_table;
ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
if (!ipv6_icmp_table)
goto out_ipv6_route_table;
-
- ipv6_route_table[0].data = &net->ipv6.sysctl.flush_delay;
- /* ipv6_route_table[1].data will be handled when we have
- routes per namespace */
- ipv6_route_table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
- ipv6_route_table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
- ipv6_route_table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
- ipv6_route_table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
- ipv6_route_table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
- ipv6_route_table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
- ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
- ipv6_table[0].child = ipv6_route_table;
-
- ipv6_icmp_table[0].data = &net->ipv6.sysctl.icmpv6_time;
ipv6_table[1].child = ipv6_icmp_table;
ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 12750f2b05a..715965f0fac 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -60,6 +60,7 @@
#include <net/dsfield.h>
#include <net/timewait_sock.h>
#include <net/netdma.h>
+#include <net/inet_common.h>
#include <asm/uaccess.h>
@@ -69,9 +70,6 @@
#include <linux/crypto.h>
#include <linux/scatterlist.h>
-/* Socket used for sending RSTs and ACKs */
-static struct socket *tcp6_socket;
-
static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
static void tcp_v6_send_check(struct sock *sk, int len,
@@ -324,7 +322,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct tcp_sock *tp;
__u32 seq;
- sk = inet6_lookup(skb->dev->nd_net, &tcp_hashinfo, &hdr->daddr,
+ sk = inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &hdr->daddr,
th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
if (sk == NULL) {
@@ -455,8 +453,7 @@ out:
}
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
- struct dst_entry *dst)
+static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
{
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -464,6 +461,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
struct ipv6_txoptions *opt = NULL;
struct in6_addr * final_p = NULL, final;
struct flowi fl;
+ struct dst_entry *dst;
int err = -1;
memset(&fl, 0, sizeof(fl));
@@ -476,24 +474,22 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
fl.fl_ip_sport = inet_sk(sk)->sport;
security_req_classify_flow(req, &fl);
- if (dst == NULL) {
- opt = np->opt;
- if (opt && opt->srcrt) {
- struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
- ipv6_addr_copy(&final, &fl.fl6_dst);
- ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
- final_p = &final;
- }
-
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err)
- goto done;
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
- if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
- goto done;
+ opt = np->opt;
+ if (opt && opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+ ipv6_addr_copy(&final, &fl.fl6_dst);
+ ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ final_p = &final;
}
+ err = ip6_dst_lookup(sk, &dst, &fl);
+ if (err)
+ goto done;
+ if (final_p)
+ ipv6_addr_copy(&fl.fl6_dst, final_p);
+ if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+ goto done;
+
skb = tcp_make_synack(sk, dst, req);
if (skb) {
struct tcphdr *th = tcp_hdr(skb);
@@ -514,6 +510,20 @@ done:
return err;
}
+static inline void syn_flood_warning(struct sk_buff *skb)
+{
+#ifdef CONFIG_SYN_COOKIES
+ if (sysctl_tcp_syncookies)
+ printk(KERN_INFO
+ "TCPv6: Possible SYN flooding on port %d. "
+ "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
+ else
+#endif
+ printk(KERN_INFO
+ "TCPv6: Possible SYN flooding on port %d. "
+ "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
+}
+
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
if (inet6_rsk(req)->pktopts)
@@ -533,7 +543,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
return NULL;
for (i = 0; i < tp->md5sig_info->entries6; i++) {
- if (ipv6_addr_cmp(&tp->md5sig_info->keys6[i].addr, addr) == 0)
+ if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
return &tp->md5sig_info->keys6[i].base;
}
return NULL;
@@ -622,7 +632,7 @@ static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
int i;
for (i = 0; i < tp->md5sig_info->entries6; i++) {
- if (ipv6_addr_cmp(&tp->md5sig_info->keys6[i].addr, peer) == 0) {
+ if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
/* Free the key */
kfree(tp->md5sig_info->keys6[i].base.key);
tp->md5sig_info->entries6--;
@@ -741,7 +751,7 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
hp = tcp_get_md5sig_pool();
if (!hp) {
- printk(KERN_WARNING "%s(): hash pool not found...\n", __FUNCTION__);
+ printk(KERN_WARNING "%s(): hash pool not found...\n", __func__);
goto clear_hash_noput;
}
bp = &hp->md5_blk.ip6;
@@ -781,17 +791,17 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
/* Now store the hash into the packet */
err = crypto_hash_init(desc);
if (err) {
- printk(KERN_WARNING "%s(): hash_init failed\n", __FUNCTION__);
+ printk(KERN_WARNING "%s(): hash_init failed\n", __func__);
goto clear_hash;
}
err = crypto_hash_update(desc, sg, nbytes);
if (err) {
- printk(KERN_WARNING "%s(): hash_update failed\n", __FUNCTION__);
+ printk(KERN_WARNING "%s(): hash_update failed\n", __func__);
goto clear_hash;
}
err = crypto_hash_final(desc, md5_hash);
if (err) {
- printk(KERN_WARNING "%s(): hash_final failed\n", __FUNCTION__);
+ printk(KERN_WARNING "%s(): hash_final failed\n", __func__);
goto clear_hash;
}
@@ -917,7 +927,7 @@ done_opts:
}
#endif
-static struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
+struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.family = AF_INET6,
.obj_size = sizeof(struct tcp6_request_sock),
.rtx_syn_ack = tcp_v6_send_synack,
@@ -979,6 +989,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
struct tcphdr *th = tcp_hdr(skb), *t1;
struct sk_buff *buff;
struct flowi fl;
+ struct net *net = dev_net(skb->dst->dev);
+ struct sock *ctl_sk = net->ipv6.tcp_sk;
unsigned int tot_len = sizeof(*th);
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
@@ -1059,11 +1071,14 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
fl.fl_ip_sport = t1->source;
security_skb_classify_flow(skb, &fl);
- /* sk = NULL, but it is safe for now. RST socket required. */
- if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
+ /* Pass a socket to ip6_dst_lookup either it is for RST
+ * Underlying function will use this to retrieve the network
+ * namespace
+ */
+ if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
- ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
+ ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
return;
@@ -1079,6 +1094,8 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
struct tcphdr *th = tcp_hdr(skb), *t1;
struct sk_buff *buff;
struct flowi fl;
+ struct net *net = dev_net(skb->dev);
+ struct sock *ctl_sk = net->ipv6.tcp_sk;
unsigned int tot_len = sizeof(struct tcphdr);
__be32 *topt;
#ifdef CONFIG_TCP_MD5SIG
@@ -1160,9 +1177,9 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
fl.fl_ip_sport = t1->source;
security_skb_classify_flow(skb, &fl);
- if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
+ if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
- ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
+ ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
return;
}
@@ -1202,7 +1219,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
if (req)
return tcp_check_req(sk, skb, req, prev);
- nsk = __inet6_lookup_established(sk->sk_net, &tcp_hashinfo,
+ nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
@@ -1215,9 +1232,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
return NULL;
}
-#if 0 /*def CONFIG_SYN_COOKIES*/
+#ifdef CONFIG_SYN_COOKIES
if (!th->rst && !th->syn && th->ack)
- sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
+ sk = cookie_v6_check(sk, skb);
#endif
return sk;
}
@@ -1233,6 +1250,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
struct request_sock *req = NULL;
__u32 isn = TCP_SKB_CB(skb)->when;
+#ifdef CONFIG_SYN_COOKIES
+ int want_cookie = 0;
+#else
+#define want_cookie 0
+#endif
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
@@ -1240,12 +1262,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
goto drop;
- /*
- * There are no SYN attacks on IPv6, yet...
- */
if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
if (net_ratelimit())
- printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
+ syn_flood_warning(skb);
+#ifdef CONFIG_SYN_COOKIES
+ if (sysctl_tcp_syncookies)
+ want_cookie = 1;
+ else
+#endif
goto drop;
}
@@ -1266,39 +1290,50 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_parse_options(skb, &tmp_opt, 0);
+ if (want_cookie && !tmp_opt.saw_tstamp)
+ tcp_clear_options(&tmp_opt);
+
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb);
treq = inet6_rsk(req);
ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
- TCP_ECN_create_request(req, tcp_hdr(skb));
treq->pktopts = NULL;
- if (ipv6_opt_accepted(sk, skb) ||
- np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
- np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
- atomic_inc(&skb->users);
- treq->pktopts = skb;
- }
- treq->iif = sk->sk_bound_dev_if;
+ if (!want_cookie)
+ TCP_ECN_create_request(req, tcp_hdr(skb));
+
+ if (want_cookie) {
+ isn = cookie_v6_init_sequence(sk, skb, &req->mss);
+ req->cookie_ts = tmp_opt.tstamp_ok;
+ } else if (!isn) {
+ if (ipv6_opt_accepted(sk, skb) ||
+ np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+ np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+ atomic_inc(&skb->users);
+ treq->pktopts = skb;
+ }
+ treq->iif = sk->sk_bound_dev_if;
- /* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- treq->iif = inet6_iif(skb);
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ treq->iif = inet6_iif(skb);
- if (isn == 0)
isn = tcp_v6_init_sequence(skb);
+ }
tcp_rsk(req)->snt_isn = isn;
security_inet_conn_request(sk, skb, req);
- if (tcp_v6_send_synack(sk, req, NULL))
+ if (tcp_v6_send_synack(sk, req))
goto drop;
- inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- return 0;
+ if (!want_cookie) {
+ inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ return 0;
+ }
drop:
if (req)
@@ -1499,7 +1534,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#endif
__inet6_hash(newsk);
- inet_inherit_port(sk, newsk);
+ __inet_inherit_port(sk, newsk);
return newsk;
@@ -1704,7 +1739,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet6_lookup(skb->dev->nd_net, &tcp_hashinfo,
+ sk = __inet6_lookup(dev_net(skb->dev), &tcp_hashinfo,
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr, ntohs(th->dest),
inet6_iif(skb));
@@ -1787,7 +1822,7 @@ do_time_wait:
{
struct sock *sk2;
- sk2 = inet6_lookup_listener(skb->dev->nd_net, &tcp_hashinfo,
+ sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
&ipv6_hdr(skb)->daddr,
ntohs(th->dest), inet6_iif(skb));
if (sk2 != NULL) {
@@ -2085,28 +2120,28 @@ out:
return 0;
}
-static struct file_operations tcp6_seq_fops;
static struct tcp_seq_afinfo tcp6_seq_afinfo = {
- .owner = THIS_MODULE,
.name = "tcp6",
.family = AF_INET6,
- .seq_show = tcp6_seq_show,
- .seq_fops = &tcp6_seq_fops,
+ .seq_fops = {
+ .owner = THIS_MODULE,
+ },
+ .seq_ops = {
+ .show = tcp6_seq_show,
+ },
};
-int __init tcp6_proc_init(void)
+int tcp6_proc_init(struct net *net)
{
- return tcp_proc_register(&tcp6_seq_afinfo);
+ return tcp_proc_register(net, &tcp6_seq_afinfo);
}
-void tcp6_proc_exit(void)
+void tcp6_proc_exit(struct net *net)
{
- tcp_proc_unregister(&tcp6_seq_afinfo);
+ tcp_proc_unregister(net, &tcp6_seq_afinfo);
}
#endif
-DEFINE_PROTO_INUSE(tcpv6)
-
struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
@@ -2137,12 +2172,11 @@ struct proto tcpv6_prot = {
.obj_size = sizeof(struct tcp6_sock),
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
- .hashinfo = &tcp_hashinfo,
+ .h.hashinfo = &tcp_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
- REF_PROTO_INUSE(tcpv6)
};
static struct inet6_protocol tcpv6_protocol = {
@@ -2164,6 +2198,22 @@ static struct inet_protosw tcpv6_protosw = {
INET_PROTOSW_ICSK,
};
+static int tcpv6_net_init(struct net *net)
+{
+ return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
+ SOCK_RAW, IPPROTO_TCP, net);
+}
+
+static void tcpv6_net_exit(struct net *net)
+{
+ inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+}
+
+static struct pernet_operations tcpv6_net_ops = {
+ .init = tcpv6_net_init,
+ .exit = tcpv6_net_exit,
+};
+
int __init tcpv6_init(void)
{
int ret;
@@ -2177,8 +2227,7 @@ int __init tcpv6_init(void)
if (ret)
goto out_tcpv6_protocol;
- ret = inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6,
- SOCK_RAW, IPPROTO_TCP);
+ ret = register_pernet_subsys(&tcpv6_net_ops);
if (ret)
goto out_tcpv6_protosw;
out:
@@ -2193,7 +2242,7 @@ out_tcpv6_protosw:
void tcpv6_exit(void)
{
- sock_release(tcp6_socket);
+ unregister_pernet_subsys(&tcpv6_net_ops);
inet6_unregister_protosw(&tcpv6_protosw);
inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 53739de829d..1fd784f3e2e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -51,9 +51,9 @@
#include <linux/seq_file.h>
#include "udp_impl.h"
-static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
+int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
- return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
+ return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
}
static struct sock *__udp6_lib_lookup(struct net *net,
@@ -70,7 +70,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet = inet_sk(sk);
- if (sk->sk_net == net && sk->sk_hash == hnum &&
+ if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
sk->sk_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
int score = 0;
@@ -235,7 +235,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct sock *sk;
int err;
- sk = __udp6_lib_lookup(skb->dev->nd_net, daddr, uh->dest,
+ sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
saddr, uh->source, inet6_iif(skb), udptable);
if (sk == NULL)
return;
@@ -323,6 +323,9 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
sk_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
+ if (sock_net(s) != sock_net(sk))
+ continue;
+
if (s->sk_hash == num && s->sk_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(s);
if (inet->dport) {
@@ -480,7 +483,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
* check socket cache ... must talk to Alan about his plans
* for sock caches... i'll skip this for now.
*/
- sk = __udp6_lib_lookup(skb->dev->nd_net, saddr, uh->source,
+ sk = __udp6_lib_lookup(dev_net(skb->dev), saddr, uh->source,
daddr, uh->dest, inet6_iif(skb), udptable);
if (sk == NULL) {
@@ -749,7 +752,10 @@ do_udp_sendmsg:
opt = ipv6_fixup_options(&opt_space, opt);
fl.proto = sk->sk_protocol;
- ipv6_addr_copy(&fl.fl6_dst, daddr);
+ if (!ipv6_addr_any(daddr))
+ ipv6_addr_copy(&fl.fl6_dst, daddr);
+ else
+ fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
ipv6_addr_copy(&fl.fl6_src, &np->saddr);
fl.fl_ip_sport = inet->sport;
@@ -789,9 +795,7 @@ do_udp_sendmsg:
else
hlimit = np->hop_limit;
if (hlimit < 0)
- hlimit = dst_metric(dst, RTAX_HOPLIMIT);
- if (hlimit < 0)
- hlimit = ipv6_get_hoplimit(dst->dev);
+ hlimit = ip6_dst_hoplimit(dst);
}
if (tclass < 0) {
@@ -976,30 +980,30 @@ int udp6_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct file_operations udp6_seq_fops;
static struct udp_seq_afinfo udp6_seq_afinfo = {
- .owner = THIS_MODULE,
.name = "udp6",
.family = AF_INET6,
.hashtable = udp_hash,
- .seq_show = udp6_seq_show,
- .seq_fops = &udp6_seq_fops,
+ .seq_fops = {
+ .owner = THIS_MODULE,
+ },
+ .seq_ops = {
+ .show = udp6_seq_show,
+ },
};
-int __init udp6_proc_init(void)
+int udp6_proc_init(struct net *net)
{
- return udp_proc_register(&udp6_seq_afinfo);
+ return udp_proc_register(net, &udp6_seq_afinfo);
}
-void udp6_proc_exit(void) {
- udp_proc_unregister(&udp6_seq_afinfo);
+void udp6_proc_exit(struct net *net) {
+ udp_proc_unregister(net, &udp6_seq_afinfo);
}
#endif /* CONFIG_PROC_FS */
/* ------------------------------------------------------------------------ */
-DEFINE_PROTO_INUSE(udpv6)
-
struct proto udpv6_prot = {
.name = "UDPv6",
.owner = THIS_MODULE,
@@ -1021,11 +1025,11 @@ struct proto udpv6_prot = {
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp6_sock),
+ .h.udp_hash = udp_hash,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
- REF_PROTO_INUSE(udpv6)
};
static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 21be3a83e7b..321b81a4d41 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -11,6 +11,8 @@ extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int );
extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
int , int , int , __be32 , struct hlist_head []);
+extern int udp_v6_get_port(struct sock *sk, unsigned short snum);
+
extern int udpv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
extern int udpv6_setsockopt(struct sock *sk, int level, int optname,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 87d4202522e..491efd00a86 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -35,13 +35,6 @@ static struct inet6_protocol udplitev6_protocol = {
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
-static int udplite_v6_get_port(struct sock *sk, unsigned short snum)
-{
- return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal);
-}
-
-DEFINE_PROTO_INUSE(udplitev6)
-
struct proto udplitev6_prot = {
.name = "UDPLITEv6",
.owner = THIS_MODULE,
@@ -58,13 +51,13 @@ struct proto udplitev6_prot = {
.backlog_rcv = udpv6_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
- .get_port = udplite_v6_get_port,
+ .get_port = udp_v6_get_port,
.obj_size = sizeof(struct udp6_sock),
+ .h.udp_hash = udplite_hash,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
- REF_PROTO_INUSE(udplitev6)
};
static struct inet_protosw udplite6_protosw = {
@@ -103,23 +96,40 @@ void udplitev6_exit(void)
}
#ifdef CONFIG_PROC_FS
-static struct file_operations udplite6_seq_fops;
static struct udp_seq_afinfo udplite6_seq_afinfo = {
- .owner = THIS_MODULE,
.name = "udplite6",
.family = AF_INET6,
.hashtable = udplite_hash,
- .seq_show = udp6_seq_show,
- .seq_fops = &udplite6_seq_fops,
+ .seq_fops = {
+ .owner = THIS_MODULE,
+ },
+ .seq_ops = {
+ .show = udp6_seq_show,
+ },
+};
+
+static int udplite6_proc_init_net(struct net *net)
+{
+ return udp_proc_register(net, &udplite6_seq_afinfo);
+}
+
+static void udplite6_proc_exit_net(struct net *net)
+{
+ udp_proc_unregister(net, &udplite6_seq_afinfo);
+}
+
+static struct pernet_operations udplite6_net_ops = {
+ .init = udplite6_proc_init_net,
+ .exit = udplite6_proc_exit_net,
};
int __init udplite6_proc_init(void)
{
- return udp_proc_register(&udplite6_seq_afinfo);
+ return register_pernet_subsys(&udplite6_net_ops);
}
void udplite6_proc_exit(void)
{
- udp_proc_unregister(&udplite6_seq_afinfo);
+ unregister_pernet_subsys(&udplite6_net_ops);
}
#endif
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index a4714d76ae6..a71c7ddcb41 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -59,9 +59,6 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto)
{
struct xfrm_state *x = NULL;
- int wildcard = 0;
- xfrm_address_t *xany;
- int nh = 0;
int i = 0;
/* Allocate new secpath or COW existing one. */
@@ -83,10 +80,9 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
goto drop;
}
- xany = (xfrm_address_t *)&in6addr_any;
-
for (i = 0; i < 3; i++) {
xfrm_address_t *dst, *src;
+
switch (i) {
case 0:
dst = daddr;
@@ -94,16 +90,13 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
break;
case 1:
/* lookup state with wild-card source address */
- wildcard = 1;
dst = daddr;
- src = xany;
+ src = (xfrm_address_t *)&in6addr_any;
break;
- case 2:
default:
/* lookup state with wild-card addresses */
- wildcard = 1; /* XXX */
- dst = xany;
- src = xany;
+ dst = (xfrm_address_t *)&in6addr_any;
+ src = (xfrm_address_t *)&in6addr_any;
break;
}
@@ -113,39 +106,19 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
spin_lock(&x->lock);
- if (wildcard) {
- if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) {
- spin_unlock(&x->lock);
- xfrm_state_put(x);
- x = NULL;
- continue;
- }
- }
-
- if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+ if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
+ likely(x->km.state == XFRM_STATE_VALID) &&
+ !xfrm_state_check_expire(x)) {
spin_unlock(&x->lock);
- xfrm_state_put(x);
- x = NULL;
- continue;
- }
- if (xfrm_state_check_expire(x)) {
+ if (x->type->input(x, skb) > 0) {
+ /* found a valid state */
+ break;
+ }
+ } else
spin_unlock(&x->lock);
- xfrm_state_put(x);
- x = NULL;
- continue;
- }
-
- spin_unlock(&x->lock);
-
- nh = x->type->input(x, skb);
- if (nh <= 0) {
- xfrm_state_put(x);
- x = NULL;
- continue;
- }
- /* Found a state */
- break;
+ xfrm_state_put(x);
+ x = NULL;
}
if (!x) {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 7d20199ee1f..8f1e0543b3c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -38,7 +38,7 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr,
if (saddr)
memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src));
- dst = ip6_route_output(NULL, &fl);
+ dst = ip6_route_output(&init_net, NULL, &fl);
err = dst->error;
if (dst->error) {
@@ -57,8 +57,9 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
if (IS_ERR(dst))
return -EHOSTUNREACH;
- ipv6_get_saddr(dst, (struct in6_addr *)&daddr->a6,
- (struct in6_addr *)&saddr->a6);
+ ipv6_dev_get_saddr(ip6_dst_idev(dst)->dev,
+ (struct in6_addr *)&daddr->a6, 0,
+ (struct in6_addr *)&saddr->a6);
dst_release(dst);
return 0;
}
@@ -246,7 +247,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xdst = (struct xfrm_dst *)dst;
if (xdst->u.rt6.rt6i_idev->dev == dev) {
struct inet6_dev *loopback_idev =
- in6_dev_get(dev->nd_net->loopback_dev);
+ in6_dev_get(dev_net(dev)->loopback_dev);
BUG_ON(!loopback_idev);
do {
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index ff1e1db8e23..89884a4f23a 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -49,125 +49,102 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
x->props.family = AF_INET6;
}
+/* distribution counting sort function for xfrm_state and xfrm_tmpl */
static int
-__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
+__xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass)
{
int i;
- int j = 0;
+ int class[XFRM_MAX_DEPTH];
+ int count[maxclass];
- /* Rule 1: select IPsec transport except AH */
- for (i = 0; i < n; i++) {
- if (src[i]->props.mode == XFRM_MODE_TRANSPORT &&
- src[i]->id.proto != IPPROTO_AH) {
- dst[j++] = src[i];
- src[i] = NULL;
- }
- }
- if (j == n)
- goto end;
+ memset(count, 0, sizeof(count));
- /* Rule 2: select MIPv6 RO or inbound trigger */
-#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
for (i = 0; i < n; i++) {
- if (src[i] &&
- (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION ||
- src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) {
- dst[j++] = src[i];
- src[i] = NULL;
- }
+ int c;
+ class[i] = c = cmp(src[i]);
+ count[c]++;
}
- if (j == n)
- goto end;
-#endif
- /* Rule 3: select IPsec transport AH */
- for (i = 0; i < n; i++) {
- if (src[i] &&
- src[i]->props.mode == XFRM_MODE_TRANSPORT &&
- src[i]->id.proto == IPPROTO_AH) {
- dst[j++] = src[i];
- src[i] = NULL;
- }
- }
- if (j == n)
- goto end;
+ for (i = 2; i < maxclass; i++)
+ count[i] += count[i - 1];
- /* Rule 4: select IPsec tunnel */
for (i = 0; i < n; i++) {
- if (src[i] &&
- (src[i]->props.mode == XFRM_MODE_TUNNEL ||
- src[i]->props.mode == XFRM_MODE_BEET)) {
- dst[j++] = src[i];
- src[i] = NULL;
- }
+ dst[count[class[i] - 1]++] = src[i];
+ src[i] = 0;
}
- if (likely(j == n))
- goto end;
- /* Final rule */
- for (i = 0; i < n; i++) {
- if (src[i]) {
- dst[j++] = src[i];
- src[i] = NULL;
- }
- }
-
- end:
return 0;
}
-static int
-__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
+/*
+ * Rule for xfrm_state:
+ *
+ * rule 1: select IPsec transport except AH
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec transport AH
+ * rule 4: select IPsec tunnel
+ * rule 5: others
+ */
+static int __xfrm6_state_sort_cmp(void *p)
{
- int i;
- int j = 0;
-
- /* Rule 1: select IPsec transport */
- for (i = 0; i < n; i++) {
- if (src[i]->mode == XFRM_MODE_TRANSPORT) {
- dst[j++] = src[i];
- src[i] = NULL;
- }
- }
- if (j == n)
- goto end;
-
- /* Rule 2: select MIPv6 RO or inbound trigger */
+ struct xfrm_state *v = p;
+
+ switch (v->props.mode) {
+ case XFRM_MODE_TRANSPORT:
+ if (v->id.proto != IPPROTO_AH)
+ return 1;
+ else
+ return 3;
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- for (i = 0; i < n; i++) {
- if (src[i] &&
- (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION ||
- src[i]->mode == XFRM_MODE_IN_TRIGGER)) {
- dst[j++] = src[i];
- src[i] = NULL;
- }
- }
- if (j == n)
- goto end;
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ case XFRM_MODE_IN_TRIGGER:
+ return 2;
#endif
-
- /* Rule 3: select IPsec tunnel */
- for (i = 0; i < n; i++) {
- if (src[i] &&
- (src[i]->mode == XFRM_MODE_TUNNEL ||
- src[i]->mode == XFRM_MODE_BEET)) {
- dst[j++] = src[i];
- src[i] = NULL;
- }
+ case XFRM_MODE_TUNNEL:
+ case XFRM_MODE_BEET:
+ return 4;
}
- if (likely(j == n))
- goto end;
+ return 5;
+}
- /* Final rule */
- for (i = 0; i < n; i++) {
- if (src[i]) {
- dst[j++] = src[i];
- src[i] = NULL;
- }
+static int
+__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
+{
+ return __xfrm6_sort((void **)dst, (void **)src, n,
+ __xfrm6_state_sort_cmp, 6);
+}
+
+/*
+ * Rule for xfrm_tmpl:
+ *
+ * rule 1: select IPsec transport
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec tunnel
+ * rule 4: others
+ */
+static int __xfrm6_tmpl_sort_cmp(void *p)
+{
+ struct xfrm_tmpl *v = p;
+ switch (v->mode) {
+ case XFRM_MODE_TRANSPORT:
+ return 1;
+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ case XFRM_MODE_IN_TRIGGER:
+ return 2;
+#endif
+ case XFRM_MODE_TUNNEL:
+ case XFRM_MODE_BEET:
+ return 3;
}
+ return 4;
+}
- end:
- return 0;
+static int
+__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
+{
+ return __xfrm6_sort((void **)dst, (void **)src, n,
+ __xfrm6_tmpl_sort_cmp, 5);
}
int xfrm6_extract_header(struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 639fe8a6ff1..c2b27813860 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -140,12 +140,26 @@ __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup);
+static int __xfrm6_tunnel_spi_check(u32 spi)
+{
+ struct xfrm6_tunnel_spi *x6spi;
+ int index = xfrm6_tunnel_spi_hash_byspi(spi);
+ struct hlist_node *pos;
+
+ hlist_for_each_entry(x6spi, pos,
+ &xfrm6_tunnel_spi_byspi[index],
+ list_byspi) {
+ if (x6spi->spi == spi)
+ return -1;
+ }
+ return index;
+}
+
static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
{
u32 spi;
struct xfrm6_tunnel_spi *x6spi;
- struct hlist_node *pos;
- unsigned index;
+ int index;
if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN ||
xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX)
@@ -154,32 +168,19 @@ static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
xfrm6_tunnel_spi++;
for (spi = xfrm6_tunnel_spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
- index = xfrm6_tunnel_spi_hash_byspi(spi);
- hlist_for_each_entry(x6spi, pos,
- &xfrm6_tunnel_spi_byspi[index],
- list_byspi) {
- if (x6spi->spi == spi)
- goto try_next_1;
- }
- xfrm6_tunnel_spi = spi;
- goto alloc_spi;
-try_next_1:;
+ index = __xfrm6_tunnel_spi_check(spi);
+ if (index >= 0)
+ goto alloc_spi;
}
for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tunnel_spi; spi++) {
- index = xfrm6_tunnel_spi_hash_byspi(spi);
- hlist_for_each_entry(x6spi, pos,
- &xfrm6_tunnel_spi_byspi[index],
- list_byspi) {
- if (x6spi->spi == spi)
- goto try_next_2;
- }
- xfrm6_tunnel_spi = spi;
- goto alloc_spi;
-try_next_2:;
+ index = __xfrm6_tunnel_spi_check(spi);
+ if (index >= 0)
+ goto alloc_spi;
}
spi = 0;
goto out;
alloc_spi:
+ xfrm6_tunnel_spi = spi;
x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC);
if (!x6spi)
goto out;