diff options
Diffstat (limited to 'net/ipv6')
49 files changed, 2598 insertions, 835 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 11b13ea69db..d92e5586783 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -21,24 +21,6 @@ menuconfig IPV6 if IPV6 -config IPV6_PRIVACY - bool "IPv6: Privacy Extensions (RFC 3041) support" - ---help--- - Privacy Extensions for Stateless Address Autoconfiguration in IPv6 - support. With this option, additional periodically-altered - pseudo-random global-scope unicast address(es) will be assigned to - your interface(s). - - We use our standard pseudo-random algorithm to generate the - randomized interface identifier, instead of one described in RFC 3041. - - By default the kernel does not generate temporary addresses. - To use temporary addresses, do - - echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr - - See <file:Documentation/networking/ip-sysctl.txt> for details. - config IPV6_ROUTER_PREF bool "IPv6: Router Preference (RFC 4191) support" ---help--- @@ -153,6 +135,17 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION ---help--- Support for MIPv6 route optimization mode. +config IPV6_VTI +tristate "Virtual (secure) IPv6: tunneling" + select IPV6_TUNNEL + depends on INET6_XFRM_MODE_TUNNEL + ---help--- + Tunneling means encapsulating data of one protocol type within + another protocol and sending it over a channel that understands the + encapsulating protocol. This can be used with xfrm mode tunnel to give + the notion of a secure tunnel for IPSEC and then use routing protocol + on top. + config IPV6_SIT tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)" select INET_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 470a9c008e9..17bb830872d 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_IPV6_MIP6) += mip6.o obj-$(CONFIG_NETFILTER) += netfilter/ +obj-$(CONFIG_IPV6_VTI) += ip6_vti.o obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d6ff12617f3..12c97d8aa6b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -83,11 +83,7 @@ #include <linux/if_tunnel.h> #include <linux/rtnetlink.h> #include <linux/netconf.h> - -#ifdef CONFIG_IPV6_PRIVACY #include <linux/random.h> -#endif - #include <linux/uaccess.h> #include <asm/unaligned.h> @@ -124,11 +120,9 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) } #endif -#ifdef CONFIG_IPV6_PRIVACY static void __ipv6_regen_rndid(struct inet6_dev *idev); static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static void ipv6_regen_rndid(unsigned long data); -#endif static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); @@ -183,13 +177,11 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, -#ifdef CONFIG_IPV6_PRIVACY .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, .regen_max_retry = REGEN_MAX_RETRY, .max_desync_factor = MAX_DESYNC_FACTOR, -#endif .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_pinfo = 1, @@ -221,13 +213,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, -#ifdef CONFIG_IPV6_PRIVACY .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, .regen_max_retry = REGEN_MAX_RETRY, .max_desync_factor = MAX_DESYNC_FACTOR, -#endif .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_pinfo = 1, @@ -281,10 +271,24 @@ static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { + int i; + if (snmp_mib_init((void __percpu **)idev->stats.ipv6, sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip; + + for_each_possible_cpu(i) { + struct ipstats_mib *addrconf_stats; + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i); + u64_stats_init(&addrconf_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i); + u64_stats_init(&addrconf_stats->syncp); +#endif + } + + idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device), GFP_KERNEL); if (!idev->stats.icmpv6dev) @@ -371,7 +375,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) } #endif -#ifdef CONFIG_IPV6_PRIVACY INIT_LIST_HEAD(&ndev->tempaddr_list); setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || @@ -384,7 +387,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) in6_dev_hold(ndev); ipv6_regen_rndid((unsigned long) ndev); } -#endif + ndev->token = in6addr_any; if (netif_running(dev) && addrconf_qdisc_ok(dev)) @@ -865,12 +868,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, /* Add to inet6_dev unicast addr list. */ ipv6_link_dev_addr(idev, ifa); -#ifdef CONFIG_IPV6_PRIVACY if (ifa->flags&IFA_F_TEMPORARY) { list_add(&ifa->tmp_list, &idev->tempaddr_list); in6_ifa_hold(ifa); } -#endif in6_ifa_hold(ifa); write_unlock(&idev->lock); @@ -913,7 +914,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) spin_unlock_bh(&addrconf_hash_lock); write_lock_bh(&idev->lock); -#ifdef CONFIG_IPV6_PRIVACY + if (ifp->flags&IFA_F_TEMPORARY) { list_del(&ifp->tmp_list); if (ifp->ifpub) { @@ -922,7 +923,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } __in6_ifa_put(ifp); } -#endif list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) { if (ifa == ifp) { @@ -1013,7 +1013,6 @@ out: in6_ifa_put(ifp); } -#ifdef CONFIG_IPV6_PRIVACY static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) { struct inet6_dev *idev = ifp->idev; @@ -1116,7 +1115,6 @@ retry: out: return ret; } -#endif /* * Choose an appropriate source address (RFC3484) @@ -1131,9 +1129,7 @@ enum { #endif IPV6_SADDR_RULE_OIF, IPV6_SADDR_RULE_LABEL, -#ifdef CONFIG_IPV6_PRIVACY IPV6_SADDR_RULE_PRIVACY, -#endif IPV6_SADDR_RULE_ORCHID, IPV6_SADDR_RULE_PREFIX, IPV6_SADDR_RULE_MAX @@ -1247,7 +1243,6 @@ static int ipv6_get_saddr_eval(struct net *net, &score->ifa->addr, score->addr_type, score->ifa->idev->dev->ifindex) == dst->label; break; -#ifdef CONFIG_IPV6_PRIVACY case IPV6_SADDR_RULE_PRIVACY: { /* Rule 7: Prefer public address @@ -1259,7 +1254,6 @@ static int ipv6_get_saddr_eval(struct net *net, ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; break; } -#endif case IPV6_SADDR_RULE_ORCHID: /* Rule 8-: Prefer ORCHID vs ORCHID or * non-ORCHID vs non-ORCHID @@ -1499,6 +1493,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, return false; } +/* Compares an address/prefix_len with addresses on device @dev. + * If one is found it returns true. + */ +bool ipv6_chk_custom_prefix(const struct in6_addr *addr, + const unsigned int prefix_len, struct net_device *dev) +{ + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool ret = false; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + list_for_each_entry(ifa, &idev->addr_list, if_list) { + ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); + if (ret) + break; + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(ipv6_chk_custom_prefix); + int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) { struct inet6_dev *idev; @@ -1561,7 +1582,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (dad_failed) ipv6_ifa_notify(0, ifp); in6_ifa_put(ifp); -#ifdef CONFIG_IPV6_PRIVACY } else if (ifp->flags&IFA_F_TEMPORARY) { struct inet6_ifaddr *ifpub; spin_lock_bh(&ifp->lock); @@ -1575,7 +1595,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); -#endif } else ipv6_del_addr(ifp); } @@ -1824,7 +1843,6 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) return err; } -#ifdef CONFIG_IPV6_PRIVACY /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ static void __ipv6_regen_rndid(struct inet6_dev *idev) { @@ -1892,7 +1910,6 @@ static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmp if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) __ipv6_regen_rndid(idev); } -#endif /* * Add prefix route. @@ -1979,23 +1996,6 @@ static void addrconf_add_mroute(struct net_device *dev) ip6_route_add(&cfg); } -#if IS_ENABLED(CONFIG_IPV6_SIT) -static void sit_route_add(struct net_device *dev) -{ - struct fib6_config cfg = { - .fc_table = RT6_TABLE_MAIN, - .fc_metric = IP6_RT_PRIO_ADDRCONF, - .fc_ifindex = dev->ifindex, - .fc_dst_len = 96, - .fc_flags = RTF_UP | RTF_NONEXTHOP, - .fc_nlinfo.nl_net = dev_net(dev), - }; - - /* prefix length - 96 bits "::d.d.d.d" */ - ip6_route_add(&cfg); -} -#endif - static struct inet6_dev *addrconf_add_dev(struct net_device *dev) { struct inet6_dev *idev; @@ -2180,9 +2180,7 @@ ok: if (ifp) { int flags; unsigned long now; -#ifdef CONFIG_IPV6_PRIVACY struct inet6_ifaddr *ift; -#endif u32 stored_lft; /* update lifetime (RFC2462 5.5.3 e) */ @@ -2193,43 +2191,21 @@ ok: else stored_lft = 0; if (!update_lft && !create && stored_lft) { - if (valid_lft > MIN_VALID_LIFETIME || - valid_lft > stored_lft) - update_lft = 1; - else if (stored_lft <= MIN_VALID_LIFETIME) { - /* valid_lft <= stored_lft is always true */ - /* - * RFC 4862 Section 5.5.3e: - * "Note that the preferred lifetime of - * the corresponding address is always - * reset to the Preferred Lifetime in - * the received Prefix Information - * option, regardless of whether the - * valid lifetime is also reset or - * ignored." - * - * So if the preferred lifetime in - * this advertisement is different - * than what we have stored, but the - * valid lifetime is invalid, just - * reset prefered_lft. - * - * We must set the valid lifetime - * to the stored lifetime since we'll - * be updating the timestamp below, - * else we'll set it back to the - * minimum. - */ - if (prefered_lft != ifp->prefered_lft) { - valid_lft = stored_lft; - update_lft = 1; - } - } else { - valid_lft = MIN_VALID_LIFETIME; - if (valid_lft < prefered_lft) - prefered_lft = valid_lft; - update_lft = 1; - } + const u32 minimum_lft = min( + stored_lft, (u32)MIN_VALID_LIFETIME); + valid_lft = max(valid_lft, minimum_lft); + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = 1; } if (update_lft) { @@ -2245,7 +2221,6 @@ ok: } else spin_unlock(&ifp->lock); -#ifdef CONFIG_IPV6_PRIVACY read_lock_bh(&in6_dev->lock); /* update all temporary addresses in the list */ list_for_each_entry(ift, &in6_dev->tempaddr_list, @@ -2310,7 +2285,7 @@ ok: } else { read_unlock_bh(&in6_dev->lock); } -#endif + in6_ifa_put(ifp); addrconf_verify(0); } @@ -2550,7 +2525,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope; + int scope, plen; + u32 pflags = 0; ASSERT_RTNL(); @@ -2560,12 +2536,16 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) if (idev->dev->flags&IFF_POINTOPOINT) { addr.s6_addr32[0] = htonl(0xfe800000); scope = IFA_LINK; + plen = 64; } else { scope = IPV6_ADDR_COMPATv4; + plen = 96; + pflags |= RTF_NONEXTHOP; } if (addr.s6_addr32[3]) { - add_addr(idev, &addr, 128, scope); + add_addr(idev, &addr, plen, scope); + addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags); return; } @@ -2577,7 +2557,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) int flag = scope; for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { - int plen; addr.s6_addr32[3] = ifa->ifa_local; @@ -2588,12 +2567,10 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) continue; flag |= IFA_HOST; } - if (idev->dev->flags&IFF_POINTOPOINT) - plen = 64; - else - plen = 96; add_addr(idev, &addr, plen, flag); + addrconf_prefix_route(&addr, plen, idev->dev, 0, + pflags); } } } @@ -2719,7 +2696,6 @@ static void addrconf_sit_config(struct net_device *dev) struct in6_addr addr; ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - addrconf_prefix_route(&addr, 64, dev, 0, 0); if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) addrconf_add_linklocal(idev, &addr); return; @@ -2729,8 +2705,6 @@ static void addrconf_sit_config(struct net_device *dev) if (dev->flags&IFF_POINTOPOINT) addrconf_add_mroute(dev); - else - sit_route_add(dev); } #endif @@ -2748,8 +2722,6 @@ static void addrconf_gre_config(struct net_device *dev) } ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - addrconf_prefix_route(&addr, 64, dev, 0, 0); - if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) addrconf_add_linklocal(idev, &addr); } @@ -2990,7 +2962,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); -#ifdef CONFIG_IPV6_PRIVACY if (how && del_timer(&idev->regen_timer)) in6_dev_put(idev); @@ -3010,7 +2981,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) in6_ifa_put(ifa); write_lock_bh(&idev->lock); } -#endif while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, @@ -3523,7 +3493,6 @@ restart: in6_ifa_put(ifp); goto restart; } -#ifdef CONFIG_IPV6_PRIVACY } else if ((ifp->flags&IFA_F_TEMPORARY) && !(ifp->flags&IFA_F_TENTATIVE)) { unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * @@ -3551,7 +3520,6 @@ restart: } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ; spin_unlock(&ifp->lock); -#endif } else { /* ifp->prefered_lft <= ifp->valid_lft */ if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) @@ -4123,13 +4091,11 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval); array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] = jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval); -#ifdef CONFIG_IPV6_PRIVACY array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft; array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry; array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; -#endif array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; @@ -4823,7 +4789,6 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, -#ifdef CONFIG_IPV6_PRIVACY { .procname = "use_tempaddr", .data = &ipv6_devconf.use_tempaddr, @@ -4859,7 +4824,6 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, -#endif { .procname = "max_addresses", .data = &ipv6_devconf.max_addresses, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7c96100b021..4fbdb7046d2 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -110,11 +110,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; - if (sock->type != SOCK_RAW && - sock->type != SOCK_DGRAM && - !inet_ehash_secret) - build_ehash_secret(); - /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; @@ -364,7 +359,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->inet_rcv_saddr = v4addr; inet->inet_saddr = v4addr; - np->rcv_saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; @@ -461,14 +456,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, peer == 1) return -ENOTCONN; sin->sin6_port = inet->inet_dport; - sin->sin6_addr = np->daddr; + sin->sin6_addr = sk->sk_v6_daddr; if (np->sndflow) sin->sin6_flowinfo = np->flow_label; } else { - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) sin->sin6_addr = np->saddr; else - sin->sin6_addr = np->rcv_saddr; + sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; } @@ -655,7 +650,7 @@ int inet6_sk_rebuild_header(struct sock *sk) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowlabel = np->flow_label; fl6.flowi6_oif = sk->sk_bound_dev_if; @@ -719,6 +714,8 @@ static void ipv6_packet_cleanup(void) static int __net_init ipv6_init_mibs(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, sizeof(struct udp_mib), __alignof__(struct udp_mib)) < 0) @@ -731,6 +728,18 @@ static int __net_init ipv6_init_mibs(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet6_stats; + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i); + u64_stats_init(&af_inet6_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i); + u64_stats_init(&af_inet6_stats->syncp); +#endif + } + + if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) @@ -870,8 +879,6 @@ static int __init inet6_init(void) if (err) goto out_sock_register_fail; - tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; - /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance @@ -965,10 +972,10 @@ out: #ifdef CONFIG_SYSCTL sysctl_fail: - ipv6_packet_cleanup(); + pingv6_exit(); #endif pingv6_fail: - pingv6_exit(); + ipv6_packet_cleanup(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: @@ -1028,52 +1035,4 @@ out_unregister_tcp_proto: } module_init(inet6_init); -static void __exit inet6_exit(void) -{ - if (disable_ipv6_mod) - return; - - /* First of all disallow new sockets creation. */ - sock_unregister(PF_INET6); - /* Disallow any further netlink messages */ - rtnl_unregister_all(PF_INET6); - - udpv6_exit(); - udplitev6_exit(); - tcpv6_exit(); - - /* Cleanup code parts. */ - ipv6_packet_cleanup(); - ipv6_frag_exit(); - ipv6_exthdrs_exit(); - addrconf_cleanup(); - ip6_flowlabel_cleanup(); - ndisc_late_cleanup(); - ip6_route_cleanup(); -#ifdef CONFIG_PROC_FS - - /* Cleanup code parts. */ - if6_proc_exit(); - ipv6_misc_proc_exit(); - udplite6_proc_exit(); - raw6_proc_exit(); -#endif - ipv6_netfilter_fini(); - ipv6_stub = NULL; - igmp6_cleanup(); - ndisc_cleanup(); - ip6_mr_cleanup(); - icmpv6_cleanup(); - rawv6_exit(); - - unregister_pernet_subsys(&inet6_net_ops); - proto_unregister(&rawv6_prot); - proto_unregister(&udplitev6_prot); - proto_unregister(&udpv6_prot); - proto_unregister(&tcpv6_prot); - - rcu_barrier(); /* Wait for completion of call_rcu()'s */ -} -module_exit(inet6_exit); - MODULE_ALIAS_NETPROTO(PF_INET6); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 73784c3d464..82e1da3a40b 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -618,8 +618,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG && + if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 48b6bd2a9a1..a454b0ff57c 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -107,16 +107,16 @@ ipv4_connected: if (err) goto out; - ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); + ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr); if (ipv6_addr_any(&np->saddr) || ipv6_mapped_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr) || - ipv6_mapped_addr_any(&np->rcv_saddr)) { + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || + ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &np->rcv_saddr); + &sk->sk_v6_rcv_saddr); if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } @@ -145,7 +145,7 @@ ipv4_connected: } } - np->daddr = *daddr; + sk->sk_v6_daddr = *daddr; np->flow_label = fl6.flowlabel; inet->inet_dport = usin->sin6_port; @@ -156,7 +156,7 @@ ipv4_connected: */ fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; @@ -183,16 +183,16 @@ ipv4_connected: if (ipv6_addr_any(&np->saddr)) np->saddr = fl6.saddr; - if (ipv6_addr_any(&np->rcv_saddr)) { - np->rcv_saddr = fl6.saddr; + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + sk->sk_v6_rcv_saddr = fl6.saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &np->daddr) ? - &np->daddr : NULL, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : @@ -883,11 +883,10 @@ EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, __u16 destp, int bucket) { - struct ipv6_pinfo *np = inet6_sk(sp); const struct in6_addr *dest, *src; - dest = &np->daddr; - src = &np->rcv_saddr; + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; seq_printf(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n", diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index d3618a78fca..b8719df0366 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -164,10 +164,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) u8 *iv; u8 *tail; __be32 *seqhi; - struct esp_data *esp = x->data; /* skb is pure payload to encrypt */ - aead = esp->aead; + aead = x->data; alen = crypto_aead_authsize(aead); tfclen = 0; @@ -181,8 +180,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(skb->len + 2 + tfclen, blksize); - if (esp->padlen) - clen = ALIGN(clen, esp->padlen); plen = clen - skb->len - tfclen; err = skb_cow_data(skb, tfclen + plen + alen, &trailer); @@ -271,8 +268,7 @@ error: static int esp_input_done2(struct sk_buff *skb, int err) { struct xfrm_state *x = xfrm_input_state(skb); - struct esp_data *esp = x->data; - struct crypto_aead *aead = esp->aead; + struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); int elen = skb->len - hlen; @@ -325,8 +321,7 @@ static void esp_input_done(struct crypto_async_request *base, int err) static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) { struct ip_esp_hdr *esph; - struct esp_data *esp = x->data; - struct crypto_aead *aead = esp->aead; + struct crypto_aead *aead = x->data; struct aead_request *req; struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); @@ -414,9 +409,8 @@ out: static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) { - struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); - u32 align = max_t(u32, blksize, esp->padlen); + struct crypto_aead *aead = x->data; + u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4); unsigned int net_adj; if (x->props.mode != XFRM_MODE_TUNNEL) @@ -424,8 +418,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) else net_adj = 0; - return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - - net_adj) & ~(align - 1)) + net_adj - 2; + return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - + net_adj) & ~(blksize - 1)) + net_adj - 2; } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -436,8 +430,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG && + if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return; @@ -455,18 +448,16 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static void esp6_destroy(struct xfrm_state *x) { - struct esp_data *esp = x->data; + struct crypto_aead *aead = x->data; - if (!esp) + if (!aead) return; - crypto_free_aead(esp->aead); - kfree(esp); + crypto_free_aead(aead); } static int esp_init_aead(struct xfrm_state *x) { - struct esp_data *esp = x->data; struct crypto_aead *aead; int err; @@ -475,7 +466,7 @@ static int esp_init_aead(struct xfrm_state *x) if (IS_ERR(aead)) goto error; - esp->aead = aead; + x->data = aead; err = crypto_aead_setkey(aead, x->aead->alg_key, (x->aead->alg_key_len + 7) / 8); @@ -492,7 +483,6 @@ error: static int esp_init_authenc(struct xfrm_state *x) { - struct esp_data *esp = x->data; struct crypto_aead *aead; struct crypto_authenc_key_param *param; struct rtattr *rta; @@ -527,7 +517,7 @@ static int esp_init_authenc(struct xfrm_state *x) if (IS_ERR(aead)) goto error; - esp->aead = aead; + x->data = aead; keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) + (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); @@ -582,7 +572,6 @@ error: static int esp6_init_state(struct xfrm_state *x) { - struct esp_data *esp; struct crypto_aead *aead; u32 align; int err; @@ -590,11 +579,7 @@ static int esp6_init_state(struct xfrm_state *x) if (x->encap) return -EINVAL; - esp = kzalloc(sizeof(*esp), GFP_KERNEL); - if (esp == NULL) - return -ENOMEM; - - x->data = esp; + x->data = NULL; if (x->aead) err = esp_init_aead(x); @@ -604,9 +589,7 @@ static int esp6_init_state(struct xfrm_state *x) if (err) goto error; - aead = esp->aead; - - esp->padlen = 0; + aead = x->data; x->props.header_len = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -626,9 +609,7 @@ static int esp6_init_state(struct xfrm_state *x) } align = ALIGN(crypto_aead_blocksize(aead), 4); - if (esp->padlen) - align = max_t(u32, align, esp->padlen); - x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead); + x->props.trailer_len = align + 1 + crypto_aead_authsize(aead); error: return err; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e4311cbc8b4..77bb8afb141 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -70,20 +70,20 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6, const struct request_sock *req) { - struct inet6_request_sock *treq = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = IPPROTO_TCP; - fl6->daddr = treq->rmt_addr; + fl6->daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(fl6, np->opt, &final); - fl6->saddr = treq->loc_addr; - fl6->flowi6_oif = treq->iif; + fl6->saddr = ireq->ir_v6_loc_addr; + fl6->flowi6_oif = ireq->ir_iif; fl6->flowi6_mark = sk->sk_mark; - fl6->fl6_dport = inet_rsk(req)->rmt_port; - fl6->fl6_sport = inet_rsk(req)->loc_port; + fl6->fl6_dport = ireq->ir_rmt_port; + fl6->fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); @@ -129,13 +129,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, lopt->nr_table_entries)]; (req = *prev) != NULL; prev = &req->dl_next) { - const struct inet6_request_sock *treq = inet6_rsk(req); + const struct inet_request_sock *ireq = inet_rsk(req); - if (inet_rsk(req)->rmt_port == rport && + if (ireq->ir_rmt_port == rport && req->rsk_ops->family == AF_INET6 && - ipv6_addr_equal(&treq->rmt_addr, raddr) && - ipv6_addr_equal(&treq->loc_addr, laddr) && - (!treq->iif || treq->iif == iif)) { + ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && + ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && + (!ireq->ir_iif || ireq->ir_iif == iif)) { WARN_ON(req->sk != NULL); *prevp = prev; return req; @@ -153,8 +153,8 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr, - inet_rsk(req)->rmt_port, + const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr, + inet_rsk(req)->ir_rmt_port, lopt->hash_rnd, lopt->nr_table_entries); reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); @@ -165,11 +165,10 @@ EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) { - struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; sin6->sin6_family = AF_INET6; - sin6->sin6_addr = np->daddr; + sin6->sin6_addr = sk->sk_v6_daddr; sin6->sin6_port = inet_sk(sk)->inet_dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; @@ -203,7 +202,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = sk->sk_protocol; - fl6->daddr = np->daddr; + fl6->daddr = sk->sk_v6_daddr; fl6->saddr = np->saddr; fl6->flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl6->flowlabel); @@ -245,7 +244,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) skb_dst_set_noref(skb, dst); /* Restore final destination back after routing done */ - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); rcu_read_unlock(); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 32b4a1675d8..262e13c02ec 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,6 +23,39 @@ #include <net/secure_seq.h> #include <net/ip.h> +static unsigned int inet6_ehashfn(struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) +{ + static u32 inet6_ehash_secret __read_mostly; + static u32 ipv6_hash_secret __read_mostly; + + u32 lhash, fhash; + + net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); + net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); + + lhash = (__force u32)laddr->s6_addr32[3]; + fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); + + return __inet6_ehashfn(lhash, lport, fhash, fport, + inet6_ehash_secret + net_hash_mix(net)); +} + +static int inet6_sk_ehashfn(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *faddr = &sk->sk_v6_daddr; + const __u16 lport = inet->inet_num; + const __be16 fport = inet->inet_dport; + struct net *net = sock_net(sk); + + return inet6_ehashfn(net, laddr, lport, faddr, fport); +} + int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; @@ -89,43 +122,22 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) - goto begintw; - if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, - ports, dif))) { - sock_put(sk); - goto begin; - } - goto out; - } - } - if (get_nulls_value(node) != slot) - goto begin; - -begintw: - /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_nulls_for_each_rcu(sk, node, &head->twchain) { - if (sk->sk_hash != hash) + if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) continue; - if (likely(INET6_TW_MATCH(sk, net, saddr, daddr, - ports, dif))) { - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { - sk = NULL; - goto out; - } - if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, - ports, dif))) { - sock_put(sk); - goto begintw; - } + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) goto out; + + if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { + sock_gen_put(sk); + goto begin; } + goto found; } if (get_nulls_value(node) != slot) - goto begintw; - sk = NULL; + goto begin; out: + sk = NULL; +found: rcu_read_unlock(); return sk; } @@ -140,11 +152,10 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score++; } @@ -236,9 +247,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *daddr = &np->rcv_saddr; - const struct in6_addr *saddr = &np->daddr; + const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *saddr = &sk->sk_v6_daddr; const int dif = sk->sk_bound_dev_if; const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); @@ -248,38 +258,28 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; - struct inet_timewait_sock *tw; + struct inet_timewait_sock *tw = NULL; int twrefcnt = 0; spin_lock(lock); - /* Check TIME-WAIT sockets first. */ - sk_nulls_for_each(sk2, node, &head->twchain) { - if (sk2->sk_hash != hash) - continue; - - if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr, - ports, dif))) { - tw = inet_twsk(sk2); - if (twsk_unique(sk, sk2, twp)) - goto unique; - else - goto not_unique; - } - } - tw = NULL; - - /* And established part... */ sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; - if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) + + if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) { + if (sk2->sk_state == TCP_TIME_WAIT) { + tw = inet_twsk(sk2); + if (twsk_unique(sk, sk2, twp)) + break; + } goto not_unique; + } } -unique: /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ + * in hash table socket with a funny identity. + */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; @@ -312,9 +312,9 @@ not_unique: static inline u32 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, - np->daddr.s6_addr32, + + return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_dport); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5bec666aba6..5550a8113a6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1529,25 +1529,6 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, fib6_walk(&c.w); } -void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) -{ - struct fib6_table *table; - struct hlist_head *head; - unsigned int h; - - rcu_read_lock(); - for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, head, tb6_hlist) { - read_lock_bh(&table->tb6_lock); - fib6_clean_tree(net, &table->tb6_root, - func, prune, arg); - read_unlock_bh(&table->tb6_lock); - } - } - rcu_read_unlock(); -} void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { @@ -1782,3 +1763,189 @@ void fib6_gc_cleanup(void) unregister_pernet_subsys(&fib6_net_ops); kmem_cache_destroy(fib6_node_kmem); } + +#ifdef CONFIG_PROC_FS + +struct ipv6_route_iter { + struct seq_net_private p; + struct fib6_walker_t w; + loff_t skip; + struct fib6_table *tbl; + __u32 sernum; +}; + +static int ipv6_route_seq_show(struct seq_file *seq, void *v) +{ + struct rt6_info *rt = v; + struct ipv6_route_iter *iter = seq->private; + + seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); + +#ifdef CONFIG_IPV6_SUBTREES + seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); +#else + seq_puts(seq, "00000000000000000000000000000000 00 "); +#endif + if (rt->rt6i_flags & RTF_GATEWAY) + seq_printf(seq, "%pi6", &rt->rt6i_gateway); + else + seq_puts(seq, "00000000000000000000000000000000"); + + seq_printf(seq, " %08x %08x %08x %08x %8s\n", + rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), + rt->dst.__use, rt->rt6i_flags, + rt->dst.dev ? rt->dst.dev->name : ""); + iter->w.leaf = NULL; + return 0; +} + +static int ipv6_route_yield(struct fib6_walker_t *w) +{ + struct ipv6_route_iter *iter = w->args; + + if (!iter->skip) + return 1; + + do { + iter->w.leaf = iter->w.leaf->dst.rt6_next; + iter->skip--; + if (!iter->skip && iter->w.leaf) + return 1; + } while (iter->w.leaf); + + return 0; +} + +static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter) +{ + memset(&iter->w, 0, sizeof(iter->w)); + iter->w.func = ipv6_route_yield; + iter->w.root = &iter->tbl->tb6_root; + iter->w.state = FWS_INIT; + iter->w.node = iter->w.root; + iter->w.args = iter; + iter->sernum = iter->w.root->fn_sernum; + INIT_LIST_HEAD(&iter->w.lh); + fib6_walker_link(&iter->w); +} + +static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, + struct net *net) +{ + unsigned int h; + struct hlist_node *node; + + if (tbl) { + h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1; + node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist)); + } else { + h = 0; + node = NULL; + } + + while (!node && h < FIB6_TABLE_HASHSZ) { + node = rcu_dereference_bh( + hlist_first_rcu(&net->ipv6.fib_table_hash[h++])); + } + return hlist_entry_safe(node, struct fib6_table, tb6_hlist); +} + +static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) +{ + if (iter->sernum != iter->w.root->fn_sernum) { + iter->sernum = iter->w.root->fn_sernum; + iter->w.state = FWS_INIT; + iter->w.node = iter->w.root; + WARN_ON(iter->w.skip); + iter->w.skip = iter->w.count; + } +} + +static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + int r; + struct rt6_info *n; + struct net *net = seq_file_net(seq); + struct ipv6_route_iter *iter = seq->private; + + if (!v) + goto iter_table; + + n = ((struct rt6_info *)v)->dst.rt6_next; + if (n) { + ++*pos; + return n; + } + +iter_table: + ipv6_route_check_sernum(iter); + read_lock(&iter->tbl->tb6_lock); + r = fib6_walk_continue(&iter->w); + read_unlock(&iter->tbl->tb6_lock); + if (r > 0) { + if (v) + ++*pos; + return iter->w.leaf; + } else if (r < 0) { + fib6_walker_unlink(&iter->w); + return NULL; + } + fib6_walker_unlink(&iter->w); + + iter->tbl = ipv6_route_seq_next_table(iter->tbl, net); + if (!iter->tbl) + return NULL; + + ipv6_route_seq_setup_walk(iter); + goto iter_table; +} + +static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU_BH) +{ + struct net *net = seq_file_net(seq); + struct ipv6_route_iter *iter = seq->private; + + rcu_read_lock_bh(); + iter->tbl = ipv6_route_seq_next_table(NULL, net); + iter->skip = *pos; + + if (iter->tbl) { + ipv6_route_seq_setup_walk(iter); + return ipv6_route_seq_next(seq, NULL, pos); + } else { + return NULL; + } +} + +static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) +{ + struct fib6_walker_t *w = &iter->w; + return w->node && !(w->state == FWS_U && w->node == w->root); +} + +static void ipv6_route_seq_stop(struct seq_file *seq, void *v) + __releases(RCU_BH) +{ + struct ipv6_route_iter *iter = seq->private; + + if (ipv6_route_iter_active(iter)) + fib6_walker_unlink(&iter->w); + + rcu_read_unlock_bh(); +} + +static const struct seq_operations ipv6_route_seq_ops = { + .start = ipv6_route_seq_start, + .next = ipv6_route_seq_next, + .stop = ipv6_route_seq_stop, + .show = ipv6_route_seq_show +}; + +int ipv6_route_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ipv6_route_seq_ops, + sizeof(struct ipv6_route_iter)); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 46e88433ec7..e7fb7106550 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -41,7 +41,7 @@ #define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified in old IPv6 RFC. Well, it was reasonable value. */ -#define FL_MAX_LINGER 60 /* Maximal linger timeout */ +#define FL_MAX_LINGER 150 /* Maximal linger timeout */ /* FL hash table */ @@ -345,6 +345,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo expires = check_linger(expires); if (!expires) return -EPERM; + + spin_lock_bh(&ip6_fl_lock); fl->lastuse = jiffies; if (time_before(fl->linger, linger)) fl->linger = linger; @@ -352,6 +354,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo expires = fl->linger; if (time_before(fl->expires, fl->lastuse + expires)) fl->expires = fl->lastuse + expires; + spin_unlock_bh(&ip6_fl_lock); + return 0; } @@ -453,8 +457,10 @@ static int mem_check(struct sock *sk) if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; + rcu_read_lock_bh(); for_each_sk_fl_rcu(np, sfl) count++; + rcu_read_unlock_bh(); if (room <= 0 || ((count >= FL_MAX_PER_SOCK || @@ -465,34 +471,6 @@ static int mem_check(struct sock *sk) return 0; } -static bool ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2) -{ - if (h1 == h2) - return false; - if (h1 == NULL || h2 == NULL) - return true; - if (h1->hdrlen != h2->hdrlen) - return true; - return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1)); -} - -static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) -{ - if (o1 == o2) - return false; - if (o1 == NULL || o2 == NULL) - return true; - if (o1->opt_nflen != o2->opt_nflen) - return true; - if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt)) - return true; - if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt)) - return true; - if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt)) - return true; - return false; -} - static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, struct ip6_flowlabel *fl) { @@ -503,6 +481,32 @@ static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, spin_unlock_bh(&ip6_sk_fl_lock); } +int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_fl_socklist *sfl; + + rcu_read_lock_bh(); + + for_each_sk_fl_rcu(np, sfl) { + if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { + spin_lock_bh(&ip6_fl_lock); + freq->flr_label = sfl->fl->label; + freq->flr_dst = sfl->fl->dst; + freq->flr_share = sfl->fl->share; + freq->flr_expires = (sfl->fl->expires - jiffies) / HZ; + freq->flr_linger = sfl->fl->linger / HZ; + + spin_unlock_bh(&ip6_fl_lock); + rcu_read_unlock_bh(); + return 0; + } + } + rcu_read_unlock_bh(); + + return -ENOENT; +} + int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { int uninitialized_var(err); @@ -603,11 +607,6 @@ recheck: uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; - err = -EINVAL; - if (!ipv6_addr_equal(&fl1->dst, &fl->dst) || - ipv6_opt_cmp(fl1->opt, fl->opt)) - goto release; - err = -ENOMEM; if (sfl1 == NULL) goto release; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6b26e9feafb..8acb28621f9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct ip6_tnl *tunnel = netdev_priv(dev); struct net_device *tdev; /* Device to other host */ struct ipv6hdr *ipv6h; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ + unsigned int max_headroom = 0; /* The extra header space needed */ int gre_hlen; struct ipv6_tel_txoption opt; int mtu; @@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; + max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { @@ -976,6 +976,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) if (t->parms.o_flags&GRE_SEQ) addend += 4; } + t->hlen = addend; if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & @@ -1002,8 +1003,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) } ip6_rt_put(rt); } - - t->hlen = addend; } static int ip6gre_tnl_change(struct ip6_tnl *t, @@ -1173,9 +1172,8 @@ done: static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { - struct ip6_tnl *tunnel = netdev_priv(dev); if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) + new_mtu > 0xFFF8 - dev->hard_header_len) return -EINVAL; dev->mtu = new_mtu; return 0; @@ -1254,6 +1252,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1271,6 +1270,13 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tunnel_stats; + ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tunnel_stats->syncp); + } + + return 0; } @@ -1451,6 +1457,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1464,6 +1471,12 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tap_stats; + ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tap_stats->syncp); + } + return 0; } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d82de722810..4b851692b1f 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -66,7 +66,6 @@ static int ipv6_gso_send_check(struct sk_buff *skb) __skb_pull(skb, sizeof(*ipv6h)); err = -EPROTONOSUPPORT; - rcu_read_lock(); ops = rcu_dereference(inet6_offloads[ ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); @@ -74,7 +73,6 @@ static int ipv6_gso_send_check(struct sk_buff *skb) skb_reset_transport_header(skb); err = ops->callbacks.gso_send_check(skb); } - rcu_read_unlock(); out: return err; @@ -92,46 +90,58 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u8 *prevhdr; int offset = 0; bool tunnel; + int nhoff; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) goto out; + skb_reset_network_header(skb); + nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) goto out; - tunnel = skb->encapsulation; + tunnel = SKB_GSO_CB(skb)->encap_level > 0; + if (tunnel) + features = skb->dev->hw_enc_features & netif_skb_features(skb); + SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); + ipv6h = ipv6_hdr(skb); __skb_pull(skb, sizeof(*ipv6h)); segs = ERR_PTR(-EPROTONOSUPPORT); proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); - rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); segs = ops->callbacks.gso_segment(skb, features); } - rcu_read_unlock(); if (IS_ERR(segs)) goto out; for (skb = segs; skb; skb = skb->next) { - ipv6h = ipv6_hdr(skb); - ipv6h->payload_len = htons(skb->len - skb->mac_len - - sizeof(*ipv6h)); + ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); + ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); + if (tunnel) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + skb->network_header = (u8 *)ipv6h - skb->head; + if (!tunnel && proto == IPPROTO_UDP) { unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - fptr = (struct frag_hdr *)(skb_network_header(skb) + - unfrag_ip6hlen); + fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); if (skb->next != NULL) fptr->frag_off |= htons(IP6_MF); @@ -267,6 +277,13 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { }, }; +static const struct net_offload sit_offload = { + .callbacks = { + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, + }, +}; + static int __init ipv6_offload_init(void) { @@ -278,6 +295,9 @@ static int __init ipv6_offload_init(void) pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__); dev_add_offload(&ipv6_packet_offload); + + inet_add_offload(&sit_offload, IPPROTO_IPV6); + return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3a692d52916..59df872e2f4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + nexthop = rt6_nexthop((struct rt6_info *)dst); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -125,7 +125,8 @@ static int ip6_finish_output2(struct sk_buff *skb) static int ip6_finish_output(struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb))) + dst_allfrag(skb_dst(skb)) || + (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(skb, ip6_finish_output2); else return ip6_finish_output2(skb); @@ -874,7 +875,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ rt = (struct rt6_info *) *dst; rcu_read_lock_bh(); - n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock_bh(); @@ -909,7 +910,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, out_err_release: if (err == -ENETUNREACH) - IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; return err; @@ -1008,6 +1009,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, { struct sk_buff *skb; + struct frag_hdr fhdr; int err; /* There is support for UDP large send offload by network @@ -1034,33 +1036,26 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->transport_header = skb->network_header + fragheaderlen; skb->protocol = htons(ETH_P_IPV6); - skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - } - - err = skb_append_datato_frags(sk,skb, getfrag, from, - (length - transhdrlen)); - if (!err) { - struct frag_hdr fhdr; - /* Specify the length of each IPv6 datagram fragment. - * It has to be a multiple of 8. - */ - skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - - sizeof(struct frag_hdr)) & ~7; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr, rt); - skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); - - return 0; + } else if (skb_is_gso(skb)) { + goto append; } - /* There is not enough support do UPD LSO, - * so follow normal path - */ - kfree_skb(skb); - return err; + skb->ip_summed = CHECKSUM_PARTIAL; + /* Specify the length of each IPv6 datagram fragment. + * It has to be a multiple of 8. + */ + skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - + sizeof(struct frag_hdr)) & ~7; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + ipv6_select_ident(&fhdr, rt); + skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + +append: + return skb_append_datato_frags(sk, skb, getfrag, from, + (length - transhdrlen)); } static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, @@ -1227,27 +1222,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - cork->length += length; - if (length > mtu) { - int proto = sk->sk_protocol; - if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - - if (proto == IPPROTO_UDP && - (rt->dst.dev->features & NETIF_F_UFO)) { + if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); + return -EMSGSIZE; + } - err = ip6_ufo_append_data(sk, getfrag, from, length, - hh_len, fragheaderlen, - transhdrlen, mtu, flags, rt); - if (err) - goto error; - return 0; - } + skb = skb_peek_tail(&sk->sk_write_queue); + cork->length += length; + if (((length > mtu) || + (skb && skb_is_gso(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO)) { + err = ip6_ufo_append_data(sk, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags, rt); + if (err) + goto error; + return 0; } - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + if (!skb) goto alloc_new_skb; while (length > 0) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2d8f4829575..d6062325db0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1430,9 +1430,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < IPV6_MIN_MTU) { - return -EINVAL; + struct ip6_tnl *tnl = netdev_priv(dev); + + if (tnl->parms.proto == IPPROTO_IPIP) { + if (new_mtu < 68) + return -EINVAL; + } else { + if (new_mtu < IPV6_MIN_MTU) + return -EINVAL; } + if (new_mtu > 0xFFF8 - dev->hard_header_len) + return -EINVAL; dev->mtu = new_mtu; return 0; } @@ -1486,12 +1494,19 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + int i; t->dev = dev; t->net = dev_net(dev); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6_tnl_stats; + ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6_tnl_stats->syncp); + } return 0; } @@ -1627,6 +1642,15 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], return ip6_tnl_update(t, &p); } +static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + + if (dev != ip6n->fb_tnl_dev) + unregister_netdevice_queue(dev, head); +} + static size_t ip6_tnl_get_size(const struct net_device *dev) { return @@ -1691,6 +1715,7 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = { .validate = ip6_tnl_validate, .newlink = ip6_tnl_newlink, .changelink = ip6_tnl_changelink, + .dellink = ip6_tnl_dellink, .get_size = ip6_tnl_get_size, .fill_info = ip6_tnl_fill_info, }; @@ -1707,9 +1732,9 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { .priority = 1, }; -static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) +static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) { - struct net *net = dev_net(ip6n->fb_tnl_dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct net_device *dev, *aux; int h; struct ip6_tnl *t; @@ -1731,8 +1756,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) } } - t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, &list); unregister_netdevice_many(&list); } @@ -1752,6 +1775,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); + ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ @@ -1778,10 +1802,8 @@ err_alloc_dev: static void __net_exit ip6_tnl_exit_net(struct net *net) { - struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - rtnl_lock(); - ip6_tnl_destroy_tunnels(ip6n); + ip6_tnl_destroy_tunnels(net); rtnl_unlock(); } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c new file mode 100644 index 00000000000..ed94ba61dda --- /dev/null +++ b/net/ipv6/ip6_vti.c @@ -0,0 +1,1056 @@ +/* + * IPv6 virtual tunneling interface + * + * Copyright (C) 2013 secunet Security Networks AG + * + * Author: + * Steffen Klassert <steffen.klassert@secunet.com> + * + * Based on: + * net/ipv6/ip6_tunnel.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/capability.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/sockios.h> +#include <linux/icmp.h> +#include <linux/if.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/if_tunnel.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/icmpv6.h> +#include <linux/init.h> +#include <linux/route.h> +#include <linux/rtnetlink.h> +#include <linux/netfilter_ipv6.h> +#include <linux/slab.h> +#include <linux/hash.h> + +#include <linux/uaccess.h> +#include <linux/atomic.h> + +#include <net/icmp.h> +#include <net/ip.h> +#include <net/ip_tunnels.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/ip6_tunnel.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> + +#define HASH_SIZE_SHIFT 5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) + +static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +{ + u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); + + return hash_32(hash, HASH_SIZE_SHIFT); +} + +static int vti6_dev_init(struct net_device *dev); +static void vti6_dev_setup(struct net_device *dev); +static struct rtnl_link_ops vti6_link_ops __read_mostly; + +static int vti6_net_id __read_mostly; +struct vti6_net { + /* the vti6 tunnel fallback device */ + struct net_device *fb_tnl_dev; + /* lists for storing tunnels in use */ + struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; + struct ip6_tnl __rcu *tnls_wc[1]; + struct ip6_tnl __rcu **tnls[2]; +}; + +static struct net_device_stats *vti6_get_stats(struct net_device *dev) +{ + struct pcpu_tstats sum = { 0 }; + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + + sum.rx_packets += tstats->rx_packets; + sum.rx_bytes += tstats->rx_bytes; + sum.tx_packets += tstats->tx_packets; + sum.tx_bytes += tstats->tx_bytes; + } + dev->stats.rx_packets = sum.rx_packets; + dev->stats.rx_bytes = sum.rx_bytes; + dev->stats.tx_packets = sum.tx_packets; + dev->stats.tx_bytes = sum.tx_bytes; + return &dev->stats; +} + +#define for_each_vti6_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + +/** + * vti6_tnl_lookup - fetch tunnel matching the end-point addresses + * @net: network namespace + * @remote: the address of the tunnel exit-point + * @local: the address of the tunnel entry-point + * + * Return: + * tunnel matching given end-points if found, + * else fallback tunnel if its device is up, + * else %NULL + **/ +static struct ip6_tnl * +vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, + const struct in6_addr *local) +{ + unsigned int hash = HASH(remote, local); + struct ip6_tnl *t; + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + t = rcu_dereference(ip6n->tnls_wc[0]); + if (t && (t->dev->flags & IFF_UP)) + return t; + + return NULL; +} + +/** + * vti6_tnl_bucket - get head of list matching given tunnel parameters + * @p: parameters containing tunnel end-points + * + * Description: + * vti6_tnl_bucket() returns the head of the list matching the + * &struct in6_addr entries laddr and raddr in @p. + * + * Return: head of IPv6 tunnel list + **/ +static struct ip6_tnl __rcu ** +vti6_tnl_bucket(struct vti6_net *ip6n, const struct __ip6_tnl_parm *p) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + unsigned int h = 0; + int prio = 0; + + if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { + prio = 1; + h = HASH(remote, local); + } + return &ip6n->tnls[prio][h]; +} + +static void +vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms); + + rcu_assign_pointer(t->next , rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); +} + +static void +vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp; + struct ip6_tnl *iter; + + for (tp = vti6_tnl_bucket(ip6n, &t->parms); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { + if (t == iter) { + rcu_assign_pointer(*tp, t->next); + break; + } + } +} + +static void vti6_dev_free(struct net_device *dev) +{ + free_percpu(dev->tstats); + free_netdev(dev); +} + +static int vti6_tnl_create2(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + int err; + + err = vti6_dev_init(dev); + if (err < 0) + goto out; + + err = register_netdevice(dev); + if (err < 0) + goto out; + + strcpy(t->parms.name, dev->name); + dev->rtnl_link_ops = &vti6_link_ops; + + dev_hold(dev); + vti6_tnl_link(ip6n, t); + + return 0; + +out: + return err; +} + +static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) +{ + struct net_device *dev; + struct ip6_tnl *t; + char name[IFNAMSIZ]; + int err; + + if (p->name[0]) + strlcpy(name, p->name, IFNAMSIZ); + else + sprintf(name, "ip6_vti%%d"); + + dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup); + if (dev == NULL) + goto failed; + + dev_net_set(dev, net); + + t = netdev_priv(dev); + t->parms = *p; + t->net = dev_net(dev); + + err = vti6_tnl_create2(dev); + if (err < 0) + goto failed_free; + + return t; + +failed_free: + vti6_dev_free(dev); +failed: + return NULL; +} + +/** + * vti6_locate - find or create tunnel matching given parameters + * @net: network namespace + * @p: tunnel parameters + * @create: != 0 if allowed to create new tunnel if no match found + * + * Description: + * vti6_locate() first tries to locate an existing tunnel + * based on @parms. If this is unsuccessful, but @create is set a new + * tunnel device is created and registered for use. + * + * Return: + * matching tunnel or NULL + **/ +static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, + int create) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + struct ip6_tnl __rcu **tp; + struct ip6_tnl *t; + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + for (tp = vti6_tnl_bucket(ip6n, p); + (t = rtnl_dereference(*tp)) != NULL; + tp = &t->next) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr)) + return t; + } + if (!create) + return NULL; + return vti6_tnl_create(net, p); +} + +/** + * vti6_dev_uninit - tunnel device uninitializer + * @dev: the device to be destroyed + * + * Description: + * vti6_dev_uninit() removes tunnel from its list + **/ +static void vti6_dev_uninit(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + if (dev == ip6n->fb_tnl_dev) + RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); + else + vti6_tnl_unlink(ip6n, t); + ip6_tnl_dst_reset(t); + dev_put(dev); +} + +static int vti6_rcv(struct sk_buff *skb) +{ + struct ip6_tnl *t; + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + + rcu_read_lock(); + + if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, + &ipv6h->daddr)) != NULL) { + struct pcpu_tstats *tstats; + + if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { + rcu_read_unlock(); + goto discard; + } + + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + rcu_read_unlock(); + return 0; + } + + if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { + t->dev->stats.rx_dropped++; + rcu_read_unlock(); + goto discard; + } + + tstats = this_cpu_ptr(t->dev->tstats); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + + skb->mark = 0; + secpath_reset(skb); + skb->dev = t->dev; + + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + return 1; + +discard: + kfree_skb(skb); + return 0; +} + +/** + * vti6_addr_conflict - compare packet addresses to tunnel's own + * @t: the outgoing tunnel device + * @hdr: IPv6 header from the incoming packet + * + * Description: + * Avoid trivial tunneling loop by checking that tunnel exit-point + * doesn't match source of incoming packet. + * + * Return: + * 1 if conflict, + * 0 else + **/ +static inline bool +vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) +{ + return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); +} + +/** + * vti6_xmit - send a packet + * @skb: the outgoing socket buffer + * @dev: the outgoing tunnel device + **/ +static int vti6_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct ip6_tnl *t = netdev_priv(dev); + struct net_device_stats *stats = &t->dev->stats; + struct dst_entry *dst = NULL, *ndst = NULL; + struct flowi6 fl6; + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct net_device *tdev; + int err = -1; + + if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || + !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) + return err; + + dst = ip6_tnl_dst_check(t); + if (!dst) { + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + + ndst = ip6_route_output(net, NULL, &fl6); + + if (ndst->error) + goto tx_err_link_failure; + ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + ndst = NULL; + goto tx_err_link_failure; + } + dst = ndst; + } + + if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL) + goto tx_err_link_failure; + + tdev = dst->dev; + + if (tdev == dev) { + stats->collisions++; + net_warn_ratelimited("%s: Local routing loop detected!\n", + t->parms.name); + goto tx_err_dst_release; + } + + + skb_dst_drop(skb); + skb_dst_set_noref(skb, dst); + + ip6tunnel_xmit(skb, dev); + if (ndst) { + dev->mtu = dst_mtu(ndst); + ip6_tnl_dst_store(t, ndst); + } + + return 0; +tx_err_link_failure: + stats->tx_carrier_errors++; + dst_link_failure(skb); +tx_err_dst_release: + dst_release(ndst); + return err; +} + +static netdev_tx_t +vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net_device_stats *stats = &t->dev->stats; + int ret; + + switch (skb->protocol) { + case htons(ETH_P_IPV6): + ret = vti6_xmit(skb, dev); + break; + default: + goto tx_err; + } + + if (ret < 0) + goto tx_err; + + return NETDEV_TX_OK; + +tx_err: + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static void vti6_link_config(struct ip6_tnl *t) +{ + struct dst_entry *dst; + struct net_device *dev = t->dev; + struct __ip6_tnl_parm *p = &t->parms; + struct flowi6 *fl6 = &t->fl.u.ip6; + + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); + + /* Set up flowi template */ + fl6->saddr = p->laddr; + fl6->daddr = p->raddr; + fl6->flowi6_oif = p->link; + fl6->flowi6_mark = be32_to_cpu(p->i_key); + fl6->flowi6_proto = p->proto; + fl6->flowlabel = 0; + + p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | + IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); + + if (p->flags & IP6_TNL_F_CAP_XMIT && p->flags & IP6_TNL_F_CAP_RCV) + dev->flags |= IFF_POINTOPOINT; + else + dev->flags &= ~IFF_POINTOPOINT; + + dev->iflink = p->link; + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + + dst = ip6_route_output(dev_net(dev), NULL, fl6); + if (dst->error) + return; + + dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6), + NULL, 0); + if (IS_ERR(dst)) + return; + + if (dst->dev) { + dev->hard_header_len = dst->dev->hard_header_len; + + dev->mtu = dst_mtu(dst); + + if (dev->mtu < IPV6_MIN_MTU) + dev->mtu = IPV6_MIN_MTU; + } + dst_release(dst); + } +} + +/** + * vti6_tnl_change - update the tunnel parameters + * @t: tunnel to be changed + * @p: tunnel configuration parameters + * + * Description: + * vti6_tnl_change() updates the tunnel parameters + **/ +static int +vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) +{ + t->parms.laddr = p->laddr; + t->parms.raddr = p->raddr; + t->parms.link = p->link; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + t->parms.proto = p->proto; + ip6_tnl_dst_reset(t); + vti6_link_config(t); + return 0; +} + +static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +{ + struct net *net = dev_net(t->dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + int err; + + vti6_tnl_unlink(ip6n, t); + synchronize_net(); + err = vti6_tnl_change(t, p); + vti6_tnl_link(ip6n, t); + netdev_state_change(t->dev); + return err; +} + +static void +vti6_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm2 *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->link = u->link; + p->i_key = u->i_key; + p->o_key = u->o_key; + p->proto = u->proto; + + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void +vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) +{ + u->laddr = p->laddr; + u->raddr = p->raddr; + u->link = p->link; + u->i_key = p->i_key; + u->o_key = p->o_key; + u->proto = p->proto; + + memcpy(u->name, p->name, sizeof(u->name)); +} + +/** + * vti6_tnl_ioctl - configure vti6 tunnels from userspace + * @dev: virtual device associated with tunnel + * @ifr: parameters passed from userspace + * @cmd: command to be performed + * + * Description: + * vti6_ioctl() is used for managing vti6 tunnels + * from userspace. + * + * The possible commands are the following: + * %SIOCGETTUNNEL: get tunnel parameters for device + * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters + * %SIOCCHGTUNNEL: change tunnel parameters to those given + * %SIOCDELTUNNEL: delete tunnel + * + * The fallback device "ip6_vti0", created during module + * initialization, can be used for creating other tunnel devices. + * + * Return: + * 0 on success, + * %-EFAULT if unable to copy data to or from userspace, + * %-EPERM if current process hasn't %CAP_NET_ADMIN set + * %-EINVAL if passed tunnel parameters are invalid, + * %-EEXIST if changing a tunnel's parameters would cause a conflict + * %-ENODEV if attempting to change or delete a nonexisting device + **/ +static int +vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip6_tnl_parm2 p; + struct __ip6_tnl_parm p1; + struct ip6_tnl *t = NULL; + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + switch (cmd) { + case SIOCGETTUNNEL: + if (dev == ip6n->fb_tnl_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + vti6_parm_from_user(&p1, &p); + t = vti6_locate(net, &p1, 0); + } else { + memset(&p, 0, sizeof(p)); + } + if (t == NULL) + t = netdev_priv(dev); + vti6_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + break; + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + break; + err = -EINVAL; + if (p.proto != IPPROTO_IPV6 && p.proto != 0) + break; + vti6_parm_from_user(&p1, &p); + t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); + if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else + t = netdev_priv(dev); + + err = vti6_update(t, &p1); + } + if (t) { + err = 0; + vti6_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + case SIOCDELTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + break; + + if (dev == ip6n->fb_tnl_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + break; + err = -ENOENT; + vti6_parm_from_user(&p1, &p); + t = vti6_locate(net, &p1, 0); + if (t == NULL) + break; + err = -EPERM; + if (t->dev == ip6n->fb_tnl_dev) + break; + dev = t->dev; + } + err = 0; + unregister_netdevice(dev); + break; + default: + err = -EINVAL; + } + return err; +} + +/** + * vti6_tnl_change_mtu - change mtu manually for tunnel device + * @dev: virtual device associated with tunnel + * @new_mtu: the new mtu + * + * Return: + * 0 on success, + * %-EINVAL if mtu too small + **/ +static int vti6_change_mtu(struct net_device *dev, int new_mtu) +{ + if (new_mtu < IPV6_MIN_MTU) + return -EINVAL; + + dev->mtu = new_mtu; + return 0; +} + +static const struct net_device_ops vti6_netdev_ops = { + .ndo_uninit = vti6_dev_uninit, + .ndo_start_xmit = vti6_tnl_xmit, + .ndo_do_ioctl = vti6_ioctl, + .ndo_change_mtu = vti6_change_mtu, + .ndo_get_stats = vti6_get_stats, +}; + +/** + * vti6_dev_setup - setup virtual tunnel device + * @dev: virtual device associated with tunnel + * + * Description: + * Initialize function pointers and device parameters + **/ +static void vti6_dev_setup(struct net_device *dev) +{ + struct ip6_tnl *t; + + dev->netdev_ops = &vti6_netdev_ops; + dev->destructor = vti6_dev_free; + + dev->type = ARPHRD_TUNNEL6; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); + dev->mtu = ETH_DATA_LEN; + t = netdev_priv(dev); + dev->flags |= IFF_NOARP; + dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; +} + +/** + * vti6_dev_init_gen - general initializer for all tunnel devices + * @dev: virtual device associated with tunnel + **/ +static inline int vti6_dev_init_gen(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + + t->dev = dev; + t->net = dev_net(dev); + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + return 0; +} + +/** + * vti6_dev_init - initializer for all non fallback tunnel devices + * @dev: virtual device associated with tunnel + **/ +static int vti6_dev_init(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + int err = vti6_dev_init_gen(dev); + + if (err) + return err; + vti6_link_config(t); + return 0; +} + +/** + * vti6_fb_tnl_dev_init - initializer for fallback tunnel device + * @dev: fallback device + * + * Return: 0 + **/ +static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + int err = vti6_dev_init_gen(dev); + + if (err) + return err; + + t->parms.proto = IPPROTO_IPV6; + dev_hold(dev); + + vti6_link_config(t); + + rcu_assign_pointer(ip6n->tnls_wc[0], t); + return 0; +} + +static int vti6_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + return 0; +} + +static void vti6_netlink_parms(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + if (!data) + return; + + if (data[IFLA_VTI_LINK]) + parms->link = nla_get_u32(data[IFLA_VTI_LINK]); + + if (data[IFLA_VTI_LOCAL]) + nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL], + sizeof(struct in6_addr)); + + if (data[IFLA_VTI_REMOTE]) + nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE], + sizeof(struct in6_addr)); + + if (data[IFLA_VTI_IKEY]) + parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]); + + if (data[IFLA_VTI_OKEY]) + parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); +} + +static int vti6_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net *net = dev_net(dev); + struct ip6_tnl *nt; + + nt = netdev_priv(dev); + vti6_netlink_parms(data, &nt->parms); + + nt->parms.proto = IPPROTO_IPV6; + + if (vti6_locate(net, &nt->parms, 0)) + return -EEXIST; + + return vti6_tnl_create2(dev); +} + +static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip6_tnl *t; + struct __ip6_tnl_parm p; + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + if (dev == ip6n->fb_tnl_dev) + return -EINVAL; + + vti6_netlink_parms(data, &p); + + t = vti6_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else + t = netdev_priv(dev); + + return vti6_update(t, &p); +} + +static size_t vti6_get_size(const struct net_device *dev) +{ + return + /* IFLA_VTI_LINK */ + nla_total_size(4) + + /* IFLA_VTI_LOCAL */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_VTI_REMOTE */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_VTI_IKEY */ + nla_total_size(4) + + /* IFLA_VTI_OKEY */ + nla_total_size(4) + + 0; +} + +static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + struct __ip6_tnl_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) || + nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr), + &parm->laddr) || + nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr), + &parm->raddr) || + nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || + nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = { + [IFLA_VTI_LINK] = { .type = NLA_U32 }, + [IFLA_VTI_LOCAL] = { .len = sizeof(struct in6_addr) }, + [IFLA_VTI_REMOTE] = { .len = sizeof(struct in6_addr) }, + [IFLA_VTI_IKEY] = { .type = NLA_U32 }, + [IFLA_VTI_OKEY] = { .type = NLA_U32 }, +}; + +static struct rtnl_link_ops vti6_link_ops __read_mostly = { + .kind = "vti6", + .maxtype = IFLA_VTI_MAX, + .policy = vti6_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = vti6_dev_setup, + .validate = vti6_validate, + .newlink = vti6_newlink, + .changelink = vti6_changelink, + .get_size = vti6_get_size, + .fill_info = vti6_fill_info, +}; + +static struct xfrm_tunnel_notifier vti6_handler __read_mostly = { + .handler = vti6_rcv, + .priority = 1, +}; + +static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) +{ + int h; + struct ip6_tnl *t; + LIST_HEAD(list); + + for (h = 0; h < HASH_SIZE; h++) { + t = rtnl_dereference(ip6n->tnls_r_l[h]); + while (t != NULL) { + unregister_netdevice_queue(t->dev, &list); + t = rtnl_dereference(t->next); + } + } + + t = rtnl_dereference(ip6n->tnls_wc[0]); + unregister_netdevice_queue(t->dev, &list); + unregister_netdevice_many(&list); +} + +static int __net_init vti6_init_net(struct net *net) +{ + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct ip6_tnl *t = NULL; + int err; + + ip6n->tnls[0] = ip6n->tnls_wc; + ip6n->tnls[1] = ip6n->tnls_r_l; + + err = -ENOMEM; + ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0", + vti6_dev_setup); + + if (!ip6n->fb_tnl_dev) + goto err_alloc_dev; + dev_net_set(ip6n->fb_tnl_dev, net); + + err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev); + if (err < 0) + goto err_register; + + err = register_netdev(ip6n->fb_tnl_dev); + if (err < 0) + goto err_register; + + t = netdev_priv(ip6n->fb_tnl_dev); + + strcpy(t->parms.name, ip6n->fb_tnl_dev->name); + return 0; + +err_register: + vti6_dev_free(ip6n->fb_tnl_dev); +err_alloc_dev: + return err; +} + +static void __net_exit vti6_exit_net(struct net *net) +{ + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + rtnl_lock(); + vti6_destroy_tunnels(ip6n); + rtnl_unlock(); +} + +static struct pernet_operations vti6_net_ops = { + .init = vti6_init_net, + .exit = vti6_exit_net, + .id = &vti6_net_id, + .size = sizeof(struct vti6_net), +}; + +/** + * vti6_tunnel_init - register protocol and reserve needed resources + * + * Return: 0 on success + **/ +static int __init vti6_tunnel_init(void) +{ + int err; + + err = register_pernet_device(&vti6_net_ops); + if (err < 0) + goto out_pernet; + + err = xfrm6_mode_tunnel_input_register(&vti6_handler); + if (err < 0) { + pr_err("%s: can't register vti6\n", __func__); + goto out; + } + err = rtnl_link_register(&vti6_link_ops); + if (err < 0) + goto rtnl_link_failed; + + return 0; + +rtnl_link_failed: + xfrm6_mode_tunnel_input_deregister(&vti6_handler); +out: + unregister_pernet_device(&vti6_net_ops); +out_pernet: + return err; +} + +/** + * vti6_tunnel_cleanup - free resources and unregister protocol + **/ +static void __exit vti6_tunnel_cleanup(void) +{ + rtnl_link_unregister(&vti6_link_ops); + if (xfrm6_mode_tunnel_input_deregister(&vti6_handler)) + pr_info("%s: can't deregister vti6\n", __func__); + + unregister_pernet_device(&vti6_net_ops); +} + +module_init(vti6_tunnel_init); +module_exit(vti6_tunnel_cleanup); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("vti6"); +MODULE_ALIAS_NETDEV("ip6_vti0"); +MODULE_AUTHOR("Steffen Klassert"); +MODULE_DESCRIPTION("IPv6 virtual tunnel interface"); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 5636a912074..ce507d9e1c9 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -64,8 +64,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG && + if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d1e2e8ef29c..1c6ce3119ff 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -174,7 +174,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } if (ipv6_only_sock(sk) || - !ipv6_addr_v4mapped(&np->daddr)) { + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { retv = -EADDRNOTAVAIL; break; } @@ -1011,7 +1011,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { @@ -1026,7 +1026,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : + np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { @@ -1211,6 +1212,34 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->sndflow; break; + case IPV6_FLOWLABEL_MGR: + { + struct in6_flowlabel_req freq; + + if (len < sizeof(freq)) + return -EINVAL; + + if (copy_from_user(&freq, optval, sizeof(freq))) + return -EFAULT; + + if (freq.flr_action != IPV6_FL_A_GET) + return -EINVAL; + + len = sizeof(freq); + memset(&freq, 0, sizeof(freq)); + + val = ipv6_flowlabel_opt_get(sk, &freq); + if (val < 0) + return val; + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &freq, len)) + return -EFAULT; + + return 0; + } + case IPV6_ADDR_PREFERENCES: val = 0; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 096cd67b737..d18f9f903db 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data) if (idev->mc_dad_count) mld_dad_start_timer(idev, idev->mc_maxdelay); } - __in6_dev_put(idev); + in6_dev_put(idev); } static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, @@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data) idev->mc_gq_running = 0; mld_send_report(idev, NULL); - __in6_dev_put(idev); + in6_dev_put(idev); } static void mld_ifc_timer_expire(unsigned long data) @@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data) if (idev->mc_ifc_count) mld_ifc_start_timer(idev, idev->mc_maxdelay); } - __in6_dev_put(idev); + in6_dev_put(idev); } static void mld_ifc_event(struct inet6_dev *idev) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f8a55ff1971..3512177deb4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1726,8 +1726,8 @@ int __init ndisc_init(void) &ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; -#endif out: +#endif return err; #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a7f842b29b6..7702f9e90a0 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,19 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_TABLES_IPV6 + depends on NF_TABLES + tristate "IPv6 nf_tables support" + +config NFT_CHAIN_ROUTE_IPV6 + depends on NF_TABLES_IPV6 + tristate "IPv6 nf_tables route chain support" + +config NFT_CHAIN_NAT_IPV6 + depends on NF_TABLES_IPV6 + depends on NF_NAT_IPV6 && NFT_NAT + tristate "IPv6 nf_tables nat chain support" + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2b53738f798..d1b4928f34f 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -23,6 +23,11 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +# nf_tables +obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o +obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o +obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o + # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 44400c216dc..710238f58aa 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -349,6 +349,11 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 56eef30ee5f..da00a2ecde5 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); /* Send RST reply */ -static void send_reset(struct net *net, struct sk_buff *oldskb) +static void send_reset(struct net *net, struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; struct tcphdr otcph, *tcph; @@ -88,8 +88,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) } /* Check checksum. */ - if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, - skb_checksum(oldskb, tcphoff, otcplen, 0))) { + if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { pr_debug("TCP checksum is invalid\n"); return; } @@ -227,7 +226,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(net, skb); + send_reset(net, skb, par->hooknum); break; default: net_info_ratelimited("case %u not handled yet\n", reject->with); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 19cfea8dbca..bf9f612c1bc 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (th == NULL) return NF_DROP; - synproxy_parse_options(skb, par->thoff, th, &opts); + if (!synproxy_parse_options(skb, par->thoff, th, &opts)) + return NF_DROP; if (th->syn && !(th->ack || th->fin || th->rst)) { /* Initial SYN from client */ @@ -311,7 +312,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv6_synproxy_hook(unsigned int hooknum, +static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, /* fall through */ case TCP_CONNTRACK_SYN_SENT: - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; if (!th->syn && th->ack && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { @@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, if (!th->syn || !th->ack) break; - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy->tsoff = opts.tsval - synproxy->its; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 29b44b14c5e..ca7f6c12808 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -32,13 +32,14 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, +ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index c705907ae6a..307bbb782d1 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb, +ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (hook == NF_INET_LOCAL_OUT) + if (ops->hooknum == NF_INET_LOCAL_OUT) return ip6t_mangle_out(skb, out); - if (hook == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, hook, in, out, + if (ops->hooknum == NF_INET_POST_ROUTING) + return ip6t_do_table(skb, ops->hooknum, in, out, dev_net(out)->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, hook, in, out, + return ip6t_do_table(skb, ops->hooknum, in, out, dev_net(in)->ipv6.ip6table_mangle); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 9b076d2d3a7..84c7f33d0cf 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, } static unsigned int -nf_nat_ipv6_fn(unsigned int hooknum, +nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); __be16 frag_off; int hdrlen; u8 nexthdr; @@ -111,7 +111,8 @@ nf_nat_ipv6_fn(unsigned int hooknum, if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - hooknum, hdrlen)) + ops->hooknum, + hdrlen)) return NF_DROP; else return NF_ACCEPT; @@ -124,14 +125,14 @@ nf_nat_ipv6_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; @@ -140,11 +141,11 @@ nf_nat_ipv6_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, hooknum, skb); + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -152,7 +153,7 @@ oif_changed: } static unsigned int -nf_nat_ipv6_in(unsigned int hooknum, +nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -161,7 +162,7 @@ nf_nat_ipv6_in(unsigned int hooknum, unsigned int ret; struct in6_addr daddr = ipv6_hdr(skb)->daddr; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); @@ -170,7 +171,7 @@ nf_nat_ipv6_in(unsigned int hooknum, } static unsigned int -nf_nat_ipv6_out(unsigned int hooknum, +nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -187,7 +188,7 @@ nf_nat_ipv6_out(unsigned int hooknum, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -209,7 +210,7 @@ nf_nat_ipv6_out(unsigned int hooknum, } static unsigned int -nf_nat_ipv6_local_fn(unsigned int hooknum, +nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -224,7 +225,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 9a626d86720..5274740acec 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -19,13 +19,14 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_raw_hook(unsigned int hook, struct sk_buff *skb, +ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index ce88d1d7e52..ab3b0219ecf 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -36,14 +36,15 @@ static const struct xt_table security_table = { }; static unsigned int -ip6table_security_hook(unsigned int hook, struct sk_buff *skb, +ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index d6e4dd8b58d..4cbc6b290dd 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_helper(unsigned int hooknum, +static unsigned int ipv6_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum, return helper->help(skb, protoff, ct, ctinfo); } -static unsigned int ipv6_confirm(unsigned int hooknum, +static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -169,66 +169,16 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int __ipv6_conntrack_in(struct net *net, - unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm = skb->nfct_reasm; - const struct nf_conn_help *help; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - - /* This packet is fragmented and has reassembled packet. */ - if (reasm) { - /* Reassembled packet isn't parsed yet ? */ - if (!reasm->nfct) { - unsigned int ret; - - ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); - if (ret != NF_ACCEPT) - return ret; - } - - /* Conntrack helpers need the entire reassembled packet in the - * POST_ROUTING hook. In case of unconfirmed connections NAT - * might reassign a helper, so the entire packet is also - * required. - */ - ct = nf_ct_get(reasm, &ctinfo); - if (ct != NULL && !nf_ct_is_untracked(ct)) { - help = nfct_help(ct); - if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { - nf_conntrack_get_reasm(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, - (struct net_device *)in, - (struct net_device *)out, - okfn, NF_IP6_PRI_CONNTRACK + 1); - return NF_DROP_ERR(-ECANCELED); - } - } - - nf_conntrack_get(reasm->nfct); - skb->nfct = reasm->nfct; - skb->nfctinfo = reasm->nfctinfo; - return NF_ACCEPT; - } - - return nf_conntrack_in(net, PF_INET6, hooknum, skb); -} - -static unsigned int ipv6_conntrack_in(unsigned int hooknum, +static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); + return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb); } -static unsigned int ipv6_conntrack_local(unsigned int hooknum, +static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -239,7 +189,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); + return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { @@ -297,9 +247,9 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; struct nf_conn *ct; - tuple.src.u3.in6 = inet6->rcv_saddr; + tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; tuple.src.u.tcp.port = inet->inet_sport; - tuple.dst.u3.in6 = inet6->daddr; + tuple.dst.u3.in6 = sk->sk_v6_daddr; tuple.dst.u.tcp.port = inet->inet_dport; tuple.dst.protonum = sk->sk_protocol; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index dffdc1a389c..767ab8da821 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -144,12 +144,24 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } +static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr) +{ + u32 c; + + net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd)); + c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, nf_frags.rnd); + return c & (INETFRAGS_HASHSZ - 1); +} + + static unsigned int nf_hashfn(struct inet_frag_queue *q) { const struct frag_queue *nq; nq = container_of(q, struct frag_queue, q); - return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); + return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); } static void nf_skb_free(struct sk_buff *skb) @@ -185,7 +197,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.ecn = ecn; read_lock_bh(&nf_frags.lock); - hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); + hash = nf_hash_frag(id, src, dst); q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); @@ -621,31 +633,16 @@ ret_orig: return skb; } -void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)) +void nf_ct_frag6_consume_orig(struct sk_buff *skb) { struct sk_buff *s, *s2; - unsigned int ret = 0; for (s = NFCT_FRAG6_CB(skb)->orig; s;) { - nf_conntrack_put_reasm(s->nfct_reasm); - nf_conntrack_get_reasm(skb); - s->nfct_reasm = skb; - s2 = s->next; s->next = NULL; - - if (ret != -ECANCELED) - ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, - in, out, okfn, - NF_IP6_PRI_CONNTRACK_DEFRAG + 1); - else - kfree_skb(s); - + consume_skb(s); s = s2; } - nf_conntrack_put_reasm(skb); } static int nf_ct_net_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index aacd121fe8c..7b9a748c6ba 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, } -static unsigned int ipv6_defrag(unsigned int hooknum, +static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); + reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb)); /* queued */ if (reasm == NULL) return NF_STOLEN; @@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (reasm == skb) return NF_ACCEPT; - nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, - (struct net_device *)out, okfn); + nf_ct_frag6_consume_orig(reasm); + + NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm, + (struct net_device *) in, (struct net_device *) out, + okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); return NF_STOLEN; } diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c new file mode 100644 index 00000000000..d77db8a1350 --- /dev/null +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/ipv6.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv6.h> + +static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + if (unlikely(skb->len < sizeof(struct ipv6hdr))) { + if (net_ratelimit()) + pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + return nft_do_chain_pktinfo(&pkt, ops); +} + +static struct nft_af_info nft_af_ipv6 __read_mostly = { + .family = NFPROTO_IPV6, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .hooks = { + [NF_INET_LOCAL_OUT] = nft_ipv6_output, + }, +}; + +static int nf_tables_ipv6_init_net(struct net *net) +{ + net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.ipv6 == NULL) + return -ENOMEM; + + memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6)); + + if (nft_register_afinfo(net, net->nft.ipv6) < 0) + goto err; + + return 0; +err: + kfree(net->nft.ipv6); + return -ENOMEM; +} + +static void nf_tables_ipv6_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.ipv6); + kfree(net->nft.ipv6); +} + +static struct pernet_operations nf_tables_ipv6_net_ops = { + .init = nf_tables_ipv6_init_net, + .exit = nf_tables_ipv6_exit_net, +}; + +static unsigned int +nft_do_chain_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + return nft_do_chain_pktinfo(&pkt, ops); +} + +static struct nf_chain_type filter_ipv6 = { + .family = NFPROTO_IPV6, + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .fn = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, + [NF_INET_LOCAL_OUT] = nft_ipv6_output, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, + }, +}; + +static int __init nf_tables_ipv6_init(void) +{ + nft_register_chain_type(&filter_ipv6); + return register_pernet_subsys(&nf_tables_ipv6_net_ops); +} + +static void __exit nf_tables_ipv6_exit(void) +{ + unregister_pernet_subsys(&nf_tables_ipv6_net_ops); + nft_unregister_chain_type(&filter_ipv6); +} + +module_init(nf_tables_ipv6_init); +module_exit(nf_tables_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(AF_INET6); diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c new file mode 100644 index 00000000000..e86dcd70dc7 --- /dev/null +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv6.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/ipv6.h> + +/* + * IPv6 NAT chains + */ + +static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + __be16 frag_off; + int hdrlen; + u8 nexthdr; + struct nft_pktinfo pkt; + unsigned int ret; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (nat == NULL) { + /* Conntrack module was loaded late, can't add extension. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) + return NF_ACCEPT; + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED + IP_CT_IS_REPLY: + nexthdr = ipv6_hdr(skb)->nexthdr; + hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, + ops->hooknum, + hdrlen)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall through */ + case IP_CT_NEW: + if (nf_nat_initialized(ct, maniptype)) + break; + + nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); + + ret = nft_do_chain_pktinfo(&pkt, ops); + if (ret != NF_ACCEPT) + return ret; + if (!nf_nat_initialized(ct, maniptype)) { + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } + default: + break; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct in6_addr daddr = ipv6_hdr(skb)->daddr; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo __maybe_unused; + const struct nf_conn *ct __maybe_unused; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET6) < 0) + ret = NF_DROP; + } +#endif + return ret; +} + +static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, + &ct->tuplehash[!dir].tuple.src.u3)) { + if (ip6_route_me_harder(skb)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET6)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_chain_type nft_chain_nat_ipv6 = { + .family = NFPROTO_IPV6, + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .fn = { + [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting, + [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting, + [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output, + [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_nat_ipv6_init(void) +{ + int err; + + err = nft_register_chain_type(&nft_chain_nat_ipv6); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_chain_nat_ipv6_exit(void) +{ + nft_unregister_chain_type(&nft_chain_nat_ipv6); +} + +module_init(nft_chain_nat_ipv6_init); +module_exit(nft_chain_nat_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c new file mode 100644 index 00000000000..3fe40f0456a --- /dev/null +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv6.h> +#include <net/route.h> + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct nft_pktinfo pkt; + struct in6_addr saddr, daddr; + u_int8_t hop_limit; + u32 mark, flowlabel; + + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + /* save source/dest address, mark, hoplimit, flowlabel, priority */ + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; + + /* flowlabel and prio (includes version, which shouldn't change either */ + flowlabel = *((u32 *)ipv6_hdr(skb)); + + ret = nft_do_chain_pktinfo(&pkt, ops); + if (ret != NF_DROP && ret != NF_QUEUE && + (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || + memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit || + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) + return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + + return ret; +} + +static struct nf_chain_type nft_chain_route_ipv6 = { + .family = NFPROTO_IPV6, + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .fn = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_route_init(void) +{ + return nft_register_chain_type(&nft_chain_route_ipv6); +} + +static void __exit nft_chain_route_exit(void) +{ + nft_unregister_chain_type(&nft_chain_route_ipv6); +} + +module_init(nft_chain_route_init); +module_exit(nft_chain_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route"); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 18f19df4189..8815e31a87f 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -116,7 +116,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; } if (!iif) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 58916bbb172..e24ff1df040 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -77,20 +77,19 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, sk_for_each_from(sk) if (inet_sk(sk)->inet_num == num) { - struct ipv6_pinfo *np = inet6_sk(sk); if (!net_eq(sock_net(sk), net)) continue; - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) goto found; if (is_multicast && inet6_mc_check(sk, loc_addr, rmt_addr)) @@ -302,7 +301,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) } inet->inet_rcv_saddr = inet->inet_saddr = v4addr; - np->rcv_saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; err = 0; @@ -335,8 +334,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); } - if (type == NDISC_REDIRECT) + if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); + return; + } if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) @@ -464,9 +465,6 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (flags & MSG_OOB) return -EOPNOTSUPP; - if (addr_len) - *addr_len=sizeof(*sin6); - if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); @@ -505,6 +503,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, IP6CB(skb)->iif); + *addr_len = sizeof(*sin6); } sock_recv_ts_and_drops(msg, sk, skb); @@ -802,8 +801,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && @@ -814,7 +813,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, return -EDESTADDRREQ; proto = inet->inet_num; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1aeb473b2cc..cc85a9ba501 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -82,24 +82,24 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. */ -unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr, u32 rnd) +static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr) { u32 c; + net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd)); c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), - (__force u32)id, rnd); + (__force u32)id, ip6_frags.rnd); return c & (INETFRAGS_HASHSZ - 1); } -EXPORT_SYMBOL_GPL(inet6_hash_frag); static unsigned int ip6_hashfn(struct inet_frag_queue *q) { struct frag_queue *fq; fq = container_of(q, struct frag_queue, q); - return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); + return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr); } bool ip6_frag_match(struct inet_frag_queue *q, void *a) @@ -193,7 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, arg.ecn = ecn; read_lock(&ip6_frags.lock); - hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); + hash = inet6_hash_frag(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); if (IS_ERR_OR_NULL(q)) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c979dd96d82..7faa9d5e150 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -476,6 +476,24 @@ out: } #ifdef CONFIG_IPV6_ROUTER_PREF +struct __rt6_probe_work { + struct work_struct work; + struct in6_addr target; + struct net_device *dev; +}; + +static void rt6_probe_deferred(struct work_struct *w) +{ + struct in6_addr mcaddr; + struct __rt6_probe_work *work = + container_of(w, struct __rt6_probe_work, work); + + addrconf_addr_solict_mult(&work->target, &mcaddr); + ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); + dev_put(work->dev); + kfree(w); +} + static void rt6_probe(struct rt6_info *rt) { struct neighbour *neigh; @@ -499,17 +517,23 @@ static void rt6_probe(struct rt6_info *rt) if (!neigh || time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { - struct in6_addr mcaddr; - struct in6_addr *target; + struct __rt6_probe_work *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (neigh) { + if (neigh && work) neigh->updated = jiffies; + + if (neigh) write_unlock(&neigh->lock); - } - target = (struct in6_addr *)&rt->rt6i_gateway; - addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); + if (work) { + INIT_WORK(&work->work, rt6_probe_deferred); + work->target = rt->rt6i_gateway; + dev_hold(rt->dst.dev); + work->dev = rt->dst.dev; + schedule_work(&work->work); + } } else { out: write_unlock(&neigh->lock); @@ -595,7 +619,7 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, goto out; m = rt6_score_route(rt, oif, strict); - if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + if (m == RT6_NUD_FAIL_SOFT) { match_do_rr = true; m = 0; /* lowest valid score */ } else if (m < 0) { @@ -707,8 +731,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, - dev->ifindex); + if (rinfo->prefix_len == 0) + rt = rt6_get_dflt_router(gwaddr, dev); + else + rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, + gwaddr, dev->ifindex); if (rt && !lifetime) { ip6_del_rt(rt); @@ -847,12 +874,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, rt = ip6_rt_copy(ort, daddr); if (rt) { - if (!(rt->rt6i_flags & RTF_GATEWAY)) { - if (ort->rt6i_dst.plen != 128 && - ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) - rt->rt6i_flags |= RTF_ANYCAST; - rt->rt6i_gateway = *daddr; - } + if (ort->rt6i_dst.plen != 128 && + ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) + rt->rt6i_flags |= RTF_ANYCAST; rt->rt6i_flags |= RTF_CACHE; @@ -1064,10 +1088,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) - return dst; + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; - return NULL; + if (rt6_check_expired(rt)) + return NULL; + + return dst; } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -1137,7 +1164,6 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1236,7 +1262,6 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1258,7 +1283,6 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; @@ -1338,6 +1362,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; atomic_set(&rt->dst.__refcnt, 1); + rt->rt6i_gateway = fl6->daddr; rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; rt->rt6i_idev = idev; @@ -1873,7 +1898,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, in6_dev_hold(rt->rt6i_idev); rt->dst.lastuse = jiffies; - rt->rt6i_gateway = ort->rt6i_gateway; + if (ort->rt6i_flags & RTF_GATEWAY) + rt->rt6i_gateway = ort->rt6i_gateway; + else + rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == (RTF_DEFAULT | RTF_ADDRCONF)) @@ -2160,6 +2188,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, else rt->rt6i_flags |= RTF_LOCAL; + rt->rt6i_gateway = *addr; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); @@ -2800,56 +2829,12 @@ static int ip6_route_dev_notify(struct notifier_block *this, #ifdef CONFIG_PROC_FS -struct rt6_proc_arg -{ - char *buffer; - int offset; - int length; - int skip; - int len; -}; - -static int rt6_info_route(struct rt6_info *rt, void *p_arg) -{ - struct seq_file *m = p_arg; - - seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); - -#ifdef CONFIG_IPV6_SUBTREES - seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); -#else - seq_puts(m, "00000000000000000000000000000000 00 "); -#endif - if (rt->rt6i_flags & RTF_GATEWAY) { - seq_printf(m, "%pi6", &rt->rt6i_gateway); - } else { - seq_puts(m, "00000000000000000000000000000000"); - } - seq_printf(m, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), - rt->dst.__use, rt->rt6i_flags, - rt->dst.dev ? rt->dst.dev->name : ""); - return 0; -} - -static int ipv6_route_show(struct seq_file *m, void *v) -{ - struct net *net = (struct net *)m->private; - fib6_clean_all_ro(net, rt6_info_route, 0, m); - return 0; -} - -static int ipv6_route_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, ipv6_route_show); -} - static const struct file_operations ipv6_route_proc_fops = { .owner = THIS_MODULE, .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release_net, + .release = seq_release_net, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7ee5cb96db3..1b4a4a95367 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, return false; } +/* Checks if an address matches an address on the tunnel interface. + * Used to detect the NAT of proto 41 packets and let them pass spoofing test. + * Long story: + * This function is called after we considered the packet as spoofed + * in is_spoofed_6rd. + * We may have a router that is doing NAT for proto 41 packets + * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb + * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd + * function will return true, dropping the packet. + * But, we can still check if is spoofed against the IP + * addresses associated with the interface. + */ +static bool only_dnatted(const struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + int prefix_len; + +#ifdef CONFIG_IPV6_SIT_6RD + prefix_len = tunnel->ip6rd.prefixlen + 32 + - tunnel->ip6rd.relay_prefixlen; +#else + prefix_len = 48; +#endif + return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); +} + +/* Returns true if a packet is spoofed */ +static bool packet_is_spoofed(struct sk_buff *skb, + const struct iphdr *iph, + struct ip_tunnel *tunnel) +{ + const struct ipv6hdr *ipv6h; + + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) + return true; + + return false; + } + + if (tunnel->dev->flags & IFF_POINTOPOINT) + return false; + + ipv6h = ipv6_hdr(skb); + + if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { + net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; + } + + if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) + return false; + + if (only_dnatted(tunnel, &ipv6h->daddr)) + return false; + + net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; +} + static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); @@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb) IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); - if (tunnel->dev->priv_flags & IFF_ISATAP) { - if (!isatap_chksrc(skb, iph, tunnel)) { - tunnel->dev->stats.rx_errors++; - goto out; - } - } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { - if (is_spoofed_6rd(tunnel, iph->saddr, - &ipv6_hdr(skb)->saddr) || - is_spoofed_6rd(tunnel, iph->daddr, - &ipv6_hdr(skb)->daddr)) { - tunnel->dev->stats.rx_errors++; - goto out; - } + if (packet_is_spoofed(skb, iph, tunnel)) { + tunnel->dev->stats.rx_errors++; + goto out; } __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); @@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -879,10 +933,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ttl = iph6->hop_limit; tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); + if (IS_ERR(skb)) + goto out; err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); @@ -892,8 +945,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tx_error_icmp: dst_link_failure(skb); tx_error: - dev->stats.tx_errors++; dev_kfree_skb(skb); +out: + dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -902,13 +956,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + if (IS_ERR(skb)) + goto out; ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); return NETDEV_TX_OK; +out: + dev->stats.tx_errors++; + return NETDEV_TX_OK; } static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, @@ -1238,6 +1294,12 @@ static void ipip6_dev_free(struct net_device *dev) free_netdev(dev); } +#define SIT_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | \ + NETIF_F_HW_CSUM) + static void ipip6_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip6_netdev_ops; @@ -1251,11 +1313,14 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_LLTX; + dev->features |= SIT_FEATURES; + dev->hw_features |= SIT_FEATURES; } static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1268,6 +1333,12 @@ static int ipip6_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_tunnel_stats; + ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_tunnel_stats->syncp); + } + return 0; } @@ -1277,6 +1348,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1290,6 +1362,13 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_fb_stats; + ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_fb_stats->syncp); + } + dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; @@ -1540,6 +1619,15 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { #endif }; +static void ipip6_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct sit_net *sitn = net_generic(net, sit_net_id); + + if (dev != sitn->fb_tunnel_dev) + unregister_netdevice_queue(dev, head); +} + static struct rtnl_link_ops sit_link_ops __read_mostly = { .kind = "sit", .maxtype = IFLA_IPTUN_MAX, @@ -1551,6 +1639,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .changelink = ipip6_changelink, .get_size = ipip6_get_size, .fill_info = ipip6_fill_info, + .dellink = ipip6_dellink, }; static struct xfrm_tunnel sit_handler __read_mostly = { @@ -1565,9 +1654,10 @@ static struct xfrm_tunnel ipip_handler __read_mostly = { .priority = 2, }; -static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) +static void __net_exit sit_destroy_tunnels(struct net *net, + struct list_head *head) { - struct net *net = dev_net(sitn->fb_tunnel_dev); + struct sit_net *sitn = net_generic(net, sit_net_id); struct net_device *dev, *aux; int prio; @@ -1612,6 +1702,7 @@ static int __net_init sit_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); + sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops; /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ @@ -1641,12 +1732,10 @@ err_alloc_dev: static void __net_exit sit_exit_net(struct net *net) { - struct sit_net *sitn = net_generic(net, sit_net_id); LIST_HEAD(list); rtnl_lock(); - sit_destroy_tunnels(sitn, &list); - unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); + sit_destroy_tunnels(net, &list); unregister_netdevice_many(&list); rtnl_unlock(); } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bf63ac8a49b..535a3ad262f 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -24,26 +24,23 @@ #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -/* Table must be sorted. */ +static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS]; + +/* RFC 2460, Section 8.3: + * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] + * + * Due to IPV6_MIN_MTU=1280 the lowest possible MSS is 1220, which allows + * using higher values than ipv4 tcp syncookies. + * The other values are chosen based on ethernet (1500 and 9k MTU), plus + * one that accounts for common encap (PPPoe) overhead. Table must be sorted. + */ static __u16 const msstab[] = { - 64, - 512, - 536, - 1280 - 60, + 1280 - 60, /* IPV6_MIN_MTU - 60 */ 1480 - 60, 1500 - 60, - 4460 - 60, 9000 - 60, }; -/* - * This (misnamed) value is the age of syncookie which is permitted. - * Its ideal value should be dependent on TCP_TIMEOUT_INIT and - * sysctl_tcp_retries1. It's a rather complicated formula (exponential - * backoff) to compute at runtime so it's currently hardcoded here. - */ -#define COUNTER_TRIES 4 - static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) @@ -66,14 +63,18 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, u32 count, int c) { - __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch); + __u32 *tmp; + + net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret)); + + tmp = __get_cpu_var(ipv6_cookie_scratch); /* * we have 320 bits of information to hash, copy in the remaining - * 192 bits required for sha_transform, from the syncookie_secret + * 192 bits required for sha_transform, from the syncookie6_secret * and overwrite the digest with the secret */ - memcpy(tmp + 10, syncookie_secret[c], 44); + memcpy(tmp + 10, syncookie6_secret[c], 44); memcpy(tmp, saddr, 16); memcpy(tmp + 4, daddr, 16); tmp[8] = ((__force u32)sport << 16) + (__force u32)dport; @@ -86,8 +87,9 @@ static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *dadd static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, __u32 sseq, - __u32 count, __u32 data) + __u32 data) { + u32 count = tcp_cookie_time(); return (cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq + (count << COOKIEBITS) + ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) @@ -96,15 +98,14 @@ static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr, static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, - __be16 dport, __u32 sseq, __u32 count, - __u32 maxdiff) + __be16 dport, __u32 sseq) { - __u32 diff; + __u32 diff, count = tcp_cookie_time(); cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); - if (diff >= maxdiff) + if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; return (cookie - @@ -125,8 +126,7 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, *mssp = msstab[mssind]; return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, - th->dest, ntohl(th->seq), - jiffies / (HZ * 60), mssind); + th->dest, ntohl(th->seq), mssind); } EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence); @@ -146,8 +146,7 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, { __u32 seq = ntohl(th->seq) - 1; __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, - th->source, th->dest, seq, - jiffies / (HZ * 60), COUNTER_TRIES); + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } @@ -157,7 +156,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; struct inet_request_sock *ireq; - struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -194,7 +192,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ireq = inet_rsk(req); - ireq6 = inet6_rsk(req); treq = tcp_rsk(req); treq->listener = NULL; @@ -202,22 +199,22 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out_free; req->mss = mss; - ireq->rmt_port = th->source; - ireq->loc_port = th->dest; - ireq6->rmt_addr = ipv6_hdr(skb)->saddr; - ireq6->loc_addr = ipv6_hdr(skb)->daddr; + ireq->ir_rmt_port = th->source; + ireq->ir_num = ntohs(th->dest); + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); - ireq6->pktopts = skb; + ireq->pktopts = skb; } - ireq6->iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if; /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq6->iif = inet6_iif(skb); + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); req->expires = 0UL; req->num_retrans = 0; @@ -241,12 +238,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = ireq6->rmt_addr; + fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, np->opt, &final); - fl6.saddr = ireq6->loc_addr; + fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5c71501fc91..0740f93a114 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -192,13 +192,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } if (tp->rx_opt.ts_recent_stamp && - !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) { + !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; tp->write_seq = 0; } - np->daddr = usin->sin6_addr; + sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* @@ -237,17 +237,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } else { ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &np->rcv_saddr); + &sk->sk_v6_rcv_saddr); } return err; } - if (!ipv6_addr_any(&np->rcv_saddr)) - saddr = &np->rcv_saddr; + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; @@ -266,7 +266,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (saddr == NULL) { saddr = &fl6.saddr; - np->rcv_saddr = *saddr; + sk->sk_v6_rcv_saddr = *saddr; } /* set the source address */ @@ -279,7 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) + ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr)) tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; @@ -298,7 +298,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); @@ -465,7 +465,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, u16 queue_mapping) { - struct inet6_request_sock *treq = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; int err = -ENOMEM; @@ -477,9 +477,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { - __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); + __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, + &ireq->ir_v6_rmt_addr); - fl6->daddr = treq->rmt_addr; + fl6->daddr = ireq->ir_v6_rmt_addr; skb_set_queue_mapping(skb, queue_mapping); err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); err = net_xmit_eval(err); @@ -502,7 +503,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) static void tcp_v6_reqsk_destructor(struct request_sock *req) { - kfree_skb(inet6_rsk(req)->pktopts); + kfree_skb(inet_rsk(req)->pktopts); } #ifdef CONFIG_TCP_MD5SIG @@ -515,13 +516,13 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, struct sock *addr_sk) { - return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr); + return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); } static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, struct request_sock *req) { - return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr); + return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); } static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, @@ -621,10 +622,10 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, if (sk) { saddr = &inet6_sk(sk)->saddr; - daddr = &inet6_sk(sk)->daddr; + daddr = &sk->sk_v6_daddr; } else if (req) { - saddr = &inet6_rsk(req)->loc_addr; - daddr = &inet6_rsk(req)->rmt_addr; + saddr = &inet_rsk(req)->ir_v6_loc_addr; + daddr = &inet_rsk(req)->ir_v6_rmt_addr; } else { const struct ipv6hdr *ip6h = ipv6_hdr(skb); saddr = &ip6h->saddr; @@ -949,7 +950,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tmp_opt; struct request_sock *req; - struct inet6_request_sock *treq; + struct inet_request_sock *ireq; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; @@ -994,25 +995,25 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); - treq = inet6_rsk(req); - treq->rmt_addr = ipv6_hdr(skb)->saddr; - treq->loc_addr = ipv6_hdr(skb)->daddr; + ireq = inet_rsk(req); + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, skb, sock_net(sk)); - treq->iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if; /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); if (!isn) { if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); - treq->pktopts = skb; + ireq->pktopts = skb; } if (want_cookie) { @@ -1051,7 +1052,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) * to the moment of synflood. */ LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", - &treq->rmt_addr, ntohs(tcp_hdr(skb)->source)); + &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source)); goto drop_and_release; } @@ -1086,7 +1087,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { - struct inet6_request_sock *treq; + struct inet_request_sock *ireq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; @@ -1116,11 +1117,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - newnp->rcv_saddr = newnp->saddr; + newsk->sk_v6_rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -1151,7 +1152,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; } - treq = inet6_rsk(req); + ireq = inet_rsk(req); if (sk_acceptq_is_full(sk)) goto out_overflow; @@ -1185,10 +1186,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newnp->daddr = treq->rmt_addr; - newnp->saddr = treq->loc_addr; - newnp->rcv_saddr = treq->loc_addr; - newsk->sk_bound_dev_if = treq->iif; + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; + newnp->saddr = ireq->ir_v6_loc_addr; + newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; + newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... @@ -1203,11 +1204,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (treq->pktopts != NULL) { - newnp->pktoptions = skb_clone(treq->pktopts, + if (ireq->pktopts != NULL) { + newnp->pktoptions = skb_clone(ireq->pktopts, sk_gfp_atomic(sk, GFP_ATOMIC)); - consume_skb(treq->pktopts); - treq->pktopts = NULL; + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } @@ -1244,13 +1245,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) { + if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key * across. Shucks. */ - tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, + tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, AF_INET6, key->key, key->keylen, sk_gfp_atomic(sk, GFP_ATOMIC)); } @@ -1722,8 +1723,8 @@ static void get_openreq6(struct seq_file *seq, const struct sock *sk, struct request_sock *req, int i, kuid_t uid) { int ttd = req->expires - jiffies; - const struct in6_addr *src = &inet6_rsk(req)->loc_addr; - const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr; + const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; + const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; if (ttd < 0) ttd = 0; @@ -1734,10 +1735,10 @@ static void get_openreq6(struct seq_file *seq, i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], - ntohs(inet_rsk(req)->loc_port), + inet_rsk(req)->ir_num, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], - ntohs(inet_rsk(req)->rmt_port), + ntohs(inet_rsk(req)->ir_rmt_port), TCP_SYN_RECV, 0,0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ @@ -1758,10 +1759,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); - const struct ipv6_pinfo *np = inet6_sk(sp); - dest = &np->daddr; - src = &np->rcv_saddr; + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); @@ -1810,11 +1810,10 @@ static void get_timewait6_sock(struct seq_file *seq, { const struct in6_addr *dest, *src; __u16 destp, srcp; - const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); - long delta = tw->tw_ttd - jiffies; + s32 delta = tw->tw_ttd - inet_tw_time_stamp(); - dest = &tw6->tw_v6_daddr; - src = &tw6->tw_v6_rcv_saddr; + dest = &tw->tw_v6_daddr; + src = &tw->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); @@ -1834,6 +1833,7 @@ static void get_timewait6_sock(struct seq_file *seq, static int tcp6_seq_show(struct seq_file *seq, void *v) { struct tcp_iter_state *st; + struct sock *sk = v; if (v == SEQ_START_TOKEN) { seq_puts(seq, @@ -1849,14 +1849,14 @@ static int tcp6_seq_show(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_LISTENING: case TCP_SEQ_STATE_ESTABLISHED: - get_tcp6_sock(seq, v, st->num); + if (sk->sk_state == TCP_TIME_WAIT) + get_timewait6_sock(seq, v, st->num); + else + get_tcp6_sock(seq, v, st->num); break; case TCP_SEQ_STATE_OPENREQ: get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid); break; - case TCP_SEQ_STATE_TIME_WAIT: - get_timewait6_sock(seq, v, st->num); - break; } out: return 0; @@ -1929,6 +1929,7 @@ struct proto tcpv6_prot = { .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, .orphan_count = &tcp_orphan_count, + .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 2ec6bf6a0aa..c1097c79890 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -83,7 +83,7 @@ static int tcp6_gro_complete(struct sk_buff *skb) static const struct net_offload tcpv6_offload = { .callbacks = { .gso_send_check = tcp_v6_gso_send_check, - .gso_segment = tcp_tso_segment, + .gso_segment = tcp_gso_segment, .gro_receive = tcp6_gro_receive, .gro_complete = tcp6_gro_complete, }, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f4058150262..81eb8cf8389 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,22 +53,42 @@ #include <trace/events/skb.h> #include "udp_impl.h" +static unsigned int udp6_ehashfn(struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) +{ + static u32 udp6_ehash_secret __read_mostly; + static u32 udp_ipv6_hash_secret __read_mostly; + + u32 lhash, fhash; + + net_get_random_once(&udp6_ehash_secret, + sizeof(udp6_ehash_secret)); + net_get_random_once(&udp_ipv6_hash_secret, + sizeof(udp_ipv6_hash_secret)); + + lhash = (__force u32)laddr->s6_addr32[3]; + fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); + + return __inet6_ehashfn(lhash, lport, fhash, fport, + udp_ipv6_hash_secret + net_hash_mix(net)); +} + int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { - const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk1_rcv_saddr = sk_rcv_saddr(sk); - __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(sk_rcv_saddr6); + int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) return (!sk2_ipv6only && - (!sk1_rcv_saddr || !sk2_rcv_saddr || - sk1_rcv_saddr == sk2_rcv_saddr)); + (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr || + sk->sk_rcv_saddr == sk2->sk_rcv_saddr)); if (addr_type2 == IPV6_ADDR_ANY && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) @@ -79,7 +99,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 1; if (sk2_rcv_saddr6 && - ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) return 1; return 0; @@ -107,7 +127,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) unsigned int hash2_nulladdr = udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); unsigned int hash2_partial = - udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); + udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@ -117,7 +137,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) static void udp_v6_rehash(struct sock *sk) { u16 new_hash = udp6_portaddr_hash(sock_net(sk), - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); @@ -133,7 +153,6 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); score = 0; @@ -142,13 +161,13 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score++; } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } @@ -171,10 +190,9 @@ static inline int compute_score2(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score = 0; if (inet->inet_dport) { @@ -182,8 +200,8 @@ static inline int compute_score2(struct sock *sk, struct net *net, return -1; score++; } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } @@ -219,8 +237,8 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - hash = inet6_ehashfn(net, daddr, hnum, - saddr, sport); + hash = udp6_ehashfn(net, daddr, hnum, + saddr, sport); matches = 1; } else if (score == SCORE2_MAX) goto exact_match; @@ -300,8 +318,8 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - hash = inet6_ehashfn(net, daddr, hnum, - saddr, sport); + hash = udp6_ehashfn(net, daddr, hnum, + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { @@ -374,9 +392,6 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int is_udp4; bool slow; - if (addr_len) - *addr_len = sizeof(struct sockaddr_in6); - if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); @@ -462,7 +477,7 @@ try_again: ipv6_iface_scope_id(&sin6->sin6_addr, IP6CB(skb)->iif); } - + *addr_len = sizeof(*sin6); } if (is_udp4) { if (inet->cmsg_flags) @@ -525,8 +540,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == ICMPV6_PKT_TOOBIG) ip6_sk_update_pmtu(skb, sk, info); - if (type == NDISC_REDIRECT) + if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); + goto out; + } np = inet6_sk(sk); @@ -549,8 +566,10 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; - if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); + } rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) { @@ -688,20 +707,19 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, if (udp_sk(s)->udp_port_hash == num && s->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(s); if (inet->inet_dport) { if (inet->inet_dport != rmt_port) continue; } - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) continue; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) continue; } if (!inet6_mc_check(s, loc_addr, rmt_addr)) @@ -844,7 +862,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - sk_mark_napi_id(sk, skb); ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); @@ -1062,7 +1079,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, } else if (!up->pending) { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; } else daddr = NULL; @@ -1132,8 +1149,8 @@ do_udp_sendmsg: * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && @@ -1144,7 +1161,7 @@ do_udp_sendmsg: return -EDESTADDRREQ; fl6.fl6_dport = inet->inet_dport; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; connected = 1; } @@ -1223,9 +1240,6 @@ do_udp_sendmsg: if (tclass < 0) tclass = np->tclass; - if (dontfrag < 0) - dontfrag = np->dontfrag; - if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -1244,6 +1258,8 @@ back_from_confirm: up->pending = AF_INET6; do_append_data: + if (dontfrag < 0) + dontfrag = np->dontfrag; up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, @@ -1260,8 +1276,8 @@ do_append_data: if (dst) { if (connected) { ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &np->daddr) ? - &np->daddr : NULL, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 4691ed50a92..c779c3c90b9 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -7,33 +7,32 @@ #include <net/inet_common.h> #include <net/transp_v6.h> -extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); -extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, - u8 , u8 , int , __be32 , struct udp_table *); +int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); +void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, + __be32, struct udp_table *); -extern int udp_v6_get_port(struct sock *sk, unsigned short snum); +int udp_v6_get_port(struct sock *sk, unsigned short snum); -extern int udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -extern int udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); +int udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +int udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); #ifdef CONFIG_COMPAT -extern int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); -extern int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); +int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); +int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); #endif -extern int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len); -extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len); -extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); -extern void udpv6_destroy_sock(struct sock *sk); +int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len); +int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len); +int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +void udpv6_destroy_sock(struct sock *sk); -extern void udp_v6_clear_sk(struct sock *sk, int size); +void udp_v6_clear_sk(struct sock *sk, int size); #ifdef CONFIG_PROC_FS -extern int udp6_seq_show(struct seq_file *seq, void *v); +int udp6_seq_show(struct seq_file *seq, void *v); #endif #endif /* _UDP6_IMPL_H */ diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 60559511bd9..e7359f9eaa8 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -64,6 +64,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; @@ -88,7 +90,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Check if there is enough headroom to insert fragment header. */ tnl_hlen = skb_tnl_header_len(skb); - if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { + if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) goto out; } diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 4770d515c2c..cb04f7a16b5 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -18,6 +18,65 @@ #include <net/ipv6.h> #include <net/xfrm.h> +/* Informational hook. The decap is still done here. */ +static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly; +static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex); + +int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler) +{ + struct xfrm_tunnel_notifier __rcu **pprev; + struct xfrm_tunnel_notifier *t; + int ret = -EEXIST; + int priority = handler->priority; + + mutex_lock(&xfrm6_mode_tunnel_input_mutex); + + for (pprev = &rcv_notify_handlers; + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL; + pprev = &t->next) { + if (t->priority > priority) + break; + if (t->priority == priority) + goto err; + + } + + handler->next = *pprev; + rcu_assign_pointer(*pprev, handler); + + ret = 0; + +err: + mutex_unlock(&xfrm6_mode_tunnel_input_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register); + +int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler) +{ + struct xfrm_tunnel_notifier __rcu **pprev; + struct xfrm_tunnel_notifier *t; + int ret = -ENOENT; + + mutex_lock(&xfrm6_mode_tunnel_input_mutex); + for (pprev = &rcv_notify_handlers; + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL; + pprev = &t->next) { + if (t == handler) { + *pprev = handler->next; + ret = 0; + break; + } + } + mutex_unlock(&xfrm6_mode_tunnel_input_mutex); + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister); + static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) { const struct ipv6hdr *outer_iph = ipv6_hdr(skb); @@ -63,8 +122,15 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } +#define for_each_input_rcu(head, handler) \ + for (handler = rcu_dereference(head); \ + handler != NULL; \ + handler = rcu_dereference(handler->next)) + + static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_tunnel_notifier *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) @@ -72,6 +138,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; + for_each_input_rcu(rcv_notify_handlers, handler) + handler->handler(skb); + err = skb_unclone(skb, GFP_ATOMIC); if (err) goto out; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 23ed03d786c..5f8e128c512 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -135,9 +135,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct ipv6_opt_hdr *exthdr; const unsigned char *nh = skb_network_header(skb); u8 nexthdr = nh[IP6CB(skb)->nhoff]; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; + fl6->flowi6_oif = reverse ? skb->skb_iif : oif; fl6->daddr = reverse ? hdr->saddr : hdr->daddr; fl6->saddr = reverse ? hdr->daddr : hdr->saddr; @@ -284,7 +289,7 @@ static struct dst_ops xfrm6_dst_ops = { .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { |