diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-06 09:38:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-06 09:38:14 -0700 |
commit | ae045e2455429c418a418a3376301a9e5753a0a8 (patch) | |
tree | b445bdeecd3f38aa0d0a29c9585cee49e4ccb0f1 /net/ipv6 | |
parent | f4f142ed4ef835709c7e6d12eaca10d190bcebed (diff) | |
parent | d247b6ab3ce6dd43665780865ec5fa145d9ab6bd (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Steady transitioning of the BPF instructure to a generic spot so
all kernel subsystems can make use of it, from Alexei Starovoitov.
2) SFC driver supports busy polling, from Alexandre Rames.
3) Take advantage of hash table in UDP multicast delivery, from David
Held.
4) Lighten locking, in particular by getting rid of the LRU lists, in
inet frag handling. From Florian Westphal.
5) Add support for various RFC6458 control messages in SCTP, from
Geir Ola Vaagland.
6) Allow to filter bridge forwarding database dumps by device, from
Jamal Hadi Salim.
7) virtio-net also now supports busy polling, from Jason Wang.
8) Some low level optimization tweaks in pktgen from Jesper Dangaard
Brouer.
9) Add support for ipv6 address generation modes, so that userland
can have some input into the process. From Jiri Pirko.
10) Consolidate common TCP connection request code in ipv4 and ipv6,
from Octavian Purdila.
11) New ARP packet logger in netfilter, from Pablo Neira Ayuso.
12) Generic resizable RCU hash table, with intial users in netlink and
nftables. From Thomas Graf.
13) Maintain a name assignment type so that userspace can see where a
network device name came from (enumerated by kernel, assigned
explicitly by userspace, etc.) From Tom Gundersen.
14) Automatic flow label generation on transmit in ipv6, from Tom
Herbert.
15) New packet timestamping facilities from Willem de Bruijn, meant to
assist in measuring latencies going into/out-of the packet
scheduler, latency from TCP data transmission to ACK, etc"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1536 commits)
cxgb4 : Disable recursive mailbox commands when enabling vi
net: reduce USB network driver config options.
tg3: Modify tg3_tso_bug() to handle multiple TX rings
amd-xgbe: Perform phy connect/disconnect at dev open/stop
amd-xgbe: Use dma_set_mask_and_coherent to set DMA mask
net: sun4i-emac: fix memory leak on bad packet
sctp: fix possible seqlock seadlock in sctp_packet_transmit()
Revert "net: phy: Set the driver when registering an MDIO bus device"
cxgb4vf: Turn off SGE RX/TX Callback Timers and interrupts in PCI shutdown routine
team: Simplify return path of team_newlink
bridge: Update outdated comment on promiscuous mode
net-timestamp: ACK timestamp for bytestreams
net-timestamp: TCP timestamping
net-timestamp: SCHED timestamp on entering packet scheduler
net-timestamp: add key to disambiguate concurrent datagrams
net-timestamp: move timestamp flags out of sk_flags
net-timestamp: extend SCM_TIMESTAMPING ancillary data struct
cxgb4i : Move stray CPL definitions to cxgb4 driver
tcp: reduce spurious retransmits due to transient SACK reneging
qlcnic: Initialize dcbnl_ops before register_netdev
...
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrconf.c | 148 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 7 | ||||
-rw-r--r-- | net/ipv6/datagram.c | 1 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 13 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 13 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 32 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 8 | ||||
-rw-r--r-- | net/ipv6/ip6_vti.c | 55 | ||||
-rw-r--r-- | net/ipv6/ip6mr.c | 2 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 13 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 58 | ||||
-rw-r--r-- | net/ipv6/netfilter/Kconfig | 5 | ||||
-rw-r--r-- | net/ipv6/netfilter/Makefile | 3 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6t_ipv6header.c | 1 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_conntrack_reasm.c | 49 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_log_ipv6.c | 417 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 4 | ||||
-rw-r--r-- | net/ipv6/proc.c | 4 | ||||
-rw-r--r-- | net/ipv6/raw.c | 14 | ||||
-rw-r--r-- | net/ipv6/reassembly.c | 90 | ||||
-rw-r--r-- | net/ipv6/sit.c | 4 | ||||
-rw-r--r-- | net/ipv6/syncookies.c | 2 | ||||
-rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 9 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 209 | ||||
-rw-r--r-- | net/ipv6/udp.c | 131 |
25 files changed, 871 insertions, 421 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5667b3003af..0b239fc1816 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -108,11 +108,12 @@ static inline u32 cstamp_delta(unsigned long cstamp) } #ifdef CONFIG_SYSCTL -static void addrconf_sysctl_register(struct inet6_dev *idev); +static int addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); #else -static inline void addrconf_sysctl_register(struct inet6_dev *idev) +static inline int addrconf_sysctl_register(struct inet6_dev *idev) { + return 0; } static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) @@ -186,6 +187,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .accept_ra_from_local = 0, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -222,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .accept_ra_from_local = 0, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -308,16 +311,16 @@ err_ip: static struct inet6_dev *ipv6_add_dev(struct net_device *dev) { struct inet6_dev *ndev; + int err = -ENOMEM; ASSERT_RTNL(); if (dev->mtu < IPV6_MIN_MTU) - return NULL; + return ERR_PTR(-EINVAL); ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); - if (ndev == NULL) - return NULL; + return ERR_PTR(err); rwlock_init(&ndev->lock); ndev->dev = dev; @@ -330,7 +333,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); if (ndev->nd_parms == NULL) { kfree(ndev); - return NULL; + return ERR_PTR(err); } if (ndev->cnf.forwarding) dev_disable_lro(dev); @@ -344,17 +347,14 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) neigh_parms_release(&nd_tbl, ndev->nd_parms); dev_put(dev); kfree(ndev); - return NULL; + return ERR_PTR(err); } if (snmp6_register_dev(ndev) < 0) { ADBG(KERN_WARNING "%s: cannot create /proc/net/dev_snmp6/%s\n", __func__, dev->name); - neigh_parms_release(&nd_tbl, ndev->nd_parms); - ndev->dead = 1; - in6_dev_finish_destroy(ndev); - return NULL; + goto err_release; } /* One reference from device. We must do this before @@ -392,7 +392,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; - addrconf_sysctl_register(ndev); + err = addrconf_sysctl_register(ndev); + if (err) { + ipv6_mc_destroy_dev(ndev); + del_timer(&ndev->regen_timer); + goto err_release; + } /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); @@ -407,6 +412,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); return ndev; + +err_release: + neigh_parms_release(&nd_tbl, ndev->nd_parms); + ndev->dead = 1; + in6_dev_finish_destroy(ndev); + return ERR_PTR(err); } static struct inet6_dev *ipv6_find_idev(struct net_device *dev) @@ -418,7 +429,7 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev) idev = __in6_dev_get(dev); if (!idev) { idev = ipv6_add_dev(dev); - if (!idev) + if (IS_ERR(idev)) return NULL; } @@ -2728,9 +2739,25 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr } } +static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) +{ + if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + /* addrconf_add_linklocal also adds a prefix_route and we + * only need to care about prefix routes if ipv6_generate_eui64 + * couldn't generate one. + */ + if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0) + addrconf_add_linklocal(idev, &addr); + else if (prefix_route) + addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + } +} + static void addrconf_dev_config(struct net_device *dev) { - struct in6_addr addr; struct inet6_dev *idev; ASSERT_RTNL(); @@ -2751,11 +2778,7 @@ static void addrconf_dev_config(struct net_device *dev) if (IS_ERR(idev)) return; - memset(&addr, 0, sizeof(struct in6_addr)); - addr.s6_addr32[0] = htonl(0xFE800000); - - if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0) - addrconf_add_linklocal(idev, &addr); + addrconf_addr_gen(idev, false); } #if IS_ENABLED(CONFIG_IPV6_SIT) @@ -2777,11 +2800,7 @@ static void addrconf_sit_config(struct net_device *dev) } if (dev->priv_flags & IFF_ISATAP) { - struct in6_addr addr; - - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) - addrconf_add_linklocal(idev, &addr); + addrconf_addr_gen(idev, false); return; } @@ -2796,7 +2815,6 @@ static void addrconf_sit_config(struct net_device *dev) static void addrconf_gre_config(struct net_device *dev) { struct inet6_dev *idev; - struct in6_addr addr; ASSERT_RTNL(); @@ -2805,11 +2823,7 @@ static void addrconf_gre_config(struct net_device *dev) return; } - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) - addrconf_add_linklocal(idev, &addr); - else - addrconf_prefix_route(&addr, 64, dev, 0, 0); + addrconf_addr_gen(idev, true); } #endif @@ -2825,8 +2839,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { idev = ipv6_add_dev(dev); - if (!idev) - return notifier_from_errno(-ENOMEM); + if (IS_ERR(idev)) + return notifier_from_errno(PTR_ERR(idev)); } break; @@ -2846,7 +2860,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!idev && dev->mtu >= IPV6_MIN_MTU) idev = ipv6_add_dev(dev); - if (idev) { + if (!IS_ERR_OR_NULL(idev)) { idev->if_flags |= IF_READY; run_pending = 1; } @@ -2889,7 +2903,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; } - if (idev) { + if (!IS_ERR_OR_NULL(idev)) { if (run_pending) addrconf_dad_run(idev); @@ -2924,7 +2938,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!idev && dev->mtu >= IPV6_MIN_MTU) { idev = ipv6_add_dev(dev); - if (idev) + if (!IS_ERR(idev)) break; } @@ -2945,10 +2959,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) { snmp6_unregister_dev(idev); addrconf_sysctl_unregister(idev); - addrconf_sysctl_register(idev); - err = snmp6_register_dev(idev); + err = addrconf_sysctl_register(idev); if (err) return notifier_from_errno(err); + err = snmp6_register_dev(idev); + if (err) { + addrconf_sysctl_unregister(idev); + return notifier_from_errno(err); + } } break; @@ -4321,6 +4339,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; + array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; } static inline size_t inet6_ifla6_size(void) @@ -4420,6 +4439,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); if (nla == NULL) goto nla_put_failure; + + if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode)) + goto nla_put_failure; + read_lock_bh(&idev->lock); memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); read_unlock_bh(&idev->lock); @@ -4524,8 +4547,21 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0) BUG(); - if (tb[IFLA_INET6_TOKEN]) + if (tb[IFLA_INET6_TOKEN]) { err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + if (err) + return err; + } + + if (tb[IFLA_INET6_ADDR_GEN_MODE]) { + u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); + + if (mode != IN6_ADDR_GEN_MODE_EUI64 && + mode != IN6_ADDR_GEN_MODE_NONE) + return -EINVAL; + idev->addr_gen_mode = mode; + err = 0; + } return err; } @@ -5168,6 +5204,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec }, { + .procname = "accept_ra_from_local", + .data = &ipv6_devconf.accept_ra_from_local, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { /* sentinel */ } }, @@ -5218,12 +5261,23 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) kfree(t); } -static void addrconf_sysctl_register(struct inet6_dev *idev) +static int addrconf_sysctl_register(struct inet6_dev *idev) { - neigh_sysctl_register(idev->dev, idev->nd_parms, - &ndisc_ifinfo_sysctl_change); - __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, - idev, &idev->cnf); + int err; + + if (!sysctl_dev_name_is_allowed(idev->dev->name)) + return -EINVAL; + + err = neigh_sysctl_register(idev->dev, idev->nd_parms, + &ndisc_ifinfo_sysctl_change); + if (err) + return err; + err = __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, + idev, &idev->cnf); + if (err) + neigh_sysctl_unregister(idev->nd_parms); + + return err; } static void addrconf_sysctl_unregister(struct inet6_dev *idev) @@ -5308,6 +5362,7 @@ static struct rtnl_af_ops inet6_ops = { int __init addrconf_init(void) { + struct inet6_dev *idev; int i, err; err = ipv6_addr_label_init(); @@ -5346,11 +5401,12 @@ int __init addrconf_init(void) * device and it being up should be removed. */ rtnl_lock(); - if (!ipv6_add_dev(init_net.loopback_dev)) - err = -ENOMEM; + idev = ipv6_add_dev(init_net.loopback_dev); rtnl_unlock(); - if (err) + if (IS_ERR(idev)) { + err = PTR_ERR(idev); goto errlo; + } for (i = 0; i < IN6_ADDR_HSIZE; i++) INIT_HLIST_HEAD(&inet6_addr_lst[i]); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7cb4392690d..2daa3a133e4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -197,7 +197,7 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->ipv6only = net->ipv6.sysctl.bindv6only; + sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. @@ -294,7 +294,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Binding to v4-mapped address on a v6-only socket * makes no sense */ - if (np->ipv6only) { + if (sk->sk_ipv6only) { err = -EINVAL; goto out; } @@ -371,7 +371,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_type != IPV6_ADDR_ANY) { sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (addr_type != IPV6_ADDR_MAPPED) - np->ipv6only = 1; + sk->sk_ipv6only = 1; } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; @@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; + net->ipv6.sysctl.auto_flowlabels = 0; atomic_set(&net->ipv6.rt_genid, 0); err = ipv6_init_mibs(net); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c3bf2d2e519..2753319524f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -199,6 +199,7 @@ ipv4_connected: NULL); sk->sk_state = TCP_ESTABLISHED; + ip6_set_txhash(sk); out: fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f6c84a6eb23..06ba3e58320 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -626,9 +626,10 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) int inner_offset; __be16 frag_off; u8 nexthdr; + struct net *net = dev_net(skb->dev); if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return; + goto out; nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; if (ipv6_ext_hdr(nexthdr)) { @@ -636,14 +637,14 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (inner_offset<0) - return; + goto out; } else { inner_offset = sizeof(struct ipv6hdr); } /* Checkin header including 8 bytes of inner protocol header. */ if (!pskb_may_pull(skb, inner_offset+8)) - return; + goto out; /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. Without this we will not able f.e. to make source routed @@ -652,13 +653,15 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) --ANK (980726) */ - rcu_read_lock(); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, NULL, type, code, inner_offset, info); - rcu_read_unlock(); raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); + return; + +out: + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); } /* diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3873181ed85..5f19dfbc4c6 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -322,7 +322,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, else strcpy(name, "ip6gre%d"); - dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup); + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, + ip6gre_tunnel_setup); if (!dev) return NULL; @@ -723,7 +724,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, * Push down and install the IP header. */ ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1174,7 +1176,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); __be16 *p = (__be16 *)(ipv6h+1); - ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel); + ip6_flow_hdr(ipv6h, 0, + ip6_make_flowlabel(dev_net(dev), skb, + t->fl.u.ip6.flowlabel, false)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; @@ -1323,7 +1327,8 @@ static int __net_init ip6gre_init_net(struct net *net) int err; ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", - ip6gre_tunnel_setup); + NET_NAME_UNKNOWN, + ip6gre_tunnel_setup); if (!ign->fb_tunnel_dev) { err = -ENOMEM; goto err_alloc_dev; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 45702b8cd14..315a55d6607 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - ip6_flow_hdr(hdr, tclass, fl6->flowlabel); + ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, + np->autoflowlabel)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -802,8 +803,8 @@ slow_path: /* * Copy a block of the IP datagram. */ - if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) - BUG(); + BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag), + len)); left -= len; fh->frag_off = htons(offset); @@ -1156,6 +1157,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int err; int offset = 0; __u8 tx_flags = 0; + u32 tskey = 0; if (flags&MSG_PROBE) return 0; @@ -1271,9 +1273,12 @@ emsgsize: } } - /* For UDP, check if TX timestamp is enabled */ - if (sk->sk_type == SOCK_DGRAM) + if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { sock_tx_timestamp(sk, &tx_flags); + if (tx_flags & SKBTX_ANY_SW_TSTAMP && + sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + tskey = sk->sk_tskey++; + } /* * Let's try using as much space as possible. @@ -1381,12 +1386,6 @@ alloc_new_skb: sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; - else { - /* Only the initial fragment - * is time stamped. - */ - tx_flags = 0; - } } if (skb == NULL) goto error; @@ -1400,8 +1399,11 @@ alloc_new_skb: skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + dst_exthdrlen); - if (sk->sk_type == SOCK_DGRAM) - skb_shinfo(skb)->tx_flags = tx_flags; + /* Only the initial fragment is time stamped */ + skb_shinfo(skb)->tx_flags = tx_flags; + tx_flags = 0; + skb_shinfo(skb)->tskey = tskey; + tskey = 0; /* * Find where to start putting bytes @@ -1571,7 +1573,9 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); + ip6_flow_hdr(hdr, np->cork.tclass, + ip6_make_flowlabel(net, skb, fl6->flowlabel, + np->autoflowlabel)); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index afa08245836..f9de5a69507 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -315,7 +315,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) else sprintf(name, "ip6tnl%%d"); - dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup); + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, + ip6_tnl_dev_setup); if (dev == NULL) goto failed; @@ -1046,7 +1047,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1772,7 +1774,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", - ip6_tnl_dev_setup); + NET_NAME_UNKNOWN, ip6_tnl_dev_setup); if (!ip6n->fb_tnl_dev) goto err_alloc_dev; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 9aaa6bb229e..7f52fd9fa7b 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -204,7 +204,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p else sprintf(name, "ip6_vti%%d"); - dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup); + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup); if (dev == NULL) goto failed; @@ -1020,7 +1020,7 @@ static int __net_init vti6_init_net(struct net *net) err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0", - vti6_dev_setup); + NET_NAME_UNKNOWN, vti6_dev_setup); if (!ip6n->fb_tnl_dev) goto err_alloc_dev; @@ -1089,36 +1089,26 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { **/ static int __init vti6_tunnel_init(void) { - int err; + const char *msg; + int err; + msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) - goto out_pernet; + goto pernet_dev_failed; + msg = "tunnel protocols"; err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP); - if (err < 0) { - pr_err("%s: can't register vti6 protocol\n", __func__); - - goto out; - } - + if (err < 0) + goto xfrm_proto_esp_failed; err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH); - if (err < 0) { - xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); - pr_err("%s: can't register vti6 protocol\n", __func__); - - goto out; - } - + if (err < 0) + goto xfrm_proto_ah_failed; err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); - if (err < 0) { - xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); - xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); - pr_err("%s: can't register vti6 protocol\n", __func__); - - goto out; - } + if (err < 0) + goto xfrm_proto_comp_failed; + msg = "netlink interface"; err = rtnl_link_register(&vti6_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1127,11 +1117,14 @@ static int __init vti6_tunnel_init(void) rtnl_link_failed: xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); +xfrm_proto_comp_failed: xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); +xfrm_proto_ah_failed: xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); -out: +xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); -out_pernet: +pernet_dev_failed: + pr_err("vti6 init: failed to register %s\n", msg); return err; } @@ -1141,13 +1134,9 @@ out_pernet: static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); - if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP)) - pr_info("%s: can't deregister protocol\n", __func__); - if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH)) - pr_info("%s: can't deregister protocol\n", __func__); - if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP)) - pr_info("%s: can't deregister protocol\n", __func__); - + xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); + xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8250474ab7d..f9a3fd320d1 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -744,7 +744,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) else sprintf(name, "pim6reg%u", mrt->id); - dev = alloc_netdev(0, name, reg_vif_setup); + dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); if (dev == NULL) return NULL; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index edb58aff4ae..0c289982796 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -235,7 +235,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optlen < sizeof(int) || inet_sk(sk)->inet_num) goto e_inval; - np->ipv6only = valbool; + sk->sk_ipv6only = valbool; retv = 0; break; @@ -834,6 +834,10 @@ pref_skip_coa: np->dontfrag = valbool; retv = 0; break; + case IPV6_AUTOFLOWLABEL: + np->autoflowlabel = valbool; + retv = 0; + break; } release_sock(sk); @@ -1058,7 +1062,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_V6ONLY: - val = np->ipv6only; + val = sk->sk_ipv6only; break; case IPV6_RECVPKTINFO: @@ -1158,7 +1162,6 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; - break; } case IPV6_TRANSPARENT: @@ -1273,6 +1276,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->dontfrag; break; + case IPV6_AUTOFLOWLABEL: + val = np->autoflowlabel; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ca8d4ea48a5..339078f95d1 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1070,6 +1070,9 @@ static void ndisc_router_discovery(struct sk_buff *skb) optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); + ND_PRINTK(2, info, + "RA: %s, dev: %s\n", + __func__, skb->dev->name); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); return; @@ -1102,13 +1105,21 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - if (!ipv6_accept_ra(in6_dev)) + if (!ipv6_accept_ra(in6_dev)) { + ND_PRINTK(2, info, + "RA: %s, did not accept ra for dev: %s\n", + __func__, skb->dev->name); goto skip_linkparms; + } #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific parameters from interior routers */ - if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) + if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { + ND_PRINTK(2, info, + "RA: %s, nodetype is NODEFAULT, dev: %s\n", + __func__, skb->dev->name); goto skip_linkparms; + } #endif if (in6_dev->if_flags & IF_RS_SENT) { @@ -1130,11 +1141,24 @@ static void ndisc_router_discovery(struct sk_buff *skb) (ra_msg->icmph.icmp6_addrconf_other ? IF_RA_OTHERCONF : 0); - if (!in6_dev->cnf.accept_ra_defrtr) + if (!in6_dev->cnf.accept_ra_defrtr) { + ND_PRINTK(2, info, + "RA: %s, defrtr is false for dev: %s\n", + __func__, skb->dev->name); goto skip_defrtr; + } - if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0)) + /* Do not accept RA with source-addr found on local machine unless + * accept_ra_from_local is set to true. + */ + if (!in6_dev->cnf.accept_ra_from_local && + ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, + NULL, 0)) { + ND_PRINTK(2, info, + "RA from local address detected on dev: %s: default router ignored\n", + skb->dev->name); goto skip_defrtr; + } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); @@ -1163,8 +1187,10 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = NULL; } + ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n", + rt, lifetime, skb->dev->name); if (rt == NULL && lifetime) { - ND_PRINTK(3, dbg, "RA: adding default router\n"); + ND_PRINTK(3, info, "RA: adding default router\n"); rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); if (rt == NULL) { @@ -1260,12 +1286,22 @@ skip_linkparms: NEIGH_UPDATE_F_ISROUTER); } - if (!ipv6_accept_ra(in6_dev)) + if (!ipv6_accept_ra(in6_dev)) { + ND_PRINTK(2, info, + "RA: %s, accept_ra is false for dev: %s\n", + __func__, skb->dev->name); goto out; + } #ifdef CONFIG_IPV6_ROUTE_INFO - if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0)) + if (!in6_dev->cnf.accept_ra_from_local && + ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, + NULL, 0)) { + ND_PRINTK(2, info, + "RA from local address detected on dev: %s: router info ignored.\n", + skb->dev->name); goto skip_routeinfo; + } if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; @@ -1293,8 +1329,12 @@ skip_routeinfo: #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific ndopts from interior routers */ - if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) + if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { + ND_PRINTK(2, info, + "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", + __func__, skb->dev->name); goto out; + } #endif if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) { @@ -1728,7 +1768,7 @@ int __init ndisc_init(void) #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, - &ndisc_ifinfo_sysctl_change); + ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; out: diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 4bff1f297e3..ac93df16f5a 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -55,6 +55,11 @@ config NFT_REJECT_IPV6 default NFT_REJECT tristate +config NF_LOG_IPV6 + tristate "IPv6 packet logging" + depends on NETFILTER_ADVANCED + select NF_LOG_COMMON + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 70d3dd66f2c..c0b263104ed 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -23,6 +23,9 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +# logging +obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o + # nf_tables obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 54bd9790603..8b147440fbd 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -94,7 +94,6 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par) break; default: return false; - break; } nexthdr = hp->nexthdr; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 0d5279fd852..6f187c8d8a1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -50,6 +50,7 @@ #include <linux/module.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +static const char nf_frags_cache_name[] = "nf-frags"; struct nf_ct_frag6_skb_cb { @@ -63,6 +64,8 @@ struct nf_ct_frag6_skb_cb static struct inet_frags nf_frags; #ifdef CONFIG_SYSCTL +static int zero; + static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", @@ -76,14 +79,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { .data = &init_net.nf_frag.frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &init_net.nf_frag.frags.high_thresh }, { .procname = "nf_conntrack_frag6_high_thresh", .data = &init_net.nf_frag.frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &init_net.nf_frag.frags.low_thresh }, { } }; @@ -102,7 +108,10 @@ static int nf_ct_frag6_sysctl_register(struct net *net) table[0].data = &net->nf_frag.frags.timeout; table[1].data = &net->nf_frag.frags.low_thresh; + table[1].extra2 = &net->nf_frag.frags.high_thresh; table[2].data = &net->nf_frag.frags.high_thresh; + table[2].extra1 = &net->nf_frag.frags.low_thresh; + table[2].extra2 = &init_net.nf_frag.frags.high_thresh; } hdr = register_net_sysctl(net, "net/netfilter", table); @@ -147,16 +156,13 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr, const struct in6_addr *daddr) { - u32 c; - net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd)); - c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), - (__force u32)id, nf_frags.rnd); - return c & (INETFRAGS_HASHSZ - 1); + return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, nf_frags.rnd); } -static unsigned int nf_hashfn(struct inet_frag_queue *q) +static unsigned int nf_hashfn(const struct inet_frag_queue *q) { const struct frag_queue *nq; @@ -196,7 +202,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.dst = dst; arg.ecn = ecn; - read_lock_bh(&nf_frags.lock); + local_bh_disable(); hash = nf_hash_frag(id, src, dst); q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); @@ -217,7 +223,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, int offset, end; u8 ecn; - if (fq->q.last_in & INET_FRAG_COMPLETE) { + if (fq->q.flags & INET_FRAG_COMPLETE) { pr_debug("Already completed\n"); goto err; } @@ -248,11 +254,11 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, * or have different end, the segment is corrupted. */ if (end < fq->q.len || - ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) { + ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) { pr_debug("already received last fragment\n"); goto err; } - fq->q.last_in |= INET_FRAG_LAST_IN; + fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. @@ -267,7 +273,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & INET_FRAG_LAST_IN) { + if (fq->q.flags & INET_FRAG_LAST_IN) { pr_debug("last packet already reached.\n"); goto err; } @@ -349,10 +355,9 @@ found: */ if (offset == 0) { fq->nhoffset = nhoff; - fq->q.last_in |= INET_FRAG_FIRST_IN; + fq->q.flags |= INET_FRAG_FIRST_IN; } - inet_frag_lru_move(&fq->q); return 0; discard_fq: @@ -597,10 +602,6 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - local_bh_disable(); - inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); - local_bh_enable(); - fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); if (fq == NULL) { @@ -617,7 +618,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) goto ret_orig; } - if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { ret_skb = nf_ct_frag6_reasm(fq, dev); if (ret_skb == NULL) @@ -677,13 +678,15 @@ int nf_ct_frag6_init(void) nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; - nf_frags.secret_interval = 10 * 60 * HZ; - inet_frags_init(&nf_frags); - + nf_frags.frags_cache_name = nf_frags_cache_name; + ret = inet_frags_init(&nf_frags); + if (ret) + goto out; ret = register_pernet_subsys(&nf_ct_net_ops); if (ret) inet_frags_fini(&nf_frags); +out: return ret; } diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c new file mode 100644 index 00000000000..7b17a0be93e --- /dev/null +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -0,0 +1,417 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <net/ipv6.h> +#include <net/icmp.h> +#include <net/udp.h> +#include <net/tcp.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/xt_LOG.h> +#include <net/netfilter/nf_log.h> + +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 5, + .logflags = NF_LOG_MASK, + }, + }, +}; + +/* One level of recursion won't kill us */ +static void dump_ipv6_packet(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int ip6hoff, + int recurse) +{ + u_int8_t currenthdr; + int fragment; + struct ipv6hdr _ip6h; + const struct ipv6hdr *ih; + unsigned int ptr; + unsigned int hdrlen = 0; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; + + ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); + if (ih == NULL) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ + nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); + + /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ + nf_log_buf_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", + ntohs(ih->payload_len) + sizeof(struct ipv6hdr), + (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, + ih->hop_limit, + (ntohl(*(__be32 *)ih) & 0x000fffff)); + + fragment = 0; + ptr = ip6hoff + sizeof(struct ipv6hdr); + currenthdr = ih->nexthdr; + while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { + struct ipv6_opt_hdr _hdr; + const struct ipv6_opt_hdr *hp; + + hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); + if (hp == NULL) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Max length: 48 "OPT (...) " */ + if (logflags & XT_LOG_IPOPT) + nf_log_buf_add(m, "OPT ( "); + + switch (currenthdr) { + case IPPROTO_FRAGMENT: { + struct frag_hdr _fhdr; + const struct frag_hdr *fh; + + nf_log_buf_add(m, "FRAG:"); + fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), + &_fhdr); + if (fh == NULL) { + nf_log_buf_add(m, "TRUNCATED "); + return; + } + + /* Max length: 6 "65535 " */ + nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); + + /* Max length: 11 "INCOMPLETE " */ + if (fh->frag_off & htons(0x0001)) + nf_log_buf_add(m, "INCOMPLETE "); + + nf_log_buf_add(m, "ID:%08x ", + ntohl(fh->identification)); + + if (ntohs(fh->frag_off) & 0xFFF8) + fragment = 1; + + hdrlen = 8; + + break; + } + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + case IPPROTO_HOPOPTS: + if (fragment) { + if (logflags & XT_LOG_IPOPT) + nf_log_buf_add(m, ")"); + return; + } + hdrlen = ipv6_optlen(hp); + break; + /* Max Length */ + case IPPROTO_AH: + if (logflags & XT_LOG_IPOPT) { + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + + /* Max length: 3 "AH " */ + nf_log_buf_add(m, "AH "); + + if (fragment) { + nf_log_buf_add(m, ")"); + return; + } + + ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), + &_ahdr); + if (ah == NULL) { + /* + * Max length: 26 "INCOMPLETE [65535 + * bytes] )" + */ + nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", + skb->len - ptr); + return; + } + + /* Length: 15 "SPI=0xF1234567 */ + nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); + + } + + hdrlen = (hp->hdrlen+2)<<2; + break; + case IPPROTO_ESP: + if (logflags & XT_LOG_IPOPT) { + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; + + /* Max length: 4 "ESP " */ + nf_log_buf_add(m, "ESP "); + + if (fragment) { + nf_log_buf_add(m, ")"); + return; + } + + /* + * Max length: 26 "INCOMPLETE [65535 bytes] )" + */ + eh = skb_header_pointer(skb, ptr, sizeof(_esph), + &_esph); + if (eh == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", + skb->len - ptr); + return; + } + + /* Length: 16 "SPI=0xF1234567 )" */ + nf_log_buf_add(m, "SPI=0x%x )", + ntohl(eh->spi)); + } + return; + default: + /* Max length: 20 "Unknown Ext Hdr 255" */ + nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr); + return; + } + if (logflags & XT_LOG_IPOPT) + nf_log_buf_add(m, ") "); + + currenthdr = hp->nexthdr; + ptr += hdrlen; + } + + switch (currenthdr) { + case IPPROTO_TCP: + if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment, + ptr, logflags)) + return; + break; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr)) + return; + break; + case IPPROTO_ICMPV6: { + struct icmp6hdr _icmp6h; + const struct icmp6hdr *ic; + + /* Max length: 13 "PROTO=ICMPv6 " */ + nf_log_buf_add(m, "PROTO=ICMPv6 "); + + if (fragment) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); + if (ic == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - ptr); + return; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + nf_log_buf_add(m, "TYPE=%u CODE=%u ", + ic->icmp6_type, ic->icmp6_code); + + switch (ic->icmp6_type) { + case ICMPV6_ECHO_REQUEST: + case ICMPV6_ECHO_REPLY: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + nf_log_buf_add(m, "ID=%u SEQ=%u ", + ntohs(ic->icmp6_identifier), + ntohs(ic->icmp6_sequence)); + break; + case ICMPV6_MGM_QUERY: + case ICMPV6_MGM_REPORT: + case ICMPV6_MGM_REDUCTION: + break; + + case ICMPV6_PARAMPROB: + /* Max length: 17 "POINTER=ffffffff " */ + nf_log_buf_add(m, "POINTER=%08x ", + ntohl(ic->icmp6_pointer)); + /* Fall through */ + case ICMPV6_DEST_UNREACH: + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + /* Max length: 3+maxlen */ + if (recurse) { + nf_log_buf_add(m, "["); + dump_ipv6_packet(m, info, skb, + ptr + sizeof(_icmp6h), 0); + nf_log_buf_add(m, "] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) { + nf_log_buf_add(m, "MTU=%u ", + ntohl(ic->icmp6_mtu)); + } + } + break; + } + /* Max length: 10 "PROTO=255 " */ + default: + nf_log_buf_add(m, "PROTO=%u ", currenthdr); + } + + /* Max length: 15 "UID=4294967295 " */ + if ((logflags & XT_LOG_UID) && recurse) + nf_log_dump_sk_uid_gid(m, skb->sk); + + /* Max length: 16 "MARK=0xFFFFFFFF " */ + if (recurse && skb->mark) + nf_log_buf_add(m, "MARK=0x%x ", skb->mark); +} + +static void dump_ipv6_mac_header(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & XT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + nf_log_buf_add(m, "MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int len = dev->hard_header_len; + unsigned int i; + + if (dev->type == ARPHRD_SIT) { + p -= ETH_HLEN; + + if (p < skb->head) + p = NULL; + } + + if (p != NULL) { + nf_log_buf_add(m, "%02x", *p++); + for (i = 1; i < len; i++) + nf_log_buf_add(m, ":%02x", *p++); + } + nf_log_buf_add(m, " "); + + if (dev->type == ARPHRD_SIT) { + const struct iphdr *iph = + (struct iphdr *)skb_mac_header(skb); + nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, + &iph->daddr); + } + } else { + nf_log_buf_add(m, " "); + } +} + +static void nf_log_ip6_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, + loginfo, prefix); + + if (in != NULL) + dump_ipv6_mac_header(m, loginfo, skb); + + dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1); + + nf_log_buf_close(m); +} + +static struct nf_logger nf_ip6_logger __read_mostly = { + .name = "nf_log_ipv6", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_ip6_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_ipv6_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); + return 0; +} + +static void __net_exit nf_log_ipv6_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_ip6_logger); +} + +static struct pernet_operations nf_log_ipv6_net_ops = { + .init = nf_log_ipv6_net_init, + .exit = nf_log_ipv6_net_exit, +}; + +static int __init nf_log_ipv6_init(void) +{ + int ret; + + ret = register_pernet_subsys(&nf_log_ipv6_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + return 0; +} + +static void __exit nf_log_ipv6_exit(void) +{ + unregister_pernet_subsys(&nf_log_ipv6_net_ops); + nf_log_unregister(&nf_ip6_logger); +} + +module_init(nf_log_ipv6_init); +module_exit(nf_log_ipv6_exit); + +MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter IPv4 packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(AF_INET6, 0); diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index abfe75a2e31..fc8e49b2ff3 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -158,6 +158,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, htons(oldlen), htons(datalen), 1); } +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range) { @@ -175,6 +176,7 @@ static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], return 0; } +#endif static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { .l3proto = NFPROTO_IPV6, @@ -183,7 +185,9 @@ static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { .manip_pkt = nf_nat_ipv6_manip_pkt, .csum_update = nf_nat_ipv6_csum_update, .csum_recalc = nf_nat_ipv6_csum_recalc, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_ipv6_nlattr_to_range, +#endif #ifdef CONFIG_XFRM .decode_session = nf_nat_ipv6_decode_session, #endif diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 3317440ea34..2d6f860e5c1 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -33,6 +33,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; + unsigned int frag_mem = ip6_frag_mem(net); seq_printf(seq, "TCP6: inuse %d\n", sock_prot_inuse_get(net, &tcpv6_prot)); @@ -42,8 +43,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); - seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues(net), ip6_frag_mem(net)); + seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem); return 0; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index b2dc60b0c76..39d44226e40 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -176,7 +176,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) goto out; net = dev_net(skb->dev); - sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); + sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb)); while (sk) { int filtered; @@ -220,7 +220,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) } } sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, - IP6CB(skb)->iif); + inet6_iif(skb)); } out: read_unlock(&raw_v6_hashinfo.lock); @@ -375,7 +375,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, net = dev_net(skb->dev); while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, - IP6CB(skb)->iif))) { + inet6_iif(skb)))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); @@ -506,7 +506,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, - IP6CB(skb)->iif); + inet6_iif(skb)); *addr_len = sizeof(*sin6); } @@ -588,8 +588,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, } offset += skb_transport_offset(skb); - if (skb_copy_bits(skb, offset, &csum, 2)) - BUG(); + BUG_ON(skb_copy_bits(skb, offset, &csum, 2)); /* in case cksum was not initialized */ if (unlikely(csum)) @@ -601,8 +600,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP) csum = CSUM_MANGLED_0; - if (skb_store_bits(skb, offset, &csum, 2)) - BUG(); + BUG_ON(skb_store_bits(skb, offset, &csum, 2)); send: err = ip6_push_pending_frames(sk); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index cc85a9ba501..c6557d9f780 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -60,6 +60,8 @@ #include <net/inet_frag.h> #include <net/inet_ecn.h> +static const char ip6_frag_cache_name[] = "ip6-frags"; + struct ip6frag_skb_cb { struct inet6_skb_parm h; @@ -85,27 +87,23 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, const struct in6_addr *daddr) { - u32 c; - net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd)); - c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), - (__force u32)id, ip6_frags.rnd); - - return c & (INETFRAGS_HASHSZ - 1); + return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, ip6_frags.rnd); } -static unsigned int ip6_hashfn(struct inet_frag_queue *q) +static unsigned int ip6_hashfn(const struct inet_frag_queue *q) { - struct frag_queue *fq; + const struct frag_queue *fq; fq = container_of(q, struct frag_queue, q); return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr); } -bool ip6_frag_match(struct inet_frag_queue *q, void *a) +bool ip6_frag_match(const struct inet_frag_queue *q, const void *a) { - struct frag_queue *fq; - struct ip6_create_arg *arg = a; + const struct frag_queue *fq; + const struct ip6_create_arg *arg = a; fq = container_of(q, struct frag_queue, q); return fq->id == arg->id && @@ -115,10 +113,10 @@ bool ip6_frag_match(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_match); -void ip6_frag_init(struct inet_frag_queue *q, void *a) +void ip6_frag_init(struct inet_frag_queue *q, const void *a) { struct frag_queue *fq = container_of(q, struct frag_queue, q); - struct ip6_create_arg *arg = a; + const struct ip6_create_arg *arg = a; fq->id = arg->id; fq->user = arg->user; @@ -135,7 +133,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, spin_lock(&fq->q.lock); - if (fq->q.last_in & INET_FRAG_COMPLETE) + if (fq->q.flags & INET_FRAG_COMPLETE) goto out; inet_frag_kill(&fq->q, frags); @@ -145,17 +143,20 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, if (!dev) goto out_rcu_unlock; - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + if (fq->q.flags & INET_FRAG_EVICTED) + goto out_rcu_unlock; + + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + /* Don't send error if the first segment did not arrive. */ - if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments) + if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) goto out_rcu_unlock; - /* - But use as source device on which LAST ARRIVED - segment was received. And do not use fq->dev - pointer directly, device might already disappeared. + /* But use as source device on which LAST ARRIVED + * segment was received. And do not use fq->dev + * pointer directly, device might already disappeared. */ fq->q.fragments->dev = dev; icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); @@ -192,7 +193,6 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, arg.dst = dst; arg.ecn = ecn; - read_lock(&ip6_frags.lock); hash = inet6_hash_frag(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); @@ -212,7 +212,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct net *net = dev_net(skb_dst(skb)->dev); u8 ecn; - if (fq->q.last_in & INET_FRAG_COMPLETE) + if (fq->q.flags & INET_FRAG_COMPLETE) goto err; offset = ntohs(fhdr->frag_off) & ~0x7; @@ -243,9 +243,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * or have different end, the segment is corrupted. */ if (end < fq->q.len || - ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) + ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) goto err; - fq->q.last_in |= INET_FRAG_LAST_IN; + fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. @@ -263,7 +263,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & INET_FRAG_LAST_IN) + if (fq->q.flags & INET_FRAG_LAST_IN) goto err; fq->q.len = end; } @@ -338,10 +338,10 @@ found: */ if (offset == 0) { fq->nhoffset = nhoff; - fq->q.last_in |= INET_FRAG_FIRST_IN; + fq->q.flags |= INET_FRAG_FIRST_IN; } - if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { int res; unsigned long orefdst = skb->_skb_refdst; @@ -353,14 +353,13 @@ found: } skb_dst_drop(skb); - inet_frag_lru_move(&fq->q); return -1; discard_fq: inet_frag_kill(&fq->q, &ip6_frags); err: - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; } @@ -523,7 +522,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); - int evicted; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; @@ -552,11 +550,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false); - if (evicted) - IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_REASMFAILS, evicted); - fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); if (fq != NULL) { @@ -576,7 +569,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return -1; fail_hdr: - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); return -1; } @@ -588,20 +582,25 @@ static const struct inet6_protocol frag_protocol = }; #ifdef CONFIG_SYSCTL +static int zero; + static struct ctl_table ip6_frags_ns_ctl_table[] = { { .procname = "ip6frag_high_thresh", .data = &init_net.ipv6.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &init_net.ipv6.frags.low_thresh }, { .procname = "ip6frag_low_thresh", .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &init_net.ipv6.frags.high_thresh }, { .procname = "ip6frag_time", @@ -613,10 +612,12 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { { } }; +/* secret interval has been deprecated */ +static int ip6_frags_secret_interval_unused; static struct ctl_table ip6_frags_ctl_table[] = { { .procname = "ip6frag_secret_interval", - .data = &ip6_frags.secret_interval, + .data = &ip6_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -636,7 +637,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) goto err_alloc; table[0].data = &net->ipv6.frags.high_thresh; + table[0].extra1 = &net->ipv6.frags.low_thresh; + table[0].extra2 = &init_net.ipv6.frags.high_thresh; table[1].data = &net->ipv6.frags.low_thresh; + table[1].extra2 = &net->ipv6.frags.high_thresh; table[2].data = &net->ipv6.frags.timeout; /* Don't export sysctls to unprivileged users */ @@ -746,8 +750,10 @@ int __init ipv6_frag_init(void) ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; - ip6_frags.secret_interval = 10 * 60 * HZ; - inet_frags_init(&ip6_frags); + ip6_frags.frags_cache_name = ip6_frag_cache_name; + ret = inet_frags_init(&ip6_frags); + if (ret) + goto err_pernet; out: return ret; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 4f408176dc6..2e9ba035fb5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -250,7 +250,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, else strcpy(name, "sit%d"); - dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, + ipip6_tunnel_setup); if (dev == NULL) return NULL; @@ -1729,6 +1730,7 @@ static int __net_init sit_init_net(struct net *net) sitn->tunnels[3] = sitn->tunnels_r_l; sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", + NET_NAME_UNKNOWN, ipip6_tunnel_setup); if (!sitn->fb_tunnel_dev) { err = -ENOMEM; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index a822b880689..83cea1d3946 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -187,7 +187,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet6_reqsk_alloc(&tcp6_request_sock_ops); + req = inet_reqsk_alloc(&tcp6_request_sock_ops); if (!req) goto out; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 058f3eca2e5..0c56c93619e 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -39,6 +39,13 @@ static struct ctl_table ipv6_table_template[] = { .proc_handler = proc_dointvec }, { + .procname = "auto_flowlabels", + .data = &init_net.ipv6.sysctl.auto_flowlabels, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "fwmark_reflect", .data = &init_net.ipv6.sysctl.fwmark_reflect, .maxlen = sizeof(int), @@ -74,6 +81,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; + ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels; + ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 229239ad96b..22055b09842 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -198,6 +198,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; + ip6_set_txhash(sk); + /* * TCP over IPv4 */ @@ -470,13 +472,14 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, - struct flowi6 *fl6, + struct flowi *fl, struct request_sock *req, u16 queue_mapping, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; int err = -ENOMEM; @@ -503,18 +506,6 @@ done: return err; } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) -{ - struct flowi6 fl6; - int res; - - res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL); - if (!res) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - } - return res; -} static void tcp_v6_reqsk_destructor(struct request_sock *req) { @@ -718,22 +709,66 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) } #endif +static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, + struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct ipv6_pinfo *np = inet6_sk(sk); + + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + + ireq->ir_iif = sk->sk_bound_dev_if; + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); + + if (!TCP_SKB_CB(skb)->when && + (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || + np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || + np->rxopt.bits.rxohlim || np->repflow)) { + atomic_inc(&skb->users); + ireq->pktopts = skb; + } +} + +static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict) +{ + if (strict) + *strict = true; + return inet6_csk_route_req(sk, &fl->u.ip6, req); +} + struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), - .rtx_syn_ack = tcp_v6_rtx_synack, + .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, .syn_ack_timeout = tcp_syn_ack_timeout, }; -#ifdef CONFIG_TCP_MD5SIG static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { + .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - + sizeof(struct ipv6hdr), +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_reqsk_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, -}; #endif + .init_req = tcp_v6_init_req, +#ifdef CONFIG_SYN_COOKIES + .cookie_init_seq = cookie_v6_init_sequence, +#endif + .route_req = tcp_v6_route_req, + .init_seq = tcp_v6_init_sequence, + .send_synack = tcp_v6_send_synack, + .queue_hash_add = inet6_csk_reqsk_queue_hash_add, +}; static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, @@ -973,153 +1008,17 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -/* FIXME: this is substantially similar to the ipv4 code. - * Can some kind of merge be done? -- erics - */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_options_received tmp_opt; - struct request_sock *req; - struct inet_request_sock *ireq; - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - __u32 isn = TCP_SKB_CB(skb)->when; - struct dst_entry *dst = NULL; - struct tcp_fastopen_cookie foc = { .len = -1 }; - bool want_cookie = false, fastopen; - struct flowi6 fl6; - int err; - if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) goto drop; - if ((sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); - if (!want_cookie) - goto drop; - } - - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); - goto drop; - } - - req = inet6_reqsk_alloc(&tcp6_request_sock_ops); - if (req == NULL) - goto drop; - -#ifdef CONFIG_TCP_MD5SIG - tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops; -#endif - - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); - tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); + return tcp_conn_request(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, skb); - if (want_cookie && !tmp_opt.saw_tstamp) - tcp_clear_options(&tmp_opt); - - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); - - ireq = inet_rsk(req); - ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; - ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb, sock_net(sk)); - - ireq->ir_iif = sk->sk_bound_dev_if; - ireq->ir_mark = inet_request_mark(sk, skb); - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq->ir_iif = inet6_iif(skb); - - if (!isn) { - if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || - np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || - np->repflow) { - atomic_inc(&skb->users); - ireq->pktopts = skb; - } - - if (want_cookie) { - isn = cookie_v6_init_sequence(sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - goto have_isn; - } - - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tmp_opt.saw_tstamp && - tcp_death_row.sysctl_tw_recycle && - (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) { - if (!tcp_peer_is_proven(req, dst, true)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false)) { - /* Without syncookies last quarter of - * backlog is filled with destinations, - * proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", - &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source)); - goto drop_and_release; - } - - isn = tcp_v6_init_sequence(skb); - } -have_isn: - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_release; - - if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL) - goto drop_and_free; - - tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_openreq_init_rwin(req, sk, dst); - fastopen = !want_cookie && - tcp_try_fastopen(sk, skb, req, &foc, dst); - err = tcp_v6_send_synack(sk, dst, &fl6, req, - skb_get_queue_mapping(skb), &foc); - if (!fastopen) { - if (err || want_cookie) - goto drop_and_free; - - tcp_rsk(req)->listener = NULL; - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - } - return 0; - -drop_and_release: - dst_release(dst); -drop_and_free: - reqsk_free(req); drop: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; /* don't send reset */ @@ -1235,6 +1134,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; + ip6_set_txhash(newsk); + /* Now IPv6 options... First: no IPv4 options. diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7092ff78fd8..4836af8f582 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -79,7 +79,6 @@ static unsigned int udp6_ehashfn(struct net *net, int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; @@ -95,7 +94,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 1; if (addr_type == IPV6_ADDR_ANY && - !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) + !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) return 1; if (sk2_rcv_saddr6 && @@ -473,7 +472,7 @@ try_again: sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, - IP6CB(skb)->iif); + inet6_iif(skb)); } *addr_len = sizeof(*sin6); } @@ -534,11 +533,15 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct udphdr *uh = (struct udphdr*)(skb->data+offset); struct sock *sk; int err; + struct net *net = dev_net(skb->dev); - sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest, + sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), udptable); - if (sk == NULL) + if (sk == NULL) { + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; + } if (type == ICMPV6_PKT_TOOBIG) { if (!ip6_sk_accept_pmtu(sk)) @@ -674,7 +677,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto csum_error; } - if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { + if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); goto drop; @@ -703,43 +706,26 @@ drop: return -1; } -static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, - __be16 loc_port, const struct in6_addr *loc_addr, - __be16 rmt_port, const struct in6_addr *rmt_addr, - int dif) +static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, + __be16 loc_port, const struct in6_addr *loc_addr, + __be16 rmt_port, const struct in6_addr *rmt_addr, + int dif, unsigned short hnum) { - struct hlist_nulls_node *node; - unsigned short num = ntohs(loc_port); - - sk_nulls_for_each_from(sk, node) { - struct inet_sock *inet = inet_sk(sk); - - if (!net_eq(sock_net(sk), net)) - continue; - - if (udp_sk(sk)->udp_port_hash == num && - sk->sk_family == PF_INET6) { - if (inet->inet_dport) { - if (inet->inet_dport != rmt_port) - continue; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr) && - !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) - continue; - - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) - continue; + struct inet_sock *inet = inet_sk(sk); - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) - continue; - } - if (!inet6_mc_check(sk, loc_addr, rmt_addr)) - continue; - return sk; - } - } - return NULL; + if (!net_eq(sock_net(sk), net)) + return false; + + if (udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6 || + (inet->inet_dport && inet->inet_dport != rmt_port) || + (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || + (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + return false; + if (!inet6_mc_check(sk, loc_addr, rmt_addr)) + return false; + return true; } static void flush_stack(struct sock **stack, unsigned int count, @@ -763,6 +749,7 @@ static void flush_stack(struct sock **stack, unsigned int count, if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0) skb1 = NULL; + sock_put(sk); } if (unlikely(skb1)) kfree_skb(skb1); @@ -788,43 +775,51 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, { struct sock *sk, *stack[256 / sizeof(struct sock *)]; const struct udphdr *uh = udp_hdr(skb); - struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); - int dif; - unsigned int i, count = 0; + struct hlist_nulls_node *node; + unsigned short hnum = ntohs(uh->dest); + struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); + int dif = inet6_iif(skb); + unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); + unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); + + if (use_hash2) { + hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & + udp_table.mask; + hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask; +start_lookup: + hslot = &udp_table.hash2[hash2]; + offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); + } spin_lock(&hslot->lock); - sk = sk_nulls_head(&hslot->head); - dif = inet6_iif(skb); - sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); - while (sk) { - /* If zero checksum and no_check is not on for - * the socket then skip it. - */ - if (uh->check || udp_sk(sk)->no_check6_rx) + sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { + if (__udp_v6_is_mcast_sock(net, sk, + uh->dest, daddr, + uh->source, saddr, + dif, hnum) && + /* If zero checksum and no_check is not on for + * the socket then skip it. + */ + (uh->check || udp_sk(sk)->no_check6_rx)) { + if (unlikely(count == ARRAY_SIZE(stack))) { + flush_stack(stack, count, skb, ~0); + count = 0; + } stack[count++] = sk; - - sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, - uh->source, saddr, dif); - if (unlikely(count == ARRAY_SIZE(stack))) { - if (!sk) - break; - flush_stack(stack, count, skb, ~0); - count = 0; + sock_hold(sk); } } - /* - * before releasing the lock, we must take reference on sockets - */ - for (i = 0; i < count; i++) - sock_hold(stack[i]); spin_unlock(&hslot->lock); + /* Also lookup *:port if we are using hash2 and haven't done so yet. */ + if (use_hash2 && hash2 != hash2_any) { + hash2 = hash2_any; + goto start_lookup; + } + if (count) { flush_stack(stack, count, skb, count - 1); - - for (i = 0; i < count; i++) - sock_put(stack[i]); } else { kfree_skb(skb); } |