diff options
Diffstat (limited to 'net/ipv6')
61 files changed, 2047 insertions, 2838 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 6460eec834b..41877abd22e 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -8,10 +8,11 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ - ip6_flowlabel.o ipv6_syms.o netfilter.o + ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o +ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4ea8cf7c0cc..d328d598614 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -58,6 +58,7 @@ #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif +#include <linux/capability.h> #include <linux/delay.h> #include <linux/notifier.h> #include <linux/string.h> @@ -137,6 +138,7 @@ static int addrconf_ifdown(struct net_device *dev, int how); static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); static void addrconf_dad_timer(unsigned long data); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); +static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); @@ -388,6 +390,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) } #endif + if (netif_carrier_ok(dev)) + ndev->if_flags |= IF_READY; + write_lock_bh(&addrconf_lock); dev->ip6_ptr = ndev; write_unlock_bh(&addrconf_lock); @@ -415,6 +420,7 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev) if ((idev = ipv6_add_dev(dev)) == NULL) return NULL; } + if (dev->flags&IFF_UP) ipv6_mc_up(idev); return idev; @@ -634,8 +640,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } #endif - for (ifap = &idev->addr_list; (ifa=*ifap) != NULL; - ifap = &ifa->if_next) { + for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) { if (ifa == ifp) { *ifap = ifa->if_next; __in6_ifa_put(ifp); @@ -643,6 +648,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) break; deleted = 1; + continue; } else if (ifp->flags & IFA_F_PERMANENT) { if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, ifp->prefix_len)) { @@ -666,6 +672,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } } } + ifap = &ifa->if_next; } write_unlock_bh(&idev->lock); @@ -903,11 +910,18 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, score.addr_type = __ipv6_addr_type(&ifa->addr); - /* Rule 0: Candidate Source Address (section 4) + /* Rule 0: + * - Tentative Address (RFC2462 section 5.4) + * - A tentative address is not considered + * "assigned to an interface" in the traditional + * sense. + * - Candidate Source Address (section 4) * - In any case, anycast addresses, multicast * addresses, and the unspecified address MUST * NOT be included in a candidate set. */ + if (ifa->flags & IFA_F_TENTATIVE) + continue; if (unlikely(score.addr_type == IPV6_ADDR_ANY || score.addr_type & IPV6_ADDR_MULTICAST)) { LIMIT_NETDEBUG(KERN_DEBUG @@ -1182,7 +1196,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device * int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; - const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); + const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); @@ -1215,10 +1229,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* Gets referenced address, destroys ifaddr */ -void addrconf_dad_failure(struct inet6_ifaddr *ifp) +static void addrconf_dad_stop(struct inet6_ifaddr *ifp) { - if (net_ratelimit()) - printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); addrconf_del_timer(ifp); @@ -1244,6 +1256,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) ipv6_del_addr(ifp); } +void addrconf_dad_failure(struct inet6_ifaddr *ifp) +{ + if (net_ratelimit()) + printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); + addrconf_dad_stop(ifp); +} /* Join to solicited addr multicast group. */ @@ -2133,9 +2151,42 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *) data; struct inet6_dev *idev = __in6_dev_get(dev); + int run_pending = 0; switch(event) { case NETDEV_UP: + case NETDEV_CHANGE: + if (event == NETDEV_UP) { + if (!netif_carrier_ok(dev)) { + /* device is not ready yet. */ + printk(KERN_INFO + "ADDRCONF(NETDEV_UP): %s: " + "link is not ready\n", + dev->name); + break; + } + } else { + if (!netif_carrier_ok(dev)) { + /* device is still not ready. */ + break; + } + + if (idev) { + if (idev->if_flags & IF_READY) { + /* device is already configured. */ + break; + } + idev->if_flags |= IF_READY; + } + + printk(KERN_INFO + "ADDRCONF(NETDEV_CHANGE): %s: " + "link becomes ready\n", + dev->name); + + run_pending = 1; + } + switch(dev->type) { case ARPHRD_SIT: addrconf_sit_config(dev); @@ -2152,6 +2203,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; }; if (idev) { + if (run_pending) + addrconf_dad_run(idev); + /* If the MTU changed during the interface down, when the interface up, the changed MTU must be reflected in the idev as well as routers. @@ -2186,8 +2240,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, */ addrconf_ifdown(dev, event != NETDEV_DOWN); break; - case NETDEV_CHANGE: - break; + case NETDEV_CHANGENAME: #ifdef CONFIG_SYSCTL if (idev) { @@ -2268,7 +2321,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Step 3: clear flags for stateless addrconf */ if (how != 1) - idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD); + idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); /* Step 4: clear address list */ #ifdef CONFIG_IPV6_PRIVACY @@ -2377,11 +2430,20 @@ out: /* * Duplicate Address Detection */ +static void addrconf_dad_kick(struct inet6_ifaddr *ifp) +{ + unsigned long rand_num; + struct inet6_dev *idev = ifp->idev; + + rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); + ifp->probes = idev->cnf.dad_transmits; + addrconf_mod_timer(ifp, AC_DAD, rand_num); +} + static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; - unsigned long rand_num; addrconf_join_solict(dev, &ifp->addr); @@ -2390,7 +2452,6 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) flags); net_srandom(ifp->addr.s6_addr32[3]); - rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); read_lock_bh(&idev->lock); if (ifp->dead) @@ -2407,9 +2468,19 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) return; } - ifp->probes = idev->cnf.dad_transmits; - addrconf_mod_timer(ifp, AC_DAD, rand_num); - + if (!(idev->if_flags & IF_READY)) { + spin_unlock_bh(&ifp->lock); + read_unlock_bh(&idev->lock); + /* + * If the defice is not ready: + * - keep it tentative if it is a permanent address. + * - otherwise, kill it. + */ + in6_ifa_hold(ifp); + addrconf_dad_stop(ifp); + return; + } + addrconf_dad_kick(ifp); spin_unlock_bh(&ifp->lock); out: read_unlock_bh(&idev->lock); @@ -2492,6 +2563,22 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) } } +static void addrconf_dad_run(struct inet6_dev *idev) { + struct inet6_ifaddr *ifp; + + read_lock_bh(&idev->lock); + for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { + spin_lock_bh(&ifp->lock); + if (!(ifp->flags & IFA_F_TENTATIVE)) { + spin_unlock_bh(&ifp->lock); + continue; + } + spin_unlock_bh(&ifp->lock); + addrconf_dad_kick(ifp); + } + read_unlock_bh(&idev->lock); +} + #ifdef CONFIG_PROC_FS struct if6_iter_state { int bucket; @@ -2557,7 +2644,7 @@ static int if6_seq_show(struct seq_file *seq, void *v) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; seq_printf(seq, - "%04x%04x%04x%04x%04x%04x%04x%04x %02x %02x %02x %02x %8s\n", + NIP6_SEQFMT " %02x %02x %02x %02x %8s\n", NIP6(ifp->addr), ifp->idev->dev->ifindex, ifp->prefix_len, @@ -2697,6 +2784,9 @@ restart: in6_ifa_hold(ifpub); spin_unlock(&ifp->lock); read_unlock(&addrconf_hash_lock); + spin_lock(&ifpub->lock); + ifpub->regen_count = 0; + spin_unlock(&ifpub->lock); ipv6_create_tempaddr(ifpub, ifp); in6_ifa_put(ifpub); in6_ifa_put(ifp); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d9546380fa0..064ffab82a9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -22,6 +22,7 @@ #include <linux/module.h> +#include <linux/capability.h> #include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> @@ -167,6 +168,7 @@ lookup_protocol: sk->sk_reuse = 1; inet = inet_sk(sk); + inet->is_icsk = INET_PROTOSW_ICSK & answer_flags; if (SOCK_RAW == sock->type) { inet->num = protocol; @@ -389,6 +391,8 @@ int inet6_destroy_sock(struct sock *sk) return 0; } +EXPORT_SYMBOL_GPL(inet6_destroy_sock); + /* * This does both peername and sockname. */ @@ -431,7 +435,6 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; - int err = -EINVAL; switch(cmd) { @@ -450,16 +453,15 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFDSTADDR: return addrconf_set_dstaddr((void __user *) arg); default: - if (!sk->sk_prot->ioctl || - (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD) - return(dev_ioctl(cmd,(void __user *) arg)); - return err; + if (!sk->sk_prot->ioctl) + return -ENOIOCTLCMD; + return sk->sk_prot->ioctl(sk, cmd, arg); } /*NOTREACHED*/ return(0); } -struct proto_ops inet6_stream_ops = { +const struct proto_ops inet6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, @@ -480,7 +482,7 @@ struct proto_ops inet6_stream_ops = { .sendpage = tcp_sendpage }; -struct proto_ops inet6_dgram_ops = { +const struct proto_ops inet6_dgram_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, @@ -508,7 +510,7 @@ static struct net_proto_family inet6_family_ops = { }; /* Same as inet6_dgram_ops, sans udp_poll. */ -static struct proto_ops inet6_sockraw_ops = { +static const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, @@ -609,17 +611,90 @@ inet6_unregister_protosw(struct inet_protosw *p) } } +int inet6_sk_rebuild_header(struct sock *sk) +{ + int err; + struct dst_entry *dst; + struct ipv6_pinfo *np = inet6_sk(sk); + + dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst == NULL) { + struct inet_sock *inet = inet_sk(sk); + struct in6_addr *final_p = NULL, final; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + fl.proto = sk->sk_protocol; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.fl6_flowlabel = np->flow_label; + fl.oif = sk->sk_bound_dev_if; + fl.fl_ip_dport = inet->dport; + fl.fl_ip_sport = inet->sport; + + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } + + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) { + sk->sk_route_caps = 0; + return err; + } + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + sk->sk_err_soft = -err; + return err; + } + + ip6_dst_store(sk, dst, NULL); + sk->sk_route_caps = dst->dev->features & + ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + } + + return 0; +} + +EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); + +int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet6_skb_parm *opt = IP6CB(skb); + + if (np->rxopt.all) { + if ((opt->hop && (np->rxopt.bits.hopopts || + np->rxopt.bits.ohopopts)) || + ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && + np->rxopt.bits.rxflow) || + (opt->srcrt && (np->rxopt.bits.srcrt || + np->rxopt.bits.osrcrt)) || + ((opt->dst1 || opt->dst0) && + (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) + return 1; + } + return 0; +} + +EXPORT_SYMBOL_GPL(ipv6_opt_accepted); + int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) { if (ptr == NULL) return -EINVAL; - ptr[0] = __alloc_percpu(mibsize, mibalign); + ptr[0] = __alloc_percpu(mibsize); if (!ptr[0]) goto err0; - ptr[1] = __alloc_percpu(mibsize, mibalign); + ptr[1] = __alloc_percpu(mibsize); if (!ptr[1]) goto err1; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f3629730eb1..c7932cb420a 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -33,6 +33,7 @@ #include <linux/string.h> #include <net/icmp.h> #include <net/ipv6.h> +#include <net/protocol.h> #include <net/xfrm.h> #include <asm/scatterlist.h> @@ -331,8 +332,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" - "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" NIP6_FMT "\n", ntohl(ah->spi), NIP6(iph->daddr)); xfrm_state_put(x); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 6b729404723..840a33d3329 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -13,6 +13,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/capability.h> #include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> @@ -531,9 +532,7 @@ static int ac6_seq_show(struct seq_file *seq, void *v) struct ac6_iter_state *state = ac6_seq_private(seq); seq_printf(seq, - "%-4d %-15s " - "%04x%04x%04x%04x%04x%04x%04x%04x " - "%5d\n", + "%-4d %-15s " NIP6_SEQFMT " %5d\n", state->dev->ifindex, state->dev->name, NIP6(im->aca_addr), im->aca_users); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c4a3a993acb..99a6eb23378 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -13,6 +13,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 8bfbe997079..7b5b94f1390 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -36,6 +36,7 @@ #include <linux/random.h> #include <net/icmp.h> #include <net/ipv6.h> +#include <net/protocol.h> #include <linux/icmpv6.h> static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) @@ -265,8 +266,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/" - "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/" NIP6_FMT "\n", ntohl(esph->spi), NIP6(iph->daddr)); xfrm_state_put(x); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index be6faf31138..2a1e7e45b89 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -152,7 +152,7 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = { {-1, NULL} }; -static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_destopt_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); @@ -169,7 +169,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { skb->h.raw += ((skb->h.raw[1]+1)<<3); - *nhoffp = opt->dst1; + opt->nhoff = opt->dst1; return 1; } @@ -192,7 +192,7 @@ void __init ipv6_destopt_init(void) NONE header. No data in packet. ********************************/ -static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_nodata_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; @@ -215,7 +215,7 @@ void __init ipv6_nodata_init(void) Routing header. ********************************/ -static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_rthdr_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); @@ -249,7 +249,7 @@ looped_back: skb->h.raw += (hdr->hdrlen + 1) << 3; opt->dst0 = opt->dst1; opt->dst1 = 0; - *nhoffp = (&hdr->nexthdr) - skb->nh.raw; + opt->nhoff = (&hdr->nexthdr) - skb->nh.raw; return 1; } @@ -413,6 +413,8 @@ ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr) return opt; } +EXPORT_SYMBOL_GPL(ipv6_invert_rthdr); + /********************************** Hop-by-hop options. **********************************/ @@ -485,9 +487,14 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff) { - IP6CB(skb)->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) + struct inet6_skb_parm *opt = IP6CB(skb); + + opt->hop = sizeof(struct ipv6hdr); + if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { + skb->h.raw += (skb->h.raw[1]+1)<<3; + opt->nhoff = sizeof(struct ipv6hdr); return sizeof(struct ipv6hdr); + } return -1; } @@ -579,6 +586,8 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) return opt2; } +EXPORT_SYMBOL_GPL(ipv6_dup_options); + static int ipv6_renew_option(void *ohdr, struct ipv6_opt_hdr __user *newopt, int newoptlen, int inherit, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 34a332225c1..fcf883183ce 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -79,7 +79,7 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; #define icmpv6_socket __get_cpu_var(__icmpv6_socket) -static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); +static int icmpv6_rcv(struct sk_buff **pskb); static struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, @@ -328,8 +328,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, iif = skb->dev->ifindex; /* - * Must not send if we know that source is Anycast also. - * for now we don't know that. + * Must not send error if the source does not uniquely + * identify a single node (RFC2463 Section 2.4). + * We check unspecified / multicast addresses here, + * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); @@ -373,6 +375,16 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto out; + + /* + * We won't send icmp if the destination is known + * anycast. + */ + if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { + LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n"); + goto out_dst_release; + } + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out; @@ -569,7 +581,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) * Handle icmp messages */ -static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int icmpv6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; @@ -595,7 +607,7 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) skb->csum = ~csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n", NIP6(*saddr), NIP6(*daddr)); goto discard_it; } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c new file mode 100644 index 00000000000..f8f3a37a149 --- /dev/null +++ b/net/ipv6/inet6_connection_sock.c @@ -0,0 +1,200 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Support for INET6 connection oriented protocols. + * + * Authors: See the TCPv6 sources + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or(at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/in6.h> +#include <linux/ipv6.h> +#include <linux/jhash.h> + +#include <net/addrconf.h> +#include <net/inet_connection_sock.h> +#include <net/inet_ecn.h> +#include <net/inet_hashtables.h> +#include <net/ip6_route.h> +#include <net/sock.h> +#include <net/inet6_connection_sock.h> + +int inet6_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb) +{ + const struct sock *sk2; + const struct hlist_node *node; + + /* We must walk the whole port owner list in this case. -DaveM */ + sk_for_each_bound(sk2, node, &tb->owners) { + if (sk != sk2 && + (!sk->sk_bound_dev_if || + !sk2->sk_bound_dev_if || + sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && + (!sk->sk_reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + ipv6_rcv_saddr_equal(sk, sk2)) + break; + } + + return node != NULL; +} + +EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); + +/* + * request_sock (formerly open request) hash tables. + */ +static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport, + const u32 rnd, const u16 synq_hsize) +{ + u32 a = raddr->s6_addr32[0]; + u32 b = raddr->s6_addr32[1]; + u32 c = raddr->s6_addr32[2]; + + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += rnd; + __jhash_mix(a, b, c); + + a += raddr->s6_addr32[3]; + b += (u32)rport; + __jhash_mix(a, b, c); + + return c & (synq_hsize - 1); +} + +struct request_sock *inet6_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, + const struct in6_addr *raddr, + const struct in6_addr *laddr, + const int iif) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + struct request_sock *req, **prev; + + for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport, + lopt->hash_rnd, + lopt->nr_table_entries)]; + (req = *prev) != NULL; + prev = &req->dl_next) { + const struct inet6_request_sock *treq = inet6_rsk(req); + + if (inet_rsk(req)->rmt_port == rport && + req->rsk_ops->family == AF_INET6 && + ipv6_addr_equal(&treq->rmt_addr, raddr) && + ipv6_addr_equal(&treq->loc_addr, laddr) && + (!treq->iif || treq->iif == iif)) { + BUG_TRAP(req->sk == NULL); + *prevp = prev; + return req; + } + } + + return NULL; +} + +EXPORT_SYMBOL_GPL(inet6_csk_search_req); + +void inet6_csk_reqsk_queue_hash_add(struct sock *sk, + struct request_sock *req, + const unsigned long timeout) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr, + inet_rsk(req)->rmt_port, + lopt->hash_rnd, lopt->nr_table_entries); + + reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); + inet_csk_reqsk_queue_added(sk, timeout); +} + +EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); + +void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; + + sin6->sin6_family = AF_INET6; + ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); + sin6->sin6_port = inet_sk(sk)->dport; + /* We do not store received flowlabel for TCP */ + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + if (sk->sk_bound_dev_if && + ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + sin6->sin6_scope_id = sk->sk_bound_dev_if; +} + +EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); + +int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) +{ + struct sock *sk = skb->sk; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi fl; + struct dst_entry *dst; + struct in6_addr *final_p = NULL, final; + + memset(&fl, 0, sizeof(fl)); + fl.proto = sk->sk_protocol; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.fl6_flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); + fl.oif = sk->sk_bound_dev_if; + fl.fl_ip_sport = inet->sport; + fl.fl_ip_dport = inet->dport; + + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } + + dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst == NULL) { + int err = ip6_dst_lookup(sk, &dst, &fl); + + if (err) { + sk->sk_err_soft = -err; + return err; + } + + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + sk->sk_route_caps = 0; + return err; + } + + ip6_dst_store(sk, dst, NULL); + sk->sk_route_caps = dst->dev->features & + ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + } + + skb->dst = dst_clone(dst); + + /* Restore final destination back after routing done */ + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + + return ip6_xmit(sk, skb, &fl, np->opt, 0); +} + +EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 01d5f46d4e4..4154f3a8b6c 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -5,7 +5,8 @@ * * Generic INET6 transport hashtables * - * Authors: Lotsa people, from code originally in tcp + * Authors: Lotsa people, from code originally in tcp, generalised here + * by Arnaldo Carvalho de Melo <acme@mandriva.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -14,12 +15,13 @@ */ #include <linux/config.h> - #include <linux/module.h> +#include <linux/random.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> +#include <net/ip.h> struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, @@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, } EXPORT_SYMBOL_GPL(inet6_lookup); + +static int __inet6_check_established(struct inet_timewait_death_row *death_row, + struct sock *sk, const __u16 lport, + struct inet_timewait_sock **twp) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *daddr = &np->rcv_saddr; + const struct in6_addr *saddr = &np->daddr; + const int dif = sk->sk_bound_dev_if; + const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, + inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + struct sock *sk2; + const struct hlist_node *node; + struct inet_timewait_sock *tw; + + prefetch(head->chain.first); + write_lock(&head->lock); + + /* Check TIME-WAIT sockets first. */ + sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); + + tw = inet_twsk(sk2); + + if(*((__u32 *)&(tw->tw_dport)) == ports && + sk2->sk_family == PF_INET6 && + ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && + sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { + if (twsk_unique(sk, sk2, twp)) + goto unique; + else + goto not_unique; + } + } + tw = NULL; + + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { + if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) + goto not_unique; + } + +unique: + BUG_TRAP(sk_unhashed(sk)); + __sk_add_node(sk, &head->chain); + sk->sk_hash = hash; + sock_prot_inc_use(sk->sk_prot); + write_unlock(&head->lock); + + if (twp != NULL) { + *twp = tw; + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + } else if (tw != NULL) { + /* Silly. Should hash-dance instead... */ + inet_twsk_deschedule(tw, death_row); + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + + inet_twsk_put(tw); + } + return 0; + +not_unique: + write_unlock(&head->lock); + return -EADDRNOTAVAIL; +} + +static inline u32 inet6_sk_port_offset(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, + np->daddr.s6_addr32, + inet->dport); +} + +int inet6_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; + int ret; + + if (snum == 0) { + const int low = sysctl_local_port_range[0]; + const int high = sysctl_local_port_range[1]; + const int range = high - low; + int i, port; + static u32 hint; + const u32 offset = hint + inet6_sk_port_offset(sk); + struct hlist_node *node; + struct inet_timewait_sock *tw = NULL; + + local_bh_disable(); + for (i = 1; i <= range; i++) { + port = low + (i + offset) % range; + head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + spin_lock(&head->lock); + + /* Does not bother with rcv_saddr checks, + * because the established check is already + * unique enough. + */ + inet_bind_bucket_for_each(tb, node, &head->chain) { + if (tb->port == port) { + BUG_TRAP(!hlist_empty(&tb->owners)); + if (tb->fastreuse >= 0) + goto next_port; + if (!__inet6_check_established(death_row, + sk, port, + &tw)) + goto ok; + goto next_port; + } + } + + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, + head, port); + if (!tb) { + spin_unlock(&head->lock); + break; + } + tb->fastreuse = -1; + goto ok; + + next_port: + spin_unlock(&head->lock); + } + local_bh_enable(); + + return -EADDRNOTAVAIL; + +ok: + hint += i; + + /* Head lock still held and bh's disabled */ + inet_bind_hash(sk, tb, port); + if (sk_unhashed(sk)) { + inet_sk(sk)->sport = htons(port); + __inet6_hash(hinfo, sk); + } + spin_unlock(&head->lock); + + if (tw) { + inet_twsk_deschedule(tw, death_row); + inet_twsk_put(tw); + } + + ret = 0; + goto out; + } + + head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + + if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { + __inet6_hash(hinfo, sk); + spin_unlock_bh(&head->lock); + return 0; + } else { + spin_unlock(&head->lock); + /* No definite answer... Walk to established hash table */ + ret = __inet6_check_established(death_row, sk, snum, NULL); +out: + local_bh_enable(); + return ret; + } +} + +EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 1cf02765fb5..69cbe8a66d0 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -9,6 +9,7 @@ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ +#include <linux/capability.h> #include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> @@ -200,6 +201,8 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label) return NULL; } +EXPORT_SYMBOL_GPL(fl6_sock_lookup); + void fl6_free_socklist(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -626,9 +629,7 @@ static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl) { while(fl) { seq_printf(seq, - "%05X %-1d %-6d %-6d %-6ld %-8ld " - "%02x%02x%02x%02x%02x%02x%02x%02x " - "%-4d\n", + "%05X %-1d %-6d %-6d %-6ld %-8ld " NIP6_SEQFMT " %-4d\n", (unsigned)ntohl(fl->label), fl->share, (unsigned)fl->owner, @@ -644,8 +645,8 @@ static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl) static int ip6fl_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) - seq_puts(seq, "Label S Owner Users Linger Expires " - "Dst Opt\n"); + seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", + "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); else ip6fl_fl_seq_show(seq, v); return 0; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index a6026d2787d..29f73592e68 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -48,7 +48,7 @@ -static inline int ip6_rcv_finish( struct sk_buff *skb) +inline int ip6_rcv_finish( struct sk_buff *skb) { if (skb->dst == NULL) ip6_route_input(skb); @@ -97,6 +97,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; + skb->h.raw = (u8 *)(hdr + 1); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + pkt_len = ntohs(hdr->payload_len); /* pkt_len may be zero if Jumbo payload option is present */ @@ -111,8 +114,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - skb->h.raw = (u8*)(hdr+1); - if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) { + if (ipv6_parse_hopopts(skb, IP6CB(skb)->nhoff) < 0) { IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); return 0; } @@ -143,26 +145,15 @@ static inline int ip6_input_finish(struct sk_buff *skb) int nexthdr; u8 hash; - skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr); - /* * Parse extension headers */ - nexthdr = skb->nh.ipv6h->nexthdr; - nhoff = offsetof(struct ipv6hdr, nexthdr); - - /* Skip hop-by-hop options, they are already parsed. */ - if (nexthdr == NEXTHDR_HOP) { - nhoff = sizeof(struct ipv6hdr); - nexthdr = skb->h.raw[0]; - skb->h.raw += (skb->h.raw[1]+1)<<3; - } - rcu_read_lock(); resubmit: if (!pskb_pull(skb, skb->h.raw - skb->data)) goto discard; + nhoff = IP6CB(skb)->nhoff; nexthdr = skb->nh.raw[nhoff]; raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); @@ -194,7 +185,7 @@ resubmit: !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(&skb, &nhoff); + ret = ipprot->handler(&skb); if (ret > 0) goto resubmit; else if (ret == 0) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8523c76ebf7..efa3e72cfcf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -226,6 +226,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, first_hop); + skb->priority = sk->sk_priority; + mtu = dst_mtu(dst); if ((skb->len <= mtu) || ipfragok) { IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); @@ -775,6 +777,8 @@ out_err_release: return err; } +EXPORT_SYMBOL_GPL(ip6_dst_lookup); + static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), @@ -1180,6 +1184,8 @@ int ip6_push_pending_frames(struct sock *sk) ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); ipv6_addr_copy(&hdr->daddr, final_dst); + skb->priority = sk->sk_priority; + skb->dst = dst_clone(&rt->u.dst); IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e315d0f80af..92ead3cf956 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -21,6 +21,7 @@ #include <linux/config.h> #include <linux/module.h> +#include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/sockios.h> @@ -243,7 +244,7 @@ ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt) if (dev == NULL) return -ENOMEM; - t = dev->priv; + t = netdev_priv(dev); dev->init = ip6ip6_tnl_dev_init; t->parms = *p; @@ -308,7 +309,7 @@ ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create) static void ip6ip6_tnl_dev_uninit(struct net_device *dev) { - struct ip6_tnl *t = dev->priv; + struct ip6_tnl *t = netdev_priv(dev); if (dev == ip6ip6_fb_tnl_dev) { write_lock_bh(&ip6ip6_lock); @@ -510,7 +511,7 @@ static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph, **/ static int -ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +ip6ip6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct ipv6hdr *ipv6h; @@ -623,7 +624,7 @@ ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr) static int ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ip6_tnl *t = (struct ip6_tnl *) dev->priv; + struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->stat; struct ipv6hdr *ipv6h = skb->nh.ipv6h; struct ipv6_txoptions *opt = NULL; @@ -933,11 +934,11 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV) - t = (struct ip6_tnl *) dev->priv; + t = netdev_priv(dev); else if (err) break; } else - t = (struct ip6_tnl *) dev->priv; + t = netdev_priv(dev); memcpy(&p, &t->parms, sizeof (p)); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { @@ -955,7 +956,7 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } if (!create && dev != ip6ip6_fb_tnl_dev) { - t = (struct ip6_tnl *) dev->priv; + t = netdev_priv(dev); } if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) { break; @@ -991,12 +992,12 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = ip6ip6_tnl_locate(&p, &t, 0); if (err) break; - if (t == ip6ip6_fb_tnl_dev->priv) { + if (t == netdev_priv(ip6ip6_fb_tnl_dev)) { err = -EPERM; break; } } else { - t = (struct ip6_tnl *) dev->priv; + t = netdev_priv(dev); } err = unregister_netdevice(t->dev); break; @@ -1016,7 +1017,7 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static struct net_device_stats * ip6ip6_tnl_get_stats(struct net_device *dev) { - return &(((struct ip6_tnl *) dev->priv)->stat); + return &(((struct ip6_tnl *)netdev_priv(dev))->stat); } /** @@ -1073,7 +1074,7 @@ static void ip6ip6_tnl_dev_setup(struct net_device *dev) static inline void ip6ip6_tnl_dev_init_gen(struct net_device *dev) { - struct ip6_tnl *t = (struct ip6_tnl *) dev->priv; + struct ip6_tnl *t = netdev_priv(dev); t->fl.proto = IPPROTO_IPV6; t->dev = dev; strcpy(t->parms.name, dev->name); @@ -1087,7 +1088,7 @@ ip6ip6_tnl_dev_init_gen(struct net_device *dev) static int ip6ip6_tnl_dev_init(struct net_device *dev) { - struct ip6_tnl *t = (struct ip6_tnl *) dev->priv; + struct ip6_tnl *t = netdev_priv(dev); ip6ip6_tnl_dev_init_gen(dev); ip6ip6_tnl_link_config(t); return 0; @@ -1103,7 +1104,7 @@ ip6ip6_tnl_dev_init(struct net_device *dev) static int ip6ip6_fb_tnl_dev_init(struct net_device *dev) { - struct ip6_tnl *t = dev->priv; + struct ip6_tnl *t = netdev_priv(dev); ip6ip6_tnl_dev_init_gen(dev); dev_hold(dev); tnls_wc[0] = t; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 55917fb1709..d511a884dad 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -47,6 +47,7 @@ #include <linux/rtnetlink.h> #include <net/icmp.h> #include <net/ipv6.h> +#include <net/protocol.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> @@ -211,8 +212,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" - "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIP6_FMT "\n", spi, NIP6(iph->daddr)); xfrm_state_put(x); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3620718defe..f7142ba519a 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -26,6 +26,7 @@ */ #include <linux/module.h> +#include <linux/capability.h> #include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> @@ -163,17 +164,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sk_refcnt_debug_dec(sk); if (sk->sk_protocol == IPPROTO_TCP) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); local_bh_disable(); sock_prot_dec_use(sk->sk_prot); sock_prot_inc_use(&tcp_prot); local_bh_enable(); sk->sk_prot = &tcp_prot; - tp->af_specific = &ipv4_specific; + icsk->icsk_af_ops = &ipv4_specific; sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; - tcp_sync_mss(sk, tp->pmtu_cookie); + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { local_bh_disable(); sock_prot_dec_use(sk->sk_prot); @@ -317,14 +318,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, } retv = 0; - if (sk->sk_type == SOCK_STREAM) { + if (inet_sk(sk)->is_icsk) { if (opt) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { - tp->ext_header_len = opt->opt_flen + opt->opt_nflen; - tcp_sync_mss(sk, tp->pmtu_cookie); + icsk->icsk_ext_hdr_len = + opt->opt_flen + opt->opt_nflen; + icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } opt = xchg(&np->opt, opt); @@ -380,14 +382,15 @@ sticky_done: goto done; update: retv = 0; - if (sk->sk_type == SOCK_STREAM) { + if (inet_sk(sk)->is_icsk) { if (opt) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { - tp->ext_header_len = opt->opt_flen + opt->opt_nflen; - tcp_sync_mss(sk, tp->pmtu_cookie); + icsk->icsk_ext_hdr_len = + opt->opt_flen + opt->opt_nflen; + icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } opt = xchg(&np->opt, opt); @@ -547,7 +550,7 @@ done: retv = -ENOBUFS; break; } - gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL); + gsf = kmalloc(optlen,GFP_KERNEL); if (gsf == 0) { retv = -ENOBUFS; break; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index fd939da090c..4420948a1bf 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -170,7 +170,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, #define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value) #define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value) -#define IPV6_MLD_MAX_MSF 10 +#define IPV6_MLD_MAX_MSF 64 int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF; @@ -224,6 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) mc_lst->ifindex = dev->ifindex; mc_lst->sfmode = MCAST_EXCLUDE; + rwlock_init(&mc_lst->sflock); mc_lst->sflist = NULL; /* @@ -360,6 +361,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, struct ip6_sf_socklist *psl; int i, j, rv; int leavegroup = 0; + int pmclocked = 0; int err; if (pgsr->gsr_group.ss_family != AF_INET6 || @@ -403,6 +405,9 @@ int ip6_mc_source(int add, int omode, struct sock *sk, pmc->sfmode = omode; } + write_lock_bh(&pmc->sflock); + pmclocked = 1; + psl = pmc->sflist; if (!add) { if (!psl) @@ -444,8 +449,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) count += psl->sl_max; - newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk, - IP6_SFLSIZE(count), GFP_ATOMIC); + newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_ATOMIC); if (!newpsl) { err = -ENOBUFS; goto done; @@ -475,6 +479,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, /* update the interface list */ ip6_mc_add_src(idev, group, omode, 1, source, 1); done: + if (pmclocked) + write_unlock_bh(&pmc->sflock); read_unlock_bh(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); in6_dev_put(idev); @@ -510,6 +516,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) dev = idev->dev; err = 0; + read_lock_bh(&ipv6_sk_mc_lock); + if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { leavegroup = 1; goto done; @@ -526,8 +534,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) goto done; } if (gsf->gf_numsrc) { - newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk, - IP6_SFLSIZE(gsf->gf_numsrc), GFP_ATOMIC); + newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc), + GFP_ATOMIC); if (!newpsl) { err = -ENOBUFS; goto done; @@ -549,6 +557,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) newpsl = NULL; (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); } + + write_lock_bh(&pmc->sflock); psl = pmc->sflist; if (psl) { (void) ip6_mc_del_src(idev, group, pmc->sfmode, @@ -558,8 +568,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); pmc->sflist = newpsl; pmc->sfmode = gsf->gf_fmode; + write_unlock_bh(&pmc->sflock); err = 0; done: + read_unlock_bh(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); in6_dev_put(idev); dev_put(dev); @@ -592,6 +604,11 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, dev = idev->dev; err = -EADDRNOTAVAIL; + /* + * changes to the ipv6_mc_list require the socket lock and + * a read lock on ip6_sk_mc_lock. We have the socket lock, + * so reading the list is safe. + */ for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { if (pmc->ifindex != gsf->gf_interface) @@ -614,6 +631,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { return -EFAULT; } + /* changes to psl require the socket lock, a read lock on + * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We + * have the socket lock, so reading here is safe. + */ for (i=0; i<copycount; i++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -650,6 +671,7 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr, read_unlock(&ipv6_sk_mc_lock); return 1; } + read_lock(&mc->sflock); psl = mc->sflist; if (!psl) { rv = mc->sfmode == MCAST_EXCLUDE; @@ -665,6 +687,7 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr, if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) rv = 0; } + read_unlock(&mc->sflock); read_unlock(&ipv6_sk_mc_lock); return rv; @@ -744,7 +767,7 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) * for deleted items allows change reports to use common code with * non-deleted or query-response MCA's. */ - pmc = (struct ifmcaddr6 *)kmalloc(sizeof(*pmc), GFP_ATOMIC); + pmc = kmalloc(sizeof(*pmc), GFP_ATOMIC); if (!pmc) return; memset(pmc, 0, sizeof(*pmc)); @@ -1068,7 +1091,8 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) ma->mca_flags |= MAF_TIMER_RUNNING; } -static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, +/* mark EXCLUDE-mode sources */ +static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, struct in6_addr *srcs) { struct ip6_sf_list *psf; @@ -1078,13 +1102,53 @@ static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) + for (i=0; i<nsrcs; i++) { + /* skip inactive filters */ + if (pmc->mca_sfcount[MCAST_INCLUDE] || + pmc->mca_sfcount[MCAST_EXCLUDE] != + psf->sf_count[MCAST_EXCLUDE]) + continue; + if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) { + scount++; + break; + } + } + } + pmc->mca_flags &= ~MAF_GSQUERY; + if (scount == nsrcs) /* all sources excluded */ + return 0; + return 1; +} + +static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, + struct in6_addr *srcs) +{ + struct ip6_sf_list *psf; + int i, scount; + + if (pmc->mca_sfmode == MCAST_EXCLUDE) + return mld_xmarksources(pmc, nsrcs, srcs); + + /* mark INCLUDE-mode sources */ + + scount = 0; + for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + if (scount == nsrcs) + break; + for (i=0; i<nsrcs; i++) { if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) { psf->sf_gsresp = 1; scount++; break; } + } } + if (!scount) { + pmc->mca_flags &= ~MAF_GSQUERY; + return 0; + } + pmc->mca_flags |= MAF_GSQUERY; + return 1; } int igmp6_event_query(struct sk_buff *skb) @@ -1167,7 +1231,7 @@ int igmp6_event_query(struct sk_buff *skb) /* mark sources to include, if group & source-specific */ if (mlh2->nsrcs != 0) { if (!pskb_may_pull(skb, srcs_offset + - mlh2->nsrcs * sizeof(struct in6_addr))) { + ntohs(mlh2->nsrcs) * sizeof(struct in6_addr))) { in6_dev_put(idev); return -EINVAL; } @@ -1188,8 +1252,7 @@ int igmp6_event_query(struct sk_buff *skb) } } else { for (ma = idev->mc_list; ma; ma=ma->next) { - if (group_type != IPV6_ADDR_ANY && - !ipv6_addr_equal(group, &ma->mca_addr)) + if (!ipv6_addr_equal(group, &ma->mca_addr)) continue; spin_lock_bh(&ma->mca_lock); if (ma->mca_flags & MAF_TIMER_RUNNING) { @@ -1203,13 +1266,11 @@ int igmp6_event_query(struct sk_buff *skb) else ma->mca_flags &= ~MAF_GSQUERY; } - if (ma->mca_flags & MAF_GSQUERY) - mld_marksources(ma, ntohs(mlh2->nsrcs), - mlh2->srcs); - igmp6_group_queried(ma, max_delay); + if (!(ma->mca_flags & MAF_GSQUERY) || + mld_marksources(ma, ntohs(mlh2->nsrcs), mlh2->srcs)) + igmp6_group_queried(ma, max_delay); spin_unlock_bh(&ma->mca_lock); - if (group_type != IPV6_ADDR_ANY) - break; + break; } } read_unlock_bh(&idev->lock); @@ -1281,7 +1342,18 @@ static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, case MLD2_MODE_IS_EXCLUDE: if (gdeleted || sdeleted) return 0; - return !((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp); + if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) { + if (pmc->mca_sfmode == MCAST_INCLUDE) + return 1; + /* don't include if this source is excluded + * in all filters + */ + if (psf->sf_count[MCAST_INCLUDE]) + return type == MLD2_MODE_IS_INCLUDE; + return pmc->mca_sfcount[MCAST_EXCLUDE] == + psf->sf_count[MCAST_EXCLUDE]; + } + return 0; case MLD2_CHANGE_TO_INCLUDE: if (gdeleted || sdeleted) return 0; @@ -1450,7 +1522,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_report *pmr; struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; - int scount, first, isquery, truncate; + int scount, stotal, first, isquery, truncate; if (pmc->mca_flags & MAF_NOREPORT) return skb; @@ -1460,25 +1532,13 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, truncate = type == MLD2_MODE_IS_EXCLUDE || type == MLD2_CHANGE_TO_EXCLUDE; + stotal = scount = 0; + psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources; - if (!*psf_list) { - if (type == MLD2_ALLOW_NEW_SOURCES || - type == MLD2_BLOCK_OLD_SOURCES) - return skb; - if (pmc->mca_crcount || isquery) { - /* make sure we have room for group header and at - * least one source. - */ - if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)+ - sizeof(struct in6_addr)) { - mld_sendpack(skb); - skb = NULL; /* add_grhead will get a new one */ - } - skb = add_grhead(skb, pmc, type, &pgr); - } - return skb; - } + if (!*psf_list) + goto empty_source; + pmr = skb ? (struct mld2_report *)skb->h.raw : NULL; /* EX and TO_EX get a fresh packet, if needed */ @@ -1491,7 +1551,6 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, } } first = 1; - scount = 0; psf_prev = NULL; for (psf=*psf_list; psf; psf=psf_next) { struct in6_addr *psrc; @@ -1525,7 +1584,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, } psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc)); *psrc = psf->sf_addr; - scount++; + scount++; stotal++; if ((type == MLD2_ALLOW_NEW_SOURCES || type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) { psf->sf_crcount--; @@ -1540,6 +1599,21 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, } psf_prev = psf; } + +empty_source: + if (!stotal) { + if (type == MLD2_ALLOW_NEW_SOURCES || + type == MLD2_BLOCK_OLD_SOURCES) + return skb; + if (pmc->mca_crcount || isquery) { + /* make sure we have room for group header */ + if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) { + mld_sendpack(skb); + skb = NULL; /* add_grhead will get a new one */ + } + skb = add_grhead(skb, pmc, type, &pgr); + } + } if (pgr) pgr->grec_nsrcs = htons(scount); @@ -1621,11 +1695,11 @@ static void mld_send_cr(struct inet6_dev *idev) skb = add_grec(skb, pmc, dtype, 1, 1); } if (pmc->mca_crcount) { - pmc->mca_crcount--; if (pmc->mca_sfmode == MCAST_EXCLUDE) { type = MLD2_CHANGE_TO_INCLUDE; skb = add_grec(skb, pmc, type, 1, 0); } + pmc->mca_crcount--; if (pmc->mca_crcount == 0) { mld_clear_zeros(&pmc->mca_tomb); mld_clear_zeros(&pmc->mca_sources); @@ -1659,12 +1733,12 @@ static void mld_send_cr(struct inet6_dev *idev) /* filter mode changes */ if (pmc->mca_crcount) { - pmc->mca_crcount--; if (pmc->mca_sfmode == MCAST_EXCLUDE) type = MLD2_CHANGE_TO_EXCLUDE; else type = MLD2_CHANGE_TO_INCLUDE; skb = add_grec(skb, pmc, type, 0, 0); + pmc->mca_crcount--; } spin_unlock_bh(&pmc->mca_lock); } @@ -1860,7 +1934,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, psf_prev = psf; } if (!psf) { - psf = (struct ip6_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC); + psf = kmalloc(sizeof(*psf), GFP_ATOMIC); if (!psf) return -ENOBUFS; memset(psf, 0, sizeof(*psf)); @@ -1890,7 +1964,7 @@ static void sf_markstate(struct ifmcaddr6 *pmc) static int sf_setstate(struct ifmcaddr6 *pmc) { - struct ip6_sf_list *psf; + struct ip6_sf_list *psf, *dpsf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; int qrv = pmc->idev->mc_qrv; int new_in, rv; @@ -1902,8 +1976,48 @@ static int sf_setstate(struct ifmcaddr6 *pmc) !psf->sf_count[MCAST_INCLUDE]; } else new_in = psf->sf_count[MCAST_INCLUDE] != 0; - if (new_in != psf->sf_oldin) { - psf->sf_crcount = qrv; + if (new_in) { + if (!psf->sf_oldin) { + struct ip6_sf_list *prev = 0; + + for (dpsf=pmc->mca_tomb; dpsf; + dpsf=dpsf->sf_next) { + if (ipv6_addr_equal(&dpsf->sf_addr, + &psf->sf_addr)) + break; + prev = dpsf; + } + if (dpsf) { + if (prev) + prev->sf_next = dpsf->sf_next; + else + pmc->mca_tomb = dpsf->sf_next; + kfree(dpsf); + } + psf->sf_crcount = qrv; + rv++; + } + } else if (psf->sf_oldin) { + psf->sf_crcount = 0; + /* + * add or update "delete" records if an active filter + * is now inactive + */ + for (dpsf=pmc->mca_tomb; dpsf; dpsf=dpsf->sf_next) + if (ipv6_addr_equal(&dpsf->sf_addr, + &psf->sf_addr)) + break; + if (!dpsf) { + dpsf = (struct ip6_sf_list *) + kmalloc(sizeof(*dpsf), GFP_ATOMIC); + if (!dpsf) + continue; + *dpsf = *psf; + /* pmc->mca_lock held by callers */ + dpsf->sf_next = pmc->mca_tomb; + pmc->mca_tomb = dpsf; + } + dpsf->sf_crcount = qrv; rv++; } } @@ -2023,6 +2137,9 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, { int err; + /* callers have the socket lock and a write lock on ipv6_sk_mc_lock, + * so no other readers or writers of iml or its sflist + */ if (iml->sflist == 0) { /* any-source empty exclude case */ return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); @@ -2294,7 +2411,7 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v) struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); seq_printf(seq, - "%-4d %-15s %04x%04x%04x%04x%04x%04x%04x%04x %5d %08X %ld\n", + "%-4d %-15s " NIP6_SEQFMT " %5d %08X %ld\n", state->dev->ifindex, state->dev->name, NIP6(im->mca_addr), im->mca_users, im->mca_flags, @@ -2468,10 +2585,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) "Source Address", "INC", "EXC"); } else { seq_printf(seq, - "%3d %6.6s " - "%04x%04x%04x%04x%04x%04x%04x%04x " - "%04x%04x%04x%04x%04x%04x%04x%04x " - "%6lu %6lu\n", + "%3d %6.6s " NIP6_SEQFMT " " NIP6_SEQFMT " %6lu %6lu\n", state->dev->ifindex, state->dev->name, NIP6(state->im->mca_addr), NIP6(psf->sf_addr), diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 305d9ee6d7d..cb8856b1d95 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -692,7 +692,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if (!(neigh->nud_state & NUD_VALID)) { ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: " - "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + NIP6_FMT "\n", __FUNCTION__, NIP6(*target)); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index f8626ebf90f..d750cfc019d 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -1,15 +1,12 @@ -#include <linux/config.h> -#include <linux/init.h> - -#ifdef CONFIG_NETFILTER - #include <linux/kernel.h> +#include <linux/init.h> #include <linux/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <net/dst.h> #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/xfrm.h> int ip6_route_me_harder(struct sk_buff *skb) { @@ -21,11 +18,17 @@ int ip6_route_me_harder(struct sk_buff *skb) { .ip6_u = { .daddr = iph->daddr, .saddr = iph->saddr, } }, - .proto = iph->nexthdr, }; dst = ip6_route_output(skb->sk, &fl); +#ifdef CONFIG_XFRM + if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + xfrm_decode_session(skb, &fl, AF_INET6) == 0) + if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + return -1; +#endif + if (dst->error) { IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); @@ -87,18 +90,10 @@ int __init ipv6_netfilter_init(void) return nf_register_queue_rerouter(PF_INET6, &ip6_reroute); } +/* This can be called from inet6_init() on errors, so it cannot + * be marked __exit. -DaveM + */ void ipv6_netfilter_fini(void) { nf_unregister_queue_rerouter(PF_INET6); } - -#else /* CONFIG_NETFILTER */ -int __init ipv6_netfilter_init(void) -{ - return 0; -} - -void ipv6_netfilter_fini(void) -{ -} -#endif /* CONFIG_NETFILTER */ diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 04912f9b35c..2d6f8ecbc27 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -41,6 +41,7 @@ config IP6_NF_QUEUE config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering/masq/NAT)" + depends on NETFILTER_XTABLES help ip6tables is a general, extensible packet identification framework. Currently only the packet filtering and packet mangling subsystem @@ -50,25 +51,6 @@ config IP6_NF_IPTABLES To compile it as a module, choose M here. If unsure, say N. # The simple matches. -config IP6_NF_MATCH_LIMIT - tristate "limit match support" - depends on IP6_NF_IPTABLES - help - limit matching allows you to control the rate at which a rule can be - matched: mainly useful in combination with the LOG target ("LOG - target support", below) and to avoid some Denial of Service attacks. - - To compile it as a module, choose M here. If unsure, say N. - -config IP6_NF_MATCH_MAC - tristate "MAC address match support" - depends on IP6_NF_IPTABLES - help - mac matching allows you to match packets based on the source - Ethernet address of the packet. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_MATCH_RT tristate "Routing header match support" depends on IP6_NF_IPTABLES @@ -124,16 +106,6 @@ config IP6_NF_MATCH_OWNER To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_MARK - tristate "netfilter MARK match support" - depends on IP6_NF_IPTABLES - help - Netfilter mark matching allows you to match packets based on the - `nfmark' value in the packet. This can be set by the MARK target - (see below). - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_MATCH_IPV6HEADER tristate "IPv6 Extension Headers Match" depends on IP6_NF_IPTABLES @@ -151,15 +123,6 @@ config IP6_NF_MATCH_AHESP To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_LENGTH - tristate "Packet Length match support" - depends on IP6_NF_IPTABLES - help - This option allows you to match the length of a packet against a - specific value or range of values. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_MATCH_EUI64 tristate "EUI64 address check" depends on IP6_NF_IPTABLES @@ -170,12 +133,13 @@ config IP6_NF_MATCH_EUI64 To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_PHYSDEV - tristate "Physdev match support" - depends on IP6_NF_IPTABLES && BRIDGE_NETFILTER +config IP6_NF_MATCH_POLICY + tristate "IPsec policy match support" + depends on IP6_NF_IPTABLES && XFRM help - Physdev packet matching matches against the physical bridge ports - the IP packet arrived on or will leave by. + Policy matching allows you to match packets based on the + IPsec policy that was used during decapsulation/will + be used during encapsulation. To compile it as a module, choose M here. If unsure, say N. @@ -209,17 +173,6 @@ config IP6_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_NFQUEUE - tristate "NFQUEUE Target Support" - depends on IP6_NF_IPTABLES - help - This Target replaced the old obsolete QUEUE target. - - As opposed to QUEUE, it supports 65535 different queues, - not just one. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_MANGLE tristate "Packet mangling" depends on IP6_NF_IPTABLES @@ -230,19 +183,6 @@ config IP6_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_MARK - tristate "MARK target support" - depends on IP6_NF_MANGLE - help - This option adds a `MARK' target, which allows you to create rules - in the `mangle' table which alter the netfilter mark (nfmark) field - associated with the packet packet prior to routing. This can change - the routing method (see `Use netfilter MARK value as routing - key') and can also be used by other subsystems to change their - behavior. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_TARGET_HL tristate 'HL (hoplimit) target support' depends on IP6_NF_MANGLE diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 9ab5b2ca1f5..db6073c9416 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -4,24 +4,18 @@ # Link order matters here. obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o -obj-$(CONFIG_IP6_NF_MATCH_LIMIT) += ip6t_limit.o -obj-$(CONFIG_IP6_NF_MATCH_MARK) += ip6t_mark.o -obj-$(CONFIG_IP6_NF_MATCH_LENGTH) += ip6t_length.o -obj-$(CONFIG_IP6_NF_MATCH_MAC) += ip6t_mac.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o +obj-$(CONFIG_IP6_NF_MATCH_POLICY) += ip6t_policy.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o -obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o -obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o -obj-$(CONFIG_IP6_NF_TARGET_NFQUEUE) += ip6t_NFQUEUE.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 95d469271c4..847068fd336 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -13,15 +13,19 @@ * a table * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu> * - new extension header parser code + * 15 Oct 2005 Harald Welte <laforge@netfilter.org> + * - Unification of {ip,ip6}_tables into x_tables + * - Removed tcp and udp code, since it's not ipv6 specific */ + +#include <linux/capability.h> #include <linux/config.h> +#include <linux/in.h> #include <linux/skbuff.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> -#include <linux/tcp.h> -#include <linux/udp.h> #include <linux/icmpv6.h> #include <net/ipv6.h> #include <asm/uaccess.h> @@ -30,6 +34,7 @@ #include <linux/cpumask.h> #include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -64,13 +69,8 @@ do { \ #else #define IP_NF_ASSERT(x) #endif -#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) -static DECLARE_MUTEX(ip6t_mutex); -/* Must have mutex */ -#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0) -#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0) #include <linux/netfilter_ipv4/listhelp.h> #if 0 @@ -86,55 +86,22 @@ static DECLARE_MUTEX(ip6t_mutex); context stops packets coming through and allows user context to read the counters or update the rules. - To be cache friendly on SMP, we arrange them like so: - [ n-entries ] - ... cache-align padding ... - [ n-entries ] - Hence the start of any table is given by get_table() below. */ -/* The table itself */ -struct ip6t_table_info -{ - /* Size per table */ - unsigned int size; - /* Number of entries: FIXME. --RR */ - unsigned int number; - /* Initial number of entries. Needed for module usage count */ - unsigned int initial_entries; - - /* Entry points and underflows */ - unsigned int hook_entry[NF_IP6_NUMHOOKS]; - unsigned int underflow[NF_IP6_NUMHOOKS]; - - /* ip6t_entry tables: one per CPU */ - char entries[0] ____cacheline_aligned; -}; - -static LIST_HEAD(ip6t_target); -static LIST_HEAD(ip6t_match); -static LIST_HEAD(ip6t_tables); -#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) - -#ifdef CONFIG_SMP -#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p)) -#else -#define TABLE_OFFSET(t,p) 0 -#endif - #if 0 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0) #endif -static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask, - struct in6_addr addr2) +int +ip6_masked_addrcmp(const struct in6_addr *addr1, const struct in6_addr *mask, + const struct in6_addr *addr2) { int i; for( i = 0; i < 16; i++){ - if((addr1.s6_addr[i] & mask.s6_addr[i]) != - (addr2.s6_addr[i] & mask.s6_addr[i])) + if((addr1->s6_addr[i] & mask->s6_addr[i]) != + (addr2->s6_addr[i] & mask->s6_addr[i])) return 1; } return 0; @@ -168,10 +135,10 @@ ip6_packet_match(const struct sk_buff *skb, #define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg)) - if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src), - IP6T_INV_SRCIP) - || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst), - IP6T_INV_DSTIP)) { + if (FWINV(ip6_masked_addrcmp(&ipv6->saddr, &ip6info->smsk, + &ip6info->src), IP6T_INV_SRCIP) + || FWINV(ip6_masked_addrcmp(&ipv6->daddr, &ip6info->dmsk, + &ip6info->dst), IP6T_INV_DSTIP)) { dprintf("Source or dest mismatch.\n"); /* dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, @@ -214,69 +181,21 @@ ip6_packet_match(const struct sk_buff *skb, /* look for the desired protocol header */ if((ip6info->flags & IP6T_F_PROTO)) { - u_int8_t currenthdr = ipv6->nexthdr; - struct ipv6_opt_hdr _hdr, *hp; - u_int16_t ptr; /* Header offset in skb */ - u_int16_t hdrlen; /* Header */ - u_int16_t _fragoff = 0, *fp = NULL; - - ptr = IPV6_HDR_LEN; - - while (ip6t_ext_hdr(currenthdr)) { - /* Is there enough space for the next ext header? */ - if (skb->len - ptr < IPV6_OPTHDR_LEN) - return 0; - - /* NONE or ESP: there isn't protocol part */ - /* If we want to count these packets in '-p all', - * we will change the return 0 to 1*/ - if ((currenthdr == IPPROTO_NONE) || - (currenthdr == IPPROTO_ESP)) - break; - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Size calculation */ - if (currenthdr == IPPROTO_FRAGMENT) { - fp = skb_header_pointer(skb, - ptr+offsetof(struct frag_hdr, - frag_off), - sizeof(_fragoff), - &_fragoff); - if (fp == NULL) - return 0; - - _fragoff = ntohs(*fp) & ~0x7; - hdrlen = 8; - } else if (currenthdr == IPPROTO_AH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - currenthdr = hp->nexthdr; - ptr += hdrlen; - /* ptr is too large */ - if ( ptr > skb->len ) - return 0; - if (_fragoff) { - if (ip6t_ext_hdr(currenthdr)) - return 0; - break; - } - } + int protohdr; + unsigned short _frag_off; - *protoff = ptr; - *fragoff = _fragoff; + protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off); + if (protohdr < 0) + return 0; - /* currenthdr contains the protocol header */ + *fragoff = _frag_off; dprintf("Packet protocol %hi ?= %s%hi.\n", - currenthdr, + protohdr, ip6info->invflags & IP6T_INV_PROTO ? "!":"", ip6info->proto); - if (ip6info->proto == currenthdr) { + if (ip6info->proto == protohdr) { if(ip6info->invflags & IP6T_INV_PROTO) { return 0; } @@ -351,7 +270,7 @@ ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ip6t_table *table, + struct xt_table *table, void *userdata) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); @@ -363,6 +282,7 @@ ip6t_do_table(struct sk_buff **pskb, const char *indev, *outdev; void *table_base; struct ip6t_entry *e, *back; + struct xt_table_info *private; /* Initialization */ indev = in ? in->name : nulldevname; @@ -375,10 +295,10 @@ ip6t_do_table(struct sk_buff **pskb, * match it. */ read_lock_bh(&table->lock); + private = table->private; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - table_base = (void *)table->private->entries - + TABLE_OFFSET(table->private, smp_processor_id()); - e = get_entry(table_base, table->private->hook_entry[hook]); + table_base = (void *)private->entries[smp_processor_id()]; + e = get_entry(table_base, private->hook_entry[hook]); #ifdef CONFIG_NETFILTER_DEBUG /* Check noone else using our table */ @@ -394,7 +314,7 @@ ip6t_do_table(struct sk_buff **pskb, #endif /* For return from builtin chain */ - back = get_entry(table_base, table->private->underflow[hook]); + back = get_entry(table_base, private->underflow[hook]); do { IP_NF_ASSERT(e); @@ -494,145 +414,6 @@ ip6t_do_table(struct sk_buff **pskb, #endif } -/* - * These are weird, but module loading must not be done with mutex - * held (since they will register), and we have to have a single - * function to use try_then_request_module(). - */ - -/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ -static inline struct ip6t_table *find_table_lock(const char *name) -{ - struct ip6t_table *t; - - if (down_interruptible(&ip6t_mutex) != 0) - return ERR_PTR(-EINTR); - - list_for_each_entry(t, &ip6t_tables, list) - if (strcmp(t->name, name) == 0 && try_module_get(t->me)) - return t; - up(&ip6t_mutex); - return NULL; -} - -/* Find match, grabs ref. Returns ERR_PTR() on error. */ -static inline struct ip6t_match *find_match(const char *name, u8 revision) -{ - struct ip6t_match *m; - int err = 0; - - if (down_interruptible(&ip6t_mutex) != 0) - return ERR_PTR(-EINTR); - - list_for_each_entry(m, &ip6t_match, list) { - if (strcmp(m->name, name) == 0) { - if (m->revision == revision) { - if (try_module_get(m->me)) { - up(&ip6t_mutex); - return m; - } - } else - err = -EPROTOTYPE; /* Found something. */ - } - } - up(&ip6t_mutex); - return ERR_PTR(err); -} - -/* Find target, grabs ref. Returns ERR_PTR() on error. */ -static inline struct ip6t_target *find_target(const char *name, u8 revision) -{ - struct ip6t_target *t; - int err = 0; - - if (down_interruptible(&ip6t_mutex) != 0) - return ERR_PTR(-EINTR); - - list_for_each_entry(t, &ip6t_target, list) { - if (strcmp(t->name, name) == 0) { - if (t->revision == revision) { - if (try_module_get(t->me)) { - up(&ip6t_mutex); - return t; - } - } else - err = -EPROTOTYPE; /* Found something. */ - } - } - up(&ip6t_mutex); - return ERR_PTR(err); -} - -struct ip6t_target *ip6t_find_target(const char *name, u8 revision) -{ - struct ip6t_target *target; - - target = try_then_request_module(find_target(name, revision), - "ip6t_%s", name); - if (IS_ERR(target) || !target) - return NULL; - return target; -} - -static int match_revfn(const char *name, u8 revision, int *bestp) -{ - struct ip6t_match *m; - int have_rev = 0; - - list_for_each_entry(m, &ip6t_match, list) { - if (strcmp(m->name, name) == 0) { - if (m->revision > *bestp) - *bestp = m->revision; - if (m->revision == revision) - have_rev = 1; - } - } - return have_rev; -} - -static int target_revfn(const char *name, u8 revision, int *bestp) -{ - struct ip6t_target *t; - int have_rev = 0; - - list_for_each_entry(t, &ip6t_target, list) { - if (strcmp(t->name, name) == 0) { - if (t->revision > *bestp) - *bestp = t->revision; - if (t->revision == revision) - have_rev = 1; - } - } - return have_rev; -} - -/* Returns true or fals (if no such extension at all) */ -static inline int find_revision(const char *name, u8 revision, - int (*revfn)(const char *, u8, int *), - int *err) -{ - int have_rev, best = -1; - - if (down_interruptible(&ip6t_mutex) != 0) { - *err = -EINTR; - return 1; - } - have_rev = revfn(name, revision, &best); - up(&ip6t_mutex); - - /* Nothing at all? Return 0 to try loading module. */ - if (best == -1) { - *err = -ENOENT; - return 0; - } - - *err = best; - if (!have_rev) - *err = -EPROTONOSUPPORT; - return 1; -} - - /* All zeroes == unconditional rule. */ static inline int unconditional(const struct ip6t_ip6 *ipv6) @@ -649,7 +430,8 @@ unconditional(const struct ip6t_ip6 *ipv6) /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int -mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) +mark_source_chains(struct xt_table_info *newinfo, + unsigned int valid_hooks, void *entry0) { unsigned int hook; @@ -658,7 +440,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e - = (struct ip6t_entry *)(newinfo->entries + pos); + = (struct ip6t_entry *)(entry0 + pos); if (!(valid_hooks & (1 << hook))) continue; @@ -708,13 +490,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) goto next; e = (struct ip6t_entry *) - (newinfo->entries + pos); + (entry0 + pos); } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; e = (struct ip6t_entry *) - (newinfo->entries + pos + size); + (entry0 + pos + size); e->counters.pcnt = pos; pos += size; } else { @@ -731,7 +513,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) newpos = pos + e->next_offset; } e = (struct ip6t_entry *) - (newinfo->entries + newpos); + (entry0 + newpos); e->counters.pcnt = pos; pos = newpos; } @@ -794,11 +576,11 @@ check_match(struct ip6t_entry_match *m, { struct ip6t_match *match; - match = try_then_request_module(find_match(m->u.user.name, - m->u.user.revision), + match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, + m->u.user.revision), "ip6t_%s", m->u.user.name); if (IS_ERR(match) || !match) { - duprintf("check_match: `%s' not found\n", m->u.user.name); + duprintf("check_match: `%s' not found\n", m->u.user.name); return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; @@ -839,8 +621,9 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, goto cleanup_matches; t = ip6t_get_target(e); - target = try_then_request_module(find_target(t->u.user.name, - t->u.user.revision), + target = try_then_request_module(xt_find_target(AF_INET6, + t->u.user.name, + t->u.user.revision), "ip6t_%s", t->u.user.name); if (IS_ERR(target) || !target) { duprintf("check_entry: `%s' not found\n", t->u.user.name); @@ -876,7 +659,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, static inline int check_entry_size_and_hooks(struct ip6t_entry *e, - struct ip6t_table_info *newinfo, + struct xt_table_info *newinfo, unsigned char *base, unsigned char *limit, const unsigned int *hook_entries, @@ -910,7 +693,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, < 0 (not IP6T_RETURN). --RR */ /* Clear counters and comefrom */ - e->counters = ((struct ip6t_counters) { 0, 0 }); + e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; (*i)++; @@ -940,7 +723,8 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) static int translate_table(const char *name, unsigned int valid_hooks, - struct ip6t_table_info *newinfo, + struct xt_table_info *newinfo, + void *entry0, unsigned int size, unsigned int number, const unsigned int *hook_entries, @@ -961,11 +745,11 @@ translate_table(const char *name, duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ - ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, + ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, check_entry_size_and_hooks, newinfo, - newinfo->entries, - newinfo->entries + size, + entry0, + entry0 + size, hook_entries, underflows, &i); if (ret != 0) return ret; @@ -993,95 +777,79 @@ translate_table(const char *name, } } - if (!mark_source_chains(newinfo, valid_hooks)) + if (!mark_source_chains(newinfo, valid_hooks, entry0)) return -ELOOP; /* Finally, each sanity check must pass */ i = 0; - ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, + ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, check_entry, name, size, &i); if (ret != 0) { - IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, + IP6T_ENTRY_ITERATE(entry0, newinfo->size, cleanup_entry, &i); return ret; } /* And one copy for every other CPU */ for_each_cpu(i) { - if (i == 0) - continue; - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, - newinfo->entries, - SMP_ALIGN(newinfo->size)); + if (newinfo->entries[i] && newinfo->entries[i] != entry0) + memcpy(newinfo->entries[i], entry0, newinfo->size); } return ret; } -static struct ip6t_table_info * -replace_table(struct ip6t_table *table, - unsigned int num_counters, - struct ip6t_table_info *newinfo, - int *error) +/* Gets counters. */ +static inline int +add_entry_to_counter(const struct ip6t_entry *e, + struct xt_counters total[], + unsigned int *i) { - struct ip6t_table_info *oldinfo; - -#ifdef CONFIG_NETFILTER_DEBUG - { - struct ip6t_entry *table_base; - unsigned int i; - - for_each_cpu(i) { - table_base = - (void *)newinfo->entries - + TABLE_OFFSET(newinfo, i); - - table_base->comefrom = 0xdead57ac; - } - } -#endif - - /* Do the substitution. */ - write_lock_bh(&table->lock); - /* Check inside lock: is the old number correct? */ - if (num_counters != table->private->number) { - duprintf("num_counters != table->private->number (%u/%u)\n", - num_counters, table->private->number); - write_unlock_bh(&table->lock); - *error = -EAGAIN; - return NULL; - } - oldinfo = table->private; - table->private = newinfo; - newinfo->initial_entries = oldinfo->initial_entries; - write_unlock_bh(&table->lock); + ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - return oldinfo; + (*i)++; + return 0; } -/* Gets counters. */ static inline int -add_entry_to_counter(const struct ip6t_entry *e, +set_entry_to_counter(const struct ip6t_entry *e, struct ip6t_counters total[], unsigned int *i) { - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); + SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); (*i)++; return 0; } static void -get_counters(const struct ip6t_table_info *t, - struct ip6t_counters counters[]) +get_counters(const struct xt_table_info *t, + struct xt_counters counters[]) { unsigned int cpu; unsigned int i; + unsigned int curcpu; + + /* Instead of clearing (by a previous call to memset()) + * the counters and using adds, we set the counters + * with data used by 'current' CPU + * We dont care about preemption here. + */ + curcpu = raw_smp_processor_id(); + + i = 0; + IP6T_ENTRY_ITERATE(t->entries[curcpu], + t->size, + set_entry_to_counter, + counters, + &i); for_each_cpu(cpu) { + if (cpu == curcpu) + continue; i = 0; - IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), + IP6T_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, @@ -1091,31 +859,33 @@ get_counters(const struct ip6t_table_info *t, static int copy_entries_to_user(unsigned int total_size, - struct ip6t_table *table, + struct xt_table *table, void __user *userptr) { unsigned int off, num, countersize; struct ip6t_entry *e; - struct ip6t_counters *counters; + struct xt_counters *counters; + struct xt_table_info *private = table->private; int ret = 0; + void *loc_cpu_entry; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care about). */ - countersize = sizeof(struct ip6t_counters) * table->private->number; + countersize = sizeof(struct xt_counters) * private->number; counters = vmalloc(countersize); if (counters == NULL) return -ENOMEM; /* First, sum counters... */ - memset(counters, 0, countersize); write_lock_bh(&table->lock); - get_counters(table->private, counters); + get_counters(private, counters); write_unlock_bh(&table->lock); - /* ... then copy entire thing from CPU 0... */ - if (copy_to_user(userptr, table->private->entries, total_size) != 0) { + /* choose the copy that is on ourc node/cpu */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; } @@ -1127,7 +897,7 @@ copy_entries_to_user(unsigned int total_size, struct ip6t_entry_match *m; struct ip6t_entry_target *t; - e = (struct ip6t_entry *)(table->private->entries + off); + e = (struct ip6t_entry *)(loc_cpu_entry + off); if (copy_to_user(userptr + off + offsetof(struct ip6t_entry, counters), &counters[num], @@ -1173,23 +943,22 @@ get_entries(const struct ip6t_get_entries *entries, struct ip6t_get_entries __user *uptr) { int ret; - struct ip6t_table *t; + struct xt_table *t; - t = find_table_lock(entries->name); + t = xt_find_table_lock(AF_INET6, entries->name); if (t && !IS_ERR(t)) { - duprintf("t->private->number = %u\n", - t->private->number); - if (entries->size == t->private->size) - ret = copy_entries_to_user(t->private->size, + struct xt_table_info *private = t->private; + duprintf("t->private->number = %u\n", private->number); + if (entries->size == private->size) + ret = copy_entries_to_user(private->size, t, uptr->entrytable); else { duprintf("get_entries: I've got %u not %u!\n", - t->private->size, - entries->size); + private->size, entries->size); ret = -EINVAL; } module_put(t->me); - up(&ip6t_mutex); + xt_table_unlock(t); } else ret = t ? PTR_ERR(t) : -ENOENT; @@ -1201,45 +970,41 @@ do_replace(void __user *user, unsigned int len) { int ret; struct ip6t_replace tmp; - struct ip6t_table *t; - struct ip6t_table_info *newinfo, *oldinfo; - struct ip6t_counters *counters; + struct xt_table *t; + struct xt_table_info *newinfo, *oldinfo; + struct xt_counters *counters; + void *loc_cpu_entry, *loc_cpu_old_entry; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; - /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ - if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) - return -ENOMEM; - - newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(tmp.size) * - (highest_possible_processor_id()+1)); + newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; - if (copy_from_user(newinfo->entries, user + sizeof(tmp), + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } - counters = vmalloc(tmp.num_counters * sizeof(struct ip6t_counters)); + counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto free_newinfo; } - memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters)); ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, tmp.size, tmp.num_entries, + newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); if (ret != 0) goto free_newinfo_counters; duprintf("ip_tables: Translated table\n"); - t = try_then_request_module(find_table_lock(tmp.name), + t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name), "ip6table_%s", tmp.name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1254,7 +1019,7 @@ do_replace(void __user *user, unsigned int len) goto put_module; } - oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); + oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret); if (!oldinfo) goto put_module; @@ -1271,24 +1036,25 @@ do_replace(void __user *user, unsigned int len) /* Get the old counters. */ get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); - vfree(oldinfo); + loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; + IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); + xt_free_table_info(oldinfo); if (copy_to_user(tmp.counters, counters, - sizeof(struct ip6t_counters) * tmp.num_counters) != 0) + sizeof(struct xt_counters) * tmp.num_counters) != 0) ret = -EFAULT; vfree(counters); - up(&ip6t_mutex); + xt_table_unlock(t); return ret; put_module: module_put(t->me); - up(&ip6t_mutex); + xt_table_unlock(t); free_newinfo_counters_untrans: - IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); free_newinfo_counters: vfree(counters); free_newinfo: - vfree(newinfo); + xt_free_table_info(newinfo); return ret; } @@ -1296,7 +1062,7 @@ do_replace(void __user *user, unsigned int len) * and everything is OK. */ static inline int add_counter_to_entry(struct ip6t_entry *e, - const struct ip6t_counters addme[], + const struct xt_counters addme[], unsigned int *i) { #if 0 @@ -1318,14 +1084,16 @@ static int do_add_counters(void __user *user, unsigned int len) { unsigned int i; - struct ip6t_counters_info tmp, *paddc; - struct ip6t_table *t; + struct xt_counters_info tmp, *paddc; + struct xt_table_info *private; + struct xt_table *t; int ret = 0; + void *loc_cpu_entry; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; - if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ip6t_counters)) + if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters)) return -EINVAL; paddc = vmalloc(len); @@ -1337,27 +1105,30 @@ do_add_counters(void __user *user, unsigned int len) goto free; } - t = find_table_lock(tmp.name); + t = xt_find_table_lock(AF_INET6, tmp.name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } write_lock_bh(&t->lock); - if (t->private->number != paddc->num_counters) { + private = t->private; + if (private->number != paddc->num_counters) { ret = -EINVAL; goto unlock_up_free; } i = 0; - IP6T_ENTRY_ITERATE(t->private->entries, - t->private->size, + /* Choose the copy that is on our node */ + loc_cpu_entry = private->entries[smp_processor_id()]; + IP6T_ENTRY_ITERATE(loc_cpu_entry, + private->size, add_counter_to_entry, paddc->counters, &i); unlock_up_free: write_unlock_bh(&t->lock); - up(&ip6t_mutex); + xt_table_unlock(t); module_put(t->me); free: vfree(paddc); @@ -1401,7 +1172,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IP6T_SO_GET_INFO: { char name[IP6T_TABLE_MAXNAMELEN]; - struct ip6t_table *t; + struct xt_table *t; if (*len != sizeof(struct ip6t_getinfo)) { duprintf("length %u != %u\n", *len, @@ -1416,25 +1187,26 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; - t = try_then_request_module(find_table_lock(name), + t = try_then_request_module(xt_find_table_lock(AF_INET6, name), "ip6table_%s", name); if (t && !IS_ERR(t)) { struct ip6t_getinfo info; + struct xt_table_info *private = t->private; info.valid_hooks = t->valid_hooks; - memcpy(info.hook_entry, t->private->hook_entry, + memcpy(info.hook_entry, private->hook_entry, sizeof(info.hook_entry)); - memcpy(info.underflow, t->private->underflow, + memcpy(info.underflow, private->underflow, sizeof(info.underflow)); - info.num_entries = t->private->number; - info.size = t->private->size; + info.num_entries = private->number; + info.size = private->size; memcpy(info.name, name, sizeof(info.name)); if (copy_to_user(user, &info, *len) != 0) ret = -EFAULT; else ret = 0; - up(&ip6t_mutex); + xt_table_unlock(t); module_put(t->me); } else ret = t ? PTR_ERR(t) : -ENOENT; @@ -1461,7 +1233,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP6T_SO_GET_REVISION_MATCH: case IP6T_SO_GET_REVISION_TARGET: { struct ip6t_get_revision rev; - int (*revfn)(const char *, u8, int *); + int target; if (*len != sizeof(rev)) { ret = -EINVAL; @@ -1473,12 +1245,13 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } if (cmd == IP6T_SO_GET_REVISION_TARGET) - revfn = target_revfn; + target = 1; else - revfn = match_revfn; + target = 0; - try_then_request_module(find_revision(rev.name, rev.revision, - revfn, &ret), + try_then_request_module(xt_find_revision(AF_INET6, rev.name, + rev.revision, + target, &ret), "ip6t_%s", rev.name); break; } @@ -1491,308 +1264,52 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -/* Registration hooks for targets. */ -int -ip6t_register_target(struct ip6t_target *target) -{ - int ret; - - ret = down_interruptible(&ip6t_mutex); - if (ret != 0) - return ret; - list_add(&target->list, &ip6t_target); - up(&ip6t_mutex); - return ret; -} - -void -ip6t_unregister_target(struct ip6t_target *target) -{ - down(&ip6t_mutex); - LIST_DELETE(&ip6t_target, target); - up(&ip6t_mutex); -} - -int -ip6t_register_match(struct ip6t_match *match) -{ - int ret; - - ret = down_interruptible(&ip6t_mutex); - if (ret != 0) - return ret; - - list_add(&match->list, &ip6t_match); - up(&ip6t_mutex); - - return ret; -} - -void -ip6t_unregister_match(struct ip6t_match *match) -{ - down(&ip6t_mutex); - LIST_DELETE(&ip6t_match, match); - up(&ip6t_mutex); -} - -int ip6t_register_table(struct ip6t_table *table, +int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) { int ret; - struct ip6t_table_info *newinfo; - static struct ip6t_table_info bootstrap + struct xt_table_info *newinfo; + static struct xt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; + void *loc_cpu_entry; - newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(repl->size) * - (highest_possible_processor_id()+1)); + newinfo = xt_alloc_table_info(repl->size); if (!newinfo) return -ENOMEM; - memcpy(newinfo->entries, repl->entries, repl->size); + /* choose the copy on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(table->name, table->valid_hooks, - newinfo, repl->size, + newinfo, loc_cpu_entry, repl->size, repl->num_entries, repl->hook_entry, repl->underflow); if (ret != 0) { - vfree(newinfo); + xt_free_table_info(newinfo); return ret; } - ret = down_interruptible(&ip6t_mutex); - if (ret != 0) { - vfree(newinfo); + if (xt_register_table(table, &bootstrap, newinfo) != 0) { + xt_free_table_info(newinfo); return ret; } - /* Don't autoload: we'd eat our tail... */ - if (list_named_find(&ip6t_tables, table->name)) { - ret = -EEXIST; - goto free_unlock; - } - - /* Simplifies replace_table code. */ - table->private = &bootstrap; - if (!replace_table(table, 0, newinfo, &ret)) - goto free_unlock; - - duprintf("table->private->number = %u\n", - table->private->number); - - /* save number of initial entries */ - table->private->initial_entries = table->private->number; - - rwlock_init(&table->lock); - list_prepend(&ip6t_tables, table); - - unlock: - up(&ip6t_mutex); - return ret; - - free_unlock: - vfree(newinfo); - goto unlock; -} - -void ip6t_unregister_table(struct ip6t_table *table) -{ - down(&ip6t_mutex); - LIST_DELETE(&ip6t_tables, table); - up(&ip6t_mutex); - - /* Decrease module usage counts and free resources */ - IP6T_ENTRY_ITERATE(table->private->entries, table->private->size, - cleanup_entry, NULL); - vfree(table->private); -} - -/* Returns 1 if the port is matched by the range, 0 otherwise */ -static inline int -port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert) -{ - int ret; - - ret = (port >= min && port <= max) ^ invert; - return ret; -} - -static int -tcp_find_option(u_int8_t option, - const struct sk_buff *skb, - unsigned int tcpoff, - unsigned int optlen, - int invert, - int *hotdrop) -{ - /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ - u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; - unsigned int i; - - duprintf("tcp_match: finding option\n"); - if (!optlen) - return invert; - /* If we don't have the whole header, drop packet. */ - op = skb_header_pointer(skb, tcpoff + sizeof(struct tcphdr), optlen, - _opt); - if (op == NULL) { - *hotdrop = 1; - return 0; - } - - for (i = 0; i < optlen; ) { - if (op[i] == option) return !invert; - if (op[i] < 2) i++; - else i += op[i+1]?:1; - } - - return invert; -} - -static int -tcp_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - struct tcphdr _tcph, *th; - const struct ip6t_tcp *tcpinfo = matchinfo; - - if (offset) { - /* To quote Alan: - - Don't allow a fragment of TCP 8 bytes in. Nobody normal - causes this. Its a cracker trying to break in by doing a - flag overwrite to pass the direction checks. - */ - if (offset == 1) { - duprintf("Dropping evil TCP offset=1 frag.\n"); - *hotdrop = 1; - } - /* Must not be a fragment. */ - return 0; - } - -#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg)) - - th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); - if (th == NULL) { - /* We've been asked to examine this packet, and we - can't. Hence, no choice but to drop. */ - duprintf("Dropping evil TCP offset=0 tinygram.\n"); - *hotdrop = 1; - return 0; - } - - if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1], - ntohs(th->source), - !!(tcpinfo->invflags & IP6T_TCP_INV_SRCPT))) - return 0; - if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], - ntohs(th->dest), - !!(tcpinfo->invflags & IP6T_TCP_INV_DSTPT))) - return 0; - if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask) - == tcpinfo->flg_cmp, - IP6T_TCP_INV_FLAGS)) - return 0; - if (tcpinfo->option) { - if (th->doff * 4 < sizeof(_tcph)) { - *hotdrop = 1; - return 0; - } - if (!tcp_find_option(tcpinfo->option, skb, protoff, - th->doff*4 - sizeof(*th), - tcpinfo->invflags & IP6T_TCP_INV_OPTION, - hotdrop)) - return 0; - } - return 1; -} - -/* Called when user tries to insert an entry of this type. */ -static int -tcp_checkentry(const char *tablename, - const struct ip6t_ip6 *ipv6, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - const struct ip6t_tcp *tcpinfo = matchinfo; - - /* Must specify proto == TCP, and no unknown invflags */ - return ipv6->proto == IPPROTO_TCP - && !(ipv6->invflags & IP6T_INV_PROTO) - && matchsize == IP6T_ALIGN(sizeof(struct ip6t_tcp)) - && !(tcpinfo->invflags & ~IP6T_TCP_INV_MASK); + return 0; } -static int -udp_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) +void ip6t_unregister_table(struct xt_table *table) { - struct udphdr _udph, *uh; - const struct ip6t_udp *udpinfo = matchinfo; + struct xt_table_info *private; + void *loc_cpu_entry; - /* Must not be a fragment. */ - if (offset) - return 0; - - uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph); - if (uh == NULL) { - /* We've been asked to examine this packet, and we - can't. Hence, no choice but to drop. */ - duprintf("Dropping evil UDP tinygram.\n"); - *hotdrop = 1; - return 0; - } - - return port_match(udpinfo->spts[0], udpinfo->spts[1], - ntohs(uh->source), - !!(udpinfo->invflags & IP6T_UDP_INV_SRCPT)) - && port_match(udpinfo->dpts[0], udpinfo->dpts[1], - ntohs(uh->dest), - !!(udpinfo->invflags & IP6T_UDP_INV_DSTPT)); -} - -/* Called when user tries to insert an entry of this type. */ -static int -udp_checkentry(const char *tablename, - const struct ip6t_ip6 *ipv6, - void *matchinfo, - unsigned int matchinfosize, - unsigned int hook_mask) -{ - const struct ip6t_udp *udpinfo = matchinfo; + private = xt_unregister_table(table); - /* Must specify proto == UDP, and no unknown invflags */ - if (ipv6->proto != IPPROTO_UDP || (ipv6->invflags & IP6T_INV_PROTO)) { - duprintf("ip6t_udp: Protocol %u != %u\n", ipv6->proto, - IPPROTO_UDP); - return 0; - } - if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_udp))) { - duprintf("ip6t_udp: matchsize %u != %u\n", - matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_udp))); - return 0; - } - if (udpinfo->invflags & ~IP6T_UDP_INV_MASK) { - duprintf("ip6t_udp: unknown flags %X\n", - udpinfo->invflags); - return 0; - } - - return 1; + /* Decrease module usage counts and free resources */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + xt_free_table_info(private); } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ @@ -1840,11 +1357,12 @@ icmp6_match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static int icmp6_checkentry(const char *tablename, - const struct ip6t_ip6 *ipv6, + const void *entry, void *matchinfo, unsigned int matchsize, unsigned int hook_mask) { + const struct ip6t_ip6 *ipv6 = entry; const struct ip6t_icmp *icmpinfo = matchinfo; /* Must specify proto == ICMP, and no unknown invflags */ @@ -1874,187 +1392,78 @@ static struct nf_sockopt_ops ip6t_sockopts = { .get = do_ip6t_get_ctl, }; -static struct ip6t_match tcp_matchstruct = { - .name = "tcp", - .match = &tcp_match, - .checkentry = &tcp_checkentry, -}; - -static struct ip6t_match udp_matchstruct = { - .name = "udp", - .match = &udp_match, - .checkentry = &udp_checkentry, -}; - static struct ip6t_match icmp6_matchstruct = { .name = "icmp6", .match = &icmp6_match, .checkentry = &icmp6_checkentry, }; -#ifdef CONFIG_PROC_FS -static inline int print_name(const char *i, - off_t start_offset, char *buffer, int length, - off_t *pos, unsigned int *count) -{ - if ((*count)++ >= start_offset) { - unsigned int namelen; - - namelen = sprintf(buffer + *pos, "%s\n", - i + sizeof(struct list_head)); - if (*pos + namelen > length) { - /* Stop iterating */ - return 1; - } - *pos += namelen; - } - return 0; -} - -static inline int print_target(const struct ip6t_target *t, - off_t start_offset, char *buffer, int length, - off_t *pos, unsigned int *count) -{ - if (t == &ip6t_standard_target || t == &ip6t_error_target) - return 0; - return print_name((char *)t, start_offset, buffer, length, pos, count); -} - -static int ip6t_get_tables(char *buffer, char **start, off_t offset, int length) -{ - off_t pos = 0; - unsigned int count = 0; - - if (down_interruptible(&ip6t_mutex) != 0) - return 0; - - LIST_FIND(&ip6t_tables, print_name, char *, - offset, buffer, length, &pos, &count); - - up(&ip6t_mutex); - - /* `start' hack - see fs/proc/generic.c line ~105 */ - *start=(char *)((unsigned long)count-offset); - return pos; -} - -static int ip6t_get_targets(char *buffer, char **start, off_t offset, int length) -{ - off_t pos = 0; - unsigned int count = 0; - - if (down_interruptible(&ip6t_mutex) != 0) - return 0; - - LIST_FIND(&ip6t_target, print_target, struct ip6t_target *, - offset, buffer, length, &pos, &count); - - up(&ip6t_mutex); - - *start = (char *)((unsigned long)count - offset); - return pos; -} - -static int ip6t_get_matches(char *buffer, char **start, off_t offset, int length) -{ - off_t pos = 0; - unsigned int count = 0; - - if (down_interruptible(&ip6t_mutex) != 0) - return 0; - - LIST_FIND(&ip6t_match, print_name, char *, - offset, buffer, length, &pos, &count); - - up(&ip6t_mutex); - - *start = (char *)((unsigned long)count - offset); - return pos; -} - -static const struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] = -{ { "ip6_tables_names", ip6t_get_tables }, - { "ip6_tables_targets", ip6t_get_targets }, - { "ip6_tables_matches", ip6t_get_matches }, - { NULL, NULL} }; -#endif /*CONFIG_PROC_FS*/ - static int __init init(void) { int ret; + xt_proto_init(AF_INET6); + /* Noone else will be downing sem now, so we won't sleep */ - down(&ip6t_mutex); - list_append(&ip6t_target, &ip6t_standard_target); - list_append(&ip6t_target, &ip6t_error_target); - list_append(&ip6t_match, &tcp_matchstruct); - list_append(&ip6t_match, &udp_matchstruct); - list_append(&ip6t_match, &icmp6_matchstruct); - up(&ip6t_mutex); + xt_register_target(AF_INET6, &ip6t_standard_target); + xt_register_target(AF_INET6, &ip6t_error_target); + xt_register_match(AF_INET6, &icmp6_matchstruct); /* Register setsockopt */ ret = nf_register_sockopt(&ip6t_sockopts); if (ret < 0) { duprintf("Unable to register sockopts.\n"); + xt_proto_fini(AF_INET6); return ret; } -#ifdef CONFIG_PROC_FS - { - struct proc_dir_entry *proc; - int i; - - for (i = 0; ip6t_proc_entry[i].name; i++) { - proc = proc_net_create(ip6t_proc_entry[i].name, 0, - ip6t_proc_entry[i].get_info); - if (!proc) { - while (--i >= 0) - proc_net_remove(ip6t_proc_entry[i].name); - nf_unregister_sockopt(&ip6t_sockopts); - return -ENOMEM; - } - proc->owner = THIS_MODULE; - } - } -#endif - - printk("ip6_tables: (C) 2000-2002 Netfilter core team\n"); + printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; } static void __exit fini(void) { nf_unregister_sockopt(&ip6t_sockopts); -#ifdef CONFIG_PROC_FS - { - int i; - for (i = 0; ip6t_proc_entry[i].name; i++) - proc_net_remove(ip6t_proc_entry[i].name); - } -#endif + xt_unregister_match(AF_INET6, &icmp6_matchstruct); + xt_unregister_target(AF_INET6, &ip6t_error_target); + xt_unregister_target(AF_INET6, &ip6t_standard_target); + xt_proto_fini(AF_INET6); } /* - * find specified header up to transport protocol header. - * If found target header, the offset to the header is set to *offset - * and return 0. otherwise, return -1. + * find the offset to specified header or the protocol number of last header + * if target < 0. "last header" is transport protocol header, ESP, or + * "No next header". + * + * If target header is found, its offset is set in *offset and return protocol + * number. Otherwise, return -1. + * + * Note that non-1st fragment is special case that "the protocol number + * of last header" is "next header" field in Fragment header. In this case, + * *offset is meaningless and fragment offset is stored in *fragoff if fragoff + * isn't NULL. * - * Notes: - non-1st Fragment Header isn't skipped. - * - ESP header isn't skipped. - * - The target header may be trancated. */ -int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) +int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, + int target, unsigned short *fragoff) { unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; u8 nexthdr = skb->nh.ipv6h->nexthdr; unsigned int len = skb->len - start; + if (fragoff) + *fragoff = 0; + while (nexthdr != target) { struct ipv6_opt_hdr _hdr, *hp; unsigned int hdrlen; - if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) + if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { + if (target < 0) + break; return -1; + } + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (hp == NULL) return -1; @@ -2068,8 +1477,17 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) if (fp == NULL) return -1; - if (ntohs(*fp) & ~0x7) + _frag_off = ntohs(*fp) & ~0x7; + if (_frag_off) { + if (target < 0 && + ((!ipv6_ext_hdr(hp->nexthdr)) || + nexthdr == NEXTHDR_NONE)) { + if (fragoff) + *fragoff = _frag_off; + return hp->nexthdr; + } return -1; + } hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) hdrlen = (hp->hdrlen + 2) << 2; @@ -2082,18 +1500,15 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) } *offset = start; - return 0; + return nexthdr; } EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); EXPORT_SYMBOL(ip6t_do_table); -EXPORT_SYMBOL(ip6t_register_match); -EXPORT_SYMBOL(ip6t_unregister_match); -EXPORT_SYMBOL(ip6t_register_target); -EXPORT_SYMBOL(ip6t_unregister_target); EXPORT_SYMBOL(ip6t_ext_hdr); EXPORT_SYMBOL(ipv6_find_hdr); +EXPORT_SYMBOL(ip6_masked_addrcmp); module_init(init); module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index 8f5549b7272..306200c3505 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -62,7 +62,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, } static int ip6t_hl_checkentry(const char *tablename, - const struct ip6t_entry *e, + const void *entry, void *targinfo, unsigned int targinfosize, unsigned int hook_mask) diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 0cd1d1bd903..77c725832de 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/skbuff.h> +#include <linux/if_arp.h> #include <linux/ip.h> #include <linux/spinlock.h> #include <linux/icmpv6.h> @@ -62,9 +63,8 @@ static void dump_packet(const struct nf_loginfo *info, return; } - /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000" */ - printk("SRC=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ih->saddr)); - printk("DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ih->daddr)); + /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ + printk("SRC=" NIP6_FMT " DST=" NIP6_FMT " ", NIP6(ih->saddr), NIP6(ih->daddr)); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", @@ -443,7 +443,7 @@ ip6t_log_target(struct sk_buff **pskb, static int ip6t_log_checkentry(const char *tablename, - const struct ip6t_entry *e, + const void *entry, void *targinfo, unsigned int targinfosize, unsigned int hook_mask) diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c deleted file mode 100644 index eab8fb864ee..00000000000 --- a/net/ipv6/netfilter/ip6t_MARK.c +++ /dev/null @@ -1,81 +0,0 @@ -/* This is a module which is used for setting the NFMARK field of an skb. */ - -/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/checksum.h> - -#include <linux/netfilter_ipv6/ip6_tables.h> -#include <linux/netfilter_ipv6/ip6t_MARK.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); - -static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - const struct ip6t_mark_target_info *markinfo = targinfo; - - if((*pskb)->nfmark != markinfo->mark) - (*pskb)->nfmark = markinfo->mark; - - return IP6T_CONTINUE; -} - -static int -checkentry(const char *tablename, - const struct ip6t_entry *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_mark_target_info))) { - printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n", - targinfosize, - IP6T_ALIGN(sizeof(struct ip6t_mark_target_info))); - return 0; - } - - if (strcmp(tablename, "mangle") != 0) { - printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename); - return 0; - } - - return 1; -} - -static struct ip6t_target ip6t_mark_reg = { - .name = "MARK", - .target = target, - .checkentry = checkentry, - .me = THIS_MODULE -}; - -static int __init init(void) -{ - printk(KERN_DEBUG "registering ipv6 mark target\n"); - if (ip6t_register_target(&ip6t_mark_reg)) - return -EINVAL; - - return 0; -} - -static void __exit fini(void) -{ - ip6t_unregister_target(&ip6t_mark_reg); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_NFQUEUE.c b/net/ipv6/netfilter/ip6t_NFQUEUE.c deleted file mode 100644 index c6e3730e740..00000000000 --- a/net/ipv6/netfilter/ip6t_NFQUEUE.c +++ /dev/null @@ -1,70 +0,0 @@ -/* ip6tables module for using new netfilter netlink queue - * - * (C) 2005 by Harald Welte <laforge@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6/ip6_tables.h> -#include <linux/netfilter_ipv4/ipt_NFQUEUE.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("ip6tables NFQUEUE target"); -MODULE_LICENSE("GPL"); - -static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - const struct ipt_NFQ_info *tinfo = targinfo; - - return NF_QUEUE_NR(tinfo->queuenum); -} - -static int -checkentry(const char *tablename, - const struct ip6t_entry *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_NFQ_info))) { - printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", - targinfosize, - IP6T_ALIGN(sizeof(struct ipt_NFQ_info))); - return 0; - } - - return 1; -} - -static struct ip6t_target ipt_NFQ_reg = { - .name = "NFQUEUE", - .target = target, - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ip6t_register_target(&ipt_NFQ_reg); -} - -static void __exit fini(void) -{ - ip6t_unregister_target(&ipt_NFQ_reg); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index b03e87adca9..c745717b4ce 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -218,12 +218,13 @@ static unsigned int reject6_target(struct sk_buff **pskb, } static int check(const char *tablename, - const struct ip6t_entry *e, + const void *entry, void *targinfo, unsigned int targinfosize, unsigned int hook_mask) { const struct ip6t_reject_info *rejinfo = targinfo; + const struct ip6t_entry *e = entry; if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_reject_info))) { DEBUGP("ip6t_REJECT: targinfosize %u != 0\n", targinfosize); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index dde37793d20..219a30365df 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/ip.h> #include <linux/ipv6.h> #include <linux/types.h> #include <net/checksum.h> @@ -53,7 +54,7 @@ match(const struct sk_buff *skb, unsigned int ptr; unsigned int hdrlen = 0; - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL) < 0) return 0; ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); @@ -97,7 +98,7 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static int checkentry(const char *tablename, - const struct ip6t_ip6 *ip, + const void *entry, void *matchinfo, unsigned int matchinfosize, unsigned int hook_mask) diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c index c450a635e54..b4c153a5350 100644 --- a/net/ipv6/netfilter/ip6t_dst.c +++ b/net/ipv6/netfilter/ip6t_dst.c @@ -36,19 +36,19 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); #endif /* - * (Type & 0xC0) >> 6 - * 0 -> ignorable - * 1 -> must drop the packet - * 2 -> send ICMP PARM PROB regardless and drop packet - * 3 -> Send ICMP if not a multicast address and drop packet + * (Type & 0xC0) >> 6 + * 0 -> ignorable + * 1 -> must drop the packet + * 2 -> send ICMP PARM PROB regardless and drop packet + * 3 -> Send ICMP if not a multicast address and drop packet * (Type & 0x20) >> 5 - * 0 -> invariant - * 1 -> can change the routing + * 0 -> invariant + * 1 -> can change the routing * (Type & 0x1F) Type - * 0 -> Pad1 (only 1 byte!) - * 1 -> PadN LENGTH info (total length = length + 2) - * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k ) - * 5 -> RTALERT 2 x x + * 0 -> Pad1 (only 1 byte!) + * 1 -> PadN LENGTH info (total length = length + 2) + * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k ) + * 5 -> RTALERT 2 x x */ static int @@ -60,59 +60,58 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ipv6_opt_hdr _optsh, *oh; - const struct ip6t_opts *optinfo = matchinfo; - unsigned int temp; - unsigned int ptr; - unsigned int hdrlen = 0; - unsigned int ret = 0; - u8 _opttype, *tp = NULL; - u8 _optlen, *lp = NULL; - unsigned int optlen; - + struct ipv6_opt_hdr _optsh, *oh; + const struct ip6t_opts *optinfo = matchinfo; + unsigned int temp; + unsigned int ptr; + unsigned int hdrlen = 0; + unsigned int ret = 0; + u8 _opttype, *tp = NULL; + u8 _optlen, *lp = NULL; + unsigned int optlen; + #if HOPBYHOP - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) #else - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) #endif return 0; - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - if (oh == NULL){ - *hotdrop = 1; - return 0; - } - - hdrlen = ipv6_optlen(oh); - if (skb->len - ptr < hdrlen){ - /* Packet smaller than it's length field */ - return 0; - } - - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); - - DEBUGP("len %02X %04X %02X ", - optinfo->hdrlen, hdrlen, - (!(optinfo->flags & IP6T_OPTS_LEN) || - ((optinfo->hdrlen == hdrlen) ^ - !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); - - ret = (oh != NULL) - && - (!(optinfo->flags & IP6T_OPTS_LEN) || - ((optinfo->hdrlen == hdrlen) ^ - !!(optinfo->invflags & IP6T_OPTS_INV_LEN))); - - ptr += 2; - hdrlen -= 2; - if ( !(optinfo->flags & IP6T_OPTS_OPTS) ){ - return ret; + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL) { + *hotdrop = 1; + return 0; + } + + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen) { + /* Packet smaller than it's length field */ + return 0; + } + + DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); + + DEBUGP("len %02X %04X %02X ", + optinfo->hdrlen, hdrlen, + (!(optinfo->flags & IP6T_OPTS_LEN) || + ((optinfo->hdrlen == hdrlen) ^ + !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); + + ret = (oh != NULL) && + (!(optinfo->flags & IP6T_OPTS_LEN) || + ((optinfo->hdrlen == hdrlen) ^ + !!(optinfo->invflags & IP6T_OPTS_INV_LEN))); + + ptr += 2; + hdrlen -= 2; + if (!(optinfo->flags & IP6T_OPTS_OPTS)) { + return ret; } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { DEBUGP("Not strict - not implemented"); } else { DEBUGP("Strict "); - DEBUGP("#%d ",optinfo->optsnr); - for(temp=0; temp<optinfo->optsnr; temp++){ + DEBUGP("#%d ", optinfo->optsnr); + for (temp = 0; temp < optinfo->optsnr; temp++) { /* type field exists ? */ if (hdrlen < 1) break; @@ -122,10 +121,10 @@ match(const struct sk_buff *skb, break; /* Type check */ - if (*tp != (optinfo->opts[temp] & 0xFF00)>>8){ + if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) { DEBUGP("Tbad %02X %02X\n", *tp, - (optinfo->opts[temp] & 0xFF00)>>8); + (optinfo->opts[temp] & 0xFF00) >> 8); return 0; } else { DEBUGP("Tok "); @@ -169,7 +168,8 @@ match(const struct sk_buff *skb, } if (temp == optinfo->optsnr) return ret; - else return 0; + else + return 0; } return 0; @@ -178,25 +178,24 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static int checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchinfosize, - unsigned int hook_mask) + const void *info, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask) { - const struct ip6t_opts *optsinfo = matchinfo; - - if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) { - DEBUGP("ip6t_opts: matchsize %u != %u\n", - matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts))); - return 0; - } - if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { - DEBUGP("ip6t_opts: unknown flags %X\n", - optsinfo->invflags); - return 0; - } - - return 1; + const struct ip6t_opts *optsinfo = matchinfo; + + if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) { + DEBUGP("ip6t_opts: matchsize %u != %u\n", + matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts))); + return 0; + } + if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { + DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags); + return 0; + } + + return 1; } static struct ip6t_match opts_match = { @@ -212,12 +211,12 @@ static struct ip6t_match opts_match = { static int __init init(void) { - return ip6t_register_match(&opts_match); + return ip6t_register_match(&opts_match); } static void __exit cleanup(void) { - ip6t_unregister_match(&opts_match); + ip6t_unregister_match(&opts_match); } module_init(init); diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index 24bc0cde43a..724285df871 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/ip.h> #include <linux/ipv6.h> #include <linux/types.h> #include <net/checksum.h> @@ -55,7 +56,7 @@ match(const struct sk_buff *skb, /* Make sure this isn't an evil packet */ /*DEBUGP("ipv6_esp entered \n");*/ - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP, NULL) < 0) return 0; eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); @@ -75,7 +76,7 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static int checkentry(const char *tablename, - const struct ip6t_ip6 *ip, + const void *ip, void *matchinfo, unsigned int matchinfosize, unsigned int hook_mask) diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 616c2cbcd54..27396ac0b9e 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -27,45 +27,45 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { + unsigned char eui64[8]; + int i = 0; - unsigned char eui64[8]; - int i=0; - - if ( !(skb->mac.raw >= skb->head - && (skb->mac.raw + ETH_HLEN) <= skb->data) - && offset != 0) { - *hotdrop = 1; - return 0; - } - - memset(eui64, 0, sizeof(eui64)); - - if (eth_hdr(skb)->h_proto == ntohs(ETH_P_IPV6)) { - if (skb->nh.ipv6h->version == 0x6) { - memcpy(eui64, eth_hdr(skb)->h_source, 3); - memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3); - eui64[3]=0xff; - eui64[4]=0xfe; - eui64[0] |= 0x02; - - i=0; - while ((skb->nh.ipv6h->saddr.s6_addr[8+i] == - eui64[i]) && (i<8)) i++; - - if ( i == 8 ) - return 1; - } - } - - return 0; + if (!(skb->mac.raw >= skb->head && + (skb->mac.raw + ETH_HLEN) <= skb->data) && + offset != 0) { + *hotdrop = 1; + return 0; + } + + memset(eui64, 0, sizeof(eui64)); + + if (eth_hdr(skb)->h_proto == ntohs(ETH_P_IPV6)) { + if (skb->nh.ipv6h->version == 0x6) { + memcpy(eui64, eth_hdr(skb)->h_source, 3); + memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3); + eui64[3] = 0xff; + eui64[4] = 0xfe; + eui64[0] |= 0x02; + + i = 0; + while ((skb->nh.ipv6h->saddr.s6_addr[8+i] == eui64[i]) + && (i < 8)) + i++; + + if (i == 8) + return 1; + } + } + + return 0; } static int ip6t_eui64_checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) + const void *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) { if (hook_mask & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) | diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 085d5f8eea2..4c14125a0e2 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -31,12 +31,12 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static inline int id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert) { - int r=0; - DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', - min,id,max); - r=(id >= min && id <= max) ^ invert; - DEBUGP(" result %s\n",r? "PASS" : "FAILED"); - return r; + int r = 0; + DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', + min, id, max); + r = (id >= min && id <= max) ^ invert; + DEBUGP(" result %s\n", r ? "PASS" : "FAILED"); + return r; } static int @@ -48,92 +48,91 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct frag_hdr _frag, *fh; - const struct ip6t_frag *fraginfo = matchinfo; - unsigned int ptr; + struct frag_hdr _frag, *fh; + const struct ip6t_frag *fraginfo = matchinfo; + unsigned int ptr; - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL) < 0) return 0; fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); - if (fh == NULL){ + if (fh == NULL) { *hotdrop = 1; return 0; } - DEBUGP("INFO %04X ", fh->frag_off); - DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); - DEBUGP("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6); - DEBUGP("MF %04X ", fh->frag_off & htons(IP6_MF)); - DEBUGP("ID %u %08X\n", ntohl(fh->identification), - ntohl(fh->identification)); - - DEBUGP("IPv6 FRAG id %02X ", - (id_match(fraginfo->ids[0], fraginfo->ids[1], - ntohl(fh->identification), - !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)))); - DEBUGP("res %02X %02X%04X %02X ", - (fraginfo->flags & IP6T_FRAG_RES), fh->reserved, - ntohs(fh->frag_off) & 0x6, - !((fraginfo->flags & IP6T_FRAG_RES) - && (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); - DEBUGP("first %02X %02X %02X ", - (fraginfo->flags & IP6T_FRAG_FST), - ntohs(fh->frag_off) & ~0x7, - !((fraginfo->flags & IP6T_FRAG_FST) - && (ntohs(fh->frag_off) & ~0x7))); - DEBUGP("mf %02X %02X %02X ", - (fraginfo->flags & IP6T_FRAG_MF), - ntohs(fh->frag_off) & IP6_MF, - !((fraginfo->flags & IP6T_FRAG_MF) - && !((ntohs(fh->frag_off) & IP6_MF)))); - DEBUGP("last %02X %02X %02X\n", - (fraginfo->flags & IP6T_FRAG_NMF), - ntohs(fh->frag_off) & IP6_MF, - !((fraginfo->flags & IP6T_FRAG_NMF) - && (ntohs(fh->frag_off) & IP6_MF))); - - return (fh != NULL) - && - (id_match(fraginfo->ids[0], fraginfo->ids[1], - ntohl(fh->identification), - !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))) - && - !((fraginfo->flags & IP6T_FRAG_RES) - && (fh->reserved || (ntohs(fh->frag_off) & 0x6))) - && - !((fraginfo->flags & IP6T_FRAG_FST) - && (ntohs(fh->frag_off) & ~0x7)) - && - !((fraginfo->flags & IP6T_FRAG_MF) - && !(ntohs(fh->frag_off) & IP6_MF)) - && - !((fraginfo->flags & IP6T_FRAG_NMF) - && (ntohs(fh->frag_off) & IP6_MF)); + DEBUGP("INFO %04X ", fh->frag_off); + DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); + DEBUGP("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6); + DEBUGP("MF %04X ", fh->frag_off & htons(IP6_MF)); + DEBUGP("ID %u %08X\n", ntohl(fh->identification), + ntohl(fh->identification)); + + DEBUGP("IPv6 FRAG id %02X ", + (id_match(fraginfo->ids[0], fraginfo->ids[1], + ntohl(fh->identification), + !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)))); + DEBUGP("res %02X %02X%04X %02X ", + (fraginfo->flags & IP6T_FRAG_RES), fh->reserved, + ntohs(fh->frag_off) & 0x6, + !((fraginfo->flags & IP6T_FRAG_RES) + && (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); + DEBUGP("first %02X %02X %02X ", + (fraginfo->flags & IP6T_FRAG_FST), + ntohs(fh->frag_off) & ~0x7, + !((fraginfo->flags & IP6T_FRAG_FST) + && (ntohs(fh->frag_off) & ~0x7))); + DEBUGP("mf %02X %02X %02X ", + (fraginfo->flags & IP6T_FRAG_MF), + ntohs(fh->frag_off) & IP6_MF, + !((fraginfo->flags & IP6T_FRAG_MF) + && !((ntohs(fh->frag_off) & IP6_MF)))); + DEBUGP("last %02X %02X %02X\n", + (fraginfo->flags & IP6T_FRAG_NMF), + ntohs(fh->frag_off) & IP6_MF, + !((fraginfo->flags & IP6T_FRAG_NMF) + && (ntohs(fh->frag_off) & IP6_MF))); + + return (fh != NULL) + && + (id_match(fraginfo->ids[0], fraginfo->ids[1], + ntohl(fh->identification), + !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))) + && + !((fraginfo->flags & IP6T_FRAG_RES) + && (fh->reserved || (ntohs(fh->frag_off) & 0x6))) + && + !((fraginfo->flags & IP6T_FRAG_FST) + && (ntohs(fh->frag_off) & ~0x7)) + && + !((fraginfo->flags & IP6T_FRAG_MF) + && !(ntohs(fh->frag_off) & IP6_MF)) + && + !((fraginfo->flags & IP6T_FRAG_NMF) + && (ntohs(fh->frag_off) & IP6_MF)); } /* Called when user tries to insert an entry of this type. */ static int checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchinfosize, - unsigned int hook_mask) + const void *ip, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask) { - const struct ip6t_frag *fraginfo = matchinfo; - - if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_frag))) { - DEBUGP("ip6t_frag: matchsize %u != %u\n", - matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_frag))); - return 0; - } - if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) { - DEBUGP("ip6t_frag: unknown flags %X\n", - fraginfo->invflags); - return 0; - } - - return 1; + const struct ip6t_frag *fraginfo = matchinfo; + + if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_frag))) { + DEBUGP("ip6t_frag: matchsize %u != %u\n", + matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_frag))); + return 0; + } + if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) { + DEBUGP("ip6t_frag: unknown flags %X\n", fraginfo->invflags); + return 0; + } + + return 1; } static struct ip6t_match frag_match = { @@ -145,12 +144,12 @@ static struct ip6t_match frag_match = { static int __init init(void) { - return ip6t_register_match(&frag_match); + return ip6t_register_match(&frag_match); } static void __exit cleanup(void) { - ip6t_unregister_match(&frag_match); + ip6t_unregister_match(&frag_match); } module_init(init); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 1d09485111d..37a8474a7e0 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -36,19 +36,19 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); #endif /* - * (Type & 0xC0) >> 6 - * 0 -> ignorable - * 1 -> must drop the packet - * 2 -> send ICMP PARM PROB regardless and drop packet - * 3 -> Send ICMP if not a multicast address and drop packet + * (Type & 0xC0) >> 6 + * 0 -> ignorable + * 1 -> must drop the packet + * 2 -> send ICMP PARM PROB regardless and drop packet + * 3 -> Send ICMP if not a multicast address and drop packet * (Type & 0x20) >> 5 - * 0 -> invariant - * 1 -> can change the routing + * 0 -> invariant + * 1 -> can change the routing * (Type & 0x1F) Type - * 0 -> Pad1 (only 1 byte!) - * 1 -> PadN LENGTH info (total length = length + 2) - * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k ) - * 5 -> RTALERT 2 x x + * 0 -> Pad1 (only 1 byte!) + * 1 -> PadN LENGTH info (total length = length + 2) + * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k ) + * 5 -> RTALERT 2 x x */ static int @@ -60,59 +60,58 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ipv6_opt_hdr _optsh, *oh; - const struct ip6t_opts *optinfo = matchinfo; - unsigned int temp; - unsigned int ptr; - unsigned int hdrlen = 0; - unsigned int ret = 0; - u8 _opttype, *tp = NULL; - u8 _optlen, *lp = NULL; - unsigned int optlen; - + struct ipv6_opt_hdr _optsh, *oh; + const struct ip6t_opts *optinfo = matchinfo; + unsigned int temp; + unsigned int ptr; + unsigned int hdrlen = 0; + unsigned int ret = 0; + u8 _opttype, *tp = NULL; + u8 _optlen, *lp = NULL; + unsigned int optlen; + #if HOPBYHOP - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) #else - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) #endif return 0; - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - if (oh == NULL){ - *hotdrop = 1; - return 0; - } - - hdrlen = ipv6_optlen(oh); - if (skb->len - ptr < hdrlen){ - /* Packet smaller than it's length field */ - return 0; - } - - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); - - DEBUGP("len %02X %04X %02X ", - optinfo->hdrlen, hdrlen, - (!(optinfo->flags & IP6T_OPTS_LEN) || - ((optinfo->hdrlen == hdrlen) ^ - !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); - - ret = (oh != NULL) - && - (!(optinfo->flags & IP6T_OPTS_LEN) || - ((optinfo->hdrlen == hdrlen) ^ - !!(optinfo->invflags & IP6T_OPTS_INV_LEN))); - - ptr += 2; - hdrlen -= 2; - if ( !(optinfo->flags & IP6T_OPTS_OPTS) ){ - return ret; + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL) { + *hotdrop = 1; + return 0; + } + + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen) { + /* Packet smaller than it's length field */ + return 0; + } + + DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); + + DEBUGP("len %02X %04X %02X ", + optinfo->hdrlen, hdrlen, + (!(optinfo->flags & IP6T_OPTS_LEN) || + ((optinfo->hdrlen == hdrlen) ^ + !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); + + ret = (oh != NULL) && + (!(optinfo->flags & IP6T_OPTS_LEN) || + ((optinfo->hdrlen == hdrlen) ^ + !!(optinfo->invflags & IP6T_OPTS_INV_LEN))); + + ptr += 2; + hdrlen -= 2; + if (!(optinfo->flags & IP6T_OPTS_OPTS)) { + return ret; } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { DEBUGP("Not strict - not implemented"); } else { DEBUGP("Strict "); - DEBUGP("#%d ",optinfo->optsnr); - for(temp=0; temp<optinfo->optsnr; temp++){ + DEBUGP("#%d ", optinfo->optsnr); + for (temp = 0; temp < optinfo->optsnr; temp++) { /* type field exists ? */ if (hdrlen < 1) break; @@ -122,10 +121,10 @@ match(const struct sk_buff *skb, break; /* Type check */ - if (*tp != (optinfo->opts[temp] & 0xFF00)>>8){ + if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) { DEBUGP("Tbad %02X %02X\n", *tp, - (optinfo->opts[temp] & 0xFF00)>>8); + (optinfo->opts[temp] & 0xFF00) >> 8); return 0; } else { DEBUGP("Tok "); @@ -169,7 +168,8 @@ match(const struct sk_buff *skb, } if (temp == optinfo->optsnr) return ret; - else return 0; + else + return 0; } return 0; @@ -178,25 +178,24 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static int checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchinfosize, - unsigned int hook_mask) + const void *entry, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask) { - const struct ip6t_opts *optsinfo = matchinfo; - - if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) { - DEBUGP("ip6t_opts: matchsize %u != %u\n", - matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts))); - return 0; - } - if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { - DEBUGP("ip6t_opts: unknown flags %X\n", - optsinfo->invflags); - return 0; - } - - return 1; + const struct ip6t_opts *optsinfo = matchinfo; + + if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) { + DEBUGP("ip6t_opts: matchsize %u != %u\n", + matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts))); + return 0; + } + if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { + DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags); + return 0; + } + + return 1; } static struct ip6t_match opts_match = { @@ -212,12 +211,12 @@ static struct ip6t_match opts_match = { static int __init init(void) { - return ip6t_register_match(&opts_match); + return ip6t_register_match(&opts_match); } static void __exit cleanup(void) { - ip6t_unregister_match(&opts_match); + ip6t_unregister_match(&opts_match); } module_init(init); diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index 0beaff5471d..c5d9079f2d9 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -48,7 +48,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in, return 0; } -static int checkentry(const char *tablename, const struct ip6t_ip6 *ip, +static int checkentry(const char *tablename, const void *entry, void *matchinfo, unsigned int matchsize, unsigned int hook_mask) { diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 32e67f05845..83ad6b272f7 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -50,20 +50,20 @@ ipv6header_match(const struct sk_buff *skb, len = skb->len - ptr; temp = 0; - while (ip6t_ext_hdr(nexthdr)) { + while (ip6t_ext_hdr(nexthdr)) { struct ipv6_opt_hdr _hdr, *hp; - int hdrlen; + int hdrlen; /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; + if (len < (int)sizeof(struct ipv6_opt_hdr)) + return 0; /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { + if (nexthdr == NEXTHDR_NONE) { temp |= MASK_NONE; break; } /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { + if (nexthdr == NEXTHDR_ESP) { temp |= MASK_ESP; break; } @@ -72,43 +72,43 @@ ipv6header_match(const struct sk_buff *skb, BUG_ON(hp == NULL); /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); + if (nexthdr == NEXTHDR_FRAGMENT) { + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hp->hdrlen + 2) << 2; + else + hdrlen = ipv6_optlen(hp); /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - temp |= MASK_HOPOPTS; - break; - case NEXTHDR_ROUTING: - temp |= MASK_ROUTING; - break; - case NEXTHDR_FRAGMENT: - temp |= MASK_FRAGMENT; - break; - case NEXTHDR_AUTH: - temp |= MASK_AH; - break; - case NEXTHDR_DEST: - temp |= MASK_DSTOPTS; - break; - default: - return 0; - break; + switch (nexthdr) { + case NEXTHDR_HOP: + temp |= MASK_HOPOPTS; + break; + case NEXTHDR_ROUTING: + temp |= MASK_ROUTING; + break; + case NEXTHDR_FRAGMENT: + temp |= MASK_FRAGMENT; + break; + case NEXTHDR_AUTH: + temp |= MASK_AH; + break; + case NEXTHDR_DEST: + temp |= MASK_DSTOPTS; + break; + default: + return 0; + break; } - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; + nexthdr = hp->nexthdr; + len -= hdrlen; + ptr += hdrlen; if (ptr > skb->len) break; - } + } - if ( (nexthdr != NEXTHDR_NONE ) && (nexthdr != NEXTHDR_ESP) ) + if ((nexthdr != NEXTHDR_NONE) && (nexthdr != NEXTHDR_ESP)) temp |= MASK_PROTO; if (info->modeflag) @@ -124,7 +124,7 @@ ipv6header_match(const struct sk_buff *skb, static int ipv6header_checkentry(const char *tablename, - const struct ip6t_ip6 *ip, + const void *ip, void *matchinfo, unsigned int matchsize, unsigned int hook_mask) @@ -137,8 +137,8 @@ ipv6header_checkentry(const char *tablename, return 0; /* invflags is 0 or 0xff in hard mode */ - if ((!info->modeflag) && info->invflags != 0x00 - && info->invflags != 0xFF) + if ((!info->modeflag) && info->invflags != 0x00 && + info->invflags != 0xFF) return 0; return 1; @@ -152,7 +152,7 @@ static struct ip6t_match ip6t_ipv6header_match = { .me = THIS_MODULE, }; -static int __init ipv6header_init(void) +static int __init ipv6header_init(void) { return ip6t_register_match(&ip6t_ipv6header_match); } @@ -164,4 +164,3 @@ static void __exit ipv6header_exit(void) module_init(ipv6header_init); module_exit(ipv6header_exit); - diff --git a/net/ipv6/netfilter/ip6t_length.c b/net/ipv6/netfilter/ip6t_length.c deleted file mode 100644 index e0537d3811d..00000000000 --- a/net/ipv6/netfilter/ip6t_length.c +++ /dev/null @@ -1,66 +0,0 @@ -/* Length Match - IPv6 Port */ - -/* (C) 1999-2001 James Morris <jmorros@intercode.com.au> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/netfilter_ipv6/ip6t_length.h> -#include <linux/netfilter_ipv6/ip6_tables.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); -MODULE_DESCRIPTION("IPv6 packet length match"); - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - const struct ip6t_length_info *info = matchinfo; - u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr); - - return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; -} - -static int -checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_length_info))) - return 0; - - return 1; -} - -static struct ip6t_match length_match = { - .name = "length", - .match = &match, - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ip6t_register_match(&length_match); -} - -static void __exit fini(void) -{ - ip6t_unregister_match(&length_match); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_limit.c b/net/ipv6/netfilter/ip6t_limit.c deleted file mode 100644 index fb782f610be..00000000000 --- a/net/ipv6/netfilter/ip6t_limit.c +++ /dev/null @@ -1,147 +0,0 @@ -/* Kernel module to control the rate - * - * 2 September 1999: Changed from the target RATE to the match - * `limit', removed logging. Did I mention that - * Alexey is a fucking genius? - * Rusty Russell (rusty@rustcorp.com.au). */ - -/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> - * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/spinlock.h> -#include <linux/interrupt.h> - -#include <linux/netfilter_ipv6/ip6_tables.h> -#include <linux/netfilter_ipv6/ip6t_limit.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>"); -MODULE_DESCRIPTION("rate limiting within ip6tables"); - -/* The algorithm used is the Simple Token Bucket Filter (TBF) - * see net/sched/sch_tbf.c in the linux source tree - */ - -static DEFINE_SPINLOCK(limit_lock); - -/* Rusty: This is my (non-mathematically-inclined) understanding of - this algorithm. The `average rate' in jiffies becomes your initial - amount of credit `credit' and the most credit you can ever have - `credit_cap'. The `peak rate' becomes the cost of passing the - test, `cost'. - - `prev' tracks the last packet hit: you gain one credit per jiffy. - If you get credit balance more than this, the extra credit is - discarded. Every time the match passes, you lose `cost' credits; - if you don't have that many, the test fails. - - See Alexey's formal explanation in net/sched/sch_tbf.c. - - To avoid underflow, we multiply by 128 (ie. you get 128 credits per - jiffy). Hence a cost of 2^32-1, means one pass per 32768 seconds - at 1024HZ (or one every 9 hours). A cost of 1 means 12800 passes - per second at 100HZ. */ - -#define CREDITS_PER_JIFFY 128 - -static int -ip6t_limit_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - struct ip6t_rateinfo *r = ((struct ip6t_rateinfo *)matchinfo)->master; - unsigned long now = jiffies; - - spin_lock_bh(&limit_lock); - r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY; - if (r->credit > r->credit_cap) - r->credit = r->credit_cap; - - if (r->credit >= r->cost) { - /* We're not limited. */ - r->credit -= r->cost; - spin_unlock_bh(&limit_lock); - return 1; - } - - spin_unlock_bh(&limit_lock); - return 0; -} - -/* Precision saver. */ -static u_int32_t -user2credits(u_int32_t user) -{ - /* If multiplying would overflow... */ - if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) - /* Divide first. */ - return (user / IP6T_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; - - return (user * HZ * CREDITS_PER_JIFFY) / IP6T_LIMIT_SCALE; -} - -static int -ip6t_limit_checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - struct ip6t_rateinfo *r = matchinfo; - - if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_rateinfo))) - return 0; - - /* Check for overflow. */ - if (r->burst == 0 - || user2credits(r->avg * r->burst) < user2credits(r->avg)) { - printk("Call rusty: overflow in ip6t_limit: %u/%u\n", - r->avg, r->burst); - return 0; - } - - /* User avg in seconds * IP6T_LIMIT_SCALE: convert to jiffies * - 128. */ - r->prev = jiffies; - r->credit = user2credits(r->avg * r->burst); /* Credits full. */ - r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ - r->cost = user2credits(r->avg); - - /* For SMP, we only want to use one set of counters. */ - r->master = r; - - return 1; -} - -static struct ip6t_match ip6t_limit_reg = { - .name = "limit", - .match = ip6t_limit_match, - .checkentry = ip6t_limit_checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - if (ip6t_register_match(&ip6t_limit_reg)) - return -EINVAL; - return 0; -} - -static void __exit fini(void) -{ - ip6t_unregister_match(&ip6t_limit_reg); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_mac.c b/net/ipv6/netfilter/ip6t_mac.c deleted file mode 100644 index 526d43e3723..00000000000 --- a/net/ipv6/netfilter/ip6t_mac.c +++ /dev/null @@ -1,80 +0,0 @@ -/* Kernel module to match MAC address parameters. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/if_ether.h> - -#include <linux/netfilter_ipv6/ip6t_mac.h> -#include <linux/netfilter_ipv6/ip6_tables.h> - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("MAC address matching module for IPv6"); -MODULE_AUTHOR("Netfilter Core Teaam <coreteam@netfilter.org>"); - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - const struct ip6t_mac_info *info = matchinfo; - - /* Is mac pointer valid? */ - return (skb->mac.raw >= skb->head - && (skb->mac.raw + ETH_HLEN) <= skb->data - /* If so, compare... */ - && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN) - == 0) ^ info->invert)); -} - -static int -ip6t_mac_checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - if (hook_mask - & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) - | (1 << NF_IP6_FORWARD))) { - printk("ip6t_mac: only valid for PRE_ROUTING, LOCAL_IN or" - " FORWARD\n"); - return 0; - } - - if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mac_info))) - return 0; - - return 1; -} - -static struct ip6t_match mac_match = { - .name = "mac", - .match = &match, - .checkentry = &ip6t_mac_checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ip6t_register_match(&mac_match); -} - -static void __exit fini(void) -{ - ip6t_unregister_match(&mac_match); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_mark.c b/net/ipv6/netfilter/ip6t_mark.c deleted file mode 100644 index affc3de364f..00000000000 --- a/net/ipv6/netfilter/ip6t_mark.c +++ /dev/null @@ -1,66 +0,0 @@ -/* Kernel module to match NFMARK values. */ - -/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - - -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/netfilter_ipv6/ip6t_mark.h> -#include <linux/netfilter_ipv6/ip6_tables.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("ip6tables mark match"); - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - const struct ip6t_mark_info *info = matchinfo; - - return ((skb->nfmark & info->mask) == info->mark) ^ info->invert; -} - -static int -checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mark_info))) - return 0; - - return 1; -} - -static struct ip6t_match mark_match = { - .name = "mark", - .match = &match, - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ip6t_register_match(&mark_match); -} - -static void __exit fini(void) -{ - ip6t_unregister_match(&mark_match); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_multiport.c b/net/ipv6/netfilter/ip6t_multiport.c index 6e3246153fa..49f7829dfbc 100644 --- a/net/ipv6/netfilter/ip6t_multiport.c +++ b/net/ipv6/netfilter/ip6t_multiport.c @@ -84,11 +84,12 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static int checkentry(const char *tablename, - const struct ip6t_ip6 *ip, + const void *info, void *matchinfo, unsigned int matchsize, unsigned int hook_mask) { + const struct ip6t_ip6 *ip = info; const struct ip6t_multiport *multiinfo = matchinfo; if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_multiport))) diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 4de4cdad4b7..8c8a4c7ec93 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -36,14 +36,14 @@ match(const struct sk_buff *skb, if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file) return 0; - if(info->match & IP6T_OWNER_UID) { - if((skb->sk->sk_socket->file->f_uid != info->uid) ^ + if (info->match & IP6T_OWNER_UID) { + if ((skb->sk->sk_socket->file->f_uid != info->uid) ^ !!(info->invert & IP6T_OWNER_UID)) return 0; } - if(info->match & IP6T_OWNER_GID) { - if((skb->sk->sk_socket->file->f_gid != info->gid) ^ + if (info->match & IP6T_OWNER_GID) { + if ((skb->sk->sk_socket->file->f_gid != info->gid) ^ !!(info->invert & IP6T_OWNER_GID)) return 0; } @@ -53,23 +53,23 @@ match(const struct sk_buff *skb, static int checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) + const void *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) { const struct ip6t_owner_info *info = matchinfo; - if (hook_mask - & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) { - printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); - return 0; - } + if (hook_mask + & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) { + printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); + return 0; + } if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info))) return 0; - if (info->match & (IP6T_OWNER_PID|IP6T_OWNER_SID)) { + if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) { printk("ipt_owner: pid and sid matching " "not supported anymore\n"); return 0; diff --git a/net/ipv6/netfilter/ip6t_physdev.c b/net/ipv6/netfilter/ip6t_physdev.c deleted file mode 100644 index 71515c86ece..00000000000 --- a/net/ipv6/netfilter/ip6t_physdev.c +++ /dev/null @@ -1,135 +0,0 @@ -/* Kernel module to match the bridge port in and - * out device for IP packets coming into contact with a bridge. */ - -/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/netfilter_ipv6/ip6t_physdev.h> -#include <linux/netfilter_ipv6/ip6_tables.h> -#include <linux/netfilter_bridge.h> -#define MATCH 1 -#define NOMATCH 0 - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); -MODULE_DESCRIPTION("iptables bridge physical device match module"); - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - int i; - static const char nulldevname[IFNAMSIZ]; - const struct ip6t_physdev_info *info = matchinfo; - unsigned int ret; - const char *indev, *outdev; - struct nf_bridge_info *nf_bridge; - - /* Not a bridged IP packet or no info available yet: - * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if - * the destination device will be a bridge. */ - if (!(nf_bridge = skb->nf_bridge)) { - /* Return MATCH if the invert flags of the used options are on */ - if ((info->bitmask & IP6T_PHYSDEV_OP_BRIDGED) && - !(info->invert & IP6T_PHYSDEV_OP_BRIDGED)) - return NOMATCH; - if ((info->bitmask & IP6T_PHYSDEV_OP_ISIN) && - !(info->invert & IP6T_PHYSDEV_OP_ISIN)) - return NOMATCH; - if ((info->bitmask & IP6T_PHYSDEV_OP_ISOUT) && - !(info->invert & IP6T_PHYSDEV_OP_ISOUT)) - return NOMATCH; - if ((info->bitmask & IP6T_PHYSDEV_OP_IN) && - !(info->invert & IP6T_PHYSDEV_OP_IN)) - return NOMATCH; - if ((info->bitmask & IP6T_PHYSDEV_OP_OUT) && - !(info->invert & IP6T_PHYSDEV_OP_OUT)) - return NOMATCH; - return MATCH; - } - - /* This only makes sense in the FORWARD and POSTROUTING chains */ - if ((info->bitmask & IP6T_PHYSDEV_OP_BRIDGED) && - (!!(nf_bridge->mask & BRNF_BRIDGED) ^ - !(info->invert & IP6T_PHYSDEV_OP_BRIDGED))) - return NOMATCH; - - if ((info->bitmask & IP6T_PHYSDEV_OP_ISIN && - (!nf_bridge->physindev ^ !!(info->invert & IP6T_PHYSDEV_OP_ISIN))) || - (info->bitmask & IP6T_PHYSDEV_OP_ISOUT && - (!nf_bridge->physoutdev ^ !!(info->invert & IP6T_PHYSDEV_OP_ISOUT)))) - return NOMATCH; - - if (!(info->bitmask & IP6T_PHYSDEV_OP_IN)) - goto match_outdev; - indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) { - ret |= (((const unsigned int *)indev)[i] - ^ ((const unsigned int *)info->physindev)[i]) - & ((const unsigned int *)info->in_mask)[i]; - } - - if ((ret == 0) ^ !(info->invert & IP6T_PHYSDEV_OP_IN)) - return NOMATCH; - -match_outdev: - if (!(info->bitmask & IP6T_PHYSDEV_OP_OUT)) - return MATCH; - outdev = nf_bridge->physoutdev ? - nf_bridge->physoutdev->name : nulldevname; - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) { - ret |= (((const unsigned int *)outdev)[i] - ^ ((const unsigned int *)info->physoutdev)[i]) - & ((const unsigned int *)info->out_mask)[i]; - } - - return (ret != 0) ^ !(info->invert & IP6T_PHYSDEV_OP_OUT); -} - -static int -checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - const struct ip6t_physdev_info *info = matchinfo; - - if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_physdev_info))) - return 0; - if (!(info->bitmask & IP6T_PHYSDEV_OP_MASK) || - info->bitmask & ~IP6T_PHYSDEV_OP_MASK) - return 0; - return 1; -} - -static struct ip6t_match physdev_match = { - .name = "physdev", - .match = &match, - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ip6t_register_match(&physdev_match); -} - -static void __exit fini(void) -{ - ip6t_unregister_match(&physdev_match); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_policy.c b/net/ipv6/netfilter/ip6t_policy.c new file mode 100644 index 00000000000..afe1cc4c18a --- /dev/null +++ b/net/ipv6/netfilter/ip6t_policy.c @@ -0,0 +1,175 @@ +/* IP tables module for matching IPsec policy + * + * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/xfrm.h> + +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter_ipv6/ip6t_policy.h> + +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("IPtables IPsec policy matching module"); +MODULE_LICENSE("GPL"); + + +static inline int +match_xfrm_state(struct xfrm_state *x, const struct ip6t_policy_elem *e) +{ +#define MATCH_ADDR(x,y,z) (!e->match.x || \ + ((ip6_masked_addrcmp((z), &e->x, &e->y)) == 0) ^ e->invert.x) +#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) + + return MATCH_ADDR(saddr, smask, (struct in6_addr *)&x->props.saddr.a6) && + MATCH_ADDR(daddr, dmask, (struct in6_addr *)&x->id.daddr.a6) && + MATCH(proto, x->id.proto) && + MATCH(mode, x->props.mode) && + MATCH(spi, x->id.spi) && + MATCH(reqid, x->props.reqid); +} + +static int +match_policy_in(const struct sk_buff *skb, const struct ip6t_policy_info *info) +{ + const struct ip6t_policy_elem *e; + struct sec_path *sp = skb->sp; + int strict = info->flags & IP6T_POLICY_MATCH_STRICT; + int i, pos; + + if (sp == NULL) + return -1; + if (strict && info->len != sp->len) + return 0; + + for (i = sp->len - 1; i >= 0; i--) { + pos = strict ? i - sp->len + 1 : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(sp->x[i].xvec, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int +match_policy_out(const struct sk_buff *skb, const struct ip6t_policy_info *info) +{ + const struct ip6t_policy_elem *e; + struct dst_entry *dst = skb->dst; + int strict = info->flags & IP6T_POLICY_MATCH_STRICT; + int i, pos; + + if (dst->xfrm == NULL) + return -1; + + for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + pos = strict ? i : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(dst->xfrm, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct ip6t_policy_info *info = matchinfo; + int ret; + + if (info->flags & IP6T_POLICY_MATCH_IN) + ret = match_policy_in(skb, info); + else + ret = match_policy_out(skb, info); + + if (ret < 0) + ret = info->flags & IP6T_POLICY_MATCH_NONE ? 1 : 0; + else if (info->flags & IP6T_POLICY_MATCH_NONE) + ret = 0; + + return ret; +} + +static int checkentry(const char *tablename, const void *ip_void, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + struct ip6t_policy_info *info = matchinfo; + + if (matchsize != IP6T_ALIGN(sizeof(*info))) { + printk(KERN_ERR "ip6t_policy: matchsize %u != %zu\n", + matchsize, IP6T_ALIGN(sizeof(*info))); + return 0; + } + if (!(info->flags & (IP6T_POLICY_MATCH_IN|IP6T_POLICY_MATCH_OUT))) { + printk(KERN_ERR "ip6t_policy: neither incoming nor " + "outgoing policy selected\n"); + return 0; + } + if (hook_mask & (1 << NF_IP6_PRE_ROUTING | 1 << NF_IP6_LOCAL_IN) + && info->flags & IP6T_POLICY_MATCH_OUT) { + printk(KERN_ERR "ip6t_policy: output policy not valid in " + "PRE_ROUTING and INPUT\n"); + return 0; + } + if (hook_mask & (1 << NF_IP6_POST_ROUTING | 1 << NF_IP6_LOCAL_OUT) + && info->flags & IP6T_POLICY_MATCH_IN) { + printk(KERN_ERR "ip6t_policy: input policy not valid in " + "POST_ROUTING and OUTPUT\n"); + return 0; + } + if (info->len > IP6T_POLICY_MAX_ELEM) { + printk(KERN_ERR "ip6t_policy: too many policy elements\n"); + return 0; + } + + return 1; +} + +static struct ip6t_match policy_match = { + .name = "policy", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ip6t_register_match(&policy_match); +} + +static void __exit fini(void) +{ + ip6t_unregister_match(&policy_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index beb2fd5cebb..8f82476dc89 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -33,12 +33,12 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static inline int segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert) { - int r=0; - DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', - min,id,max); - r=(id >= min && id <= max) ^ invert; - DEBUGP(" result %s\n",r? "PASS" : "FAILED"); - return r; + int r = 0; + DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x", + invert ? '!' : ' ', min, id, max); + r = (id >= min && id <= max) ^ invert; + DEBUGP(" result %s\n", r ? "PASS" : "FAILED"); + return r; } static int @@ -50,87 +50,93 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ipv6_rt_hdr _route, *rh; - const struct ip6t_rt *rtinfo = matchinfo; - unsigned int temp; - unsigned int ptr; - unsigned int hdrlen = 0; - unsigned int ret = 0; - struct in6_addr *ap, _addr; - - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0) + struct ipv6_rt_hdr _route, *rh; + const struct ip6t_rt *rtinfo = matchinfo; + unsigned int temp; + unsigned int ptr; + unsigned int hdrlen = 0; + unsigned int ret = 0; + struct in6_addr *ap, _addr; + + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL) < 0) return 0; - rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); - if (rh == NULL){ - *hotdrop = 1; - return 0; - } - - hdrlen = ipv6_optlen(rh); - if (skb->len - ptr < hdrlen){ - /* Pcket smaller than its length field */ - return 0; - } - - DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); - DEBUGP("TYPE %04X ", rh->type); - DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); - - DEBUGP("IPv6 RT segsleft %02X ", - (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], - rh->segments_left, - !!(rtinfo->invflags & IP6T_RT_INV_SGS)))); - DEBUGP("type %02X %02X %02X ", - rtinfo->rt_type, rh->type, - (!(rtinfo->flags & IP6T_RT_TYP) || - ((rtinfo->rt_type == rh->type) ^ - !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); - DEBUGP("len %02X %04X %02X ", - rtinfo->hdrlen, hdrlen, - (!(rtinfo->flags & IP6T_RT_LEN) || - ((rtinfo->hdrlen == hdrlen) ^ - !!(rtinfo->invflags & IP6T_RT_INV_LEN)))); - DEBUGP("res %02X %02X %02X ", - (rtinfo->flags & IP6T_RT_RES), ((struct rt0_hdr *)rh)->reserved, - !((rtinfo->flags & IP6T_RT_RES) && (((struct rt0_hdr *)rh)->reserved))); - - ret = (rh != NULL) - && - (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], - rh->segments_left, - !!(rtinfo->invflags & IP6T_RT_INV_SGS))) - && - (!(rtinfo->flags & IP6T_RT_LEN) || - ((rtinfo->hdrlen == hdrlen) ^ - !!(rtinfo->invflags & IP6T_RT_INV_LEN))) - && - (!(rtinfo->flags & IP6T_RT_TYP) || - ((rtinfo->rt_type == rh->type) ^ - !!(rtinfo->invflags & IP6T_RT_INV_TYP))); + rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); + if (rh == NULL) { + *hotdrop = 1; + return 0; + } + + hdrlen = ipv6_optlen(rh); + if (skb->len - ptr < hdrlen) { + /* Pcket smaller than its length field */ + return 0; + } + + DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); + DEBUGP("TYPE %04X ", rh->type); + DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); + + DEBUGP("IPv6 RT segsleft %02X ", + (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], + rh->segments_left, + !!(rtinfo->invflags & IP6T_RT_INV_SGS)))); + DEBUGP("type %02X %02X %02X ", + rtinfo->rt_type, rh->type, + (!(rtinfo->flags & IP6T_RT_TYP) || + ((rtinfo->rt_type == rh->type) ^ + !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); + DEBUGP("len %02X %04X %02X ", + rtinfo->hdrlen, hdrlen, + (!(rtinfo->flags & IP6T_RT_LEN) || + ((rtinfo->hdrlen == hdrlen) ^ + !!(rtinfo->invflags & IP6T_RT_INV_LEN)))); + DEBUGP("res %02X %02X %02X ", + (rtinfo->flags & IP6T_RT_RES), + ((struct rt0_hdr *)rh)->reserved, + !((rtinfo->flags & IP6T_RT_RES) && + (((struct rt0_hdr *)rh)->reserved))); + + ret = (rh != NULL) + && + (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], + rh->segments_left, + !!(rtinfo->invflags & IP6T_RT_INV_SGS))) + && + (!(rtinfo->flags & IP6T_RT_LEN) || + ((rtinfo->hdrlen == hdrlen) ^ + !!(rtinfo->invflags & IP6T_RT_INV_LEN))) + && + (!(rtinfo->flags & IP6T_RT_TYP) || + ((rtinfo->rt_type == rh->type) ^ + !!(rtinfo->invflags & IP6T_RT_INV_TYP))); if (ret && (rtinfo->flags & IP6T_RT_RES)) { u_int32_t *rp, _reserved; rp = skb_header_pointer(skb, - ptr + offsetof(struct rt0_hdr, reserved), - sizeof(_reserved), &_reserved); + ptr + offsetof(struct rt0_hdr, + reserved), + sizeof(_reserved), + &_reserved); ret = (*rp == 0); } - DEBUGP("#%d ",rtinfo->addrnr); - if ( !(rtinfo->flags & IP6T_RT_FST) ){ - return ret; + DEBUGP("#%d ", rtinfo->addrnr); + if (!(rtinfo->flags & IP6T_RT_FST)) { + return ret; } else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) { DEBUGP("Not strict "); - if ( rtinfo->addrnr > (unsigned int)((hdrlen-8)/16) ){ + if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { DEBUGP("There isn't enough space\n"); return 0; } else { unsigned int i = 0; - DEBUGP("#%d ",rtinfo->addrnr); - for(temp=0; temp<(unsigned int)((hdrlen-8)/16); temp++){ + DEBUGP("#%d ", rtinfo->addrnr); + for (temp = 0; + temp < (unsigned int)((hdrlen - 8) / 16); + temp++) { ap = skb_header_pointer(skb, ptr + sizeof(struct rt0_hdr) @@ -141,24 +147,26 @@ match(const struct sk_buff *skb, BUG_ON(ap == NULL); if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) { - DEBUGP("i=%d temp=%d;\n",i,temp); + DEBUGP("i=%d temp=%d;\n", i, temp); i++; } - if (i==rtinfo->addrnr) break; + if (i == rtinfo->addrnr) + break; } DEBUGP("i=%d #%d\n", i, rtinfo->addrnr); if (i == rtinfo->addrnr) return ret; - else return 0; + else + return 0; } } else { DEBUGP("Strict "); - if ( rtinfo->addrnr > (unsigned int)((hdrlen-8)/16) ){ + if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { DEBUGP("There isn't enough space\n"); return 0; } else { - DEBUGP("#%d ",rtinfo->addrnr); - for(temp=0; temp<rtinfo->addrnr; temp++){ + DEBUGP("#%d ", rtinfo->addrnr); + for (temp = 0; temp < rtinfo->addrnr; temp++) { ap = skb_header_pointer(skb, ptr + sizeof(struct rt0_hdr) @@ -171,9 +179,11 @@ match(const struct sk_buff *skb, break; } DEBUGP("temp=%d #%d\n", temp, rtinfo->addrnr); - if ((temp == rtinfo->addrnr) && (temp == (unsigned int)((hdrlen-8)/16))) + if ((temp == rtinfo->addrnr) && + (temp == (unsigned int)((hdrlen - 8) / 16))) return ret; - else return 0; + else + return 0; } } @@ -183,32 +193,31 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static int checkentry(const char *tablename, - const struct ip6t_ip6 *ip, - void *matchinfo, - unsigned int matchinfosize, - unsigned int hook_mask) + const void *entry, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask) { - const struct ip6t_rt *rtinfo = matchinfo; - - if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_rt))) { - DEBUGP("ip6t_rt: matchsize %u != %u\n", - matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_rt))); - return 0; - } - if (rtinfo->invflags & ~IP6T_RT_INV_MASK) { - DEBUGP("ip6t_rt: unknown flags %X\n", - rtinfo->invflags); - return 0; - } - if ( (rtinfo->flags & (IP6T_RT_RES|IP6T_RT_FST_MASK)) && - (!(rtinfo->flags & IP6T_RT_TYP) || - (rtinfo->rt_type != 0) || - (rtinfo->invflags & IP6T_RT_INV_TYP)) ) { - DEBUGP("`--rt-type 0' required before `--rt-0-*'"); - return 0; - } - - return 1; + const struct ip6t_rt *rtinfo = matchinfo; + + if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_rt))) { + DEBUGP("ip6t_rt: matchsize %u != %u\n", + matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_rt))); + return 0; + } + if (rtinfo->invflags & ~IP6T_RT_INV_MASK) { + DEBUGP("ip6t_rt: unknown flags %X\n", rtinfo->invflags); + return 0; + } + if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) && + (!(rtinfo->flags & IP6T_RT_TYP) || + (rtinfo->rt_type != 0) || + (rtinfo->invflags & IP6T_RT_INV_TYP))) { + DEBUGP("`--rt-type 0' required before `--rt-0-*'"); + return 0; + } + + return 1; } static struct ip6t_match rt_match = { @@ -220,12 +229,12 @@ static struct ip6t_match rt_match = { static int __init init(void) { - return ip6t_register_match(&rt_match); + return ip6t_register_match(&rt_match); } static void __exit cleanup(void) { - ip6t_unregister_match(&rt_match); + ip6t_unregister_match(&rt_match); } module_init(init); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 4c0028671c2..ce4a968e1f7 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -97,6 +97,7 @@ static struct ip6t_table packet_filter = { .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, + .af = AF_INET6, }; /* The work comes in here from netfilter.c. */ diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 85c1e6eada1..30a4627e000 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -127,6 +127,7 @@ static struct ip6t_table packet_mangler = { .valid_hooks = MANGLE_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, + .af = AF_INET6, }; /* The work comes in here from netfilter.c. */ diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index c2982efd14a..db28ba3855e 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -106,11 +106,12 @@ static struct } }; -static struct ip6t_table packet_raw = { +static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, - .me = THIS_MODULE + .me = THIS_MODULE, + .af = AF_INET6, }; /* The work comes in here from netfilter.c. */ diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 753a3ae8502..ac702a29dd1 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -74,7 +74,7 @@ static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, static int ipv6_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ", + return seq_printf(s, "src=" NIP6_FMT " dst=" NIP6_FMT " ", NIP6(*((struct in6_addr *)tuple->src.u3.ip6)), NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); } @@ -335,10 +335,10 @@ static struct nf_hook_ops ipv6_conntrack_local_in_ops = { #ifdef CONFIG_SYSCTL /* From nf_conntrack_proto_icmpv6.c */ -extern unsigned long nf_ct_icmpv6_timeout; +extern unsigned int nf_ct_icmpv6_timeout; /* From nf_conntrack_frag6.c */ -extern unsigned long nf_ct_frag6_timeout; +extern unsigned int nf_ct_frag6_timeout; extern unsigned int nf_ct_frag6_low_thresh; extern unsigned int nf_ct_frag6_high_thresh; @@ -401,6 +401,48 @@ static ctl_table nf_ct_net_table[] = { }; #endif +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> + +static int ipv6_tuple_to_nfattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple) +{ + NFA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, + &tuple->src.u3.ip6); + NFA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, + &tuple->dst.u3.ip6); + return 0; + +nfattr_failure: + return -1; +} + +static const size_t cta_min_ip[CTA_IP_MAX] = { + [CTA_IP_V6_SRC-1] = sizeof(u_int32_t)*4, + [CTA_IP_V6_DST-1] = sizeof(u_int32_t)*4, +}; + +static int ipv6_nfattr_to_tuple(struct nfattr *tb[], + struct nf_conntrack_tuple *t) +{ + if (!tb[CTA_IP_V6_SRC-1] || !tb[CTA_IP_V6_DST-1]) + return -EINVAL; + + if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) + return -EINVAL; + + memcpy(&t->src.u3.ip6, NFA_DATA(tb[CTA_IP_V6_SRC-1]), + sizeof(u_int32_t) * 4); + memcpy(&t->dst.u3.ip6, NFA_DATA(tb[CTA_IP_V6_DST-1]), + sizeof(u_int32_t) * 4); + + return 0; +} +#endif + struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .l3proto = PF_INET6, .name = "ipv6", @@ -409,6 +451,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .print_tuple = ipv6_print_tuple, .print_conntrack = ipv6_print_conntrack, .prepare = ipv6_prepare, +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = ipv6_tuple_to_nfattr, + .nfattr_to_tuple = ipv6_nfattr_to_tuple, +#endif .get_features = ipv6_get_features, .me = THIS_MODULE, }; @@ -537,7 +584,7 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); static int __init init(void) { - need_nf_conntrack(); + need_conntrack(); return init_or_cleanup(1); } @@ -548,9 +595,3 @@ static void __exit fini(void) module_init(init); module_exit(fini); - -void need_ip6_conntrack(void) -{ -} - -EXPORT_SYMBOL(need_ip6_conntrack); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index a7e03cfacd0..09945c33305 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -57,17 +57,17 @@ static int icmpv6_pkt_to_tuple(const struct sk_buff *skb, return 1; } +/* Add 1; spaces filled with 0. */ +static u_int8_t invmap[] = { + [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, + [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, + [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1, + [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 +}; + static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { - /* Add 1; spaces filled with 0. */ - static u_int8_t invmap[] = { - [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, - [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, - [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1, - [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 - }; - int type = orig->dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(invmap) || !invmap[type]) return 0; @@ -185,7 +185,7 @@ icmpv6_error_message(struct sk_buff *skb, return -NF_ACCEPT; } - inproto = nf_ct_find_proto(PF_INET6, inprotonum); + inproto = __nf_ct_proto_find(PF_INET6, inprotonum); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, @@ -255,6 +255,60 @@ skipped: return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); } +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> +static int icmpv6_tuple_to_nfattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *t) +{ + NFA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t), + &t->src.u.icmp.id); + NFA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t), + &t->dst.u.icmp.type); + NFA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t), + &t->dst.u.icmp.code); + + return 0; + +nfattr_failure: + return -1; +} + +static const size_t cta_min_proto[CTA_PROTO_MAX] = { + [CTA_PROTO_ICMPV6_TYPE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMPV6_CODE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMPV6_ID-1] = sizeof(u_int16_t) +}; + +static int icmpv6_nfattr_to_tuple(struct nfattr *tb[], + struct nf_conntrack_tuple *tuple) +{ + if (!tb[CTA_PROTO_ICMPV6_TYPE-1] + || !tb[CTA_PROTO_ICMPV6_CODE-1] + || !tb[CTA_PROTO_ICMPV6_ID-1]) + return -EINVAL; + + if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + return -EINVAL; + + tuple->dst.u.icmp.type = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_TYPE-1]); + tuple->dst.u.icmp.code = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]); + tuple->src.u.icmp.id = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]); + + if (tuple->dst.u.icmp.type < 128 + || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) + || !invmap[tuple->dst.u.icmp.type - 128]) + return -EINVAL; + + return 0; +} +#endif + struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = { .l3proto = PF_INET6, @@ -267,6 +321,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = .packet = icmpv6_packet, .new = icmpv6_new, .error = icmpv6_error, +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = icmpv6_tuple_to_nfattr, + .nfattr_to_tuple = icmpv6_nfattr_to_tuple, +#endif }; EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c2c52af9e56..84ef9a13108 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -70,8 +70,8 @@ struct nf_ct_frag6_skb_cb struct nf_ct_frag6_queue { - struct nf_ct_frag6_queue *next; - struct list_head lru_list; /* lru list member */ + struct hlist_node list; + struct list_head lru_list; /* lru list member */ __u32 id; /* fragment id */ struct in6_addr saddr; @@ -90,24 +90,21 @@ struct nf_ct_frag6_queue #define FIRST_IN 2 #define LAST_IN 1 __u16 nhoffset; - struct nf_ct_frag6_queue **pprev; }; /* Hash table. */ #define FRAG6Q_HASHSZ 64 -static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ]; -static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED; +static struct hlist_head nf_ct_frag6_hash[FRAG6Q_HASHSZ]; +static DEFINE_RWLOCK(nf_ct_frag6_lock); static u32 nf_ct_frag6_hash_rnd; static LIST_HEAD(nf_ct_frag6_lru_list); int nf_ct_frag6_nqueues = 0; static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq) { - if (fq->next) - fq->next->pprev = fq->pprev; - *fq->pprev = fq->next; + hlist_del(&fq->list); list_del(&fq->lru_list); nf_ct_frag6_nqueues--; } @@ -158,28 +155,18 @@ static void nf_ct_frag6_secret_rebuild(unsigned long dummy) get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32)); for (i = 0; i < FRAG6Q_HASHSZ; i++) { struct nf_ct_frag6_queue *q; + struct hlist_node *p, *n; - q = nf_ct_frag6_hash[i]; - while (q) { - struct nf_ct_frag6_queue *next = q->next; + hlist_for_each_entry_safe(q, p, n, &nf_ct_frag6_hash[i], list) { unsigned int hval = ip6qhashfn(q->id, &q->saddr, &q->daddr); - if (hval != i) { - /* Unlink. */ - if (q->next) - q->next->pprev = q->pprev; - *q->pprev = q->next; - + hlist_del(&q->list); /* Relink to new hash chain. */ - if ((q->next = nf_ct_frag6_hash[hval]) != NULL) - q->next->pprev = &q->next; - nf_ct_frag6_hash[hval] = q; - q->pprev = &nf_ct_frag6_hash[hval]; + hlist_add_head(&q->list, + &nf_ct_frag6_hash[hval]); } - - q = next; } } write_unlock(&nf_ct_frag6_lock); @@ -314,15 +301,17 @@ out: /* Creation primitives. */ - static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, struct nf_ct_frag6_queue *fq_in) { struct nf_ct_frag6_queue *fq; +#ifdef CONFIG_SMP + struct hlist_node *n; +#endif write_lock(&nf_ct_frag6_lock); #ifdef CONFIG_SMP - for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) { + hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) { if (fq->id == fq_in->id && !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) && !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) { @@ -340,10 +329,7 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, atomic_inc(&fq->refcnt); atomic_inc(&fq->refcnt); - if ((fq->next = nf_ct_frag6_hash[hash]) != NULL) - fq->next->pprev = &fq->next; - nf_ct_frag6_hash[hash] = fq; - fq->pprev = &nf_ct_frag6_hash[hash]; + hlist_add_head(&fq->list, &nf_ct_frag6_hash[hash]); INIT_LIST_HEAD(&fq->lru_list); list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list); nf_ct_frag6_nqueues++; @@ -371,7 +357,7 @@ nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct init_timer(&fq->timer); fq->timer.function = nf_ct_frag6_expire; fq->timer.data = (long) fq; - fq->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&fq->lock); atomic_set(&fq->refcnt, 1); return nf_ct_frag6_intern(hash, fq); @@ -384,10 +370,11 @@ static __inline__ struct nf_ct_frag6_queue * fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) { struct nf_ct_frag6_queue *fq; + struct hlist_node *n; unsigned int hash = ip6qhashfn(id, src, dst); read_lock(&nf_ct_frag6_lock); - for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) { + hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) { if (fq->id == id && !ipv6_addr_cmp(src, &fq->saddr) && !ipv6_addr_cmp(dst, &fq->daddr)) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a66900cda2a..66f1d12ea57 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -32,6 +32,7 @@ #include <linux/icmpv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> +#include <linux/skbuff.h> #include <asm/uaccess.h> #include <asm/ioctls.h> #include <asm/bug.h> @@ -433,25 +434,14 @@ out: return err; csum_copy_err: - /* Clear queue. */ - if (flags&MSG_PEEK) { - int clear = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - clear = 1; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - if (clear) - kfree_skb(skb); - } + skb_kill_datagram(sk, skb, flags); /* Error for blocking case is chosen to masquerade as some normal condition. */ err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; /* FIXME: increment a raw6 drops counter here */ - goto out_free; + goto out; } static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5d316cb72ec..15e1456b3f1 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -581,7 +581,6 @@ err: * the last and the first frames arrived and all the bits are here. */ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, - unsigned int *nhoffp, struct net_device *dev) { struct sk_buff *fp, *head = fq->fragments; @@ -654,6 +653,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->dev = dev; skb_set_timestamp(head, &fq->stamp); head->nh.ipv6h->payload_len = htons(payload_len); + IP6CB(head)->nhoff = nhoff; *skb_in = head; @@ -663,7 +663,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); fq->fragments = NULL; - *nhoffp = nhoff; return 1; out_oversize: @@ -678,7 +677,7 @@ out_fail: return -1; } -static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_frag_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct net_device *dev = skb->dev; @@ -710,7 +709,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) skb->h.raw += sizeof(struct frag_hdr); IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); - *nhoffp = (u8*)fhdr - skb->nh.raw; + IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw; return 1; } @@ -722,11 +721,11 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) spin_lock(&fq->lock); - ip6_frag_queue(fq, skb, fhdr, *nhoffp); + ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) - ret = ip6_frag_reasm(fq, skbp, nhoffp, dev); + ret = ip6_frag_reasm(fq, skbp, dev); spin_unlock(&fq->lock); fq_put(fq, NULL); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7c68bfbee36..e0d3ad02ffb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -24,6 +24,7 @@ * reachable. otherwise, round-robin the list. */ +#include <linux/capability.h> #include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> @@ -413,11 +414,14 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, rt = ip6_rt_copy(ort); if (rt) { - ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); - - if (!(rt->rt6i_flags&RTF_GATEWAY)) + if (!(rt->rt6i_flags&RTF_GATEWAY)) { + if (rt->rt6i_dst.plen != 128 && + ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) + rt->rt6i_flags |= RTF_ANYCAST; ipv6_addr_copy(&rt->rt6i_gateway, daddr); + } + ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; rt->u.dst.flags |= DST_HOST; @@ -1413,7 +1417,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->u.dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; - if (!anycast) + if (anycast) + rt->rt6i_flags |= RTF_ANYCAST; + else rt->rt6i_flags |= RTF_LOCAL; rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); if (rt->rt6i_nexthop == NULL) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c3123c9e1a8..c2d3e17beae 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -20,6 +20,7 @@ #include <linux/config.h> #include <linux/module.h> +#include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -33,6 +34,7 @@ #include <asm/uaccess.h> #include <linux/init.h> #include <linux/netfilter_ipv4.h> +#include <linux/if_ether.h> #include <net/sock.h> #include <net/snmp.h> @@ -183,7 +185,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int if (dev == NULL) return NULL; - nt = dev->priv; + nt = netdev_priv(dev); dev->init = ipip6_tunnel_init; nt->parms = *parms; @@ -209,7 +211,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) write_unlock_bh(&ipip6_lock); dev_put(dev); } else { - ipip6_tunnel_unlink((struct ip_tunnel*)dev->priv); + ipip6_tunnel_unlink(netdev_priv(dev)); dev_put(dev); } } @@ -345,7 +347,7 @@ out: rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0); if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { - struct ip_tunnel * t = (struct ip_tunnel*)rt6i->rt6i_dev->priv; + struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { rel_type = ICMPV6_DEST_UNREACH; rel_code = ICMPV6_ADDR_UNREACH; @@ -380,6 +382,7 @@ static int ipip6_rcv(struct sk_buff *skb) skb->mac.raw = skb->nh.raw; skb->nh.raw = skb->data; memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; tunnel->stat.rx_packets++; @@ -422,7 +425,7 @@ static inline u32 try_6to4(struct in6_addr *v6dst) static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->stat; struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = skb->nh.ipv6h; @@ -551,6 +554,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags = 0; dst_release(skb->dst); skb->dst = &rt->u.dst; @@ -607,7 +611,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t = ipip6_tunnel_locate(&p, 0); } if (t == NULL) - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); memcpy(&p, &t->parms, sizeof(p)); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; @@ -644,7 +648,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EINVAL; break; } - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); ipip6_tunnel_unlink(t); t->parms.iph.saddr = p.iph.saddr; t->parms.iph.daddr = p.iph.daddr; @@ -680,7 +684,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) if ((t = ipip6_tunnel_locate(&p, 0)) == NULL) goto done; err = -EPERM; - if (t == ipip6_fb_tunnel_dev->priv) + if (t == netdev_priv(ipip6_fb_tunnel_dev)) goto done; dev = t->dev; } @@ -697,7 +701,7 @@ done: static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev) { - return &(((struct ip_tunnel*)dev->priv)->stat); + return &(((struct ip_tunnel*)netdev_priv(dev))->stat); } static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) @@ -720,7 +724,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); - dev->mtu = 1500 - sizeof(struct iphdr); + dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); dev->flags = IFF_NOARP; dev->iflink = 0; dev->addr_len = 4; @@ -732,7 +736,7 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel; struct iphdr *iph; - tunnel = (struct ip_tunnel*)dev->priv; + tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; tunnel->dev = dev; @@ -772,7 +776,7 @@ static int ipip6_tunnel_init(struct net_device *dev) static int __init ipip6_fb_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel = dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; tunnel->dev = dev; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8827389abaf..66d04004afd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -48,6 +48,7 @@ #include <net/tcp.h> #include <net/ndisc.h> #include <net/inet6_hashtables.h> +#include <net/inet6_connection_sock.h> #include <net/ipv6.h> #include <net/transp_v6.h> #include <net/addrconf.h> @@ -59,232 +60,45 @@ #include <net/addrconf.h> #include <net/snmp.h> #include <net/dsfield.h> +#include <net/timewait_sock.h> #include <asm/uaccess.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +/* Socket used for sending RSTs and ACKs */ +static struct socket *tcp6_socket; + static void tcp_v6_send_reset(struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); -static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, +static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok); - -static struct tcp_func ipv6_mapped; -static struct tcp_func ipv6_specific; - -static inline int tcp_v6_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb) -{ - const struct sock *sk2; - const struct hlist_node *node; - - /* We must walk the whole port owner list in this case. -DaveM */ - sk_for_each_bound(sk2, node, &tb->owners) { - if (sk != sk2 && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && - (!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; - } - return node != NULL; -} +static struct inet_connection_sock_af_ops ipv6_mapped; +static struct inet_connection_sock_af_ops ipv6_specific; -/* Grrr, addr_type already calculated by caller, but I don't want - * to add some silly "cookie" argument to this method just for that. - * But it doesn't matter, the recalculation is in the rarest path - * this function ever takes. - */ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) { - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - struct hlist_node *node; - int ret; - - local_bh_disable(); - if (snum == 0) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover = net_random() % (high - low) + low; - - do { - head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == rover) - goto next; - break; - next: - spin_unlock(&head->lock); - if (++rover > high) - rover = low; - } while (--remaining > 0); - - /* Exhausted local port range during search? It is not - * possible for us to be holding one of the bind hash - * locks if this test triggers, because if 'remaining' - * drops to zero, we broke out of the do/while loop at - * the top level, not from the 'break;' statement. - */ - ret = 1; - if (unlikely(remaining <= 0)) - goto fail; - - /* OK, here is the one we will use. */ - snum = rover; - } else { - head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == snum) - goto tb_found; - } - tb = NULL; - goto tb_not_found; -tb_found: - if (tb && !hlist_empty(&tb->owners)) { - if (tb->fastreuse > 0 && sk->sk_reuse && - sk->sk_state != TCP_LISTEN) { - goto success; - } else { - ret = 1; - if (tcp_v6_bind_conflict(sk, tb)) - goto fail_unlock; - } - } -tb_not_found: - ret = 1; - if (tb == NULL) { - tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum); - if (tb == NULL) - goto fail_unlock; - } - if (hlist_empty(&tb->owners)) { - if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) - tb->fastreuse = 1; - else - tb->fastreuse = 0; - } else if (tb->fastreuse && - (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) - tb->fastreuse = 0; - -success: - if (!inet_csk(sk)->icsk_bind_hash) - inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); - ret = 0; - -fail_unlock: - spin_unlock(&head->lock); -fail: - local_bh_enable(); - return ret; -} - -static __inline__ void __tcp_v6_hash(struct sock *sk) -{ - struct hlist_head *list; - rwlock_t *lock; - - BUG_TRAP(sk_unhashed(sk)); - - if (sk->sk_state == TCP_LISTEN) { - list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &tcp_hashinfo.lhash_lock; - inet_listen_wlock(&tcp_hashinfo); - } else { - unsigned int hash; - sk->sk_hash = hash = inet6_sk_ehashfn(sk); - hash &= (tcp_hashinfo.ehash_size - 1); - list = &tcp_hashinfo.ehash[hash].chain; - lock = &tcp_hashinfo.ehash[hash].lock; - write_lock(lock); - } - - __sk_add_node(sk, list); - sock_prot_inc_use(sk->sk_prot); - write_unlock(lock); + return inet_csk_get_port(&tcp_hashinfo, sk, snum, + inet6_csk_bind_conflict); } - static void tcp_v6_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { - struct tcp_sock *tp = tcp_sk(sk); - - if (tp->af_specific == &ipv6_mapped) { + if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) { tcp_prot.hash(sk); return; } local_bh_disable(); - __tcp_v6_hash(sk); + __inet6_hash(&tcp_hashinfo, sk); local_bh_enable(); } } -/* - * Open request hash tables. - */ - -static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd) -{ - u32 a, b, c; - - a = raddr->s6_addr32[0]; - b = raddr->s6_addr32[1]; - c = raddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += rnd; - __jhash_mix(a, b, c); - - a += raddr->s6_addr32[3]; - b += (u32) rport; - __jhash_mix(a, b, c); - - return c & (TCP_SYNQ_HSIZE - 1); -} - -static struct request_sock *tcp_v6_search_req(const struct sock *sk, - struct request_sock ***prevp, - __u16 rport, - struct in6_addr *raddr, - struct in6_addr *laddr, - int iif) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; - - for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; - (req = *prev) != NULL; - prev = &req->dl_next) { - const struct tcp6_request_sock *treq = tcp6_rsk(req); - - if (inet_rsk(req)->rmt_port == rport && - req->rsk_ops->family == AF_INET6 && - ipv6_addr_equal(&treq->rmt_addr, raddr) && - ipv6_addr_equal(&treq->loc_addr, laddr) && - (!treq->iif || treq->iif == iif)) { - BUG_TRAP(req->sk == NULL); - *prevp = prev; - return req; - } - } - - return NULL; -} - static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, struct in6_addr *saddr, struct in6_addr *daddr, @@ -308,195 +122,12 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) } } -static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, - struct inet_timewait_sock **twp) -{ - struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *daddr = &np->rcv_saddr; - const struct in6_addr *saddr = &np->daddr; - const int dif = sk->sk_bound_dev_if; - const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport); - struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); - struct sock *sk2; - const struct hlist_node *node; - struct inet_timewait_sock *tw; - - prefetch(head->chain.first); - write_lock(&head->lock); - - /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2); - - tw = inet_twsk(sk2); - - if(*((__u32 *)&(tw->tw_dport)) == ports && - sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); - struct tcp_sock *tp = tcp_sk(sk); - - if (tcptw->tw_ts_recent_stamp && - (!twp || - (sysctl_tcp_tw_reuse && - xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { - /* See comment in tcp_ipv4.c */ - tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; - if (!tp->write_seq) - tp->write_seq = 1; - tp->rx_opt.ts_recent = tcptw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; - sock_hold(sk2); - goto unique; - } else - goto not_unique; - } - } - tw = NULL; - - /* And established part... */ - sk_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) - goto not_unique; - } - -unique: - BUG_TRAP(sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); - sk->sk_hash = hash; - sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); - - if (twp) { - *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw) { - /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, &tcp_death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - - inet_twsk_put(tw); - } - return 0; - -not_unique: - write_unlock(&head->lock); - return -EADDRNOTAVAIL; -} - -static inline u32 tcpv6_port_offset(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - - return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32, - np->daddr.s6_addr32, - inet->dport); -} - -static int tcp_v6_hash_connect(struct sock *sk) -{ - unsigned short snum = inet_sk(sk)->num; - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; - - if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int range = high - low; - int i; - int port; - static u32 hint; - u32 offset = hint + tcpv6_port_offset(sk); - struct hlist_node *node; - struct inet_timewait_sock *tw = NULL; - - local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; - head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); - if (tb->fastreuse >= 0) - goto next_port; - if (!__tcp_v6_check_established(sk, - port, - &tw)) - goto ok; - goto next_port; - } - } - - tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); - if (!tb) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - } - local_bh_enable(); - - return -EADDRNOTAVAIL; - -ok: - hint += i; - - /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); - if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(port); - __tcp_v6_hash(sk); - } - spin_unlock(&head->lock); - - if (tw) { - inet_twsk_deschedule(tw, &tcp_death_row); - inet_twsk_put(tw); - } - - ret = 0; - goto out; - } - - head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - - if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __tcp_v6_hash(sk); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __tcp_v6_check_established(sk, snum, NULL); -out: - local_bh_enable(); - return ret; - } -} - static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; - struct inet_sock *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p = NULL, final; @@ -571,7 +202,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, */ if (addr_type == IPV6_ADDR_MAPPED) { - u32 exthdrlen = tp->ext_header_len; + u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); @@ -583,14 +214,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - tp->af_specific = &ipv6_mapped; + icsk->icsk_af_ops = &ipv6_mapped; sk->sk_backlog_rcv = tcp_v4_do_rcv; err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { - tp->ext_header_len = exthdrlen; - tp->af_specific = &ipv6_specific; + icsk->icsk_ext_hdr_len = exthdrlen; + icsk->icsk_af_ops = &ipv6_specific; sk->sk_backlog_rcv = tcp_v6_do_rcv; goto failure; } else { @@ -643,16 +274,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); - tp->ext_header_len = 0; + icsk->icsk_ext_hdr_len = 0; if (np->opt) - tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen; + icsk->icsk_ext_hdr_len = (np->opt->opt_flen + + np->opt->opt_nflen); tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); inet->dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); - err = tcp_v6_hash_connect(sk); + err = inet6_hash_connect(&tcp_death_row, sk); if (err) goto late_failure; @@ -758,7 +390,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); - if (tp->pmtu_cookie > dst_mtu(dst)) { + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ @@ -775,8 +407,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sock_owned_by_user(sk)) goto out; - req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, - &hdr->saddr, inet6_iif(skb)); + req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, + &hdr->saddr, inet6_iif(skb)); if (!req) goto out; @@ -822,7 +454,7 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct dst_entry *dst) { - struct tcp6_request_sock *treq = tcp6_rsk(req); + struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; struct ipv6_txoptions *opt = NULL; @@ -888,8 +520,8 @@ done: static void tcp_v6_reqsk_destructor(struct request_sock *req) { - if (tcp6_rsk(req)->pktopts) - kfree_skb(tcp6_rsk(req)->pktopts); + if (inet6_rsk(req)->pktopts) + kfree_skb(inet6_rsk(req)->pktopts); } static struct request_sock_ops tcp6_request_sock_ops = { @@ -901,26 +533,15 @@ static struct request_sock_ops tcp6_request_sock_ops = { .send_reset = tcp_v6_send_reset }; -static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_skb_parm *opt = IP6CB(skb); - - if (np->rxopt.all) { - if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || - ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) || - (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || - ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) - return 1; - } - return 0; -} - +static struct timewait_sock_ops tcp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_unique = tcp_twsk_unique, +}; -static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, - struct sk_buff *skb) +static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); + struct tcphdr *th = skb->h.th; if (skb->ip_summed == CHECKSUM_HW) { th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); @@ -993,7 +614,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb) if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { - ip6_xmit(NULL, buff, &fl, NULL, 0); + ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); return; @@ -1057,7 +678,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { - ip6_xmit(NULL, buff, &fl, NULL, 0); + ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); return; } @@ -1091,8 +712,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) struct sock *nsk; /* Find possible connection requests. */ - req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, inet6_iif(skb)); + req = inet6_csk_search_req(sk, &prev, th->source, + &skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, inet6_iif(skb)); if (req) return tcp_check_req(sk, skb, req, prev); @@ -1116,23 +738,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) return sk; } -static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); - - reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); - inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); -} - - /* FIXME: this is substantially similar to the ipv4 code. * Can some kind of merge be done? -- erics */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp6_request_sock *treq; + struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_options_received tmp_opt; struct tcp_sock *tp = tcp_sk(sk); @@ -1157,7 +768,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&tcp6_request_sock_ops); + req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (req == NULL) goto drop; @@ -1170,7 +781,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); - treq = tcp6_rsk(req); + treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr); ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr); TCP_ECN_create_request(req, skb->h.th); @@ -1196,8 +807,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (tcp_v6_send_synack(sk, req, NULL)) goto drop; - tcp_v6_synq_add(sk, req); - + inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; drop: @@ -1212,7 +822,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { - struct tcp6_request_sock *treq = tcp6_rsk(req); + struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; @@ -1247,7 +857,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); - newtp->af_specific = &ipv6_mapped; + inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; @@ -1261,10 +871,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ /* It is tricky place. Until this moment IPv4 tcp - worked with IPv6 af_tcp.af_specific. + worked with IPv6 icsk.icsk_af_ops. Sync it now. */ - tcp_sync_mss(newsk, newtp->pmtu_cookie); + tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } @@ -1371,10 +981,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, sock_kfree_s(sk, opt, opt->tot_len); } - newtp->ext_header_len = 0; + inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt) - newtp->ext_header_len = newnp->opt->opt_nflen + - newnp->opt->opt_flen; + inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + + newnp->opt->opt_flen); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric(dst, RTAX_ADVMSS); @@ -1382,7 +992,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; - __tcp_v6_hash(newsk); + __inet6_hash(&tcp_hashinfo, newsk); inet_inherit_port(&tcp_hashinfo, sk, newsk); return newsk; @@ -1546,7 +1156,7 @@ ipv6_pktoptions: return 0; } -static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int tcp_v6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct tcphdr *th; @@ -1679,139 +1289,16 @@ do_time_wait: goto discard_it; } -static int tcp_v6_rebuild_header(struct sock *sk) -{ - int err; - struct dst_entry *dst; - struct ipv6_pinfo *np = inet6_sk(sk); - - dst = __sk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { - struct inet_sock *inet = inet_sk(sk); - struct in6_addr *final_p = NULL, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; - - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) { - sk->sk_route_caps = 0; - return err; - } - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - sk->sk_err_soft = -err; - return err; - } - - ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); - } - - return 0; -} - -static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) -{ - struct sock *sk = skb->sk; - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi fl; - struct dst_entry *dst; - struct in6_addr *final_p = NULL, final; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_sport = inet->sport; - fl.fl_ip_dport = inet->dport; - - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - dst = __sk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { - int err = ip6_dst_lookup(sk, &dst, &fl); - - if (err) { - sk->sk_err_soft = -err; - return err; - } - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - sk->sk_route_caps = 0; - return err; - } - - ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); - } - - skb->dst = dst_clone(dst); - - /* Restore final destination back after routing done */ - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - - return ip6_xmit(sk, skb, &fl, np->opt, 0); -} - -static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; - - sin6->sin6_family = AF_INET6; - ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); - sin6->sin6_port = inet_sk(sk)->dport; - /* We do not store received flowlabel for TCP */ - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (sk->sk_bound_dev_if && - ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = sk->sk_bound_dev_if; -} - static int tcp_v6_remember_stamp(struct sock *sk) { /* Alas, not yet... */ return 0; } -static struct tcp_func ipv6_specific = { - .queue_xmit = tcp_v6_xmit, +static struct inet_connection_sock_af_ops ipv6_specific = { + .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, - .rebuild_header = tcp_v6_rebuild_header, + .rebuild_header = inet6_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .remember_stamp = tcp_v6_remember_stamp, @@ -1819,7 +1306,7 @@ static struct tcp_func ipv6_specific = { .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = v6_addr2sockaddr, + .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6) }; @@ -1827,7 +1314,7 @@ static struct tcp_func ipv6_specific = { * TCP over IPv4 via INET6 API */ -static struct tcp_func ipv6_mapped = { +static struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, @@ -1838,7 +1325,7 @@ static struct tcp_func ipv6_mapped = { .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = v6_addr2sockaddr, + .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6) }; @@ -1877,8 +1364,9 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_state = TCP_CLOSE; - tp->af_specific = &ipv6_specific; + icsk->icsk_af_ops = &ipv6_specific; icsk->icsk_ca_ops = &tcp_init_congestion_ops; + icsk->icsk_sync_mss = tcp_sync_mss; sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); @@ -1900,14 +1388,13 @@ static int tcp_v6_destroy_sock(struct sock *sk) static void get_openreq6(struct seq_file *seq, struct sock *sk, struct request_sock *req, int i, int uid) { - struct in6_addr *dest, *src; int ttd = req->expires - jiffies; + struct in6_addr *src = &inet6_rsk(req)->loc_addr; + struct in6_addr *dest = &inet6_rsk(req)->rmt_addr; if (ttd < 0) ttd = 0; - src = &tcp6_rsk(req)->loc_addr; - dest = &tcp6_rsk(req)->rmt_addr; seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", @@ -1988,14 +1475,14 @@ static void get_timewait6_sock(struct seq_file *seq, { struct in6_addr *dest, *src; __u16 destp, srcp; - struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); + struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); int ttd = tw->tw_ttd - jiffies; if (ttd < 0) ttd = 0; - dest = &tcp6tw->tw_v6_daddr; - src = &tcp6tw->tw_v6_rcv_saddr; + dest = &tw6->tw_v6_daddr; + src = &tw6->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); @@ -2093,7 +1580,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), - .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, }; @@ -2110,13 +1597,27 @@ static struct inet_protosw tcpv6_protosw = { .ops = &inet6_stream_ops, .capability = -1, .no_check = 0, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | + INET_PROTOSW_ICSK, }; void __init tcpv6_init(void) { + int err; + /* register inet6 protocol */ if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0) printk(KERN_ERR "tcpv6_init: Could not register protocol\n"); inet6_register_protosw(&tcpv6_protosw); + + err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_TCP, &tcp6_socket); + if (err < 0) + panic("Failed to create the TCPv6 control socket.\n"); + tcp6_socket->sk->sk_allocation = GFP_ATOMIC; + + /* Unhash it so that IP input processing does not even + * see it, we do not wish this socket to see incoming + * packets. + */ + tcp6_socket->sk->sk_prot->unhash(tcp6_socket->sk); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5cc8731eb55..c47648892c0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -36,6 +36,7 @@ #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/init.h> +#include <linux/skbuff.h> #include <asm/uaccess.h> #include <net/sock.h> @@ -300,20 +301,7 @@ out: return err; csum_copy_err: - /* Clear queue. */ - if (flags&MSG_PEEK) { - int clear = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - clear = 1; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - if (clear) - kfree_skb(skb); - } - - skb_free_datagram(sk, skb); + skb_kill_datagram(sk, skb, flags); if (flags & MSG_DONTWAIT) { UDP6_INC_STATS_USER(UDP_MIB_INERRORS); @@ -447,7 +435,7 @@ out: read_unlock(&udp_hash_lock); } -static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int udpv6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct sock *sk; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 28c29d78338..1ca2da68ef6 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -11,6 +11,8 @@ #include <linux/module.h> #include <linux/string.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/ip.h> @@ -26,7 +28,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) IP6_ECN_set_ce(inner_iph); } -int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) +int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi) { struct sk_buff *skb = *pskb; int err; @@ -38,7 +40,7 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) int nexthdr; unsigned int nhoff; - nhoff = *nhoffp; + nhoff = IP6CB(skb)->nhoff; nexthdr = skb->nh.raw[nhoff]; seq = 0; @@ -121,6 +123,8 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) skb->sp->len += xfrm_nr; skb->ip_summed = CHECKSUM_NONE; + nf_reset(skb); + if (decaps) { if (!(skb->dev->flags&IFF_LOOPBACK)) { dst_release(skb->dst); @@ -129,7 +133,16 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) netif_rx(skb); return -1; } else { +#ifdef CONFIG_NETFILTER + skb->nh.ipv6h->payload_len = htons(skb->len); + __skb_push(skb, skb->data - skb->nh.raw); + + NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, + ip6_rcv_finish); + return -1; +#else return 1; +#endif } drop_unlock: @@ -144,7 +157,7 @@ drop: EXPORT_SYMBOL(xfrm6_rcv_spi); -int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +int xfrm6_rcv(struct sk_buff **pskb) { - return xfrm6_rcv_spi(pskb, nhoffp, 0); + return xfrm6_rcv_spi(pskb, 0); } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6b9867717d1..80242172a5d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -9,9 +9,11 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/compiler.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/icmpv6.h> +#include <linux/netfilter_ipv6.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/ipv6.h> @@ -92,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) return ret; } -int xfrm6_output(struct sk_buff *skb) +static int xfrm6_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; @@ -110,29 +112,35 @@ int xfrm6_output(struct sk_buff *skb) goto error_nolock; } - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; + do { + spin_lock_bh(&x->lock); + err = xfrm_state_check(x, skb); + if (err) + goto error; - xfrm6_encap(skb); + xfrm6_encap(skb); - err = x->type->output(x, skb); - if (err) - goto error; + err = x->type->output(x, skb); + if (err) + goto error; - x->curlft.bytes += skb->len; - x->curlft.packets++; + x->curlft.bytes += skb->len; + x->curlft.packets++; - spin_unlock_bh(&x->lock); + spin_unlock_bh(&x->lock); - skb->nh.raw = skb->data; - - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - err = NET_XMIT_BYPASS; + skb->nh.raw = skb->data; + + if (!(skb->dst = dst_pop(dst))) { + err = -EHOSTUNREACH; + goto error_nolock; + } + dst = skb->dst; + x = dst->xfrm; + } while (x && !x->props.mode); + + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; + err = 0; out_exit: return err; @@ -142,3 +150,33 @@ error_nolock: kfree_skb(skb); goto out_exit; } + +static int xfrm6_output_finish(struct sk_buff *skb) +{ + int err; + + while (likely((err = xfrm6_output_one(skb)) == 0)) { + nf_reset(skb); + + err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL, + skb->dst->dev, dst_output); + if (unlikely(err != 1)) + break; + + if (!skb->dst->xfrm) + return dst_output(skb); + + err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, + skb->dst->dev, xfrm6_output_finish); + if (unlikely(err != 1)) + break; + } + + return err; +} + +int xfrm6_output(struct sk_buff *skb) +{ + return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm6_output_finish); +} diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index bf0d0abc387..a5723024d3b 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -15,6 +15,7 @@ #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <net/ipv6.h> +#include <net/addrconf.h> static struct xfrm_state_afinfo xfrm6_state_afinfo; @@ -41,6 +42,22 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); + if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { + struct rt6_info *rt; + struct flowi fl_tunnel = { + .nl_u = { + .ip6_u = { + .daddr = *(struct in6_addr *)daddr, + } + } + }; + if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, + &fl_tunnel, AF_INET6)) { + ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr, + (struct in6_addr *)&x->props.saddr); + dst_release(&rt->u.dst); + } + } x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; x->props.family = AF_INET6; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index fbef7826a74..8cfc58b96fc 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -259,8 +259,7 @@ try_next_2:; spi = 0; goto out; alloc_spi: - X6TPRINTK3(KERN_DEBUG "%s(): allocate new spi for " - "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + X6TPRINTK3(KERN_DEBUG "%s(): allocate new spi for " NIP6_FMT "\n", __FUNCTION__, NIP6(*(struct in6_addr *)saddr)); x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC); @@ -323,9 +322,8 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr) list_byaddr) { if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) { - X6TPRINTK3(KERN_DEBUG "%s(): x6spi object " - "for %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " - "found at %p\n", + X6TPRINTK3(KERN_DEBUG "%s(): x6spi object for " NIP6_FMT + " found at %p\n", __FUNCTION__, NIP6(*(struct in6_addr *)saddr), x6spi); @@ -397,7 +395,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler) EXPORT_SYMBOL(xfrm6_tunnel_deregister); -static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int xfrm6_tunnel_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler = xfrm6_tunnel_handler; @@ -405,11 +403,11 @@ static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) u32 spi; /* device-like_ip6ip6_handler() */ - if (handler && handler->handler(pskb, nhoffp) == 0) + if (handler && handler->handler(pskb) == 0) return 0; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(pskb, nhoffp, spi); + return xfrm6_rcv_spi(pskb, spi); } static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, |