diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrconf.c | 805 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 3 | ||||
-rw-r--r-- | net/ipv6/datagram.c | 108 | ||||
-rw-r--r-- | net/ipv6/fib6_rules.c | 3 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 7 | ||||
-rw-r--r-- | net/ipv6/inet6_connection_sock.c | 4 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 16 | ||||
-rw-r--r-- | net/ipv6/ip6_flowlabel.c | 3 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 34 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 86 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 139 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6t_hbh.c | 4 | ||||
-rw-r--r-- | net/ipv6/proc.c | 1 | ||||
-rw-r--r-- | net/ipv6/raw.c | 12 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 64 | ||||
-rw-r--r-- | net/ipv6/udp.c | 16 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 31 |
17 files changed, 731 insertions, 605 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 413054f02aa..34d2d649e39 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -82,7 +82,7 @@ #include <linux/random.h> #endif -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unaligned.h> #include <linux/proc_fs.h> @@ -98,7 +98,11 @@ #endif #define INFINITY_LIFE_TIME 0xFFFFFFFF -#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) +#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b))) + +#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) +#define ADDRCONF_TIMER_FUZZ (HZ / 4) +#define ADDRCONF_TIMER_FUZZ_MAX (HZ) #ifdef CONFIG_SYSCTL static void addrconf_sysctl_register(struct inet6_dev *idev); @@ -127,8 +131,8 @@ static int ipv6_count_addresses(struct inet6_dev *idev); /* * Configured unicast address hash table */ -static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; -static DEFINE_RWLOCK(addrconf_hash_lock); +static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(addrconf_hash_lock); static void addrconf_verify(unsigned long); @@ -138,8 +142,8 @@ static DEFINE_SPINLOCK(addrconf_verify_lock); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); -static void addrconf_bonding_change(struct net_device *dev, - unsigned long event); +static void addrconf_type_change(struct net_device *dev, + unsigned long event); static int addrconf_ifdown(struct net_device *dev, int how); static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); @@ -152,8 +156,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo); -static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev); +static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev); static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); @@ -250,8 +254,7 @@ static void addrconf_del_timer(struct inet6_ifaddr *ifp) __in6_ifa_put(ifp); } -enum addrconf_timer_t -{ +enum addrconf_timer_t { AC_NONE, AC_DAD, AC_RS, @@ -271,7 +274,8 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, case AC_RS: ifp->timer.function = addrconf_rs_timer; break; - default:; + default: + break; } ifp->timer.expires = jiffies + when; add_timer(&ifp->timer); @@ -318,7 +322,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) { struct net_device *dev = idev->dev; - WARN_ON(idev->addr_list != NULL); + WARN_ON(!list_empty(&idev->addr_list)); WARN_ON(idev->mc_list != NULL); #ifdef NET_REFCNT_DEBUG @@ -326,7 +330,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) #endif dev_put(dev); if (!idev->dead) { - printk("Freeing alive inet6 device %p\n", idev); + pr_warning("Freeing alive inet6 device %p\n", idev); return; } snmp6_free_dev(idev); @@ -351,6 +355,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; + INIT_LIST_HEAD(&ndev->addr_list); + memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; @@ -402,6 +408,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) #endif #ifdef CONFIG_IPV6_PRIVACY + INIT_LIST_HEAD(&ndev->tempaddr_list); setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || @@ -439,8 +446,10 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev) ASSERT_RTNL(); - if ((idev = __in6_dev_get(dev)) == NULL) { - if ((idev = ipv6_add_dev(dev)) == NULL) + idev = __in6_dev_get(dev); + if (!idev) { + idev = ipv6_add_dev(dev); + if (!idev) return NULL; } @@ -466,7 +475,8 @@ static void dev_forward_change(struct inet6_dev *idev) else ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); } - for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { + + list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa->flags&IFA_F_TENTATIVE) continue; if (idev->cnf.forwarding) @@ -523,12 +533,16 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) } #endif -/* Nobody refers to this ifaddr, destroy it */ +static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head) +{ + struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu); + kfree(ifp); +} +/* Nobody refers to this ifaddr, destroy it */ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) { - WARN_ON(ifp->if_next != NULL); - WARN_ON(ifp->lst_next != NULL); + WARN_ON(!hlist_unhashed(&ifp->addr_lst)); #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); @@ -537,54 +551,46 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); if (del_timer(&ifp->timer)) - printk("Timer is still running, when freeing ifa=%p\n", ifp); + pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); if (!ifp->dead) { - printk("Freeing alive inet6 address %p\n", ifp); + pr_warning("Freeing alive inet6 address %p\n", ifp); return; } dst_release(&ifp->rt->u.dst); - kfree(ifp); + call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu); } static void ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - struct inet6_ifaddr *ifa, **ifap; + struct list_head *p; int ifp_scope = ipv6_addr_src_scope(&ifp->addr); /* * Each device address list is sorted in order of scope - * global before linklocal. */ - for (ifap = &idev->addr_list; (ifa = *ifap) != NULL; - ifap = &ifa->if_next) { + list_for_each(p, &idev->addr_list) { + struct inet6_ifaddr *ifa + = list_entry(p, struct inet6_ifaddr, if_list); if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) break; } - ifp->if_next = *ifap; - *ifap = ifp; + list_add_tail(&ifp->if_list, p); } -/* - * Hash function taken from net_alias.c - */ -static u8 ipv6_addr_hash(const struct in6_addr *addr) +static u32 ipv6_addr_hash(const struct in6_addr *addr) { - __u32 word; - /* * We perform the hash function over the last 64 bits of the address * This will include the IEEE address token on links that support it. */ - - word = (__force u32)(addr->s6_addr32[2] ^ addr->s6_addr32[3]); - word ^= (word >> 16); - word ^= (word >> 8); - - return ((word ^ (word >> 4)) & 0x0f); + return jhash_2words((__force u32)addr->s6_addr32[2], + (__force u32)addr->s6_addr32[3], 0) + & (IN6_ADDR_HSIZE - 1); } /* On success it returns ifp with increased reference count */ @@ -595,7 +601,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, { struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; - int hash; + unsigned int hash; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -616,7 +622,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, goto out2; } - write_lock(&addrconf_hash_lock); + spin_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { @@ -643,6 +649,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, spin_lock_init(&ifa->lock); init_timer(&ifa->timer); + INIT_HLIST_NODE(&ifa->addr_lst); ifa->timer.data = (unsigned long) ifa; ifa->scope = scope; ifa->prefix_len = pfxlen; @@ -669,10 +676,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, /* Add to big hash table */ hash = ipv6_addr_hash(addr); - ifa->lst_next = inet6_addr_lst[hash]; - inet6_addr_lst[hash] = ifa; - in6_ifa_hold(ifa); - write_unlock(&addrconf_hash_lock); + hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); + spin_unlock(&addrconf_hash_lock); write_lock(&idev->lock); /* Add to inet6_dev unicast addr list. */ @@ -680,8 +685,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, #ifdef CONFIG_IPV6_PRIVACY if (ifa->flags&IFA_F_TEMPORARY) { - ifa->tmp_next = idev->tempaddr_list; - idev->tempaddr_list = ifa; + list_add(&ifa->tmp_list, &idev->tempaddr_list); in6_ifa_hold(ifa); } #endif @@ -700,7 +704,7 @@ out2: return ifa; out: - write_unlock(&addrconf_hash_lock); + spin_unlock(&addrconf_hash_lock); goto out2; } @@ -708,7 +712,7 @@ out: static void ipv6_del_addr(struct inet6_ifaddr *ifp) { - struct inet6_ifaddr *ifa, **ifap; + struct inet6_ifaddr *ifa, *ifn; struct inet6_dev *idev = ifp->idev; int hash; int deleted = 0, onlink = 0; @@ -718,42 +722,27 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ifp->dead = 1; - write_lock_bh(&addrconf_hash_lock); - for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL; - ifap = &ifa->lst_next) { - if (ifa == ifp) { - *ifap = ifa->lst_next; - __in6_ifa_put(ifp); - ifa->lst_next = NULL; - break; - } - } - write_unlock_bh(&addrconf_hash_lock); + spin_lock_bh(&addrconf_hash_lock); + hlist_del_init_rcu(&ifp->addr_lst); + spin_unlock_bh(&addrconf_hash_lock); write_lock_bh(&idev->lock); #ifdef CONFIG_IPV6_PRIVACY if (ifp->flags&IFA_F_TEMPORARY) { - for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL; - ifap = &ifa->tmp_next) { - if (ifa == ifp) { - *ifap = ifa->tmp_next; - if (ifp->ifpub) { - in6_ifa_put(ifp->ifpub); - ifp->ifpub = NULL; - } - __in6_ifa_put(ifp); - ifa->tmp_next = NULL; - break; - } + list_del(&ifp->tmp_list); + if (ifp->ifpub) { + in6_ifa_put(ifp->ifpub); + ifp->ifpub = NULL; } + __in6_ifa_put(ifp); } #endif - for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) { + list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) { if (ifa == ifp) { - *ifap = ifa->if_next; + list_del_init(&ifp->if_list); __in6_ifa_put(ifp); - ifa->if_next = NULL; + if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) break; deleted = 1; @@ -786,7 +775,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } } } - ifap = &ifa->if_next; } write_unlock_bh(&idev->lock); @@ -1165,7 +1153,7 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, continue; read_lock_bh(&idev->lock); - for (score->ifa = idev->addr_list; score->ifa; score->ifa = score->ifa->if_next) { + list_for_each_entry(score->ifa, &idev->addr_list, if_list) { int i; /* @@ -1243,7 +1231,6 @@ try_nextdev: in6_ifa_put(hiscore->ifa); return 0; } - EXPORT_SYMBOL(ipv6_dev_get_saddr); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, @@ -1253,12 +1240,14 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, int err = -EADDRNOTAVAIL; rcu_read_lock(); - if ((idev = __in6_dev_get(dev)) != NULL) { + idev = __in6_dev_get(dev); + if (idev) { struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) { + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { ipv6_addr_copy(addr, &ifp->addr); err = 0; break; @@ -1276,7 +1265,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev) struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) + list_for_each_entry(ifp, &idev->addr_list, if_list) cnt++; read_unlock_bh(&idev->lock); return cnt; @@ -1285,11 +1274,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev) int ipv6_chk_addr(struct net *net, struct in6_addr *addr, struct net_device *dev, int strict) { - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); + struct inet6_ifaddr *ifp = NULL; + struct hlist_node *node; + unsigned int hash = ipv6_addr_hash(addr); - read_lock_bh(&addrconf_hash_lock); - for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -1299,27 +1289,28 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr, break; } } - read_unlock_bh(&addrconf_hash_lock); + rcu_read_unlock_bh(); + return ifp != NULL; } EXPORT_SYMBOL(ipv6_chk_addr); -static -int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev) +static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev) { - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); + unsigned int hash = ipv6_addr_hash(addr); + struct inet6_ifaddr *ifp; + struct hlist_node *node; - for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev) - break; + return true; } } - return ifp != NULL; + return false; } int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev) @@ -1333,7 +1324,7 @@ int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev) idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); - for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { onlink = ipv6_prefix_equal(addr, &ifa->addr, ifa->prefix_len); if (onlink) @@ -1350,24 +1341,26 @@ EXPORT_SYMBOL(ipv6_chk_prefix); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict) { - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); + struct inet6_ifaddr *ifp, *result = NULL; + unsigned int hash = ipv6_addr_hash(addr); + struct hlist_node *node; - read_lock_bh(&addrconf_hash_lock); - for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { + result = ifp; in6_ifa_hold(ifp); break; } } } - read_unlock_bh(&addrconf_hash_lock); + rcu_read_unlock_bh(); - return ifp; + return result; } /* Gets referenced address, destroys ifaddr */ @@ -1570,7 +1563,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { + list_for_each_entry(ifp, &idev->addr_list, if_list) { if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { memcpy(eui, ifp->addr.s6_addr+8, 8); err = 0; @@ -1738,7 +1731,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) ASSERT_RTNL(); - if ((idev = ipv6_find_idev(dev)) == NULL) + idev = ipv6_find_idev(dev); + if (!idev) return NULL; /* Add default multicast route */ @@ -1971,7 +1965,7 @@ ok: #ifdef CONFIG_IPV6_PRIVACY read_lock_bh(&in6_dev->lock); /* update all temporary addresses in the list */ - for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) { + list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) { /* * When adjusting the lifetimes of an existing * temporary address, only lower the lifetimes. @@ -2174,7 +2168,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, return -ENXIO; read_lock_bh(&idev->lock); - for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) { + list_for_each_entry(ifp, &idev->addr_list, if_list) { if (ifp->prefix_len == plen && ipv6_addr_equal(pfx, &ifp->addr)) { in6_ifa_hold(ifp); @@ -2185,7 +2179,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, /* If the last address is deleted administratively, disable IPv6 on this interface. */ - if (idev->addr_list == NULL) + if (list_empty(&idev->addr_list)) addrconf_ifdown(idev->dev, 1); return 0; } @@ -2446,7 +2440,8 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) ASSERT_RTNL(); - if ((idev = addrconf_add_dev(dev)) == NULL) { + idev = addrconf_add_dev(dev); + if (!idev) { printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); return; } @@ -2461,7 +2456,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, int run_pending = 0; int err; - switch(event) { + switch (event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { idev = ipv6_add_dev(dev); @@ -2469,6 +2464,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, return notifier_from_errno(-ENOMEM); } break; + case NETDEV_UP: case NETDEV_CHANGE: if (dev->flags & IFF_SLAVE) @@ -2498,10 +2494,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } if (idev) { - if (idev->if_flags & IF_READY) { + if (idev->if_flags & IF_READY) /* device is already configured. */ break; - } idev->if_flags |= IF_READY; } @@ -2513,7 +2508,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } - switch(dev->type) { + switch (dev->type) { #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) case ARPHRD_SIT: addrconf_sit_config(dev); @@ -2530,25 +2525,30 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, addrconf_dev_config(dev); break; } + if (idev) { if (run_pending) addrconf_dad_run(idev); - /* If the MTU changed during the interface down, when the - interface up, the changed MTU must be reflected in the - idev as well as routers. + /* + * If the MTU changed during the interface down, + * when the interface up, the changed MTU must be + * reflected in the idev as well as routers. */ - if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) { + if (idev->cnf.mtu6 != dev->mtu && + dev->mtu >= IPV6_MIN_MTU) { rt6_mtu_change(dev, dev->mtu); idev->cnf.mtu6 = dev->mtu; } idev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, idev); - /* If the changed mtu during down is lower than IPV6_MIN_MTU - stop IPv6 on this interface. + + /* + * If the changed mtu during down is lower than + * IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) - addrconf_ifdown(dev, event != NETDEV_DOWN); + addrconf_ifdown(dev, 1); } break; @@ -2565,7 +2565,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; } - /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ + /* + * MTU falled under IPV6_MIN_MTU. + * Stop IPv6 on this interface. + */ case NETDEV_DOWN: case NETDEV_UNREGISTER: @@ -2585,9 +2588,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, return notifier_from_errno(err); } break; - case NETDEV_BONDING_OLDTYPE: - case NETDEV_BONDING_NEWTYPE: - addrconf_bonding_change(dev, event); + + case NETDEV_PRE_TYPE_CHANGE: + case NETDEV_POST_TYPE_CHANGE: + addrconf_type_change(dev, event); break; } @@ -2599,28 +2603,27 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, */ static struct notifier_block ipv6_dev_notf = { .notifier_call = addrconf_notify, - .priority = 0 }; -static void addrconf_bonding_change(struct net_device *dev, unsigned long event) +static void addrconf_type_change(struct net_device *dev, unsigned long event) { struct inet6_dev *idev; ASSERT_RTNL(); idev = __in6_dev_get(dev); - if (event == NETDEV_BONDING_NEWTYPE) + if (event == NETDEV_POST_TYPE_CHANGE) ipv6_mc_remap(idev); - else if (event == NETDEV_BONDING_OLDTYPE) + else if (event == NETDEV_PRE_TYPE_CHANGE) ipv6_mc_unmap(idev); } static int addrconf_ifdown(struct net_device *dev, int how) { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa, *keep_list, **bifa; struct net *net = dev_net(dev); - int i; + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + LIST_HEAD(keep_list); ASSERT_RTNL(); @@ -2631,8 +2634,9 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (idev == NULL) return -ENODEV; - /* Step 1: remove reference to ipv6 device from parent device. - Do not dev_put! + /* + * Step 1: remove reference to ipv6 device from parent device. + * Do not dev_put! */ if (how) { idev->dead = 1; @@ -2645,40 +2649,21 @@ static int addrconf_ifdown(struct net_device *dev, int how) } - /* Step 2: clear hash table */ - for (i=0; i<IN6_ADDR_HSIZE; i++) { - bifa = &inet6_addr_lst[i]; - - write_lock_bh(&addrconf_hash_lock); - while ((ifa = *bifa) != NULL) { - if (ifa->idev == idev && - (how || !(ifa->flags&IFA_F_PERMANENT) || - ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { - *bifa = ifa->lst_next; - ifa->lst_next = NULL; - __in6_ifa_put(ifa); - continue; - } - bifa = &ifa->lst_next; - } - write_unlock_bh(&addrconf_hash_lock); - } - write_lock_bh(&idev->lock); - /* Step 3: clear flags for stateless addrconf */ + /* Step 2: clear flags for stateless addrconf */ if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); - /* Step 4: clear address list */ #ifdef CONFIG_IPV6_PRIVACY if (how && del_timer(&idev->regen_timer)) in6_dev_put(idev); - /* clear tempaddr list */ - while ((ifa = idev->tempaddr_list) != NULL) { - idev->tempaddr_list = ifa->tmp_next; - ifa->tmp_next = NULL; + /* Step 3: clear tempaddr list */ + while (!list_empty(&idev->tempaddr_list)) { + ifa = list_first_entry(&idev->tempaddr_list, + struct inet6_ifaddr, tmp_list); + list_del(&ifa->tmp_list); ifa->dead = 1; write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); @@ -2692,23 +2677,18 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&idev->lock); } #endif - keep_list = NULL; - bifa = &keep_list; - while ((ifa = idev->addr_list) != NULL) { - idev->addr_list = ifa->if_next; - ifa->if_next = NULL; + while (!list_empty(&idev->addr_list)) { + ifa = list_first_entry(&idev->addr_list, + struct inet6_ifaddr, if_list); addrconf_del_timer(ifa); /* If just doing link down, and address is permanent and not link-local, then retain it. */ - if (how == 0 && + if (!how && (ifa->flags&IFA_F_PERMANENT) && !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { - - /* Move to holding list */ - *bifa = ifa; - bifa = &ifa->if_next; + list_move_tail(&ifa->if_list, &keep_list); /* If not doing DAD on this address, just keep it. */ if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || @@ -2723,24 +2703,32 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Flag it for later restoration when link comes up */ ifa->flags |= IFA_F_TENTATIVE; in6_ifa_hold(ifa); + write_unlock_bh(&idev->lock); } else { + list_del(&ifa->if_list); ifa->dead = 1; + write_unlock_bh(&idev->lock); + + /* clear hash table */ + spin_lock_bh(&addrconf_hash_lock); + hlist_del_init_rcu(&ifa->addr_lst); + spin_unlock_bh(&addrconf_hash_lock); } - write_unlock_bh(&idev->lock); __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); + if (ifa->dead) + atomic_notifier_call_chain(&inet6addr_chain, + NETDEV_DOWN, ifa); in6_ifa_put(ifa); write_lock_bh(&idev->lock); } - idev->addr_list = keep_list; + list_splice(&keep_list, &idev->addr_list); write_unlock_bh(&idev->lock); /* Step 5: Discard multicast list */ - if (how) ipv6_mc_destroy_dev(idev); else @@ -2748,8 +2736,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) idev->tstamp = jiffies; - /* Shot the device (if unregistered) */ - + /* Last: Shot the device (if unregistered) */ if (how) { addrconf_sysctl_unregister(idev); neigh_parms_release(&nd_tbl, idev->nd_parms); @@ -2860,7 +2847,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) * Optimistic nodes can start receiving * Frames right away */ - if(ifp->flags & IFA_F_OPTIMISTIC) + if (ifp->flags & IFA_F_OPTIMISTIC) ip6_ins_rt(ifp->rt); addrconf_dad_kick(ifp); @@ -2910,7 +2897,7 @@ out: static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { - struct net_device * dev = ifp->idev->dev; + struct net_device *dev = ifp->idev->dev; /* * Configure the address for reception. Now it is valid. @@ -2941,11 +2928,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) } } -static void addrconf_dad_run(struct inet6_dev *idev) { +static void addrconf_dad_run(struct inet6_dev *idev) +{ struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { + list_for_each_entry(ifp, &idev->addr_list, if_list) { spin_lock(&ifp->lock); if (!(ifp->flags & IFA_F_TENTATIVE)) { spin_unlock(&ifp->lock); @@ -2970,36 +2958,35 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - ifa = inet6_addr_lst[state->bucket]; - - while (ifa && !net_eq(dev_net(ifa->idev->dev), net)) - ifa = ifa->lst_next; - if (ifa) - break; + struct hlist_node *n; + hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket], + addr_lst) + if (net_eq(dev_net(ifa->idev->dev), net)) + return ifa; } - return ifa; + return NULL; } -static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) +static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, + struct inet6_ifaddr *ifa) { struct if6_iter_state *state = seq->private; struct net *net = seq_file_net(seq); + struct hlist_node *n = &ifa->addr_lst; - ifa = ifa->lst_next; -try_again: - if (ifa) { - if (!net_eq(dev_net(ifa->idev->dev), net)) { - ifa = ifa->lst_next; - goto try_again; - } - } + hlist_for_each_entry_continue_rcu(ifa, n, addr_lst) + if (net_eq(dev_net(ifa->idev->dev), net)) + return ifa; - if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) { - ifa = inet6_addr_lst[state->bucket]; - goto try_again; + while (++state->bucket < IN6_ADDR_HSIZE) { + hlist_for_each_entry(ifa, n, + &inet6_addr_lst[state->bucket], addr_lst) { + if (net_eq(dev_net(ifa->idev->dev), net)) + return ifa; + } } - return ifa; + return NULL; } static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) @@ -3007,15 +2994,15 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) struct inet6_ifaddr *ifa = if6_get_first(seq); if (ifa) - while(pos && (ifa = if6_get_next(seq, ifa)) != NULL) + while (pos && (ifa = if6_get_next(seq, ifa)) != NULL) --pos; return pos ? NULL : ifa; } static void *if6_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(addrconf_hash_lock) + __acquires(rcu) { - read_lock_bh(&addrconf_hash_lock); + rcu_read_lock_bh(); return if6_get_idx(seq, *pos); } @@ -3029,9 +3016,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void if6_seq_stop(struct seq_file *seq, void *v) - __releases(addrconf_hash_lock) + __releases(rcu) { - read_unlock_bh(&addrconf_hash_lock); + rcu_read_unlock_bh(); } static int if6_seq_show(struct seq_file *seq, void *v) @@ -3101,10 +3088,12 @@ void if6_proc_exit(void) int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) { int ret = 0; - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); - read_lock_bh(&addrconf_hash_lock); - for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { + struct inet6_ifaddr *ifp = NULL; + struct hlist_node *n; + unsigned int hash = ipv6_addr_hash(addr); + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -3113,7 +3102,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) break; } } - read_unlock_bh(&addrconf_hash_lock); + rcu_read_unlock_bh(); return ret; } #endif @@ -3124,43 +3113,35 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) static void addrconf_verify(unsigned long foo) { + unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; - unsigned long now, next; + struct hlist_node *node; int i; - spin_lock_bh(&addrconf_verify_lock); + rcu_read_lock_bh(); + spin_lock(&addrconf_verify_lock); now = jiffies; - next = now + ADDR_CHECK_FREQUENCY; + next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); del_timer(&addr_chk_timer); - for (i=0; i < IN6_ADDR_HSIZE; i++) { - + for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - read_lock(&addrconf_hash_lock); - for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { + hlist_for_each_entry_rcu(ifp, node, + &inet6_addr_lst[i], addr_lst) { unsigned long age; -#ifdef CONFIG_IPV6_PRIVACY - unsigned long regen_advance; -#endif if (ifp->flags & IFA_F_PERMANENT) continue; spin_lock(&ifp->lock); - age = (now - ifp->tstamp) / HZ; - -#ifdef CONFIG_IPV6_PRIVACY - regen_advance = ifp->idev->cnf.regen_max_retry * - ifp->idev->cnf.dad_transmits * - ifp->idev->nd_parms->retrans_time / HZ; -#endif + /* We try to batch several events at once. */ + age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; if (ifp->valid_lft != INFINITY_LIFE_TIME && age >= ifp->valid_lft) { spin_unlock(&ifp->lock); in6_ifa_hold(ifp); - read_unlock(&addrconf_hash_lock); ipv6_del_addr(ifp); goto restart; } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { @@ -3182,7 +3163,6 @@ restart: if (deprecate) { in6_ifa_hold(ifp); - read_unlock(&addrconf_hash_lock); ipv6_ifa_notify(0, ifp); in6_ifa_put(ifp); @@ -3191,6 +3171,10 @@ restart: #ifdef CONFIG_IPV6_PRIVACY } else if ((ifp->flags&IFA_F_TEMPORARY) && !(ifp->flags&IFA_F_TENTATIVE)) { + unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * + ifp->idev->cnf.dad_transmits * + ifp->idev->nd_parms->retrans_time / HZ; + if (age >= ifp->prefered_lft - regen_advance) { struct inet6_ifaddr *ifpub = ifp->ifpub; if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) @@ -3200,7 +3184,7 @@ restart: in6_ifa_hold(ifp); in6_ifa_hold(ifpub); spin_unlock(&ifp->lock); - read_unlock(&addrconf_hash_lock); + spin_lock(&ifpub->lock); ifpub->regen_count = 0; spin_unlock(&ifpub->lock); @@ -3220,12 +3204,26 @@ restart: spin_unlock(&ifp->lock); } } - read_unlock(&addrconf_hash_lock); } - addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next; + next_sec = round_jiffies_up(next); + next_sched = next; + + /* If rounded timeout is accurate enough, accept it. */ + if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) + next_sched = next_sec; + + /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ + if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX)) + next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX; + + ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", + now, next, next_sec, next_sched)); + + addr_chk_timer.expires = next_sched; add_timer(&addr_chk_timer); - spin_unlock_bh(&addrconf_verify_lock); + spin_unlock(&addrconf_verify_lock); + rcu_read_unlock_bh(); } static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) @@ -3515,8 +3513,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, return nlmsg_end(skb, nlh); } -enum addr_type_t -{ +enum addr_type_t { UNICAST_ADDR, MULTICAST_ADDR, ANYCAST_ADDR, @@ -3527,7 +3524,6 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, struct netlink_callback *cb, enum addr_type_t type, int s_ip_idx, int *p_ip_idx) { - struct inet6_ifaddr *ifa; struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; int err = 1; @@ -3535,11 +3531,12 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, read_lock_bh(&idev->lock); switch (type) { - case UNICAST_ADDR: + case UNICAST_ADDR: { + struct inet6_ifaddr *ifa; + /* unicast address incl. temp addr */ - for (ifa = idev->addr_list; ifa; - ifa = ifa->if_next, ip_idx++) { - if (ip_idx < s_ip_idx) + list_for_each_entry(ifa, &idev->addr_list, if_list) { + if (++ip_idx < s_ip_idx) continue; err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, @@ -3550,6 +3547,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, break; } break; + } case MULTICAST_ADDR: /* multicast address */ for (ifmca = idev->mc_list; ifmca; @@ -3614,7 +3612,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, if (h > s_h || idx > s_idx) s_ip_idx = 0; ip_idx = 0; - if ((idev = __in6_dev_get(dev)) == NULL) + idev = __in6_dev_get(dev); + if (!idev) goto cont; if (in6_dump_addrs(idev, skb, cb, type, @@ -3681,12 +3680,14 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, if (ifm->ifa_index) dev = __dev_get_by_index(net, ifm->ifa_index); - if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) { + ifa = ipv6_get_ifaddr(net, addr, dev, 1); + if (!ifa) { err = -EADDRNOTAVAIL; goto errout; } - if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) { + skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL); + if (!skb) { err = -ENOBUFS; goto errout_ifa; } @@ -3811,7 +3812,7 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib, static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes) { - switch(attrtype) { + switch (attrtype) { case IFLA_INET6_STATS: __snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); break; @@ -4047,7 +4048,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); dst_hold(&ifp->rt->u.dst); - if (ip6_del_rt(ifp->rt)) + + if (ifp->dead && ip6_del_rt(ifp->rt)) dst_free(&ifp->rt->u.dst); break; } @@ -4163,211 +4165,211 @@ static struct addrconf_sysctl_table .sysctl_header = NULL, .addrconf_vars = { { - .procname = "forwarding", - .data = &ipv6_devconf.forwarding, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_forward, + .procname = "forwarding", + .data = &ipv6_devconf.forwarding, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_forward, }, { - .procname = "hop_limit", - .data = &ipv6_devconf.hop_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "hop_limit", + .data = &ipv6_devconf.hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "mtu", - .data = &ipv6_devconf.mtu6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "mtu", + .data = &ipv6_devconf.mtu6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "accept_ra", - .data = &ipv6_devconf.accept_ra, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_ra", + .data = &ipv6_devconf.accept_ra, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "accept_redirects", - .data = &ipv6_devconf.accept_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_redirects", + .data = &ipv6_devconf.accept_redirects, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "autoconf", - .data = &ipv6_devconf.autoconf, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "autoconf", + .data = &ipv6_devconf.autoconf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "dad_transmits", - .data = &ipv6_devconf.dad_transmits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "dad_transmits", + .data = &ipv6_devconf.dad_transmits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "router_solicitations", - .data = &ipv6_devconf.rtr_solicits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "router_solicitations", + .data = &ipv6_devconf.rtr_solicits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "router_solicitation_interval", - .data = &ipv6_devconf.rtr_solicit_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .procname = "router_solicitation_interval", + .data = &ipv6_devconf.rtr_solicit_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, }, { - .procname = "router_solicitation_delay", - .data = &ipv6_devconf.rtr_solicit_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .procname = "router_solicitation_delay", + .data = &ipv6_devconf.rtr_solicit_delay, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, }, { - .procname = "force_mld_version", - .data = &ipv6_devconf.force_mld_version, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "force_mld_version", + .data = &ipv6_devconf.force_mld_version, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_PRIVACY { - .procname = "use_tempaddr", - .data = &ipv6_devconf.use_tempaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "use_tempaddr", + .data = &ipv6_devconf.use_tempaddr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "temp_valid_lft", - .data = &ipv6_devconf.temp_valid_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "temp_valid_lft", + .data = &ipv6_devconf.temp_valid_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "temp_prefered_lft", - .data = &ipv6_devconf.temp_prefered_lft, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "temp_prefered_lft", + .data = &ipv6_devconf.temp_prefered_lft, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "regen_max_retry", - .data = &ipv6_devconf.regen_max_retry, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "regen_max_retry", + .data = &ipv6_devconf.regen_max_retry, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "max_desync_factor", - .data = &ipv6_devconf.max_desync_factor, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "max_desync_factor", + .data = &ipv6_devconf.max_desync_factor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #endif { - .procname = "max_addresses", - .data = &ipv6_devconf.max_addresses, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "max_addresses", + .data = &ipv6_devconf.max_addresses, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "accept_ra_defrtr", - .data = &ipv6_devconf.accept_ra_defrtr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_ra_defrtr", + .data = &ipv6_devconf.accept_ra_defrtr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "accept_ra_pinfo", - .data = &ipv6_devconf.accept_ra_pinfo, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_ra_pinfo", + .data = &ipv6_devconf.accept_ra_pinfo, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_ROUTER_PREF { - .procname = "accept_ra_rtr_pref", - .data = &ipv6_devconf.accept_ra_rtr_pref, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_ra_rtr_pref", + .data = &ipv6_devconf.accept_ra_rtr_pref, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "router_probe_interval", - .data = &ipv6_devconf.rtr_probe_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .procname = "router_probe_interval", + .data = &ipv6_devconf.rtr_probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, }, #ifdef CONFIG_IPV6_ROUTE_INFO { - .procname = "accept_ra_rt_info_max_plen", - .data = &ipv6_devconf.accept_ra_rt_info_max_plen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_ra_rt_info_max_plen", + .data = &ipv6_devconf.accept_ra_rt_info_max_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #endif #endif { - .procname = "proxy_ndp", - .data = &ipv6_devconf.proxy_ndp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "proxy_ndp", + .data = &ipv6_devconf.proxy_ndp, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { - .procname = "accept_source_route", - .data = &ipv6_devconf.accept_source_route, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_source_route", + .data = &ipv6_devconf.accept_source_route, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD { - .procname = "optimistic_dad", - .data = &ipv6_devconf.optimistic_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "optimistic_dad", + .data = &ipv6_devconf.optimistic_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, #endif #ifdef CONFIG_IPV6_MROUTE { - .procname = "mc_forwarding", - .data = &ipv6_devconf.mc_forwarding, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, + .procname = "mc_forwarding", + .data = &ipv6_devconf.mc_forwarding, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, }, #endif { - .procname = "disable_ipv6", - .data = &ipv6_devconf.disable_ipv6, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = addrconf_sysctl_disable, + .procname = "disable_ipv6", + .data = &ipv6_devconf.disable_ipv6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_disable, }, { - .procname = "accept_dad", - .data = &ipv6_devconf.accept_dad, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, + .procname = "accept_dad", + .data = &ipv6_devconf.accept_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, }, { .procname = "force_tllao", @@ -4403,8 +4405,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, if (t == NULL) goto out; - for (i=0; t->addrconf_vars[i].data; i++) { - t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; + for (i = 0; t->addrconf_vars[i].data; i++) { + t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf; t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ t->addrconf_vars[i].extra2 = net; } @@ -4541,14 +4543,12 @@ int register_inet6addr_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&inet6addr_chain, nb); } - EXPORT_SYMBOL(register_inet6addr_notifier); int unregister_inet6addr_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&inet6addr_chain,nb); + return atomic_notifier_chain_unregister(&inet6addr_chain, nb); } - EXPORT_SYMBOL(unregister_inet6addr_notifier); /* @@ -4557,11 +4557,12 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier); int __init addrconf_init(void) { - int err; + int i, err; - if ((err = ipv6_addr_label_init()) < 0) { - printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n", - err); + err = ipv6_addr_label_init(); + if (err < 0) { + printk(KERN_CRIT "IPv6 Addrconf:" + " cannot initialize default policy table: %d.\n", err); return err; } @@ -4592,6 +4593,9 @@ int __init addrconf_init(void) if (err) goto errlo; + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_addr_lst[i]); + register_netdevice_notifier(&ipv6_dev_notf); addrconf_verify(0); @@ -4620,7 +4624,6 @@ errlo: void addrconf_cleanup(void) { - struct inet6_ifaddr *ifa; struct net_device *dev; int i; @@ -4640,20 +4643,10 @@ void addrconf_cleanup(void) /* * Check hash table. */ - write_lock_bh(&addrconf_hash_lock); - for (i=0; i < IN6_ADDR_HSIZE; i++) { - for (ifa=inet6_addr_lst[i]; ifa; ) { - struct inet6_ifaddr *bifa; - - bifa = ifa; - ifa = ifa->lst_next; - printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa); - /* Do not free it; something is wrong. - Now we can investigate it with debugger. - */ - } - } - write_unlock_bh(&addrconf_hash_lock); + spin_lock_bh(&addrconf_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_addr_lst[i])); + spin_unlock_bh(&addrconf_hash_lock); del_timer(&addr_chk_timer); rtnl_unlock(); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3192aa02ba5..d2df3144429 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -417,6 +417,9 @@ void inet6_destroy_sock(struct sock *sk) if ((skb = xchg(&np->pktoptions, NULL)) != NULL) kfree_skb(skb); + if ((skb = xchg(&np->rxpmtu, NULL)) != NULL) + kfree_skb(skb); + /* Free flowlabels */ fl6_free_socklist(sk); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 622dc7939a1..5959230bc6c 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -278,6 +278,45 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) kfree_skb(skb); } +void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6hdr *iph; + struct sk_buff *skb; + struct ip6_mtuinfo *mtu_info; + + if (!np->rxopt.bits.rxpmtu) + return; + + skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); + if (!skb) + return; + + skb_put(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + iph = ipv6_hdr(skb); + ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); + + mtu_info = IP6CBMTU(skb); + if (!mtu_info) { + kfree_skb(skb); + return; + } + + mtu_info->ip6m_mtu = mtu; + mtu_info->ip6m_addr.sin6_family = AF_INET6; + mtu_info->ip6m_addr.sin6_port = 0; + mtu_info->ip6m_addr.sin6_flowinfo = 0; + mtu_info->ip6m_addr.sin6_scope_id = fl->oif; + ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr); + + __skb_pull(skb, skb_tail_pointer(skb) - skb->data); + skb_reset_transport_header(skb); + + skb = xchg(&np->rxpmtu, skb); + kfree_skb(skb); +} + /* * Handle MSG_ERRQUEUE */ @@ -381,6 +420,54 @@ out: return err; } +/* + * Handle IPV6_RECVPATHMTU + */ +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct sk_buff *skb; + struct sockaddr_in6 *sin; + struct ip6_mtuinfo mtu_info; + int err; + int copied; + + err = -EAGAIN; + skb = xchg(&np->rxpmtu, NULL); + if (skb == NULL) + goto out; + + copied = skb->len; + if (copied > len) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (err) + goto out_free_skb; + + sock_recv_timestamp(msg, sk, skb); + + memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info)); + + sin = (struct sockaddr_in6 *)msg->msg_name; + if (sin) { + sin->sin6_family = AF_INET6; + sin->sin6_flowinfo = 0; + sin->sin6_port = 0; + sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; + ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr); + } + + put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); + + err = copied; + +out_free_skb: + kfree_skb(skb); +out: + return err; +} int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) @@ -497,7 +584,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) int datagram_send_ctl(struct net *net, struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, - int *hlimit, int *tclass) + int *hlimit, int *tclass, int *dontfrag) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -737,6 +824,25 @@ int datagram_send_ctl(struct net *net, break; } + + case IPV6_DONTFRAG: + { + int df; + + err = -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { + goto exit_f; + } + + df = *(int *)CMSG_DATA(cmsg); + if (df < 0 || df > 1) + goto exit_f; + + err = 0; + *dontfrag = df; + + break; + } default: LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 5e463c43fcc..8124f16f2ac 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -208,7 +208,6 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, { struct fib6_rule *rule6 = (struct fib6_rule *) rule; - frh->family = AF_INET6; frh->dst_len = rule6->dst.plen; frh->src_len = rule6->src.plen; frh->tos = rule6->tclass; @@ -239,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) } static struct fib_rules_ops fib6_rules_ops_template = { - .family = AF_INET6, + .family = FIB_RULES_IPV6, .rule_size = sizeof(struct fib6_rule), .addr_size = sizeof(struct in6_addr), .action = fib6_rule_action, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 3330a4bd615..ce799298255 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -481,8 +481,9 @@ route_done: len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl, (struct rt6_info*)dst, - MSG_DONTWAIT); + MSG_DONTWAIT, np->dontfrag); if (err) { + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); ip6_flush_pending_frames(sk); goto out_put; } @@ -560,9 +561,11 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl, - (struct rt6_info*)dst, MSG_DONTWAIT); + (struct rt6_info*)dst, MSG_DONTWAIT, + np->dontfrag); if (err) { + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); ip6_flush_pending_frames(sk); goto out_put; } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 3a4d92b5a83..9ca1efc923a 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -183,7 +183,7 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) return dst; } -int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) +int inet6_csk_xmit(struct sk_buff *skb) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); @@ -239,7 +239,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) /* Restore final destination back after routing done */ ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - return ip6_xmit(sk, skb, &fl, np->opt, 0); + return ip6_xmit(sk, skb, &fl, np->opt); } EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6b82e02158c..92a122b7795 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -128,12 +128,24 @@ static __inline__ u32 fib6_new_sernum(void) /* * test bit */ +#if defined(__LITTLE_ENDIAN) +# define BITOP_BE32_SWIZZLE (0x1F & ~7) +#else +# define BITOP_BE32_SWIZZLE 0 +#endif static __inline__ __be32 addr_bit_set(void *token, int fn_bit) { __be32 *addr = token; - - return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5]; + /* + * Here, + * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) + * is optimized version of + * htonl(1 << ((~fn_bit)&0x1F)) + * See include/asm-generic/bitops/le.h. + */ + return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & + addr[fn_bit >> 5]; } static __inline__ struct fib6_node * node_alloc(void) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 14e23216eb2..13654686aea 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -360,7 +360,8 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk); + err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, + &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 75d5ef83009..7db09c3f528 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -181,11 +181,11 @@ int ip6_output(struct sk_buff *skb) } /* - * xmit an sk_buff (used by TCP) + * xmit an sk_buff (used by TCP, SCTP and DCCP) */ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, - struct ipv6_txoptions *opt, int ipfragok) + struct ipv6_txoptions *opt) { struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -231,10 +231,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - /* Allow local fragmentation. */ - if (ipfragok) - skb->local_df = 1; - /* * Fill in the IPv6 header */ @@ -1109,7 +1105,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, - struct rt6_info *rt, unsigned int flags) + struct rt6_info *rt, unsigned int flags, int dontfrag) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -1223,15 +1219,23 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, */ inet->cork.length += length; - if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + if (length > mtu) { + int proto = sk->sk_protocol; + if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ + ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); + return -EMSGSIZE; + } - err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, - fragheaderlen, transhdrlen, mtu, - flags); - if (err) - goto error; - return 0; + if (proto == IPPROTO_UDP && + (rt->u.dst.dev->features & NETIF_F_UFO)) { + + err = ip6_ufo_append_data(sk, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags); + if (err) + goto error; + return 0; + } } if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 33f60fca7aa..bd43f0152c2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -114,9 +114,9 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, } opt = xchg(&inet6_sk(sk)->opt, opt); } else { - write_lock(&sk->sk_dst_lock); + spin_lock(&sk->sk_dst_lock); opt = xchg(&inet6_sk(sk)->opt, opt); - write_unlock(&sk->sk_dst_lock); + spin_unlock(&sk->sk_dst_lock); } sk_dst_reset(sk); @@ -337,6 +337,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; + case IPV6_RECVPATHMTU: + if (optlen < sizeof(int)) + goto e_inval; + np->rxopt.bits.rxpmtu = valbool; + retv = 0; + break; + case IPV6_HOPOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: @@ -451,7 +458,8 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk); + retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk, + &junk); if (retv) goto done; update: @@ -767,6 +775,17 @@ pref_skip_coa: break; } + case IPV6_MINHOPCOUNT: + if (optlen < sizeof(int)) + goto e_inval; + if (val < 0 || val > 255) + goto e_inval; + np->min_hopcount = val; + break; + case IPV6_DONTFRAG: + np->dontfrag = valbool; + retv = 0; + break; } release_sock(sk); @@ -971,14 +990,13 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_MTU: { struct dst_entry *dst; + val = 0; - lock_sock(sk); - dst = sk_dst_get(sk); - if (dst) { + rcu_read_lock(); + dst = __sk_dst_get(sk); + if (dst) val = dst_mtu(dst); - dst_release(dst); - } - release_sock(sk); + rcu_read_unlock(); if (!val) return -ENOTCONN; break; @@ -1056,6 +1074,38 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->rxopt.bits.rxflow; break; + case IPV6_RECVPATHMTU: + val = np->rxopt.bits.rxpmtu; + break; + + case IPV6_PATHMTU: + { + struct dst_entry *dst; + struct ip6_mtuinfo mtuinfo; + + if (len < sizeof(mtuinfo)) + return -EINVAL; + + len = sizeof(mtuinfo); + memset(&mtuinfo, 0, sizeof(mtuinfo)); + + rcu_read_lock(); + dst = __sk_dst_get(sk); + if (dst) + mtuinfo.ip6m_mtu = dst_mtu(dst); + rcu_read_unlock(); + if (!mtuinfo.ip6m_mtu) + return -ENOTCONN; + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &mtuinfo, len)) + return -EFAULT; + + return 0; + break; + } + case IPV6_UNICAST_HOPS: case IPV6_MULTICAST_HOPS: { @@ -1066,12 +1116,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, else val = np->mcast_hops; - dst = sk_dst_get(sk); - if (dst) { - if (val < 0) + if (val < 0) { + rcu_read_lock(); + dst = __sk_dst_get(sk); + if (dst) val = ip6_dst_hoplimit(dst); - dst_release(dst); + rcu_read_unlock(); } + if (val < 0) val = sock_net(sk)->ipv6.devconf_all->hop_limit; break; @@ -1115,6 +1167,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val |= IPV6_PREFER_SRC_HOME; break; + case IPV6_MINHOPCOUNT: + val = np->min_hopcount; + break; + + case IPV6_DONTFRAG: + val = np->dontfrag; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index c483ab9fd67..006aee683a0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -44,6 +44,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> +#include <net/mld.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -71,54 +72,11 @@ #define MDBG(x) #endif -/* - * These header formats should be in a separate include file, but icmpv6.h - * doesn't have in6_addr defined in all cases, there is no __u128, and no - * other files reference these. - * - * +-DLS 4/14/03 - */ - -/* Multicast Listener Discovery version 2 headers */ - -struct mld2_grec { - __u8 grec_type; - __u8 grec_auxwords; - __be16 grec_nsrcs; - struct in6_addr grec_mca; - struct in6_addr grec_src[0]; -}; - -struct mld2_report { - __u8 type; - __u8 resv1; - __sum16 csum; - __be16 resv2; - __be16 ngrec; - struct mld2_grec grec[0]; -}; - -struct mld2_query { - __u8 type; - __u8 code; - __sum16 csum; - __be16 mrc; - __be16 resv1; - struct in6_addr mca; -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 qrv:3, - suppress:1, - resv2:4; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u8 resv2:4, - suppress:1, - qrv:3; -#else -#error "Please fix <asm/byteorder.h>" -#endif - __u8 qqic; - __be16 nsrcs; - struct in6_addr srcs[0]; +/* Ensure that we have struct in6_addr aligned on 32bit word. */ +static void *__mld2_query_bugs[] __attribute__((__unused__)) = { + BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4), + BUILD_BUG_ON_NULL(offsetof(struct mld2_report, mld2r_grec) % 4), + BUILD_BUG_ON_NULL(offsetof(struct mld2_grec, grec_mca) % 4) }; static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; @@ -157,14 +115,6 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, ((idev)->mc_v1_seen && \ time_before(jiffies, (idev)->mc_v1_seen))) -#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value)) -#define MLDV2_EXP(thresh, nbmant, nbexp, value) \ - ((value) < (thresh) ? (value) : \ - ((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \ - (MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp)))) - -#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value) - #define IPV6_MLD_MAX_MSF 64 int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; @@ -715,7 +665,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) if (!(mc->mca_flags&MAF_LOADED)) { mc->mca_flags |= MAF_LOADED; if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) - dev_mc_add(dev, buf, dev->addr_len, 0); + dev_mc_add(dev, buf); } spin_unlock_bh(&mc->mca_lock); @@ -741,7 +691,7 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) if (mc->mca_flags&MAF_LOADED) { mc->mca_flags &= ~MAF_LOADED; if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) - dev_mc_delete(dev, buf, dev->addr_len, 0); + dev_mc_del(dev, buf); } if (mc->mca_flags & MAF_NOREPORT) @@ -1161,7 +1111,7 @@ int igmp6_event_query(struct sk_buff *skb) struct in6_addr *group; unsigned long max_delay; struct inet6_dev *idev; - struct icmp6hdr *hdr; + struct mld_msg *mld; int group_type; int mark = 0; int len; @@ -1182,8 +1132,8 @@ int igmp6_event_query(struct sk_buff *skb) if (idev == NULL) return 0; - hdr = icmp6_hdr(skb); - group = (struct in6_addr *) (hdr + 1); + mld = (struct mld_msg *)icmp6_hdr(skb); + group = &mld->mld_mca; group_type = ipv6_addr_type(group); if (group_type != IPV6_ADDR_ANY && @@ -1197,7 +1147,7 @@ int igmp6_event_query(struct sk_buff *skb) /* MLDv1 router present */ /* Translate milliseconds to jiffies */ - max_delay = (ntohs(hdr->icmp6_maxdelay)*HZ)/1000; + max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000; switchback = (idev->mc_qrv + 1) * max_delay; idev->mc_v1_seen = jiffies + switchback; @@ -1216,14 +1166,14 @@ int igmp6_event_query(struct sk_buff *skb) return -EINVAL; } mlh2 = (struct mld2_query *)skb_transport_header(skb); - max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000; + max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000; if (!max_delay) max_delay = 1; idev->mc_maxdelay = max_delay; - if (mlh2->qrv) - idev->mc_qrv = mlh2->qrv; + if (mlh2->mld2q_qrv) + idev->mc_qrv = mlh2->mld2q_qrv; if (group_type == IPV6_ADDR_ANY) { /* general query */ - if (mlh2->nsrcs) { + if (mlh2->mld2q_nsrcs) { in6_dev_put(idev); return -EINVAL; /* no sources allowed */ } @@ -1232,9 +1182,9 @@ int igmp6_event_query(struct sk_buff *skb) return 0; } /* mark sources to include, if group & source-specific */ - if (mlh2->nsrcs != 0) { + if (mlh2->mld2q_nsrcs != 0) { if (!pskb_may_pull(skb, srcs_offset + - ntohs(mlh2->nsrcs) * sizeof(struct in6_addr))) { + ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) { in6_dev_put(idev); return -EINVAL; } @@ -1270,7 +1220,7 @@ int igmp6_event_query(struct sk_buff *skb) ma->mca_flags &= ~MAF_GSQUERY; } if (!(ma->mca_flags & MAF_GSQUERY) || - mld_marksources(ma, ntohs(mlh2->nsrcs), mlh2->srcs)) + mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs)) igmp6_group_queried(ma, max_delay); spin_unlock_bh(&ma->mca_lock); break; @@ -1286,9 +1236,8 @@ int igmp6_event_query(struct sk_buff *skb) int igmp6_event_report(struct sk_buff *skb) { struct ifmcaddr6 *ma; - struct in6_addr *addrp; struct inet6_dev *idev; - struct icmp6hdr *hdr; + struct mld_msg *mld; int addr_type; /* Our own report looped back. Ignore it. */ @@ -1300,10 +1249,10 @@ int igmp6_event_report(struct sk_buff *skb) skb->pkt_type != PACKET_BROADCAST) return 0; - if (!pskb_may_pull(skb, sizeof(struct in6_addr))) + if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr))) return -EINVAL; - hdr = icmp6_hdr(skb); + mld = (struct mld_msg *)icmp6_hdr(skb); /* Drop reports with not link local source */ addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); @@ -1311,8 +1260,6 @@ int igmp6_event_report(struct sk_buff *skb) !(addr_type&IPV6_ADDR_LINKLOCAL)) return -EINVAL; - addrp = (struct in6_addr *) (hdr + 1); - idev = in6_dev_get(skb->dev); if (idev == NULL) return -ENODEV; @@ -1323,7 +1270,7 @@ int igmp6_event_report(struct sk_buff *skb) read_lock_bh(&idev->lock); for (ma = idev->mc_list; ma; ma=ma->next) { - if (ipv6_addr_equal(&ma->mca_addr, addrp)) { + if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) { spin_lock(&ma->mca_lock); if (del_timer(&ma->mca_timer)) atomic_dec(&ma->mca_refcnt); @@ -1432,11 +1379,11 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data); skb_put(skb, sizeof(*pmr)); pmr = (struct mld2_report *)skb_transport_header(skb); - pmr->type = ICMPV6_MLD2_REPORT; - pmr->resv1 = 0; - pmr->csum = 0; - pmr->resv2 = 0; - pmr->ngrec = 0; + pmr->mld2r_type = ICMPV6_MLD2_REPORT; + pmr->mld2r_resv1 = 0; + pmr->mld2r_cksum = 0; + pmr->mld2r_resv2 = 0; + pmr->mld2r_ngrec = 0; return skb; } @@ -1458,9 +1405,10 @@ static void mld_sendpack(struct sk_buff *skb) mldlen = skb->tail - skb->transport_header; pip6->payload_len = htons(payload_len); - pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, - IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb), - mldlen, 0)); + pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, + IPPROTO_ICMPV6, + csum_partial(skb_transport_header(skb), + mldlen, 0)); dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); @@ -1521,7 +1469,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = 0; pgr->grec_mca = pmc->mca_addr; /* structure copy */ pmr = (struct mld2_report *)skb_transport_header(skb); - pmr->ngrec = htons(ntohs(pmr->ngrec)+1); + pmr->mld2r_ngrec = htons(ntohs(pmr->mld2r_ngrec)+1); *ppgr = pgr; return skb; } @@ -1557,7 +1505,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, /* EX and TO_EX get a fresh packet, if needed */ if (truncate) { - if (pmr && pmr->ngrec && + if (pmr && pmr->mld2r_ngrec && AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); @@ -1770,9 +1718,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) struct sock *sk = net->ipv6.igmp_sk; struct inet6_dev *idev; struct sk_buff *skb; - struct icmp6hdr *hdr; + struct mld_msg *hdr; const struct in6_addr *snd_addr, *saddr; - struct in6_addr *addrp; struct in6_addr addr_buf; int err, len, payload_len, full_len; u8 ra[8] = { IPPROTO_ICMPV6, 0, @@ -1820,16 +1767,14 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); - hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr)); - memset(hdr, 0, sizeof(struct icmp6hdr)); - hdr->icmp6_type = type; + hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg)); + memset(hdr, 0, sizeof(struct mld_msg)); + hdr->mld_type = type; + ipv6_addr_copy(&hdr->mld_mca, addr); - addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr)); - ipv6_addr_copy(addrp, addr); - - hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len, - IPPROTO_ICMPV6, - csum_partial(hdr, len, 0)); + hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len, + IPPROTO_ICMPV6, + csum_partial(hdr, len, 0)); idev = in6_dev_get(skb->dev); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index cbe8dec9744..e60677519e4 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -141,11 +141,11 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) } /* Step to the next */ - pr_debug("len%04X \n", optlen); + pr_debug("len%04X\n", optlen); if ((ptr > skb->len - optlen || hdrlen < optlen) && temp < optinfo->optsnr - 1) { - pr_debug("new pointer is too large! \n"); + pr_debug("new pointer is too large!\n"); break; } ptr += optlen; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 58344c0fbd1..458eabfbe13 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -97,6 +97,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = { SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS), SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS), SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS), + SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8763b1a0814..85627386cb0 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -461,6 +461,9 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); + if (np->rxpmtu && np->rxopt.bits.rxpmtu) + return ipv6_recv_rxpmtu(sk, msg, len); + skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) goto out; @@ -733,6 +736,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, int addr_len = msg->msg_namelen; int hlimit = -1; int tclass = -1; + int dontfrag = -1; u16 proto; int err; @@ -811,7 +815,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -880,6 +885,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (tclass < 0) tclass = np->tclass; + if (dontfrag < 0) + dontfrag = np->dontfrag; + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -890,7 +898,7 @@ back_from_confirm: lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, - msg->msg_flags); + msg->msg_flags, dontfrag); if (err) ip6_flush_pending_frames(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 075f540ec19..6603511e367 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -75,6 +75,9 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); +static void __tcp_v6_send_check(struct sk_buff *skb, + struct in6_addr *saddr, + struct in6_addr *daddr); static const struct inet_connection_sock_af_ops ipv6_mapped; static const struct inet_connection_sock_af_ops ipv6_specific; @@ -350,6 +353,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_CLOSE) goto out; + if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + goto out; + } + tp = tcp_sk(sk); seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && @@ -503,14 +511,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, skb = tcp_make_synack(sk, dst, req, rvp); if (skb) { - struct tcphdr *th = tcp_hdr(skb); - - th->check = tcp_v6_check(skb->len, - &treq->loc_addr, &treq->rmt_addr, - csum_partial(th, skb->len, skb->csum)); + __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - err = ip6_xmit(sk, skb, &fl, opt, 0); + err = ip6_xmit(sk, skb, &fl, opt); err = net_xmit_eval(err); } @@ -918,22 +922,29 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; -static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) +static void __tcp_v6_send_check(struct sk_buff *skb, + struct in6_addr *saddr, struct in6_addr *daddr) { - struct ipv6_pinfo *np = inet6_sk(sk); struct tcphdr *th = tcp_hdr(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); + th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } else { - th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, - csum_partial(th, th->doff<<2, - skb->csum)); + th->check = tcp_v6_check(skb->len, saddr, daddr, + csum_partial(th, th->doff << 2, + skb->csum)); } } +static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + __tcp_v6_send_check(skb, &np->saddr, &np->daddr); +} + static int tcp_v6_gso_send_check(struct sk_buff *skb) { struct ipv6hdr *ipv6h; @@ -946,11 +957,8 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) th = tcp_hdr(skb); th->check = 0; - th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, - IPPROTO_TCP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; + __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr); return 0; } @@ -1047,15 +1055,14 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - buff->csum = csum_partial(t1, tot_len, 0); - memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); - t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, - tot_len, IPPROTO_TCP, - buff->csum); + buff->ip_summed = CHECKSUM_PARTIAL; + buff->csum = 0; + + __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst); fl.proto = IPPROTO_TCP; fl.oif = inet6_iif(skb); @@ -1070,7 +1077,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { skb_dst_set(buff, dst); - ip6_xmit(ctl_sk, buff, &fl, NULL, 0); + ip6_xmit(ctl_sk, buff, &fl, NULL); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); @@ -1233,12 +1240,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; /* Secret recipe starts with IP addresses */ - d = &ipv6_hdr(skb)->daddr.s6_addr32[0]; + d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0]; *mess++ ^= *d++; *mess++ ^= *d++; *mess++ ^= *d++; *mess++ ^= *d++; - d = &ipv6_hdr(skb)->saddr.s6_addr32[0]; + d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0]; *mess++ ^= *d++; *mess++ ^= *d++; *mess++ ^= *d++; @@ -1676,6 +1683,7 @@ ipv6_pktoptions: static int tcp_v6_rcv(struct sk_buff *skb) { struct tcphdr *th; + struct ipv6hdr *hdr; struct sock *sk; int ret; struct net *net = dev_net(skb->dev); @@ -1702,12 +1710,13 @@ static int tcp_v6_rcv(struct sk_buff *skb) goto bad_packet; th = tcp_hdr(skb); + hdr = ipv6_hdr(skb); TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->when = 0; - TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); + TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); @@ -1718,6 +1727,11 @@ process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; + if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + goto discard_and_relse; + } + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 90824852f59..2850e35cee3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -91,9 +91,9 @@ static unsigned int udp6_portaddr_hash(struct net *net, if (ipv6_addr_any(addr6)) hash = jhash_1word(0, mix); else if (ipv6_addr_v4mapped(addr6)) - hash = jhash_1word(addr6->s6_addr32[3], mix); + hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); else - hash = jhash2(addr6->s6_addr32, 4, mix); + hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); return hash ^ port; } @@ -335,6 +335,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); + if (np->rxpmtu && np->rxopt.bits.rxpmtu) + return ipv6_recv_rxpmtu(sk, msg, len); + try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, &err); @@ -919,6 +922,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int ulen = len; int hlimit = -1; int tclass = -1; + int dontfrag = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; int connected = 0; @@ -1049,7 +1053,8 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1120,6 +1125,9 @@ do_udp_sendmsg: if (tclass < 0) tclass = np->tclass; + if (dontfrag < 0) + dontfrag = np->dontfrag; + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -1143,7 +1151,7 @@ do_append_data: err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl, (struct rt6_info*)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 00bf7c962b7..4a0e77e1446 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -67,36 +67,6 @@ static int xfrm6_get_saddr(struct net *net, return 0; } -static struct dst_entry * -__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) -{ - struct dst_entry *dst; - - /* Still not clear if we should set fl->fl6_{src,dst}... */ - read_lock_bh(&policy->lock); - for (dst = policy->bundles; dst; dst = dst->next) { - struct xfrm_dst *xdst = (struct xfrm_dst*)dst; - struct in6_addr fl_dst_prefix, fl_src_prefix; - - ipv6_addr_prefix(&fl_dst_prefix, - &fl->fl6_dst, - xdst->u.rt6.rt6i_dst.plen); - ipv6_addr_prefix(&fl_src_prefix, - &fl->fl6_src, - xdst->u.rt6.rt6i_src.plen); - if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && - ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && - xfrm_bundle_ok(policy, xdst, fl, AF_INET6, - (xdst->u.rt6.rt6i_dst.plen != 128 || - xdst->u.rt6.rt6i_src.plen != 128))) { - dst_clone(dst); - break; - } - } - read_unlock_bh(&policy->lock); - return dst; -} - static int xfrm6_get_tos(struct flowi *fl) { return 0; @@ -291,7 +261,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_ops = &xfrm6_dst_ops, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, - .find_bundle = __xfrm6_find_bundle, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, .init_path = xfrm6_init_path, |