From 0bb53a66fe1258b1cb5eb1ea70768386f0c2a1ca Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Fri, 2 May 2008 02:46:55 -0700 Subject: ipv6: assign PDE->data before gluing PDE into /proc tree Simply replace proc_create and further data assigned with proc_create_data. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/proc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index ca8b82f96fe..df0736a4caf 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -247,13 +247,11 @@ int snmp6_register_dev(struct inet6_dev *idev) if (!proc_net_devsnmp6) return -ENOENT; - p = proc_create(idev->dev->name, S_IRUGO, - proc_net_devsnmp6, &snmp6_seq_fops); + p = proc_create_data(idev->dev->name, S_IRUGO, + proc_net_devsnmp6, &snmp6_seq_fops, idev); if (!p) return -ENOMEM; - p->data = idev; - idev->stats.proc_dir_entry = p; return 0; } -- cgit v1.2.3-70-g09d2 From 4ac2ccd01646e08d7176185c94e5b19404a25998 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 2 May 2008 17:02:03 -0700 Subject: netns: Fix reassembly timer to use the right namespace This trivial fix retrieves the network namespace from frag queue and use it to get the network device in the right namespace. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 7b247e3a16f..798cabc7535 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -197,6 +197,7 @@ static void ip6_frag_expire(unsigned long data) { struct frag_queue *fq; struct net_device *dev = NULL; + struct net *net; fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); @@ -207,7 +208,8 @@ static void ip6_frag_expire(unsigned long data) fq_kill(fq); - dev = dev_get_by_index(&init_net, fq->iif); + net = container_of(fq->q.net, struct net, ipv6.frags); + dev = dev_get_by_index(net, fq->iif); if (!dev) goto out; -- cgit v1.2.3-70-g09d2 From 5ffc02a158997b1eb91ade8d02bcf521ff79a218 Mon Sep 17 00:00:00 2001 From: Satoru SATOH Date: Sun, 4 May 2008 22:14:42 -0700 Subject: ip: Use inline function dst_metric() instead of direct access to dst->metric[] There are functions to refer to the value of dst->metric[THE_METRIC-1] directly without use of a inline function "dst_metric" defined in net/dst.h. The following patch changes them to use the inline function consistently. Signed-off-by: Satoru SATOH Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 12 ++++++------ net/ipv4/route.c | 16 ++++++++-------- net/ipv4/tcp_input.c | 15 ++++++++------- net/ipv6/route.c | 6 +++--- 4 files changed, 25 insertions(+), 24 deletions(-) (limited to 'net/ipv6') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2f665a51647..f50e88bf266 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -235,14 +235,14 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) else min_mtu -= 21; - if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= min_mtu) { + if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) { if (!(dst_metric_locked(dst, RTAX_MTU))) { dst->metrics[RTAX_MTU-1] = mtu; dst_set_expires(dst, dn_rt_mtu_expires); } if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; - if (dst->metrics[RTAX_ADVMSS-1] > mss) + if (dst_metric(dst, RTAX_ADVMSS) > mss) dst->metrics[RTAX_ADVMSS-1] = mss; } } @@ -805,12 +805,12 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) rt->u.dst.neighbour = n; } - if (rt->u.dst.metrics[RTAX_MTU-1] == 0 || - rt->u.dst.metrics[RTAX_MTU-1] > rt->u.dst.dev->mtu) + if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 || + dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu) rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst)); - if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0 || - rt->u.dst.metrics[RTAX_ADVMSS-1] > mss) + if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 || + dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss) rt->u.dst.metrics[RTAX_ADVMSS-1] = mss; return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9084055a81a..92f90ae46f4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1468,14 +1468,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && - old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] && + old_mtu >= dst_metric(&rth->u.dst, RTAX_MTU) && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } - if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) { - if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { + if (mtu <= dst_metric(&rth->u.dst, RTAX_MTU)) { + if (mtu < dst_metric(&rth->u.dst, RTAX_MTU)) { dst_confirm(&rth->u.dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1497,7 +1497,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { - if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 && + if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= 68 && !(dst_metric_locked(dst, RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1624,14 +1624,14 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) } else rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; - if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU) + if (dst_metric(&rt->u.dst, RTAX_MTU) > IP_MAX_MTU) rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; - if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, ip_rt_min_advmss); - if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40) + if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; #ifdef CONFIG_NET_CLS_ROUTE diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eda4f4a233f..8ac15a604e0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -605,7 +606,7 @@ static u32 tcp_rto_min(struct sock *sk) u32 rto_min = TCP_RTO_MIN; if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) - rto_min = dst->metrics[RTAX_RTO_MIN - 1]; + rto_min = dst_metric(dst, RTAX_RTO_MIN); return rto_min; } @@ -769,7 +770,7 @@ void tcp_update_metrics(struct sock *sk) dst->metrics[RTAX_RTTVAR - 1] = m; else dst->metrics[RTAX_RTTVAR-1] -= - (dst->metrics[RTAX_RTTVAR-1] - m)>>2; + (dst_metric(dst, RTAX_RTTVAR) - m)>>2; } if (tp->snd_ssthresh >= 0xFFFF) { @@ -788,21 +789,21 @@ void tcp_update_metrics(struct sock *sk) dst->metrics[RTAX_SSTHRESH-1] = max(tp->snd_cwnd >> 1, tp->snd_ssthresh); if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1; + dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; } else { /* Else slow start did not finish, cwnd is non-sense, ssthresh may be also invalid. */ if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1; - if (dst->metrics[RTAX_SSTHRESH-1] && + dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; + if (dst_metric(dst, RTAX_SSTHRESH) && !dst_metric_locked(dst, RTAX_SSTHRESH) && - tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1]) + tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; } if (!dst_metric_locked(dst, RTAX_REORDERING)) { - if (dst->metrics[RTAX_REORDERING-1] < tp->reordering && + if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && tp->reordering != sysctl_tcp_reordering) dst->metrics[RTAX_REORDERING-1] = tp->reordering; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a493ad9b891..12bba088034 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1243,11 +1243,11 @@ install_route: } } - if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - if (!rt->u.dst.metrics[RTAX_MTU-1]) + if (!dst_metric(&rt->u.dst, RTAX_MTU)) rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); - if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) + if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; -- cgit v1.2.3-70-g09d2 From 36ca34cc3b8335eb1fe8bd9a1d0a2592980c3f02 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 8 May 2008 23:40:26 -0700 Subject: sit: Add missing kfree_skb() on pskb_may_pull() failure. Noticed by Paul Marks . Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 4b2f1033994..5a6fab95569 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -596,9 +596,9 @@ static int ipip6_rcv(struct sk_buff *skb) } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - kfree_skb(skb); read_unlock(&ipip6_lock); out: + kfree_skb(skb); return 0; } -- cgit v1.2.3-70-g09d2 From f5184d267c1aedb9b7a8cc44e08ff6b8d382c3b5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 12 May 2008 20:48:31 -0700 Subject: net: Allow netdevices to specify needed head/tailroom This patch adds needed_headroom/needed_tailroom members to struct net_device and updates many places that allocate sbks to use them. Not all of them can be converted though, and I'm sure I missed some (I mostly grepped for LL_RESERVED_SPACE) Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netdevice.h | 16 ++++++++++++++-- net/core/netpoll.c | 2 +- net/econet/af_econet.c | 2 +- net/ipv4/arp.c | 2 +- net/ipv4/igmp.c | 4 ++-- net/ipv4/ipconfig.c | 6 +++--- net/ipv4/raw.c | 10 ++++------ net/ipv6/ip6_output.c | 2 +- net/ipv6/mcast.c | 4 ++-- net/ipv6/ndisc.c | 4 ++-- net/ipv6/raw.c | 10 ++++------ net/packet/af_packet.c | 2 +- net/xfrm/xfrm_output.c | 6 +++--- 13 files changed, 39 insertions(+), 31 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a3fb57fde62..b11e6e19e96 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -246,11 +246,16 @@ struct hh_cache * * We could use other alignment values, but we must maintain the * relationship HH alignment <= LL alignment. + * + * LL_ALLOCATED_SPACE also takes into account the tailroom the device + * may need. */ #define LL_RESERVED_SPACE(dev) \ - (((dev)->hard_header_len&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) + ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) #define LL_RESERVED_SPACE_EXTRA(dev,extra) \ - ((((dev)->hard_header_len+extra)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) + ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) +#define LL_ALLOCATED_SPACE(dev) \ + ((((dev)->hard_header_len+(dev)->needed_headroom+(dev)->needed_tailroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) struct header_ops { int (*create) (struct sk_buff *skb, struct net_device *dev, @@ -569,6 +574,13 @@ struct net_device unsigned short type; /* interface hardware type */ unsigned short hard_header_len; /* hardware hdr length */ + /* extra head- and tailroom the hardware may need, but not in all cases + * can this be guaranteed, especially tailroom. Some cases also use + * LL_MAX_HEADER instead to allocate the skb. + */ + unsigned short needed_headroom; + unsigned short needed_tailroom; + struct net_device *master; /* Pointer to master device of a group, * which this device is member of. */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b04d643fc3c..8fb134da034 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -419,7 +419,7 @@ static void arp_reply(struct sk_buff *skb) return; size = arp_hdr_len(skb->dev); - send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev), + send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev), LL_RESERVED_SPACE(np->dev)); if (!send_skb) diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 68d15448004..7c9bb13b153 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -340,7 +340,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, dev_hold(dev); - skb = sock_alloc_send_skb(sk, len+LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), msg->msg_flags & MSG_DONTWAIT, &err); if (skb==NULL) goto out_unlock; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 68b72a7a180..418862f1bf2 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -570,7 +570,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, * Allocate a buffer */ - skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) return NULL; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6250f4239b6..2769dc4a4c8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -292,7 +292,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct iphdr *pip; struct igmpv3_report *pig; - skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) return NULL; @@ -653,7 +653,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, return -1; } - skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); return -1; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 89dee4346f6..ed45037ce9b 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -710,14 +710,14 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d struct net_device *dev = d->dev; struct sk_buff *skb; struct bootp_pkt *b; - int hh_len = LL_RESERVED_SPACE(dev); struct iphdr *h; /* Allocate packet */ - skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL); + skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15, + GFP_KERNEL); if (!skb) return; - skb_reserve(skb, hh_len); + skb_reserve(skb, LL_RESERVED_SPACE(dev)); b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); memset(b, 0, sizeof(struct bootp_pkt)); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 11d7f753a82..fead049daf4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -322,7 +322,6 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); - int hh_len; struct iphdr *iph; struct sk_buff *skb; unsigned int iphlen; @@ -336,13 +335,12 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, if (flags&MSG_PROBE) goto out; - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - - skb = sock_alloc_send_skb(sk, length+hh_len+15, - flags&MSG_DONTWAIT, &err); + skb = sock_alloc_send_skb(sk, + length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, hh_len); + skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0af2e055f88..48cdce9c696 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -780,7 +780,7 @@ slow_path: * Allocate buffer. */ - if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { + if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 54f91efdae5..fd632dd7f98 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1411,7 +1411,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err); + skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err); if (!skb) return NULL; @@ -1790,7 +1790,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) payload_len = len + sizeof(ra); full_len = sizeof(struct ipv6hdr) + payload_len; - skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err); + skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err); if (skb == NULL) { rcu_read_lock(); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2c74885f835..a55fc05b812 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -479,7 +479,7 @@ static void __ndisc_send(struct net_device *dev, skb = sock_alloc_send_skb(sk, (MAX_HEADER + sizeof(struct ipv6hdr) + - len + LL_RESERVED_SPACE(dev)), + len + LL_ALLOCATED_SPACE(dev)), 1, &err); if (!skb) { ND_PRINTK0(KERN_ERR @@ -1521,7 +1521,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, buff = sock_alloc_send_skb(sk, (MAX_HEADER + sizeof(struct ipv6hdr) + - len + LL_RESERVED_SPACE(dev)), + len + LL_ALLOCATED_SPACE(dev)), 1, &err); if (buff == NULL) { ND_PRINTK0(KERN_ERR diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 396f0ea1109..232e0dc45bf 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -609,7 +609,6 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *iph; struct sk_buff *skb; - unsigned int hh_len; int err; if (length > rt->u.dst.dev->mtu) { @@ -619,13 +618,12 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, if (flags&MSG_PROBE) goto out; - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - - skb = sock_alloc_send_skb(sk, length+hh_len+15, - flags&MSG_DONTWAIT, &err); + skb = sock_alloc_send_skb(sk, + length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, hh_len); + skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 25070240d4a..2cee87da444 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -743,7 +743,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, if (len > dev->mtu+reserve) goto out_unlock; - skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), msg->msg_flags & MSG_DONTWAIT, &err); if (skb==NULL) goto out_unlock; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 09cd9c0c2d8..3f964db908a 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -25,11 +25,11 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) struct dst_entry *dst = skb->dst; int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) - skb_headroom(skb); + int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); - if (nhead > 0) - return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + if (nhead > 0 || ntail > 0) + return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); - /* Check tail too... */ return 0; } -- cgit v1.2.3-70-g09d2 From 0686caa35ed17cf5b9043f453957e702a7eb588d Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 19 May 2008 16:25:42 -0700 Subject: ndisc: Add missing strategies for per-device retrans timer/reachable time settings. Noticed from Al Viro via David Miller . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ndisc.h | 4 ++++ net/ipv6/addrconf.c | 2 +- net/ipv6/ndisc.c | 8 ++++---- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 9c451ff2f4f..a01b7c4dc76 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -129,6 +129,10 @@ extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, void __user *buffer, size_t *lenp, loff_t *ppos); +int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen); #endif extern void inet6_ifinfo_notify(int event, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e591e09e5e4..8dd9155502b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4242,7 +4242,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev) neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change, - NULL); + ndisc_ifinfo_sysctl_strategy); __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, idev->dev->ifindex, idev, &idev->cnf); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a55fc05b812..282fdb31f8e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1727,10 +1727,10 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f return ret; } -static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, - int nlen, void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) +int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; -- cgit v1.2.3-70-g09d2 From a3264435b4ca1ccee54cbef2970f2ba4bef39e2d Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 19 May 2008 16:54:29 -0700 Subject: ipv6 addrconf: Fix route lifetime setting in corner case. Because of arithmetic overflow avoidance, the actual lifetime setting (vs the value given by RA) did not increase monotonically around 0x7fffffff/HZ. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 12bba088034..98aa50c11dd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -475,7 +475,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, lifetime = ntohl(rinfo->lifetime); if (lifetime == 0xffffffff) { /* infinity */ - } else if (lifetime > 0x7fffffff/HZ) { + } else if (lifetime > 0x7fffffff/HZ - 1) { /* Avoid arithmetic overflow */ lifetime = 0x7fffffff/HZ - 1; } -- cgit v1.2.3-70-g09d2 From 69cdf8f92a8dd191eee0e834c631d84a140b1121 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 19 May 2008 16:55:13 -0700 Subject: ipv6 route: Fix lifetime in netlink. We could not see appropriate lifetime if the route had been scheduled to expired at 0 (in jiffies). We should check rt6i_flags instead of rt6i_expires to determine whether lifetime is valid or not. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 98aa50c11dd..b45a7c0268c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2200,7 +2200,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); - expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; + expires = (rt->rt6i_flags & RTF_EXPIRES) ? + rt->rt6i_expires - jiffies : 0; + if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, rt->u.dst.error) < 0) goto nla_put_failure; -- cgit v1.2.3-70-g09d2 From 6f704992d3658aadff9e506c7fd80957fce33c5f Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 19 May 2008 16:56:11 -0700 Subject: ipv6 addrconf: Allow infinite prefix lifetime. We need to handle infinite prefix lifetime specially. With help from original reporter "Bonitch, Joseph" . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 73 +++++++++++++++++++++++++++++++++++------------------ net/ipv6/route.c | 4 ++- 2 files changed, 52 insertions(+), 25 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8dd9155502b..3a835578fd1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1764,14 +1764,16 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) * 2) Configure prefixes with the auto flag set */ - /* Avoid arithmetic overflow. Really, we could - save rt_expires in seconds, likely valid_lft, - but it would require division in fib gc, that it - not good. - */ - if (valid_lft >= 0x7FFFFFFF/HZ) + if (valid_lft == INFINITY_LIFE_TIME) + rt_expires = ~0UL; + else if (valid_lft >= 0x7FFFFFFF/HZ) { + /* Avoid arithmetic overflow. Really, we could + * save rt_expires in seconds, likely valid_lft, + * but it would require division in fib gc, that it + * not good. + */ rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); - else + } else rt_expires = valid_lft * HZ; /* @@ -1779,7 +1781,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) * Avoid arithmetic overflow there as well. * Overflow can happen only if HZ < USER_HZ. */ - if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ) + if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ) rt_expires = 0x7FFFFFFF / USER_HZ; if (pinfo->onlink) { @@ -1788,17 +1790,28 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) dev->ifindex, 1); if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { - if (rt->rt6i_flags&RTF_EXPIRES) { - if (valid_lft == 0) { - ip6_del_rt(rt); - rt = NULL; - } else { - rt->rt6i_expires = jiffies + rt_expires; - } + /* Autoconf prefix route */ + if (valid_lft == 0) { + ip6_del_rt(rt); + rt = NULL; + } else if (~rt_expires) { + /* not infinity */ + rt->rt6i_expires = jiffies + rt_expires; + rt->rt6i_flags |= RTF_EXPIRES; + } else { + rt->rt6i_flags &= ~RTF_EXPIRES; + rt->rt6i_expires = 0; } } else if (valid_lft) { + int flags = RTF_ADDRCONF | RTF_PREFIX_RT; + clock_t expires = 0; + if (~rt_expires) { + /* not infinity */ + flags |= RTF_EXPIRES; + expires = jiffies_to_clock_t(rt_expires); + } addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, - dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); + dev, expires, flags); } if (rt) dst_release(&rt->u.dst); @@ -2021,7 +2034,8 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, struct inet6_dev *idev; struct net_device *dev; int scope; - u32 flags = RTF_EXPIRES; + u32 flags; + clock_t expires; ASSERT_RTNL(); @@ -2041,8 +2055,13 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, if (valid_lft == INFINITY_LIFE_TIME) { ifa_flags |= IFA_F_PERMANENT; flags = 0; - } else if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + expires = 0; + } else { + if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + flags = RTF_EXPIRES; + expires = jiffies_to_clock_t(valid_lft * HZ); + } if (prefered_lft == 0) ifa_flags |= IFA_F_DEPRECATED; @@ -2060,7 +2079,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, spin_unlock_bh(&ifp->lock); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, - jiffies_to_clock_t(valid_lft * HZ), flags); + expires, flags); /* * Note that section 3.1 of RFC 4429 indicates * that the Optimistic flag should not be set for @@ -3148,7 +3167,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, u32 prefered_lft, u32 valid_lft) { - u32 flags = RTF_EXPIRES; + u32 flags; + clock_t expires; if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; @@ -3156,8 +3176,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, if (valid_lft == INFINITY_LIFE_TIME) { ifa_flags |= IFA_F_PERMANENT; flags = 0; - } else if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + expires = 0; + } else { + if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + flags = RTF_EXPIRES; + expires = jiffies_to_clock_t(valid_lft * HZ); + } if (prefered_lft == 0) ifa_flags |= IFA_F_DEPRECATED; @@ -3176,7 +3201,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, ipv6_ifa_notify(0, ifp); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, - jiffies_to_clock_t(valid_lft * HZ), flags); + expires, flags); addrconf_verify(0); return 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b45a7c0268c..b7a4a875a26 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1106,7 +1106,9 @@ int ip6_route_add(struct fib6_config *cfg) } rt->u.dst.obsolete = -1; - rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); + rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? + jiffies + clock_t_to_jiffies(cfg->fc_expires) : + 0; if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; -- cgit v1.2.3-70-g09d2 From 1ac06e0306d0192a7a4d9ea1c9e06d355ce7e7d3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 20 May 2008 14:32:14 -0700 Subject: ipsec: Use the correct ip_local_out function Because the IPsec output function xfrm_output_resume does its own dst_output call it should always call __ip_local_output instead of ip_local_output as the latter may invoke dst_output directly. Otherwise the return values from nf_hook and dst_output may clash as they both use the value 1 but for different purposes. When that clash occurs this can cause a packet to be used after it has been freed which usually leads to a crash. Because the offending value is only returned from dst_output with qdiscs such as HTB, this bug is normally not visible. Thanks to Marco Berizzi for his perseverance in tracking this down. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- net/ipv6/route.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 92f90ae46f4..df41026b60d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = { .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, - .local_out = ip_local_out, + .local_out = __ip_local_out, .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b7a4a875a26..48534c6c073 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -109,7 +109,7 @@ static struct dst_ops ip6_dst_ops_template = { .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, - .local_out = ip6_local_out, + .local_out = __ip6_local_out, .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; -- cgit v1.2.3-70-g09d2 From 071f92d05967a0c8422f1c8587ce0b4d90a8b447 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Wed, 21 May 2008 17:47:54 -0700 Subject: net: The world is not perfect patch. Unless there will be any objection here, I suggest consider the following patch which simply removes the code for the -DI_WISH_WORLD_WERE_PERFECT in the three methods which use it. The compilation errors we get when using -DI_WISH_WORLD_WERE_PERFECT show that this code was not built and not used for really a long time. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 146 +----------------------------------------------------- net/ipv4/ipip.c | 130 +----------------------------------------------- net/ipv6/sit.c | 89 +-------------------------------- 3 files changed, 3 insertions(+), 362 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2ada033406d..4342cba4ff8 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -313,9 +313,8 @@ static void ipgre_tunnel_uninit(struct net_device *dev) static void ipgre_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. @@ -398,149 +397,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipgre_lock); return; -#else - struct iphdr *iph = (struct iphdr*)dp; - struct iphdr *eiph; - __be16 *p = (__be16*)(dp+(iph->ihl<<2)); - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; - __be16 flags; - int grehlen = (iph->ihl<<2) + 4; - struct sk_buff *skb2; - struct flowi fl; - struct rtable *rt; - - if (p[1] != htons(ETH_P_IP)) - return; - - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_CSUM) - grehlen += 4; - if (flags&GRE_KEY) - grehlen += 4; - if (flags&GRE_SEQ) - grehlen += 4; - } - if (len < grehlen + sizeof(struct iphdr)) - return; - eiph = (struct iphdr*)(dp + grehlen); - - switch (type) { - default: - return; - case ICMP_PARAMETERPROB: - n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; - if (n < (iph->ihl<<2)) - return; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMP_PARAMETERPROB; - n -= grehlen; - rel_info = htonl(n << 24); - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return; - case ICMP_FRAG_NEEDED: - /* And it is the only really necessary thing :-) */ - n = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (n < grehlen+68) - return; - n -= grehlen; - /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) - return; - rel_info = htonl(n); - break; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return; - break; - } - - /* Prepare fake skb to feed it to icmp_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)eiph); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_dst = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_GRE; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) { - kfree_skb(skb2); - return; - } - skb2->dev = rt->u.dst.dev; - - /* route "incoming" packet */ - if (rt->rt_flags&RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; - fl.fl4_dst = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_IPGRE) { - ip_rt_put(rt); - kfree_skb(skb2); - return; - } - } else { - ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb2->dst->dev->type != ARPHRD_IPGRE) { - kfree_skb(skb2); - return; - } - } - - /* change mtu on this route */ - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { - kfree_skb(skb2); - return; - } - skb2->dst->ops->update_pmtu(skb2->dst, n); - } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = netdev_priv(skb2->dev); - if (t->parms.iph.ttl) { - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - } - } - - icmp_send(skb2, rel_type, rel_code, rel_info); - kfree_skb(skb2); -#endif } static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 149111f08e8..af5cb53da5c 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -278,9 +278,8 @@ static void ipip_tunnel_uninit(struct net_device *dev) static int ipip_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ @@ -337,133 +336,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipip_lock); return err; -#else - struct iphdr *iph = (struct iphdr*)dp; - int hlen = iph->ihl<<2; - struct iphdr *eiph; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; - struct sk_buff *skb2; - struct flowi fl; - struct rtable *rt; - - if (len < hlen + sizeof(struct iphdr)) - return 0; - eiph = (struct iphdr*)(dp + hlen); - - switch (type) { - default: - return 0; - case ICMP_PARAMETERPROB: - n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; - if (n < hlen) - return 0; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMP_PARAMETERPROB; - rel_info = htonl((n - hlen) << 24); - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return 0; - case ICMP_FRAG_NEEDED: - /* And it is the only really necessary thing :-) */ - n = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (n < hlen+68) - return 0; - n -= hlen; - /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) - return 0; - rel_info = htonl(n); - break; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return 0; - break; - } - - /* Prepare fake skb to feed it to icmp_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return 0; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)eiph); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_daddr = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_IPIP; - if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) { - kfree_skb(skb2); - return 0; - } - skb2->dev = rt->u.dst.dev; - - /* route "incoming" packet */ - if (rt->rt_flags&RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; - fl.fl4_daddr = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_TUNNEL) { - ip_rt_put(rt); - kfree_skb(skb2); - return 0; - } - } else { - ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb2->dst->dev->type != ARPHRD_TUNNEL) { - kfree_skb(skb2); - return 0; - } - } - - /* change mtu on this route */ - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { - kfree_skb(skb2); - return 0; - } - skb2->dst->ops->update_pmtu(skb2->dst, n); - } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = netdev_priv(skb2->dev); - if (t->parms.iph.ttl) { - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - } - } - - icmp_send(skb2, rel_type, rel_code, rel_info); - kfree_skb(skb2); - return 0; -#endif } static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5a6fab95569..3de6ffdaedf 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -403,9 +403,8 @@ static void ipip6_tunnel_uninit(struct net_device *dev) static int ipip6_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ @@ -462,92 +461,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipip6_lock); return err; -#else - struct iphdr *iph = (struct iphdr*)dp; - int hlen = iph->ihl<<2; - struct ipv6hdr *iph6; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - int rel_info = 0; - struct sk_buff *skb2; - struct rt6_info *rt6i; - - if (len < hlen + sizeof(struct ipv6hdr)) - return; - iph6 = (struct ipv6hdr*)(dp + hlen); - - switch (type) { - default: - return; - case ICMP_PARAMETERPROB: - if (icmp_hdr(skb)->un.gateway < hlen) - return; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMPV6_PARAMPROB; - rel_info = icmp_hdr(skb)->un.gateway - hlen; - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return; - case ICMP_FRAG_NEEDED: - /* Too complicated case ... */ - return; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMPV6_DEST_UNREACH; - rel_code = ICMPV6_ADDR_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return; - rel_type = ICMPV6_TIME_EXCEED; - rel_code = ICMPV6_EXC_HOPLIMIT; - break; - } - - /* Prepare fake skb to feed it to icmpv6_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return 0; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)iph6); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - rt6i = rt6_lookup(dev_net(skb->dev), &iph6->saddr, NULL, NULL, 0); - if (rt6i && rt6i->rt6i_dev) { - skb2->dev = rt6i->rt6i_dev; - - rt6i = rt6_lookup(dev_net(skb->dev), - &iph6->daddr, &iph6->saddr, NULL, 0); - - if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { - struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); - if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { - rel_type = ICMPV6_DEST_UNREACH; - rel_code = ICMPV6_ADDR_UNREACH; - } - icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev); - } - } - kfree_skb(skb2); - return 0; -#endif } static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) -- cgit v1.2.3-70-g09d2 From d430a227d272fa514bade388bf511dba4ec2962a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Jun 2008 10:59:02 +0100 Subject: bogus format in ip6mr ptrdiff_t is %t..., not %Z... Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2de3c464fe7..14796181e8b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -197,7 +197,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) const char *name = vif->dev ? vif->dev->name : "none"; seq_printf(seq, - "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n", + "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", vif - vif6_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, -- cgit v1.2.3-70-g09d2 From b9c698964614f71b9c8afeca163a945b4c2e2d20 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 4 Jun 2008 09:58:27 -0700 Subject: netfilter: nf_conntrack_ipv6: fix inconsistent lock state in nf_ct_frag6_gather() [ 63.531438] ================================= [ 63.531520] [ INFO: inconsistent lock state ] [ 63.531520] 2.6.26-rc4 #7 [ 63.531520] --------------------------------- [ 63.531520] inconsistent {softirq-on-W} -> {in-softirq-W} usage. [ 63.531520] tcpsic6/3864 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 63.531520] (&q->lock#2){-+..}, at: [] ipv6_frag_rcv+0xd0/0xbd0 [ 63.531520] {softirq-on-W} state was registered at: [ 63.531520] [] __lock_acquire+0x3aa/0x1080 [ 63.531520] [] lock_acquire+0x76/0xa0 [ 63.531520] [] _spin_lock+0x2b/0x40 [ 63.531520] [] nf_ct_frag6_gather+0x3f6/0x910 ... According to this and another similar lockdep report inet_fragment locks are taken from nf_ct_frag6_gather() with softirqs enabled, but these locks are mainly used in softirq context, so disabling BHs is necessary. Reported-and-tested-by: Eric Sesterhenn Signed-off-by: Jarek Poplawski Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 2dccad48058..e65e26e210e 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -209,7 +209,9 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.dst = dst; hash = ip6qhashfn(id, src, dst); + local_bh_disable(); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); + local_bh_enable(); if (q == NULL) goto oom; @@ -638,10 +640,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - spin_lock(&fq->q.lock); + spin_lock_bh(&fq->q.lock); if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { - spin_unlock(&fq->q.lock); + spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); fq_put(fq); goto ret_orig; @@ -653,7 +655,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } - spin_unlock(&fq->q.lock); + spin_unlock_bh(&fq->q.lock); fq_put(fq); return ret_skb; -- cgit v1.2.3-70-g09d2 From a3c960899e042bc1c2b730a2115fa32da7802039 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 4 Jun 2008 01:30:25 +0900 Subject: [IPV6] UDP: Possible dst leak in udpv6_sendmsg. ip6_sk_dst_lookup returns held dst entry. It should be released on all paths beyond this point. Add missed release when up->pending is set. Bug report and initial patch by Denis V. Lunev . Signed-off-by: YOSHIFUJI Hideaki Acked-by: Denis V. Lunev --- net/ipv6/udp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1fd784f3e2e..47123bf5eb0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -848,12 +848,14 @@ do_append_data: } else { dst_release(dst); } + dst = NULL; } if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk); out: + dst_release(dst); fl6_sock_release(flowlabel); if (!err) return len; -- cgit v1.2.3-70-g09d2 From 24ef0da7b864435f221f668bc8a324160d063e78 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 28 May 2008 16:54:22 +0200 Subject: [IPV6] ADDRCONF: Check range of prefix length As of now, the prefix length is not vaildated when adding or deleting addresses. The value is passed directly into the inet6_ifaddr structure and later passed on to memcmp() as length indicator which relies on the value never to exceed 128 (bits). Due to the missing check, the currently code allows for any 8 bit value to be passed on as prefix length while using the netlink interface, and any 32 bit value while using the ioctl interface. [Use unsigned int instead to generate better code - yoshfuji] Signed-off-by: Thomas Graf Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3a835578fd1..c3b20c5afa3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2027,7 +2027,7 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, - int plen, __u8 ifa_flags, __u32 prefered_lft, + unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; @@ -2039,6 +2039,9 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, ASSERT_RTNL(); + if (plen > 128) + return -EINVAL; + /* check the lifetime */ if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; @@ -2095,12 +2098,15 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, } static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, - int plen) + unsigned int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + if (plen > 128) + return -EINVAL; + dev = __dev_get_by_index(net, ifindex); if (!dev) return -ENODEV; -- cgit v1.2.3-70-g09d2 From 82836372311a5cbf9cc5f4f47f9b56cb9edfe90d Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 27 May 2008 00:04:43 +0800 Subject: [IPV6] TUNNEL6: Fix incoming packet length check for inter-protocol tunnel. I discover a strange behavior in [ipv4 in ipv6] tunnel. When IPv6 tunnel payload is less than 40(0x28), packet can be sent to network, received in physical interface, but not seen in IP tunnel interface. No counter increase in tunnel interface. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/tunnel6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 6323921b40b..669f280989c 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -109,7 +109,7 @@ static int tunnel46_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; for (handler = tunnel46_handlers; handler; handler = handler->next) -- cgit v1.2.3-70-g09d2 From 4bed72e4f5502ea3322f0a00794815fa58951abe Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 27 May 2008 17:37:49 +0900 Subject: [IPV6] ADDRCONF: Allow longer lifetime on 64bit archs. - Allow longer lifetimes (>= 0x7fffffff/HZ) on 64bit archs by using unsigned long. - Shadow this arithmetic overflow workaround by introducing helper functions: addrconf_timeout_fixup() and addrconf_finite_timeout(). Signed-off-by: YOSHIFUJI Hideaki --- include/net/addrconf.h | 22 ++++++++++++ net/ipv6/addrconf.c | 97 ++++++++++++++++++++++++++------------------------ net/ipv6/route.c | 12 ++----- 3 files changed, 75 insertions(+), 56 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 0a2f0372df3..bbd3d583c6e 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -94,6 +94,28 @@ extern void addrconf_join_solict(struct net_device *dev, extern void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr); +static inline unsigned long addrconf_timeout_fixup(u32 timeout, + unsigned unit) +{ + if (timeout == 0xffffffff) + return ~0UL; + + /* + * Avoid arithmetic overflow. + * Assuming unit is constant and non-zero, this "if" statement + * will go away on 64bit archs. + */ + if (0xfffffffe > LONG_MAX / unit && timeout > LONG_MAX / unit) + return LONG_MAX / unit; + + return timeout; +} + +static inline int addrconf_finite_timeout(unsigned long timeout) +{ + return ~timeout; +} + /* * IPv6 Address Label subsystem (addrlabel.c) */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c3b20c5afa3..147588f4c7c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -731,8 +731,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) onlink = -1; spin_lock(&ifa->lock); - lifetime = min_t(unsigned long, - ifa->valid_lft, 0x7fffffffUL/HZ); + + lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); + /* + * Note: Because this address is + * not permanent, lifetime < + * LONG_MAX / HZ here. + */ if (time_before(expires, ifa->tstamp + lifetime * HZ)) expires = ifa->tstamp + lifetime * HZ; @@ -1722,7 +1727,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) __u32 valid_lft; __u32 prefered_lft; int addr_type; - unsigned long rt_expires; struct inet6_dev *in6_dev; pinfo = (struct prefix_info *) opt; @@ -1764,28 +1768,23 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) * 2) Configure prefixes with the auto flag set */ - if (valid_lft == INFINITY_LIFE_TIME) - rt_expires = ~0UL; - else if (valid_lft >= 0x7FFFFFFF/HZ) { + if (pinfo->onlink) { + struct rt6_info *rt; + unsigned long rt_expires; + /* Avoid arithmetic overflow. Really, we could * save rt_expires in seconds, likely valid_lft, * but it would require division in fib gc, that it * not good. */ - rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); - } else - rt_expires = valid_lft * HZ; + if (HZ > USER_HZ) + rt_expires = addrconf_timeout_fixup(valid_lft, HZ); + else + rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ); - /* - * We convert this (in jiffies) to clock_t later. - * Avoid arithmetic overflow there as well. - * Overflow can happen only if HZ < USER_HZ. - */ - if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ) - rt_expires = 0x7FFFFFFF / USER_HZ; + if (addrconf_finite_timeout(rt_expires)) + rt_expires *= HZ; - if (pinfo->onlink) { - struct rt6_info *rt; rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, dev->ifindex, 1); @@ -1794,7 +1793,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (valid_lft == 0) { ip6_del_rt(rt); rt = NULL; - } else if (~rt_expires) { + } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ rt->rt6i_expires = jiffies + rt_expires; rt->rt6i_flags |= RTF_EXPIRES; @@ -1803,9 +1802,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) rt->rt6i_expires = 0; } } else if (valid_lft) { - int flags = RTF_ADDRCONF | RTF_PREFIX_RT; clock_t expires = 0; - if (~rt_expires) { + int flags = RTF_ADDRCONF | RTF_PREFIX_RT; + if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ flags |= RTF_EXPIRES; expires = jiffies_to_clock_t(rt_expires); @@ -2036,6 +2035,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, int scope; u32 flags; clock_t expires; + unsigned long timeout; ASSERT_RTNL(); @@ -2055,22 +2055,23 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, scope = ipv6_addr_scope(pfx); - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; - flags = 0; - expires = 0; - } else { - if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + expires = jiffies_to_clock_t(timeout * HZ); + valid_lft = timeout; flags = RTF_EXPIRES; - expires = jiffies_to_clock_t(valid_lft * HZ); + } else { + expires = 0; + flags = 0; + ifa_flags |= IFA_F_PERMANENT; } - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags |= IFA_F_DEPRECATED; + prefered_lft = timeout; + } ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); @@ -3175,26 +3176,28 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, { u32 flags; clock_t expires; + unsigned long timeout; if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; - flags = 0; - expires = 0; - } else { - if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + expires = jiffies_to_clock_t(timeout * HZ); + valid_lft = timeout; flags = RTF_EXPIRES; - expires = jiffies_to_clock_t(valid_lft * HZ); + } else { + expires = 0; + flags = 0; + ifa_flags |= IFA_F_PERMANENT; } - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags |= IFA_F_DEPRECATED; + prefered_lft = timeout; + } spin_lock_bh(&ifp->lock); ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 48534c6c073..220cffe9e63 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -446,7 +446,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; - u32 lifetime; + unsigned long lifetime; struct rt6_info *rt; if (len < sizeof(struct route_info)) { @@ -472,13 +472,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (pref == ICMPV6_ROUTER_PREF_INVALID) pref = ICMPV6_ROUTER_PREF_MEDIUM; - lifetime = ntohl(rinfo->lifetime); - if (lifetime == 0xffffffff) { - /* infinity */ - } else if (lifetime > 0x7fffffff/HZ - 1) { - /* Avoid arithmetic overflow */ - lifetime = 0x7fffffff/HZ - 1; - } + lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); if (rinfo->length == 3) prefix = (struct in6_addr *)rinfo->prefix; @@ -506,7 +500,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (lifetime == 0xffffffff) { + if (!addrconf_finite_timeout(lifetime)) { rt->rt6i_flags &= ~RTF_EXPIRES; } else { rt->rt6i_expires = jiffies + HZ * lifetime; -- cgit v1.2.3-70-g09d2 From 05335c2220c4911b69cb1bdd79e603ab08088372 Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Wed, 28 May 2008 16:23:47 +0800 Subject: [IPV6]: Fix the return value of get destination options with NULL data pointer If we pass NULL data buffer to getsockopt(), it will return 0, and the option length is set to -EFAULT: getsockopt(sk, IPPROTO_IPV6, IPV6_DSTOPTS, NULL, &len); This is because ipv6_getsockopt_sticky() will return -EFAULT or -EINVAL if some error occur. This patch fix this problem. Signed-off-by: Yang Hongyang Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 56d55fecf8e..aa7bedf780e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -975,6 +975,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, len = ipv6_getsockopt_sticky(sk, np->opt, optname, optval, len); release_sock(sk); + /* check if ipv6_getsockopt_sticky() returns err code */ + if (len < 0) + return len; return put_user(len, optlen); } -- cgit v1.2.3-70-g09d2 From 95b496b66615d8c43f77702049b1bd01e2f06595 Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Wed, 28 May 2008 16:27:28 +0800 Subject: [IPV6]: Fix the data length of get destination options with short length If get destination options with length which is not enough for that option,getsockopt() will still return the real length of the option, which is larger then the buffer space. This is because ipv6_getsockopt_sticky() returns the real length of the option. This patch fix this problem. Signed-off-by: Yang Hongyang Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index aa7bedf780e..9293b9f0ac2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -832,7 +832,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, len = min_t(unsigned int, len, ipv6_optlen(hdr)); if (copy_to_user(optval, hdr, len)) return -EFAULT; - return ipv6_optlen(hdr); + return len; } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, -- cgit v1.2.3-70-g09d2 From 187e38384c4abfbbb1b880fab234d16c2df23a25 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 4 Jun 2008 13:01:37 +0900 Subject: [IPV6]: Check outgoing interface even if source address is unspecified. The outgoing interface index (ipi6_ifindex) in IPV6_PKTINFO ancillary data, is not checked if the source address (ipi6_addr) is unspecified. If the ipi6_ifindex is the not-exist interface, it should be fail. Based on patch from Shan Wei and Brian Haley . Signed-off-by: Shan Wei Signed-off-by: Brian Haley Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/datagram.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 94fa6ae77cf..53e3883f766 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -509,7 +509,6 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { int addr_type; - struct net_device *dev = NULL; if (!CMSG_OK(msg, cmsg)) { err = -EINVAL; @@ -522,6 +521,9 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, switch (cmsg->cmsg_type) { case IPV6_PKTINFO: case IPV6_2292PKTINFO: + { + struct net_device *dev = NULL; + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; @@ -535,32 +537,32 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, fl->oif = src_info->ipi6_ifindex; } - addr_type = ipv6_addr_type(&src_info->ipi6_addr); + addr_type = __ipv6_addr_type(&src_info->ipi6_addr); - if (addr_type == IPV6_ADDR_ANY) - break; + if (fl->oif) { + dev = dev_get_by_index(&init_net, fl->oif); + if (!dev) + return -ENODEV; + } else if (addr_type & IPV6_ADDR_LINKLOCAL) + return -EINVAL; - if (addr_type & IPV6_ADDR_LINKLOCAL) { - if (!src_info->ipi6_ifindex) - return -EINVAL; - else { - dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); - if (!dev) - return -ENODEV; - } - } - if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, - dev, 0)) { - if (dev) - dev_put(dev); - err = -EINVAL; - goto exit_f; + if (addr_type != IPV6_ADDR_ANY) { + int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; + if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, + strict ? dev : NULL, 0)) + err = -EINVAL; + else + ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); } + if (dev) dev_put(dev); - ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); + if (err) + goto exit_f; + break; + } case IPV6_FLOWINFO: if (cmsg->cmsg_len < CMSG_LEN(4)) { -- cgit v1.2.3-70-g09d2 From 91e1908f569dd96a25a3947de8771e6cc93999dd Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 4 Jun 2008 13:02:49 +0900 Subject: [IPV6] NETNS: Handle ancillary data in appropriate namespace. Signed-off-by: YOSHIFUJI Hideaki --- include/net/transp_v6.h | 3 ++- net/ipv6/datagram.c | 7 ++++--- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 27394e0447d..112934a3288 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -40,7 +40,8 @@ extern int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb); -extern int datagram_send_ctl(struct msghdr *msg, +extern int datagram_send_ctl(struct net *net, + struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, int *hlimit, int *tclass); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 53e3883f766..b9c2de84a8a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -496,7 +496,8 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) return 0; } -int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, +int datagram_send_ctl(struct net *net, + struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, int *hlimit, int *tclass) { @@ -540,7 +541,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, addr_type = __ipv6_addr_type(&src_info->ipi6_addr); if (fl->oif) { - dev = dev_get_by_index(&init_net, fl->oif); + dev = dev_get_by_index(net, fl->oif); if (!dev) return -ENODEV; } else if (addr_type & IPV6_ADDR_LINKLOCAL) @@ -548,7 +549,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; - if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, + if (!ipv6_chk_addr(net, &src_info->ipi6_addr, strict ? dev : NULL, 0)) err = -EINVAL; else diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index eb7a940310f..37a4e777e34 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -354,7 +354,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); + err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9293b9f0ac2..3eef8e5b363 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -416,7 +416,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); + retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk); if (retv) goto done; update: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 232e0dc45bf..603df76e052 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -813,7 +813,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 47123bf5eb0..1b35c472200 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -731,7 +731,7 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; -- cgit v1.2.3-70-g09d2 From 49d074f4009a7b5ce9c17b040f978abcb4d7f6f6 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:49:06 +0400 Subject: [IPV6]: Do not change protocol for raw IPv6 sockets. It is not allowed to change underlying protocol for int fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); Signed-off-by: Denis V. Lunev Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3eef8e5b363..1afe210d628 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -161,6 +161,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct ipv6_txoptions *opt; struct sk_buff *pktopt; + if (sk->sk_type == SOCK_RAW) + break; + if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) -- cgit v1.2.3-70-g09d2 From 36d926b94a9908937593e5669162305a071b9cc3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:49:07 +0400 Subject: [IPV6]: inet_sk(sk)->cork.opt leak IPv6 UDP sockets wth IPv4 mapped address use udp_sendmsg to send the data actually. In this case ip_flush_pending_frames should be called instead of ip6_flush_pending_frames. Signed-off-by: Denis V. Lunev Signed-off-by: YOSHIFUJI Hideaki --- include/net/udp.h | 1 + net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/udp.h b/include/net/udp.h index 3e55a99b0ba..ccce8370704 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -135,6 +135,7 @@ extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); +extern void udp_flush_pending_frames(struct sock *sk); extern int udp_rcv(struct sk_buff *skb); extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db1cb7c96d6..56fcda3694b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info) /* * Throw away all pending data and cancel the corking. Socket is locked. */ -static void udp_flush_pending_frames(struct sock *sk) +void udp_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); @@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk) ip_flush_pending_frames(sk); } } +EXPORT_SYMBOL(udp_flush_pending_frames); /** * udp4_hwcsum_outgoing - handle outgoing HW checksumming diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1b35c472200..dd309626ae9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -534,7 +534,9 @@ static void udp_v6_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); - if (up->pending) { + if (up->pending == AF_INET) + udp_flush_pending_frames(sk); + else if (up->pending) { up->len = 0; up->pending = 0; ip6_flush_pending_frames(sk); -- cgit v1.2.3-70-g09d2 From 9596cc826e2e52bfc318ca37a6c52fe3d72990a3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:49:08 +0400 Subject: [IPV6]: Do not change protocol for UDPv6 sockets with pending sent data. Signed-off-by: Denis V. Lunev Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1afe210d628..26b83e512a0 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -164,9 +164,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_type == SOCK_RAW) break; - if (sk->sk_protocol != IPPROTO_UDP && - sk->sk_protocol != IPPROTO_UDPLITE && - sk->sk_protocol != IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_UDPLITE) { + struct udp_sock *up = udp_sk(sk); + if (up->pending == AF_INET6) { + retv = -EBUSY; + break; + } + } else if (sk->sk_protocol != IPPROTO_TCP) break; if (sk->sk_state != TCP_ESTABLISHED) { -- cgit v1.2.3-70-g09d2 From 22dd485022f3d0b162ceb5e67d85de7c3806aa20 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:16:12 -0700 Subject: raw: Raw socket leak. The program below just leaks the raw kernel socket int main() { int fd = socket(PF_INET, SOCK_RAW, IPPROTO_UDP); struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); inet_aton("127.0.0.1", &addr.sin_addr); addr.sin_family = AF_INET; addr.sin_port = htons(2048); sendto(fd, "a", 1, MSG_MORE, &addr, sizeof(addr)); return 0; } Corked packet is allocated via sock_wmalloc which holds the owner socket, so one should uncork it and flush all pending data on close. Do this in the same way as in UDP. Signed-off-by: Denis V. Lunev Acked-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/ipv4/raw.c | 9 +++++++++ net/ipv6/raw.c | 9 +++++++++ 2 files changed, 18 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index fead049daf4..e7e091d365f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -608,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout) sk_common_release(sk); } +static int raw_destroy(struct sock *sk) +{ + lock_sock(sk); + ip_flush_pending_frames(sk); + release_sock(sk); + return 0; +} + /* This gets rid of all the nasties in af_inet. -DaveM */ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -820,6 +828,7 @@ struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, .close = raw_close, + .destroy = raw_destroy, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = raw_ioctl, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 603df76e052..8fee9a15b2d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1164,6 +1164,14 @@ static void rawv6_close(struct sock *sk, long timeout) sk_common_release(sk); } +static int raw6_destroy(struct sock *sk) +{ + lock_sock(sk); + ip6_flush_pending_frames(sk); + release_sock(sk); + return 0; +} + static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); @@ -1187,6 +1195,7 @@ struct proto rawv6_prot = { .name = "RAWv6", .owner = THIS_MODULE, .close = rawv6_close, + .destroy = raw6_destroy, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = rawv6_ioctl, -- cgit v1.2.3-70-g09d2 From 2e761e0532a784816e7e822dbaaece8c5d4be14d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 9 Jun 2008 15:53:30 -0700 Subject: ipv6 netns: init net is used to set bindv6only for new sock The bindv6only is tuned via sysctl. It is already on a struct net and per-net sysctls allow for its modification (ipv6_sysctl_net_init). Despite this the value configured in the init net is used for the rest of them. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3c6aafb0218..e84b3fd17fb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -191,7 +191,7 @@ lookup_protocol: np->mcast_hops = -1; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->ipv6only = init_net.ipv6.sysctl.bindv6only; + np->ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. -- cgit v1.2.3-70-g09d2 From ce4a7d0d48bbaed78ccbb0bafb9229651a40303a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Jun 2008 12:39:35 -0700 Subject: inet{6}_request_sock: Init ->opt and ->pktopts in the constructor Wei Yongjun noticed that we may call reqsk_free on request sock objects where the opt fields may not be initialized, fix it by introducing inet_reqsk_alloc where we initialize ->opt to NULL and set ->pktopts to NULL in inet6_reqsk_alloc. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 +++- include/net/inet_sock.h | 10 ++++++++++ net/dccp/ipv4.c | 3 +-- net/dccp/ipv6.c | 1 - net/ipv4/syncookies.c | 3 +-- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/syncookies.c | 1 - net/ipv6/tcp_ipv6.c | 1 - 8 files changed, 16 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 10b666b61ad..cde056e0818 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -396,8 +396,10 @@ static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *op { struct request_sock *req = reqsk_alloc(ops); - if (req != NULL) + if (req != NULL) { inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req); + inet6_rsk(req)->pktopts = NULL; + } return req; } diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index a42cd63d241..9fabe5b3891 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -197,4 +197,14 @@ static inline int inet_iif(const struct sk_buff *skb) return skb->rtable->rt_iif; } +static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops) +{ + struct request_sock *req = reqsk_alloc(ops); + + if (req != NULL) + inet_rsk(req)->opt = NULL; + + return req; +} + #endif /* _INET_SOCK_H */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index c22a3780c14..37d27bcb361 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -589,7 +589,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&dccp_request_sock_ops); + req = inet_reqsk_alloc(&dccp_request_sock_ops); if (req == NULL) goto drop; @@ -605,7 +605,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; - ireq->opt = NULL; /* * Step 3: Process LISTEN state diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9b1129bb7ec..f7fe2a572d7 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -421,7 +421,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq6 = inet6_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); - ireq6->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 73ba98921d6..d182a2a2629 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, cookie_check_timestamp(&tcp_opt); ret = NULL; - req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ + req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ if (!req) goto out; @@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; - ireq->opt = NULL; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cd601a866c2..4f8485c67d1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1285,7 +1285,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&tcp_request_sock_ops); + req = inet_reqsk_alloc(&tcp_request_sock_ops); if (!req) goto drop; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 938ce4ecde5..3ecc1157994 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -198,7 +198,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq6 = inet6_rsk(req); treq = tcp_rsk(req); - ireq6->pktopts = NULL; if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 715965f0fac..cb46749d4c3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1299,7 +1299,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); - treq->pktopts = NULL; if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); -- cgit v1.2.3-70-g09d2 From 20c61fbd8deb2ada0ac3acecf6156a986dbfff2d Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 28 Apr 2008 14:40:55 +0900 Subject: ipv6 mcast: Check address family of gf_group in getsockopt(MS_FILTER). Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 26b83e512a0..ce794d6acb7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -874,6 +874,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -EINVAL; if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) return -EFAULT; + if (gsf.gf_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; lock_sock(sk); err = ip6_mc_msfget(sk, &gsf, (struct group_filter __user *)optval, optlen); -- cgit v1.2.3-70-g09d2 From 36e3deae8ba84865fd9eb3f2f21bbc00d49b7544 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 13 May 2008 02:52:55 +0900 Subject: ipv6 route: Fix route lifetime in netlink message. 1) We may have route lifetime larger than INT_MAX. In that case we had wired value in lifetime. Use INT_MAX if lifetime does not fit in s32. 2) Lifetime is valid iif RTF_EXPIRES is set. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 220cffe9e63..d1f3e19b06c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2196,8 +2196,12 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); - expires = (rt->rt6i_flags & RTF_EXPIRES) ? - rt->rt6i_expires - jiffies : 0; + if (!(rt->rt6i_flags & RTF_EXPIRES)) + expires = 0; + else if (rt->rt6i_expires - jiffies < INT_MAX) + expires = rt->rt6i_expires - jiffies; + else + expires = INT_MAX; if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, rt->u.dst.error) < 0) -- cgit v1.2.3-70-g09d2 From e8766fc86b34d44a8c55a2f9d71da69e091b1ca4 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 10 Jun 2008 15:50:55 +0800 Subject: ipv6: Check the hop limit setting in ancillary data. When specifing the outgoing hop limit as ancillary data for sendmsg(), the kernel doesn't check the integer hop limit value as specified in [RFC-3542] section 6.3. Signed-off-by: Shan Wei Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/datagram.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b9c2de84a8a..0f0f94a4033 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -705,6 +705,11 @@ int datagram_send_ctl(struct net *net, } *hlimit = *(int *)CMSG_DATA(cmsg); + if (*hlimit < -1 || *hlimit > 0xff) { + err = -EINVAL; + goto exit_f; + } + break; case IPV6_TCLASS: -- cgit v1.2.3-70-g09d2 From 28d4488216645cd71402925cffde9528b0cfdb7e Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 12 Jun 2008 03:14:51 +0900 Subject: ipv6: Check IPV6_MULTICAST_LOOP option value. Only 0 and 1 are valid for IPV6_MULTICAST_LOOP socket option, and we should return an error of EINVAL otherwise, per RFC3493. Based on patch from Shan Wei . Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ce794d6acb7..9a3697172d5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -458,6 +458,8 @@ done: case IPV6_MULTICAST_LOOP: if (optlen < sizeof(int)) goto e_inval; + if (val != valbool) + goto e_inval; np->mc_loop = valbool; retv = 0; break; -- cgit v1.2.3-70-g09d2 From 1717699cd5130009b7cd6756e883d8582c1fe706 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 12 Jun 2008 03:27:26 +0900 Subject: ipv6: Fail with appropriate error code when setting not-applicable sockopt. IPV6_MULTICAST_HOPS, for example, is not valid for stream sockets. Since they are virtually unavailable for stream sockets, we should return ENOPROTOOPT instead of EINVAL. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9a3697172d5..c042ce19bd1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -67,7 +67,7 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW) - return -EINVAL; + return -ENOPROTOOPT; new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -446,7 +446,7 @@ done: case IPV6_MULTICAST_HOPS: if (sk->sk_type == SOCK_STREAM) - goto e_inval; + break; if (optlen < sizeof(int)) goto e_inval; if (val > 255 || val < -1) @@ -466,7 +466,7 @@ done: case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) - goto e_inval; + break; if (optlen < sizeof(int)) goto e_inval; @@ -862,7 +862,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) - return -EINVAL; + return -ENOPROTOOPT; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; val = sk->sk_family; -- cgit v1.2.3-70-g09d2 From f23d60de719e639690b2dc5c2d0e4243ff614b7a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jun 2008 14:47:58 -0700 Subject: ipv6: Fix duplicate initialization of rawv6_prot.destroy In changeset 22dd485022f3d0b162ceb5e67d85de7c3806aa20 ("raw: Raw socket leak.") code was added so that we flush pending frames on raw sockets to avoid leaks. The ipv4 part was fine, but the ipv6 part was not done correctly. Unlike the ipv4 side, the ipv6 code already has a .destroy method for rawv6_prot. So now there were two assignments to this member, and what the compiler does is use the last one, effectively making the ipv6 parts of that changeset a NOP. Fix this by removing the: .destroy = inet6_destroy_sock, line, and adding an inet6_destroy_sock() call to the end of raw6_destroy(). Noticed by Al Viro. Signed-off-by: David S. Miller Acked-by: YOSHIFUJI Hideaki --- net/ipv6/raw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8fee9a15b2d..3aee12310d9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1169,7 +1169,8 @@ static int raw6_destroy(struct sock *sk) lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - return 0; + + return inet6_destroy_sock(sk); } static int rawv6_init_sk(struct sock *sk) @@ -1200,7 +1201,6 @@ struct proto rawv6_prot = { .disconnect = udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, - .destroy = inet6_destroy_sock, .setsockopt = rawv6_setsockopt, .getsockopt = rawv6_getsockopt, .sendmsg = rawv6_sendmsg, -- cgit v1.2.3-70-g09d2 From 2b4743bd6be9fedaa560f8c6dc3997e9ec21b99b Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 16 Jun 2008 16:48:20 -0700 Subject: ipv6 sit: Avoid extra need for compat layer in PRL management. We've introduced extra need of compat layer for ip_tunnel_prl{} for PRL (Potential Router List) management. Though compat_ioctl is still missing in ipv4/ipv6, let's make the interface more straight-forward and eliminate extra need for nasty compat layer anyway since the interface is new for 2.6.26. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 2 +- net/ipv6/sit.c | 44 ++++++++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 21 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index f1fbe9c930d..d4efe401470 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -41,7 +41,7 @@ struct ip_tunnel_prl { __u16 __reserved; __u32 datalen; __u32 __reserved2; - void __user *data; + /* data follows */ }; /* PRL flags */ diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3de6ffdaedf..32e871a6c25 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -222,15 +222,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) +static int ipip6_tunnel_get_prl(struct ip_tunnel *t, + struct ip_tunnel_prl __user *a) { - struct ip_tunnel_prl *kp; + struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; int ret = 0; - cmax = a->datalen / sizeof(*a); - if (cmax > 1 && a->addr != htonl(INADDR_ANY)) + if (copy_from_user(&kprl, a, sizeof(kprl))) + return -EFAULT; + cmax = kprl.datalen / sizeof(kprl); + if (cmax > 1 && kprl.addr != htonl(INADDR_ANY)) cmax = 1; /* For simple GET or for root users, @@ -261,26 +264,25 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) for (prl = t->prl; prl; prl = prl->next) { if (c > cmax) break; - if (a->addr != htonl(INADDR_ANY) && prl->addr != a->addr) + if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) continue; kp[c].addr = prl->addr; kp[c].flags = prl->flags; c++; - if (a->addr != htonl(INADDR_ANY)) + if (kprl.addr != htonl(INADDR_ANY)) break; } out: read_unlock(&ipip6_lock); len = sizeof(*kp) * c; - ret = len ? copy_to_user(a->data, kp, len) : 0; + ret = 0; + if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen)) + ret = -EFAULT; kfree(kp); - if (ret) - return -EFAULT; - a->datalen = len; - return 0; + return ret; } static int @@ -873,11 +875,20 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) break; case SIOCGETPRL: + err = -EINVAL; + if (dev == sitn->fb_tunnel_dev) + goto done; + err = -ENOENT; + if (!(t = netdev_priv(dev))) + goto done; + err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); + break; + case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: err = -EPERM; - if (cmd != SIOCGETPRL && !capable(CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) goto done; err = -EINVAL; if (dev == sitn->fb_tunnel_dev) @@ -890,12 +901,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; switch (cmd) { - case SIOCGETPRL: - err = ipip6_tunnel_get_prl(t, &prl); - if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, - &prl, sizeof(prl))) - err = -EFAULT; - break; case SIOCDELPRL: err = ipip6_tunnel_del_prl(t, &prl); break; @@ -904,8 +909,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } - if (cmd != SIOCGETPRL) - netdev_state_change(dev); + netdev_state_change(dev); break; default: -- cgit v1.2.3-70-g09d2 From aea7427f70cce5fa8f99ce447b213e9e3b49f24c Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Thu, 19 Jun 2008 16:29:39 -0700 Subject: ipv6: Remove options header when setsockopt's optlen is 0 Remove the sticky Hop-by-Hop options header by calling setsockopt() for IPV6_HOPOPTS with a zero option length, per RFC3542. Routing header and Destination options header does the same as Hop-by-Hop options header. Signed-off-by: Shan Wei Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index c042ce19bd1..86e28a75267 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -345,18 +345,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; + + /* remove any sticky options header with a zero option + * length, per RFC3542. + */ if (optlen == 0) optval = NULL; + else if (optlen < sizeof(struct ipv6_opt_hdr) || + optlen & 0x7 || optlen > 8 * 255) + goto e_inval; /* hop-by-hop / destination options are privileged option */ retv = -EPERM; if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) break; - if (optlen < sizeof(struct ipv6_opt_hdr) || - optlen & 0x7 || optlen > 8 * 255) - goto e_inval; - opt = ipv6_renew_options(sk, np->opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); -- cgit v1.2.3-70-g09d2 From f630e43a215a3129d0c1173cae0bce6ea4855cf7 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 19 Jun 2008 16:33:57 -0700 Subject: ipv6: Drop packets for loopback address from outside of the box. [ Based upon original report and patch by Karsten Keil. Karsten has verified that this fixes the TAHI test case "ICMPv6 test v6LC.5.1.2 Part F". -DaveM ] Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 6 ++++++ net/ipv6/ip6_input.c | 9 +++++++++ 2 files changed, 15 insertions(+) (limited to 'net/ipv6') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e0a612bc9c4..f422f7218e1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -367,6 +367,12 @@ static inline int ipv6_addr_any(const struct in6_addr *a) a->s6_addr32[2] | a->s6_addr32[3] ) == 0); } +static inline int ipv6_addr_loopback(const struct in6_addr *a) +{ + return ((a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0); +} + static inline int ipv6_addr_v4mapped(const struct in6_addr *a) { return ((a->s6_addr32[0] | a->s6_addr32[1] | diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 4e5c8615832..17eb48b8e32 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -102,6 +102,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; + /* + * RFC4291 2.5.3 + * A packet received on an interface with a destination address + * of loopback must be dropped. + */ + if (!(dev->flags & IFF_LOOPBACK) && + ipv6_addr_loopback(&hdr->daddr)) + goto err; + skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); -- cgit v1.2.3-70-g09d2 From 88a6f4ad76be425f47df7f892baf913bcd466fb3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 24 Jun 2008 13:30:45 -0700 Subject: netfilter: ip6table_mangle: don't reroute in LOCAL_IN Rerouting should only happen in LOCAL_OUT, in INPUT its useless since the packet has already chosen its final destination. Noticed by Alexey Dobriyan . Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6table_mangle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 27a5e8b48d9..f405cea21a8 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -129,7 +129,7 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_local_hook, + .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, -- cgit v1.2.3-70-g09d2 From 7be87351a1f6430426e88b4fcde353ab3330caff Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 27 Jun 2008 20:00:19 -0700 Subject: tcp: /proc/net/tcp rto,ato values not scaled properly (v2) I found another case where we are sending information to userspace in the wrong HZ scale. This should have been fixed back in 2.5 :-( This means an ABI change but as it stands there is no way for an application like ss to get the right value. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv6/tcp_ipv6.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 12695be2c25..ffe869ac1bc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2291,7 +2291,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) } seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " - "%08X %5d %8d %lu %d %p %u %u %u %u %d%n", + "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : @@ -2303,8 +2303,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) icsk->icsk_probes_out, sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cb46749d4c3..40ea9c36d24 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2036,7 +2036,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -2052,8 +2052,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); -- cgit v1.2.3-70-g09d2 From 9a375803feaadb6c34e0807bd9325885dcca5c00 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 27 Jun 2008 20:06:08 -0700 Subject: inet fragments: fix race between inet_frag_find and inet_frag_secret_rebuild The problem is that while we work w/o the inet_frags.lock even read-locked the secret rebuild timer may occur (on another CPU, since BHs are still disabled in the inet_frag_find) and change the rnd seed for ipv4/6 fragments. It was caused by my patch fd9e63544cac30a34c951f0ec958038f0529e244 ([INET]: Omit double hash calculations in xxx_frag_intern) late in the 2.6.24 kernel, so this should probably be queued to -stable. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 16 +++++++++++----- net/ipv4/ip_fragment.c | 2 ++ net/ipv6/netfilter/nf_conntrack_reasm.c | 3 ++- net/ipv6/reassembly.c | 2 ++ 4 files changed, 17 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4ed429bd595..0546a0bc97e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -192,14 +192,21 @@ EXPORT_SYMBOL(inet_frag_evictor); static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, - unsigned int hash, void *arg) + void *arg) { struct inet_frag_queue *qp; #ifdef CONFIG_SMP struct hlist_node *n; #endif + unsigned int hash; write_lock(&f->lock); + /* + * While we stayed w/o the lock other CPU could update + * the rnd seed, so we need to re-calculate the hash + * chain. Fortunatelly the qp_in can be used to get one. + */ + hash = f->hashfn(qp_in); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we @@ -247,7 +254,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - struct inet_frags *f, void *arg, unsigned int hash) + struct inet_frags *f, void *arg) { struct inet_frag_queue *q; @@ -255,7 +262,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, if (q == NULL) return NULL; - return inet_frag_intern(nf, q, f, hash, arg); + return inet_frag_intern(nf, q, f, arg); } struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, @@ -264,7 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; struct hlist_node *n; - read_lock(&f->lock); hlist_for_each_entry(q, n, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); @@ -274,6 +280,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, } read_unlock(&f->lock); - return inet_frag_create(nf, f, key, hash); + return inet_frag_create(nf, f, key); } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cd6ce6ac635..37221f65915 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -229,6 +229,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) arg.iph = iph; arg.user = user; + + read_lock(&ip4_frags.lock); hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index e65e26e210e..cf20bc4fd60 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -207,9 +207,10 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.id = id; arg.src = src; arg.dst = dst; + + read_lock_bh(&nf_frags.lock); hash = ip6qhashfn(id, src, dst); - local_bh_disable(); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); local_bh_enable(); if (q == NULL) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 798cabc7535..a60d7d12971 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -247,6 +247,8 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, arg.id = id; arg.src = src; arg.dst = dst; + + read_lock(&ip6_frags.lock); hash = ip6qhashfn(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); -- cgit v1.2.3-70-g09d2 From d420895efb259a78dda50f95289571faa6e10e41 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 27 Jun 2008 20:14:54 -0700 Subject: ipv6 route: Convert rt6_device_match() to use RT6_LOOKUP_F_xxx flags. The commit 77d16f450ae0452d7d4b009f78debb1294fb435c ("[IPV6] ROUTE: Unify RT6_F_xxx and RT6_SELECT_F_xxx flags") intended to pass various routing lookup hints around RT6_LOOKUP_F_xxx flags, but conversion was missing for rt6_device_match(). Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d1f3e19b06c..7ff687020fa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -240,7 +240,7 @@ static inline int rt6_need_strict(struct in6_addr *daddr) static inline struct rt6_info *rt6_device_match(struct net *net, struct rt6_info *rt, int oif, - int strict) + int flags) { struct rt6_info *local = NULL; struct rt6_info *sprt; @@ -253,7 +253,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (dev->flags & IFF_LOOPBACK) { if (sprt->rt6i_idev == NULL || sprt->rt6i_idev->dev->ifindex != oif) { - if (strict && oif) + if (flags & RT6_LOOKUP_F_IFACE && oif) continue; if (local && (!oif || local->rt6i_idev->dev->ifindex == oif)) @@ -266,7 +266,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (local) return local; - if (strict) + if (flags & RT6_LOOKUP_F_IFACE) return net->ipv6.ip6_null_entry; } return rt; -- cgit v1.2.3-70-g09d2 From b2238566401f01eb796e75750213c7b0fce396b2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 8 Jul 2008 15:13:31 -0700 Subject: ipv6: fix race between ipv6_del_addr and DAD timer Consider the following scenario: ipv6_del_addr(ifp) ipv6_ifa_notify(RTM_DELADDR, ifp) ip6_del_rt(ifp->rt) after returning from the ipv6_ifa_notify and enabling BH-s back, but *before* calling the addrconf_del_timer the ifp->timer fires and: addrconf_dad_timer(ifp) addrconf_dad_completed(ifp) ipv6_ifa_notify(RTM_NEWADDR, ifp) ip6_ins_rt(ifp->rt) then return back to the ipv6_del_addr and: in6_ifa_put(ifp) inet6_ifa_finish_destroy(ifp) dst_release(&ifp->rt->u.dst) After this we have an ifp->rt inserted into fib6 lists, but queued for gc, which in turn can result in oopses in the fib6_run_gc. Maybe some other nasty things, but we caught only the oops in gc so far. The solution is to disarm the ifp->timer before flushing the rt from it. Signed-off-by: Andrey Vagin Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 147588f4c7c..ff61a5cdb0b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -749,12 +749,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } write_unlock_bh(&idev->lock); + addrconf_del_timer(ifp); + ipv6_ifa_notify(RTM_DELADDR, ifp); atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); - addrconf_del_timer(ifp); - /* * Purge or update corresponding prefix * -- cgit v1.2.3-70-g09d2 From 0ce28553cc018be5022f51e67c87997f7271534e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 10 Jul 2008 16:54:50 -0700 Subject: ipv6: missed namespace context in ipv6_rthdr_rcv Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3cd1c993d52..dcf94fdfb86 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -445,7 +445,7 @@ looped_back: kfree_skb(skb); return -1; } - if (!ipv6_chk_home_addr(&init_net, addr)) { + if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); -- cgit v1.2.3-70-g09d2