From bc3ed28caaef55e7e3a9316464256353c5f9b1df Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 Jun 2008 16:36:54 -0700
Subject: netlink: Improve returned error codes

Make nlmsg_trim(), nlmsg_cancel(), genlmsg_cancel(), and
nla_nest_cancel() void functions.

Return -EMSGSIZE instead of -1 if the provided message buffer is not
big enough.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h |  4 ++--
 include/net/netlink.h   | 20 +++++++++-----------
 2 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index decdda54682..747c255d1df 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -162,9 +162,9 @@ static inline int genlmsg_end(struct sk_buff *skb, void *hdr)
  * @skb: socket buffer the message is stored in
  * @hdr: generic netlink message header
  */
-static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr)
+static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
 {
-	return nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
+	nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
 }
 
 /**
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 112dcdf7e34..dfc3701dfcc 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -556,14 +556,12 @@ static inline void *nlmsg_get_pos(struct sk_buff *skb)
  * @skb: socket buffer the message is stored in
  * @mark: mark to trim to
  *
- * Trims the message to the provided mark. Returns -1.
+ * Trims the message to the provided mark.
  */
-static inline int nlmsg_trim(struct sk_buff *skb, const void *mark)
+static inline void nlmsg_trim(struct sk_buff *skb, const void *mark)
 {
 	if (mark)
 		skb_trim(skb, (unsigned char *) mark - skb->data);
-
-	return -1;
 }
 
 /**
@@ -572,11 +570,11 @@ static inline int nlmsg_trim(struct sk_buff *skb, const void *mark)
  * @nlh: netlink message header
  *
  * Removes the complete netlink message including all
- * attributes from the socket buffer again. Returns -1.
+ * attributes from the socket buffer again.
  */
-static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
+static inline void nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	return nlmsg_trim(skb, nlh);
+	nlmsg_trim(skb, nlh);
 }
 
 /**
@@ -775,7 +773,7 @@ static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype,
 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
 
 	if (nested_len < 0)
-		return -1;
+		return -EINVAL;
 	if (nested_len >= nla_attr_size(0))
 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
 				 nested_len, policy);
@@ -1080,11 +1078,11 @@ static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start)
  * @start: container attribute
  *
  * Removes the container attribute and including all nested
- * attributes. Returns -1.
+ * attributes. Returns -EMSGSIZE
  */
-static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
+static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
 {
-	return nlmsg_trim(skb, start);
+	nlmsg_trim(skb, start);
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From e51171019bb0e1f9fb57c25bd2e38ce652eaea27 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 29 May 2008 19:55:05 +0900
Subject: [SCTP]: Fix NULL dereference of asoc.

Commit 7cbca67c073263c179f605bdbbdc565ab29d801d ("[IPV6]: Support
Source Address Selection API (RFC5014)") introduced NULL dereference
of asoc to sctp_v6_get_saddr in net/sctp/ipv6.c.
Pointed out by Johann Felix Soden <johfel@users.sourceforge.net>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/sctp/structs.h | 3 ++-
 net/sctp/ipv6.c            | 5 +++--
 net/sctp/protocol.c        | 3 ++-
 net/sctp/transport.c       | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0ce0443c5b7..917d425f054 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -548,7 +548,8 @@ struct sctp_af {
 	struct dst_entry *(*get_dst)	(struct sctp_association *asoc,
 					 union sctp_addr *daddr,
 					 union sctp_addr *saddr);
-	void		(*get_saddr)	(struct sctp_association *asoc,
+	void		(*get_saddr)	(struct sctp_sock *sk,
+					 struct sctp_association *asoc,
 					 struct dst_entry *dst,
 					 union sctp_addr *daddr,
 					 union sctp_addr *saddr);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e45e44c6063..e4aac3266fc 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -299,7 +299,8 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
 /* Fills in the source address(saddr) based on the destination address(daddr)
  * and asoc's bind address list.
  */
-static void sctp_v6_get_saddr(struct sctp_association *asoc,
+static void sctp_v6_get_saddr(struct sctp_sock *sk,
+			      struct sctp_association *asoc,
 			      struct dst_entry *dst,
 			      union sctp_addr *daddr,
 			      union sctp_addr *saddr)
@@ -318,7 +319,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
 	if (!asoc) {
 		ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL,
 				   &daddr->v6.sin6_addr,
-				   inet6_sk(asoc->base.sk)->srcprefs,
+				   inet6_sk(&sk->inet.sk)->srcprefs,
 				   &saddr->v6.sin6_addr);
 		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n",
 				  NIP6(saddr->v6.sin6_addr));
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 0ec234b762c..13ee7fa92e0 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -519,7 +519,8 @@ out:
 /* For v4, the source address is cached in the route entry(dst). So no need
  * to cache it separately and hence this is an empty routine.
  */
-static void sctp_v4_get_saddr(struct sctp_association *asoc,
+static void sctp_v4_get_saddr(struct sctp_sock *sk,
+			      struct sctp_association *asoc,
 			      struct dst_entry *dst,
 			      union sctp_addr *daddr,
 			      union sctp_addr *saddr)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index f4938f6c5ab..62082e7b797 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -291,7 +291,7 @@ void sctp_transport_route(struct sctp_transport *transport,
 	if (saddr)
 		memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
 	else
-		af->get_saddr(asoc, dst, daddr, &transport->saddr);
+		af->get_saddr(opt, asoc, dst, daddr, &transport->saddr);
 
 	transport->dst = dst;
 	if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
-- 
cgit v1.2.3-70-g09d2


From 4bed72e4f5502ea3322f0a00794815fa58951abe Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 27 May 2008 17:37:49 +0900
Subject: [IPV6] ADDRCONF: Allow longer lifetime on 64bit archs.

- Allow longer lifetimes (>= 0x7fffffff/HZ) on 64bit archs
  by using unsigned long.
- Shadow this arithmetic overflow workaround by introducing
  helper functions: addrconf_timeout_fixup() and
  addrconf_finite_timeout().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/addrconf.h | 22 ++++++++++++
 net/ipv6/addrconf.c    | 97 ++++++++++++++++++++++++++------------------------
 net/ipv6/route.c       | 12 ++-----
 3 files changed, 75 insertions(+), 56 deletions(-)

(limited to 'include/net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 0a2f0372df3..bbd3d583c6e 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -94,6 +94,28 @@ extern void			addrconf_join_solict(struct net_device *dev,
 extern void			addrconf_leave_solict(struct inet6_dev *idev,
 					struct in6_addr *addr);
 
+static inline unsigned long addrconf_timeout_fixup(u32 timeout,
+						    unsigned unit)
+{
+	if (timeout == 0xffffffff)
+		return ~0UL;
+
+	/*
+	 * Avoid arithmetic overflow.
+	 * Assuming unit is constant and non-zero, this "if" statement
+	 * will go away on 64bit archs.
+	 */
+	if (0xfffffffe > LONG_MAX / unit && timeout > LONG_MAX / unit)
+		return LONG_MAX / unit;
+
+	return timeout;
+}
+
+static inline int addrconf_finite_timeout(unsigned long timeout)
+{
+	return ~timeout;
+}
+
 /*
  *	IPv6 Address Label subsystem (addrlabel.c)
  */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c3b20c5afa3..147588f4c7c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -731,8 +731,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 						onlink = -1;
 
 					spin_lock(&ifa->lock);
-					lifetime = min_t(unsigned long,
-							 ifa->valid_lft, 0x7fffffffUL/HZ);
+
+					lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
+					/*
+					 * Note: Because this address is
+					 * not permanent, lifetime <
+					 * LONG_MAX / HZ here.
+					 */
 					if (time_before(expires,
 							ifa->tstamp + lifetime * HZ))
 						expires = ifa->tstamp + lifetime * HZ;
@@ -1722,7 +1727,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	__u32 valid_lft;
 	__u32 prefered_lft;
 	int addr_type;
-	unsigned long rt_expires;
 	struct inet6_dev *in6_dev;
 
 	pinfo = (struct prefix_info *) opt;
@@ -1764,28 +1768,23 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	 *	2) Configure prefixes with the auto flag set
 	 */
 
-	if (valid_lft == INFINITY_LIFE_TIME)
-		rt_expires = ~0UL;
-	else if (valid_lft >= 0x7FFFFFFF/HZ) {
+	if (pinfo->onlink) {
+		struct rt6_info *rt;
+		unsigned long rt_expires;
+
 		/* Avoid arithmetic overflow. Really, we could
 		 * save rt_expires in seconds, likely valid_lft,
 		 * but it would require division in fib gc, that it
 		 * not good.
 		 */
-		rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
-	} else
-		rt_expires = valid_lft * HZ;
+		if (HZ > USER_HZ)
+			rt_expires = addrconf_timeout_fixup(valid_lft, HZ);
+		else
+			rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ);
 
-	/*
-	 * We convert this (in jiffies) to clock_t later.
-	 * Avoid arithmetic overflow there as well.
-	 * Overflow can happen only if HZ < USER_HZ.
-	 */
-	if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ)
-		rt_expires = 0x7FFFFFFF / USER_HZ;
+		if (addrconf_finite_timeout(rt_expires))
+			rt_expires *= HZ;
 
-	if (pinfo->onlink) {
-		struct rt6_info *rt;
 		rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL,
 				dev->ifindex, 1);
 
@@ -1794,7 +1793,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 			if (valid_lft == 0) {
 				ip6_del_rt(rt);
 				rt = NULL;
-			} else if (~rt_expires) {
+			} else if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
 				rt->rt6i_expires = jiffies + rt_expires;
 				rt->rt6i_flags |= RTF_EXPIRES;
@@ -1803,9 +1802,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 				rt->rt6i_expires = 0;
 			}
 		} else if (valid_lft) {
-			int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
 			clock_t expires = 0;
-			if (~rt_expires) {
+			int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
+			if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
 				flags |= RTF_EXPIRES;
 				expires = jiffies_to_clock_t(rt_expires);
@@ -2036,6 +2035,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
 	int scope;
 	u32 flags;
 	clock_t expires;
+	unsigned long timeout;
 
 	ASSERT_RTNL();
 
@@ -2055,22 +2055,23 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
 
 	scope = ipv6_addr_scope(pfx);
 
-	if (valid_lft == INFINITY_LIFE_TIME) {
-		ifa_flags |= IFA_F_PERMANENT;
-		flags = 0;
-		expires = 0;
-	} else {
-		if (valid_lft >= 0x7FFFFFFF/HZ)
-			valid_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		expires = jiffies_to_clock_t(timeout * HZ);
+		valid_lft = timeout;
 		flags = RTF_EXPIRES;
-		expires = jiffies_to_clock_t(valid_lft * HZ);
+	} else {
+		expires = 0;
+		flags = 0;
+		ifa_flags |= IFA_F_PERMANENT;
 	}
 
-	if (prefered_lft == 0)
-		ifa_flags |= IFA_F_DEPRECATED;
-	else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
-		 (prefered_lft != INFINITY_LIFE_TIME))
-		prefered_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa_flags |= IFA_F_DEPRECATED;
+		prefered_lft = timeout;
+	}
 
 	ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
 
@@ -3175,26 +3176,28 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
 {
 	u32 flags;
 	clock_t expires;
+	unsigned long timeout;
 
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
-	if (valid_lft == INFINITY_LIFE_TIME) {
-		ifa_flags |= IFA_F_PERMANENT;
-		flags = 0;
-		expires = 0;
-	} else {
-		if (valid_lft >= 0x7FFFFFFF/HZ)
-			valid_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		expires = jiffies_to_clock_t(timeout * HZ);
+		valid_lft = timeout;
 		flags = RTF_EXPIRES;
-		expires = jiffies_to_clock_t(valid_lft * HZ);
+	} else {
+		expires = 0;
+		flags = 0;
+		ifa_flags |= IFA_F_PERMANENT;
 	}
 
-	if (prefered_lft == 0)
-		ifa_flags |= IFA_F_DEPRECATED;
-	else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
-		 (prefered_lft != INFINITY_LIFE_TIME))
-		prefered_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa_flags |= IFA_F_DEPRECATED;
+		prefered_lft = timeout;
+	}
 
 	spin_lock_bh(&ifp->lock);
 	ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 48534c6c073..220cffe9e63 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -446,7 +446,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	struct route_info *rinfo = (struct route_info *) opt;
 	struct in6_addr prefix_buf, *prefix;
 	unsigned int pref;
-	u32 lifetime;
+	unsigned long lifetime;
 	struct rt6_info *rt;
 
 	if (len < sizeof(struct route_info)) {
@@ -472,13 +472,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
 
-	lifetime = ntohl(rinfo->lifetime);
-	if (lifetime == 0xffffffff) {
-		/* infinity */
-	} else if (lifetime > 0x7fffffff/HZ - 1) {
-		/* Avoid arithmetic overflow */
-		lifetime = 0x7fffffff/HZ - 1;
-	}
+	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 
 	if (rinfo->length == 3)
 		prefix = (struct in6_addr *)rinfo->prefix;
@@ -506,7 +500,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 
 	if (rt) {
-		if (lifetime == 0xffffffff) {
+		if (!addrconf_finite_timeout(lifetime)) {
 			rt->rt6i_flags &= ~RTF_EXPIRES;
 		} else {
 			rt->rt6i_expires = jiffies + HZ * lifetime;
-- 
cgit v1.2.3-70-g09d2


From 91e1908f569dd96a25a3947de8771e6cc93999dd Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 4 Jun 2008 13:02:49 +0900
Subject: [IPV6] NETNS: Handle ancillary data in appropriate namespace.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/transp_v6.h  | 3 ++-
 net/ipv6/datagram.c      | 7 ++++---
 net/ipv6/ip6_flowlabel.c | 2 +-
 net/ipv6/ipv6_sockglue.c | 2 +-
 net/ipv6/raw.c           | 2 +-
 net/ipv6/udp.c           | 2 +-
 6 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index 27394e0447d..112934a3288 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -40,7 +40,8 @@ extern int			datagram_recv_ctl(struct sock *sk,
 						  struct msghdr *msg,
 						  struct sk_buff *skb);
 
-extern int			datagram_send_ctl(struct msghdr *msg,
+extern int			datagram_send_ctl(struct net *net,
+						  struct msghdr *msg,
 						  struct flowi *fl,
 						  struct ipv6_txoptions *opt,
 						  int *hlimit, int *tclass);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 53e3883f766..b9c2de84a8a 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -496,7 +496,8 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 	return 0;
 }
 
-int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
+int datagram_send_ctl(struct net *net,
+		      struct msghdr *msg, struct flowi *fl,
 		      struct ipv6_txoptions *opt,
 		      int *hlimit, int *tclass)
 {
@@ -540,7 +541,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 			addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
 
 			if (fl->oif) {
-				dev = dev_get_by_index(&init_net, fl->oif);
+				dev = dev_get_by_index(net, fl->oif);
 				if (!dev)
 					return -ENODEV;
 			} else if (addr_type & IPV6_ADDR_LINKLOCAL)
@@ -548,7 +549,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 
 			if (addr_type != IPV6_ADDR_ANY) {
 				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
-				if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr,
+				if (!ipv6_chk_addr(net, &src_info->ipi6_addr,
 						   strict ? dev : NULL, 0))
 					err = -EINVAL;
 				else
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index eb7a940310f..37a4e777e34 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -354,7 +354,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
 		msg.msg_control = (void*)(fl->opt+1);
 		flowi.oif = 0;
 
-		err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk);
+		err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk);
 		if (err)
 			goto done;
 		err = -EINVAL;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 9293b9f0ac2..3eef8e5b363 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -416,7 +416,7 @@ sticky_done:
 		msg.msg_controllen = optlen;
 		msg.msg_control = (void*)(opt+1);
 
-		retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk);
+		retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk);
 		if (retv)
 			goto done;
 update:
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 232e0dc45bf..603df76e052 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -813,7 +813,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 47123bf5eb0..1b35c472200 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -731,7 +731,7 @@ do_udp_sendmsg:
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(*opt);
 
-		err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
-- 
cgit v1.2.3-70-g09d2


From 36d926b94a9908937593e5669162305a071b9cc3 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 4 Jun 2008 15:49:07 +0400
Subject: [IPV6]: inet_sk(sk)->cork.opt leak

IPv6 UDP sockets wth IPv4 mapped address use udp_sendmsg to send the data
actually. In this case ip_flush_pending_frames should be called instead
of ip6_flush_pending_frames.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/udp.h | 1 +
 net/ipv4/udp.c    | 3 ++-
 net/ipv6/udp.c    | 4 +++-
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/udp.h b/include/net/udp.h
index 3e55a99b0ba..ccce8370704 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -135,6 +135,7 @@ extern void	udp_err(struct sk_buff *, u32);
 
 extern int	udp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			    struct msghdr *msg, size_t len);
+extern void	udp_flush_pending_frames(struct sock *sk);
 
 extern int	udp_rcv(struct sk_buff *skb);
 extern int	udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db1cb7c96d6..56fcda3694b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info)
 /*
  * Throw away all pending data and cancel the corking. Socket is locked.
  */
-static void udp_flush_pending_frames(struct sock *sk)
+void udp_flush_pending_frames(struct sock *sk)
 {
 	struct udp_sock *up = udp_sk(sk);
 
@@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk)
 		ip_flush_pending_frames(sk);
 	}
 }
+EXPORT_SYMBOL(udp_flush_pending_frames);
 
 /**
  * 	udp4_hwcsum_outgoing  -  handle outgoing HW checksumming
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1b35c472200..dd309626ae9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -534,7 +534,9 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
 {
 	struct udp_sock *up = udp_sk(sk);
 
-	if (up->pending) {
+	if (up->pending == AF_INET)
+		udp_flush_pending_frames(sk);
+	else if (up->pending) {
 		up->len = 0;
 		up->pending = 0;
 		ip6_flush_pending_frames(sk);
-- 
cgit v1.2.3-70-g09d2


From a6465234814efda9ed1dccdba852953f7508e827 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 4 Jun 2008 12:38:43 -0700
Subject: sctp: Correctly implement Fast Recovery cwnd manipulations.

Correctly keep track of Fast Recovery state and do not reduce
congestion window multiple times during sucht state.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Tested-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  8 +++++++-
 net/sctp/transport.c       | 44 ++++++++++++++++++++++++++++++++------------
 2 files changed, 39 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 917d425f054..67592072a32 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -902,7 +902,10 @@ struct sctp_transport {
 	 *		calculation completes (i.e. the DATA chunk
 	 *		is SACK'd) clear this flag.
 	 */
-	int rto_pending;
+	__u8 rto_pending;
+
+	/* Flag to track the current fast recovery state */
+	__u8 fast_recovery;
 
 	/*
 	 * These are the congestion stats.
@@ -921,6 +924,9 @@ struct sctp_transport {
 	/* Data that has been sent, but not acknowledged. */
 	__u32 flight_size;
 
+	/* TSN marking the fast recovery exit point */
+	__u32 fast_recovery_exit;
+
 	/* Destination */
 	struct dst_entry *dst;
 	/* Source address. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 62082e7b797..9647fb27722 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	peer->rttvar = 0;
 	peer->srtt = 0;
 	peer->rto_pending = 0;
+	peer->fast_recovery = 0;
 
 	peer->last_time_heard = jiffies;
 	peer->last_time_used = jiffies;
@@ -403,11 +404,16 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 	cwnd = transport->cwnd;
 	flight_size = transport->flight_size;
 
+	/* See if we need to exit Fast Recovery first */
+	if (transport->fast_recovery &&
+	    TSN_lte(transport->fast_recovery_exit, sack_ctsn))
+		transport->fast_recovery = 0;
+
 	/* The appropriate cwnd increase algorithm is performed if, and only
-	 * if the cumulative TSN has advanced and the congestion window is
+	 * if the cumulative TSN whould advanced and the congestion window is
 	 * being fully utilized.
 	 */
-	if ((transport->asoc->ctsn_ack_point >= sack_ctsn) ||
+	if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) ||
 	    (flight_size < cwnd))
 		return;
 
@@ -416,17 +422,23 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 	pmtu = transport->asoc->pathmtu;
 
 	if (cwnd <= ssthresh) {
-		/* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
-		 * than or equal to ssthresh an SCTP endpoint MUST use the
-		 * slow start algorithm to increase cwnd only if the current
-		 * congestion window is being fully utilized and an incoming
-		 * SACK advances the Cumulative TSN Ack Point. Only when these
-		 * two conditions are met can the cwnd be increased otherwise
-		 * the cwnd MUST not be increased. If these conditions are met
-		 * then cwnd MUST be increased by at most the lesser of
-		 * 1) the total size of the previously outstanding DATA
-		 * chunk(s) acknowledged, and 2) the destination's path MTU.
+		/* RFC 4960 7.2.1
+		 * o  When cwnd is less than or equal to ssthresh, an SCTP
+		 *    endpoint MUST use the slow-start algorithm to increase
+		 *    cwnd only if the current congestion window is being fully
+		 *    utilized, an incoming SACK advances the Cumulative TSN
+		 *    Ack Point, and the data sender is not in Fast Recovery.
+		 *    Only when these three conditions are met can the cwnd be
+		 *    increased; otherwise, the cwnd MUST not be increased.
+		 *    If these conditions are met, then cwnd MUST be increased
+		 *    by, at most, the lesser of 1) the total size of the
+		 *    previously outstanding DATA chunk(s) acknowledged, and
+		 *    2) the destination's path MTU.  This upper bound protects
+		 *    against the ACK-Splitting attack outlined in [SAVAGE99].
 		 */
+		if (transport->fast_recovery)
+			return;
+
 		if (bytes_acked > pmtu)
 			cwnd += pmtu;
 		else
@@ -502,6 +514,13 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      cwnd = ssthresh
 		 *      partial_bytes_acked = 0
 		 */
+		if (transport->fast_recovery)
+			return;
+
+		/* Mark Fast recovery */
+		transport->fast_recovery = 1;
+		transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
+
 		transport->ssthresh = max(transport->cwnd/2,
 					  4*transport->asoc->pathmtu);
 		transport->cwnd = transport->ssthresh;
@@ -586,6 +605,7 @@ void sctp_transport_reset(struct sctp_transport *t)
 	t->flight_size = 0;
 	t->error_count = 0;
 	t->rto_pending = 0;
+	t->fast_recovery = 0;
 
 	/* Initialize the state information for SFR-CACC */
 	t->cacc.changeover_active = 0;
-- 
cgit v1.2.3-70-g09d2


From 62aeaff5ccd96462b7077046357a6d7886175a57 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 4 Jun 2008 12:39:11 -0700
Subject: sctp: Start T3-RTX timer when fast retransmitting lowest TSN

When we are trying to fast retransmit the lowest outstanding TSN, we
need to restart the T3-RTX timer, so that subsequent timeouts will
correctly tag all the packets necessary for retransmissions.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Tested-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  5 ++++-
 net/sctp/outqueue.c        | 42 +++++++++++++++++++++++++++++++-----------
 net/sctp/transport.c       |  4 ++--
 3 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 67592072a32..714dc43c034 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1051,7 +1051,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 			  struct sctp_sock *);
 void sctp_transport_pmtu(struct sctp_transport *);
 void sctp_transport_free(struct sctp_transport *);
-void sctp_transport_reset_timers(struct sctp_transport *);
+void sctp_transport_reset_timers(struct sctp_transport *, int);
 void sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
@@ -1141,6 +1141,9 @@ struct sctp_outq {
 	/* How many unackd bytes do we have in-flight?	*/
 	__u32 outstanding_bytes;
 
+	/* Are we doing fast-rtx on this queue */
+	char fast_rtx;
+
 	/* Corked? */
 	char cork;
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 59edfd25a19..5d3c441e84d 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -208,6 +208,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
 	INIT_LIST_HEAD(&q->sacked);
 	INIT_LIST_HEAD(&q->abandoned);
 
+	q->fast_rtx = 0;
 	q->outstanding_bytes = 0;
 	q->empty = 1;
 	q->cork  = 0;
@@ -500,6 +501,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 	case SCTP_RTXR_FAST_RTX:
 		SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
 		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
+		q->fast_rtx = 1;
 		break;
 	case SCTP_RTXR_PMTUD:
 		SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
@@ -543,10 +545,13 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 	sctp_xmit_t status;
 	struct sctp_chunk *chunk, *chunk1;
 	struct sctp_association *asoc;
+	int fast_rtx;
 	int error = 0;
+	int timer = 0;
 
 	asoc = q->asoc;
 	lqueue = &q->retransmit;
+	fast_rtx = q->fast_rtx;
 
 	/* RFC 2960 6.3.3 Handle T3-rtx Expiration
 	 *
@@ -587,13 +592,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 		switch (status) {
 		case SCTP_XMIT_PMTU_FULL:
 			/* Send this packet.  */
-			if ((error = sctp_packet_transmit(pkt)) == 0)
-				*start_timer = 1;
+			error = sctp_packet_transmit(pkt);
 
 			/* If we are retransmitting, we should only
 			 * send a single packet.
 			 */
-			if (rtx_timeout) {
+			if (rtx_timeout || fast_rtx) {
 				list_add(lchunk, lqueue);
 				lchunk = NULL;
 			}
@@ -603,8 +607,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 
 		case SCTP_XMIT_RWND_FULL:
 			/* Send this packet. */
-			if ((error = sctp_packet_transmit(pkt)) == 0)
-				*start_timer = 1;
+			error = sctp_packet_transmit(pkt);
 
 			/* Stop sending DATA as there is no more room
 			 * at the receiver.
@@ -615,8 +618,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 
 		case SCTP_XMIT_NAGLE_DELAY:
 			/* Send this packet. */
-			if ((error = sctp_packet_transmit(pkt)) == 0)
-				*start_timer = 1;
+			error = sctp_packet_transmit(pkt);
 
 			/* Stop sending DATA because of nagle delay. */
 			list_add(lchunk, lqueue);
@@ -635,7 +637,14 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			if (chunk->fast_retransmit > 0)
 				chunk->fast_retransmit = -1;
 
-			*start_timer = 1;
+			/* Force start T3-rtx timer when fast retransmitting
+			 * the earliest outstanding TSN
+			 */
+			if (!timer && fast_rtx &&
+			    ntohl(chunk->subh.data_hdr->tsn) ==
+					     asoc->ctsn_ack_point + 1)
+				timer = 2;
+
 			q->empty = 0;
 
 			/* Retrieve a new chunk to bundle. */
@@ -643,12 +652,16 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			break;
 		}
 
+		/* Set the timer if there were no errors */
+		if (!error && !timer)
+			timer = 1;
+
 		/* If we are here due to a retransmit timeout or a fast
 		 * retransmit and if there are any chunks left in the retransmit
 		 * queue that could not fit in the PMTU sized packet, they need
 		 * to be marked as ineligible for a subsequent fast retransmit.
 		 */
-		if (rtx_timeout && !lchunk) {
+		if (rtx_timeout && fast_rtx) {
 			list_for_each_entry(chunk1, lqueue, transmitted_list) {
 				if (chunk1->fast_retransmit > 0)
 					chunk1->fast_retransmit = -1;
@@ -656,6 +669,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 		}
 	}
 
+	*start_timer = timer;
+
+	/* Clear fast retransmit hint */
+	if (fast_rtx)
+		q->fast_rtx = 0;
+
 	return error;
 }
 
@@ -862,7 +881,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 						    rtx_timeout, &start_timer);
 
 			if (start_timer)
-				sctp_transport_reset_timers(transport);
+				sctp_transport_reset_timers(transport,
+							    start_timer-1);
 
 			/* This can happen on COOKIE-ECHO resend.  Only
 			 * one chunk can get bundled with a COOKIE-ECHO.
@@ -977,7 +997,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			list_add_tail(&chunk->transmitted_list,
 				      &transport->transmitted);
 
-			sctp_transport_reset_timers(transport);
+			sctp_transport_reset_timers(transport, start_timer-1);
 
 			q->empty = 0;
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 9647fb27722..3f34f61221e 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -191,7 +191,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
 /* Start T3_rtx timer if it is not already running and update the heartbeat
  * timer.  This routine is called every time a DATA chunk is sent.
  */
-void sctp_transport_reset_timers(struct sctp_transport *transport)
+void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
 {
 	/* RFC 2960 6.3.2 Retransmission Timer Rules
 	 *
@@ -201,7 +201,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport)
 	 * address.
 	 */
 
-	if (!timer_pending(&transport->T3_rtx_timer))
+	if (force || !timer_pending(&transport->T3_rtx_timer))
 		if (!mod_timer(&transport->T3_rtx_timer,
 			       jiffies + transport->rto))
 			sctp_transport_hold(transport);
-- 
cgit v1.2.3-70-g09d2


From b9031d9d87b24e24cd32ea15b5f4220a1e8da909 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 4 Jun 2008 12:40:15 -0700
Subject: sctp: Fix ECN markings for IPv6

Commit e9df2e8fd8fbc95c57dbd1d33dada66c4627b44c ("[IPV6]: Use
appropriate sock tclass setting for routing lookup.") also changed the
way that ECN capable transports mark this capability in IPv6.  As a
result, SCTP was not marking ECN capablity because the traffic class
was never set.  This patch brings back the markings for IPv6 traffic.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 1 +
 net/sctp/ipv6.c            | 6 ++++++
 net/sctp/output.c          | 2 +-
 net/sctp/protocol.c        | 6 ++++++
 4 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 714dc43c034..7f25195f985 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -588,6 +588,7 @@ struct sctp_af {
 	int		(*is_ce)	(const struct sk_buff *sk);
 	void		(*seq_dump_addr)(struct seq_file *seq,
 					 union sctp_addr *addr);
+	void		(*ecn_capable)(struct sock *sk);
 	__u16		net_header_len;
 	int		sockaddr_len;
 	sa_family_t	sa_family;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e4aac3266fc..a2f4d4d5159 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -727,6 +727,11 @@ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
 	seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr));
 }
 
+static void sctp_v6_ecn_capable(struct sock *sk)
+{
+	inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+}
+
 /* Initialize a PF_INET6 socket msg_name. */
 static void sctp_inet6_msgname(char *msgname, int *addr_len)
 {
@@ -997,6 +1002,7 @@ static struct sctp_af sctp_af_inet6 = {
 	.skb_iif	   = sctp_v6_skb_iif,
 	.is_ce		   = sctp_v6_is_ce,
 	.seq_dump_addr	   = sctp_v6_seq_dump_addr,
+	.ecn_capable	   = sctp_v6_ecn_capable,
 	.net_header_len	   = sizeof(struct ipv6hdr),
 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
 #ifdef CONFIG_COMPAT
diff --git a/net/sctp/output.c b/net/sctp/output.c
index cf4f9fb6819..6d45bae93b4 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -548,7 +548,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	 * Note: The works for IPv6 layer checks this bit too later
 	 * in transmission.  See IP6_ECN_flow_xmit().
 	 */
-	INET_ECN_xmit(nskb->sk);
+	(*tp->af_specific->ecn_capable)(nskb->sk);
 
 	/* Set up the IP options.  */
 	/* BUG: not implemented
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 56bdaf7fc42..b435a193c5d 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -617,6 +617,11 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
 	seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr));
 }
 
+static void sctp_v4_ecn_capable(struct sock *sk)
+{
+	INET_ECN_xmit(sk);
+}
+
 /* Event handler for inet address addition/deletion events.
  * The sctp_local_addr_list needs to be protocted by a spin lock since
  * multiple notifiers (say IPv4 and IPv6) may be running at the same
@@ -935,6 +940,7 @@ static struct sctp_af sctp_af_inet = {
 	.skb_iif	   = sctp_v4_skb_iif,
 	.is_ce		   = sctp_v4_is_ce,
 	.seq_dump_addr	   = sctp_v4_seq_dump_addr,
+	.ecn_capable	   = sctp_v4_ecn_capable,
 	.net_header_len	   = sizeof(struct iphdr),
 	.sockaddr_len	   = sizeof(struct sockaddr_in),
 #ifdef CONFIG_COMPAT
-- 
cgit v1.2.3-70-g09d2