From e0d1caa7b0d5f02e4f34aa09c695d04251310c6c Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Mon, 24 Jul 2006 23:29:07 -0700
Subject: [MLSXFRM]: Flow based matching of xfrm policy and state

This implements a seemless mechanism for xfrm policy selection and
state matching based on the flow sid. This also includes the necessary
SELinux enforcement pieces.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 3da67ca2c3c..79405daadc5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -597,7 +597,7 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp)
 {
 	struct xfrm_policy *pol;
@@ -613,7 +613,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
 		match = xfrm_selector_match(sel, fl, family);
 
 		if (match) {
- 			if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
+ 			if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) {
 				xfrm_pol_hold(pol);
 				break;
 			}
@@ -641,7 +641,7 @@ static inline int policy_to_flow_dir(int dir)
 	};
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
 {
 	struct xfrm_policy *pol;
 
@@ -652,7 +652,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
  		int err = 0;
 
 		if (match)
-		  err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
+		  err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir));
 
  		if (match && !err)
 			xfrm_pol_hold(pol);
@@ -862,19 +862,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 	u32 genid;
 	u16 family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
-	u32 sk_sid = security_sk_sid(sk, fl, dir);
+
+	fl->secid = security_sk_sid(sk, fl, dir);
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
 	if (sk && sk->sk_policy[1])
-		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
 
 	if (!policy) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
 			return 0;
 
-		policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family,
+		policy = flow_cache_lookup(fl, dst_orig->ops->family,
 					   dir, xfrm_policy_lookup);
 	}
 
@@ -1032,13 +1033,15 @@ int
 xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	int err;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	afinfo->decode_session(skb, fl);
+	err = security_xfrm_decode_session(skb, fl);
 	xfrm_policy_put_afinfo(afinfo);
-	return 0;
+	return err;
 }
 EXPORT_SYMBOL(xfrm_decode_session);
 
@@ -1058,14 +1061,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	struct xfrm_policy *pol;
 	struct flowi fl;
 	u8 fl_dir = policy_to_flow_dir(dir);
-	u32 sk_sid;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0)
 		return 0;
 	nf_nat_decode_session(skb, &fl, family);
 
-	sk_sid = security_sk_sid(sk, &fl, fl_dir);
-
 	/* First, check used SA against their selectors. */
 	if (skb->sp) {
 		int i;
@@ -1079,10 +1079,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 	pol = NULL;
 	if (sk && sk->sk_policy[dir])
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
 
 	if (!pol)
-		pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
+		pol = flow_cache_lookup(&fl, family, fl_dir,
 					xfrm_policy_lookup);
 
 	if (!pol)
@@ -1298,6 +1298,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
 
 		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
 			return 0;
+		if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm))
+			return 0;
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
 
-- 
cgit v1.2.3-70-g09d2


From beb8d13bed80f8388f1a9a107d07ddd342e627e8 Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Date: Fri, 4 Aug 2006 23:12:42 -0700
Subject: [MLSXFRM]: Add flow labeling

This labels the flows that could utilize IPSec xfrms at the points the
flows are defined so that IPSec policy and SAs at the right label can
be used.

The following protos are currently not handled, but they should
continue to be able to use single-labeled IPSec like they currently
do.

ipmr
ip_gre
ipip
igmp
sit
sctp
ip6_tunnel (IPv6 over IPv6 tunnel device)
decnet

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h         | 38 +++++++++++++++++++++++++-------------
 include/net/route.h              |  3 +++
 net/dccp/ipv4.c                  |  1 +
 net/dccp/ipv6.c                  |  6 ++++++
 net/ipv4/af_inet.c               |  1 +
 net/ipv4/icmp.c                  |  2 ++
 net/ipv4/inet_connection_sock.c  |  1 +
 net/ipv4/ip_output.c             |  2 ++
 net/ipv4/netfilter/ipt_REJECT.c  |  1 +
 net/ipv4/raw.c                   |  1 +
 net/ipv4/syncookies.c            |  1 +
 net/ipv4/udp.c                   |  1 +
 net/ipv6/af_inet6.c              |  1 +
 net/ipv6/datagram.c              |  2 ++
 net/ipv6/icmp.c                  |  2 ++
 net/ipv6/inet6_connection_sock.c |  1 +
 net/ipv6/ndisc.c                 |  1 +
 net/ipv6/netfilter/ip6t_REJECT.c |  1 +
 net/ipv6/raw.c                   |  1 +
 net/ipv6/tcp_ipv6.c              |  7 +++++++
 net/ipv6/udp.c                   |  2 ++
 net/xfrm/xfrm_policy.c           |  3 +--
 security/dummy.c                 |  7 +++----
 security/selinux/hooks.c         |  8 ++++----
 security/selinux/include/xfrm.h  | 14 +-------------
 security/selinux/xfrm.c          | 11 +++++++----
 26 files changed, 79 insertions(+), 40 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/linux/security.h b/include/linux/security.h
index 2c4921d79d1..f3909d189fe 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/key.h>
 #include <linux/xfrm.h>
+#include <net/flow.h>
 
 struct ctl_table;
 
@@ -815,8 +816,8 @@ struct swap_info_struct;
  *	Deallocate security structure.
  * @sk_clone_security:
  *	Clone/copy security structure.
- * @sk_getsid:
- *	Retrieve the LSM-specific sid for the sock to enable caching of network
+ * @sk_getsecid:
+ *	Retrieve the LSM-specific secid for the sock to enable caching of network
  *	authorizations.
  *
  * Security hooks for XFRM operations.
@@ -882,8 +883,9 @@ struct swap_info_struct;
  *	Return 1 if there is a match.
  * @xfrm_decode_session:
  *	@skb points to skb to decode.
- *	@fl points to the flow key to set.
- *	Return 0 if successful decoding.
+ *	@secid points to the flow key secid to set.
+ *	@ckall says if all xfrms used should be checked for same secid.
+ *	Return 0 if ckall is zero or all xfrms used have the same secid.
  *
  * Security hooks affecting all Key Management operations
  *
@@ -1353,7 +1355,7 @@ struct security_operations {
 	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
 	void (*sk_free_security) (struct sock *sk);
 	void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
-	unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir);
+	void (*sk_getsecid) (struct sock *sk, u32 *secid);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1370,7 +1372,7 @@ struct security_operations {
 	int (*xfrm_state_pol_flow_match)(struct xfrm_state *x,
 			struct xfrm_policy *xp, struct flowi *fl);
 	int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm);
-	int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl);
+	int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall);
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
 	/* key management security hooks */
@@ -2917,9 +2919,9 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
 	return security_ops->sk_clone_security(sk, newsk);
 }
 
-static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
 {
-	return security_ops->sk_getsid(sk, fl, dir);
+	security_ops->sk_getsecid(sk, &fl->secid);
 }
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct socket * sock,
@@ -3047,9 +3049,8 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
 {
 }
 
-static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
 {
-	return 0;
 }
 #endif	/* CONFIG_SECURITY_NETWORK */
 
@@ -3114,9 +3115,16 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_s
 	return security_ops->xfrm_flow_state_match(fl, xfrm);
 }
 
-static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
+{
+	return security_ops->xfrm_decode_session(skb, secid, 1);
+}
+
+static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl)
 {
-	return security_ops->xfrm_decode_session(skb, fl);
+	int rc = security_ops->xfrm_decode_session(skb, &fl->secid, 0);
+
+	BUG_ON(rc);
 }
 #else	/* CONFIG_SECURITY_NETWORK_XFRM */
 static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
@@ -3176,11 +3184,15 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl,
 	return 1;
 }
 
-static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
 {
 	return 0;
 }
 
+static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl)
+{
+}
+
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
 #ifdef CONFIG_KEYS
diff --git a/include/net/route.h b/include/net/route.h
index c4a068692dc..7f93ac0e089 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -32,6 +32,7 @@
 #include <linux/route.h>
 #include <linux/ip.h>
 #include <linux/cache.h>
+#include <linux/security.h>
 
 #ifndef __KERNEL__
 #warning This file is not supposed to be used outside of kernel.
@@ -166,6 +167,7 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst,
 		ip_rt_put(*rp);
 		*rp = NULL;
 	}
+	security_sk_classify_flow(sk, &fl);
 	return ip_route_output_flow(rp, &fl, sk, 0);
 }
 
@@ -182,6 +184,7 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol,
 		fl.proto = protocol;
 		ip_rt_put(*rp);
 		*rp = NULL;
+		security_sk_classify_flow(sk, &fl);
 		return ip_route_output_flow(rp, &fl, sk, 0);
 	}
 	return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7f56f7e8f57..386498053b1 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -678,6 +678,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
 			   	     }
 			  };
 
+	security_skb_classify_flow(skb, &fl);
 	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 610c722ac27..53d255c0143 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -201,6 +201,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.oif = sk->sk_bound_dev_if;
 	fl.fl_ip_dport = usin->sin6_port;
 	fl.fl_ip_sport = inet->sport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt != NULL && np->opt->srcrt != NULL) {
 		const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -322,6 +323,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			fl.oif = sk->sk_bound_dev_if;
 			fl.fl_ip_dport = inet->dport;
 			fl.fl_ip_sport = inet->sport;
+			security_sk_classify_flow(sk, &fl);
 
 			err = ip6_dst_lookup(sk, &dst, &fl);
 			if (err) {
@@ -422,6 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 	fl.oif = ireq6->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (dst == NULL) {
 		opt = np->opt;
@@ -566,6 +569,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
 	fl.oif = inet6_iif(rxskb);
 	fl.fl_ip_dport = dh->dccph_dport;
 	fl.fl_ip_sport = dh->dccph_sport;
+	security_skb_classify_flow(rxskb, &fl);
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
 	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
@@ -622,6 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb,
 	fl.oif = inet6_iif(rxskb);
 	fl.fl_ip_dport = dh->dccph_dport;
 	fl.fl_ip_sport = dh->dccph_sport;
+	security_skb_classify_flow(rxskb, &fl);
 
 	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
 		if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
@@ -842,6 +847,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
+		security_sk_classify_flow(sk, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c84a32070f8..fc40da3b6d3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1074,6 +1074,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 		},
 	};
 						
+	security_sk_classify_flow(sk, &fl);
 	err = ip_route_output_flow(&rt, &fl, sk, 0);
 }
 	if (!err)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4c86ac3d882..6ad797c1416 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 						.saddr = rt->rt_spec_dst,
 						.tos = RT_TOS(skb->nh.iph->tos) } },
 				    .proto = IPPROTO_ICMP };
+		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			goto out_unlock;
 	}
@@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
 				}
 			}
 		};
+		security_skb_classify_flow(skb_in, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			goto out_unlock;
 	}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e50a1bfd7cc..772b4eac78b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
 				       { .sport = inet_sk(sk)->sport,
 					 .dport = ireq->rmt_port } } };
 
+	security_sk_classify_flow(sk, &fl);
 	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a2ede167e04..308bdeac345 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 			 * keep trying until route appears or the connection times
 			 * itself out.
 			 */
+			security_sk_classify_flow(sk, &fl);
 			if (ip_route_output_flow(&rt, &fl, sk, 0))
 				goto no_route;
 		}
@@ -1366,6 +1367,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 					       { .sport = skb->h.th->dest,
 					         .dport = skb->h.th->source } },
 				    .proto = sk->sk_protocol };
+		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			return;
 	}
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 269bc2067cb..7f905bf2bde 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -90,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb,
 	fl.proto = IPPROTO_TCP;
 	fl.fl_ip_sport = tcph->dest;
 	fl.fl_ip_dport = tcph->source;
+	security_skb_classify_flow(skb, &fl);
 
 	xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 62b2762a242..fe44cb50a1c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -484,6 +484,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		if (!inet->hdrincl)
 			raw_probe_proto_opt(&fl, msg);
 
+		security_sk_classify_flow(sk, &fl);
 		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
 	}
 	if (err)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e20be3331f6..307dc3c0d63 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -259,6 +259,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 				    .uli_u = { .ports =
 					       { .sport = skb->h.th->dest,
 						 .dport = skb->h.th->source } } };
+		security_sk_classify_flow(sk, &fl);
 		if (ip_route_output_key(&rt, &fl)) {
 			reqsk_free(req);
 			goto out; 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f136cec96d9..a4d005eccc7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -603,6 +603,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				    .uli_u = { .ports =
 					       { .sport = inet->sport,
 						 .dport = dport } } };
+		security_sk_classify_flow(sk, &fl);
 		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
 		if (err)
 			goto out;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ac85e9c532c..82a1b1a328d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -637,6 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet->dport;
 		fl.fl_ip_sport = inet->sport;
+		security_sk_classify_flow(sk, &fl);
 
 		if (np->opt && np->opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 3b55b4c8e2d..c73508e090a 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -156,6 +156,8 @@ ipv4_connected:
 	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
 		fl.oif = np->mcast_oif;
 
+	security_sk_classify_flow(sk, &fl);
+
 	if (flowlabel) {
 		if (flowlabel->opt && flowlabel->opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 356a8a7ef22..dbfce089e91 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -358,6 +358,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	fl.oif = iif;
 	fl.fl_icmp_type = type;
 	fl.fl_icmp_code = code;
+	security_skb_classify_flow(skb, &fl);
 
 	if (icmpv6_xmit_lock())
 		return;
@@ -472,6 +473,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 		ipv6_addr_copy(&fl.fl6_src, saddr);
 	fl.oif = skb->dev->ifindex;
 	fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
+	security_skb_classify_flow(skb, &fl);
 
 	if (icmpv6_xmit_lock())
 		return;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index bf491077b82..7a51a258615 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -157,6 +157,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
 	fl.oif = sk->sk_bound_dev_if;
 	fl.fl_ip_sport = inet->sport;
 	fl.fl_ip_dport = inet->dport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b50055b9278..67cfc3813c3 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -419,6 +419,7 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type,
 	fl->proto	 	= IPPROTO_ICMPV6;
 	fl->fl_icmp_type	= type;
 	fl->fl_icmp_code	= 0;
+	security_sk_classify_flow(ndisc_socket->sk, fl);
 }
 
 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 8629ba195d2..c4eba1aeb32 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -96,6 +96,7 @@ static void send_reset(struct sk_buff *oldskb)
 	ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
 	fl.fl_ip_sport = otcph.dest;
 	fl.fl_ip_dport = otcph.source;
+	security_skb_classify_flow(oldskb, &fl);
 	dst = ip6_route_output(NULL, &fl);
 	if (dst == NULL)
 		return;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 15b862d8aca..d5040e17229 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -759,6 +759,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
+	security_sk_classify_flow(sk, &fl);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 802a1a6b103..46922e57e31 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -251,6 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		final_p = &final;
 	}
 
+	security_sk_classify_flow(sk, &fl);
+
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
 		goto failure;
@@ -374,6 +376,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			fl.oif = sk->sk_bound_dev_if;
 			fl.fl_ip_dport = inet->dport;
 			fl.fl_ip_sport = inet->sport;
+			security_skb_classify_flow(skb, &fl);
 
 			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 				sk->sk_err_soft = -err;
@@ -467,6 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	fl.oif = treq->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (dst == NULL) {
 		opt = np->opt;
@@ -625,6 +629,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
 	fl.oif = inet6_iif(skb);
 	fl.fl_ip_dport = t1->dest;
 	fl.fl_ip_sport = t1->source;
+	security_skb_classify_flow(skb, &fl);
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
@@ -691,6 +696,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 	fl.oif = inet6_iif(skb);
 	fl.fl_ip_dport = t1->dest;
 	fl.fl_ip_sport = t1->source;
+	security_skb_classify_flow(skb, &fl);
 
 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
 		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
@@ -923,6 +929,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
+		security_sk_classify_flow(sk, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3d54f246411..82c7c9cde2a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -782,6 +782,8 @@ do_udp_sendmsg:
 		connected = 0;
 	}
 
+	security_sk_classify_flow(sk, fl);
+
 	err = ip6_sk_dst_lookup(sk, &dst, fl);
 	if (err)
 		goto out;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 79405daadc5..32c963c9057 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -863,7 +863,6 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 	u16 family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
 
-	fl->secid = security_sk_sid(sk, fl, dir);
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
@@ -1039,7 +1038,7 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family
 		return -EAFNOSUPPORT;
 
 	afinfo->decode_session(skb, fl);
-	err = security_xfrm_decode_session(skb, fl);
+	err = security_xfrm_decode_session(skb, &fl->secid);
 	xfrm_policy_put_afinfo(afinfo);
 	return err;
 }
diff --git a/security/dummy.c b/security/dummy.c
index c1f10654871..c0ff6b9bfd7 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -809,9 +809,8 @@ static inline void dummy_sk_clone_security (const struct sock *sk, struct sock *
 {
 }
 
-static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid)
 {
-	return 0;
 }
 #endif	/* CONFIG_SECURITY_NETWORK */
 
@@ -866,7 +865,7 @@ static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm
 	return 1;
 }
 
-static int dummy_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+static int dummy_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall)
 {
 	return 0;
 }
@@ -1083,7 +1082,7 @@ void security_fixup_ops (struct security_operations *ops)
 	set_to_dummy_if_null(ops, sk_alloc_security);
 	set_to_dummy_if_null(ops, sk_free_security);
 	set_to_dummy_if_null(ops, sk_clone_security);
-	set_to_dummy_if_null(ops, sk_getsid);
+	set_to_dummy_if_null(ops, sk_getsecid);
  #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef  CONFIG_SECURITY_NETWORK_XFRM
 	set_to_dummy_if_null(ops, xfrm_policy_alloc_security);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5c189da07bc..4e5989d584c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3561,14 +3561,14 @@ static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
 	newssec->peer_sid = ssec->peer_sid;
 }
 
-static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir)
+static void selinux_sk_getsecid(struct sock *sk, u32 *secid)
 {
 	if (!sk)
-		return selinux_no_sk_sid(fl);
+		*secid = SECINITSID_ANY_SOCKET;
 	else {
 		struct sk_security_struct *sksec = sk->sk_security;
 
-		return sksec->sid;
+		*secid = sksec->sid;
 	}
 }
 
@@ -4622,7 +4622,7 @@ static struct security_operations selinux_ops = {
 	.sk_alloc_security =		selinux_sk_alloc_security,
 	.sk_free_security =		selinux_sk_free_security,
 	.sk_clone_security =		selinux_sk_clone_security,
-	.sk_getsid = 			selinux_sk_getsid_security,
+	.sk_getsecid = 			selinux_sk_getsecid,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index f51a3e84bd9..8e45c1d588a 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -19,7 +19,7 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
 int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
 			struct xfrm_policy *xp, struct flowi *fl);
 int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm);
-int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl);
+int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall);
 
 
 /*
@@ -33,18 +33,6 @@ static inline struct inode_security_struct *get_sock_isec(struct sock *sk)
 	return SOCK_INODE(sk->sk_socket)->i_security;
 }
 
-
-static inline u32 selinux_no_sk_sid(struct flowi *fl)
-{
-	/* NOTE: no sock occurs on ICMP reply, forwards, ... */
-	/* icmp_reply: authorize as kernel packet */
-	if (fl && fl->proto == IPPROTO_ICMP) {
-		return SECINITSID_KERNEL;
-	}
-
-	return SECINITSID_ANY_SOCKET;
-}
-
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
 			struct avc_audit_data *ad);
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index a502b0540e3..c750ef7af66 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -158,11 +158,11 @@ int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
  * LSM hook implementation that determines the sid for the session.
  */
 
-int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
+int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
 {
 	struct sec_path *sp;
 
-	fl->secid = SECSID_NULL;
+	*sid = SECSID_NULL;
 
 	if (skb == NULL)
 		return 0;
@@ -177,10 +177,13 @@ int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl)
 				struct xfrm_sec_ctx *ctx = x->security;
 
 				if (!sid_set) {
-					fl->secid = ctx->ctx_sid;
+					*sid = ctx->ctx_sid;
 					sid_set = 1;
+
+					if (!ckall)
+						break;
 				}
-				else if (fl->secid != ctx->ctx_sid)
+				else if (*sid != ctx->ctx_sid)
 					return -EINVAL;
 			}
 		}
-- 
cgit v1.2.3-70-g09d2


From 7e49e6de30efa716614e280d97963c570f3acf29 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Fri, 22 Sep 2006 15:05:15 -0700
Subject: [XFRM]: Add XFRM_MODE_xxx for future use.

Transformation mode is used as either IPsec transport or tunnel.
It is required to add two more items, route optimization and inbound trigger
for Mobile IPv6.
Based on MIPL2 kernel patch.

This patch was also written by: Ville Nuorvala <vnuorval@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h    |  6 ++++--
 include/net/xfrm.h      |  2 +-
 net/ipv4/ah4.c          |  2 +-
 net/ipv4/esp4.c         |  6 +++---
 net/ipv4/ipcomp.c       |  8 ++++----
 net/ipv4/xfrm4_input.c  |  2 +-
 net/ipv4/xfrm4_output.c |  4 ++--
 net/ipv4/xfrm4_policy.c |  2 +-
 net/ipv4/xfrm4_state.c  |  2 +-
 net/ipv4/xfrm4_tunnel.c |  2 +-
 net/ipv6/ah6.c          |  2 +-
 net/ipv6/esp6.c         |  4 ++--
 net/ipv6/ipcomp6.c      |  6 +++---
 net/ipv6/xfrm6_input.c  |  2 +-
 net/ipv6/xfrm6_output.c |  4 ++--
 net/ipv6/xfrm6_policy.c |  2 +-
 net/ipv6/xfrm6_state.c  |  2 +-
 net/ipv6/xfrm6_tunnel.c |  2 +-
 net/key/af_key.c        |  6 +++---
 net/xfrm/xfrm_policy.c  | 11 ++++++-----
 net/xfrm/xfrm_user.c    |  4 ++--
 21 files changed, 42 insertions(+), 39 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 46a15c7a1a1..5154064b6d9 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -120,7 +120,9 @@ enum
 
 #define XFRM_MODE_TRANSPORT 0
 #define XFRM_MODE_TUNNEL 1
-#define XFRM_MODE_MAX 2
+#define XFRM_MODE_ROUTEOPTIMIZATION 2
+#define XFRM_MODE_IN_TRIGGER 3
+#define XFRM_MODE_MAX 4
 
 /* Netlink configuration messages.  */
 enum {
@@ -247,7 +249,7 @@ struct xfrm_usersa_info {
 	__u32				seq;
 	__u32				reqid;
 	__u16				family;
-	__u8				mode; /* 0=transport,1=tunnel */
+	__u8				mode;		/* XFRM_MODE_xxx */
 	__u8				replay_window;
 	__u8				flags;
 #define XFRM_STATE_NOECN	1
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 00bf86e6e82..762795624b1 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -298,7 +298,7 @@ struct xfrm_tmpl
 
 	__u32			reqid;
 
-/* Mode: transport/tunnel */
+/* Mode: transport, tunnel etc. */
 	__u8			mode;
 
 /* Sharing mode: unique, this session only, this user only etc. */
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 008e69d2e42..99542977e47 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -265,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x)
 		goto error;
 	
 	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len);
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
 	x->data = ahp;
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b428489f6cc..e87377e1d6b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -248,7 +248,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 		 *    as per draft-ietf-ipsec-udp-encaps-06,
 		 *    section 3.1.2
 		 */
-		if (!x->props.mode)
+		if (x->props.mode == XFRM_MODE_TRANSPORT)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
@@ -267,7 +267,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		mtu = ALIGN(mtu + 2, blksize);
 	} else {
 		/* The worst case. */
@@ -383,7 +383,7 @@ static int esp_init_state(struct xfrm_state *x)
 	if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
 		goto error;
 	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 5bb9c9f03fb..17342430a84 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -176,7 +176,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 
 out_ok:
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		ip_send_check(iph);
 	return 0;
 }
@@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 	t->id.daddr.a4 = x->id.daddr.a4;
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET;
-	t->props.mode = 1;
+	t->props.mode = XFRM_MODE_TUNNEL;
 	t->props.saddr.a4 = x->props.saddr.a4;
 	t->props.flags = x->props.flags;
 
@@ -416,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
 		goto out;
 
 	x->props.header_len = 0;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
 
 	mutex_lock(&ipcomp_resource_mutex);
@@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
 		goto error;
 	mutex_unlock(&ipcomp_resource_mutex);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp_tunnel_attach(x);
 		if (err)
 			goto error_tunnel;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 817ed84511a..040e8475f29 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 		if (x->mode->input(x, skb))
 			goto drop;
 
-		if (x->props.mode) {
+		if (x->props.mode == XFRM_MODE_TUNNEL) {
 			decaps = 1;
 			break;
 		}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 4a96a9e3ef3..5fd115f0c54 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -54,7 +54,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
 			goto error_nolock;
 	}
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = xfrm4_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
@@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
 		}
 		dst = skb->dst;
 		x = dst->xfrm;
-	} while (x && !x->props.mode);
+	} while (x && (x->props.mode != XFRM_MODE_TUNNEL));
 
 	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
 	err = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8f50eae47d0..a5bed741de2 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -96,7 +96,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
-		if (xfrm[i]->props.mode) {
+		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			remote = xfrm[i]->id.daddr.a4;
 			local  = xfrm[i]->props.saddr.a4;
 			tunnel = 1;
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 81e1751c966..97b0c758971 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,7 +42,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.saddr = tmpl->saddr;
 	if (x->props.saddr.a4 == 0)
 		x->props.saddr.a4 = saddr->a4;
-	if (tmpl->mode && x->props.saddr.a4 == 0) {
+	if (tmpl->mode == XFRM_MODE_TUNNEL && x->props.saddr.a4 == 0) {
 		struct rtable *rt;
 	        struct flowi fl_tunnel = {
         	        .nl_u = {
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index f8ceaa127c8..f110af5b131 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb)
 
 static int ipip_init_state(struct xfrm_state *x)
 {
-	if (!x->props.mode)
+	if (x->props.mode != XFRM_MODE_TUNNEL)
 		return -EINVAL;
 
 	if (x->encap)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 00ffa7bc6c9..60954fc7eb3 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -398,7 +398,7 @@ static int ah6_init_state(struct xfrm_state *x)
 		goto error;
 	
 	x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct ipv6hdr);
 	x->data = ahp;
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 2ebfd281e72..2b8e52e1d0a 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -237,7 +237,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		mtu = ALIGN(mtu + 2, blksize);
 	} else {
 		/* The worst case. */
@@ -358,7 +358,7 @@ static int esp6_init_state(struct xfrm_state *x)
 	if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
 		goto error;
 	x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct ipv6hdr);
 	x->data = esp;
 	return 0;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index a81e9e9d93b..19eba8d9f85 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -212,7 +212,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 	memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET6;
-	t->props.mode = 1;
+	t->props.mode = XFRM_MODE_TUNNEL;
 	memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
 
 	if (xfrm_init_state(t))
@@ -417,7 +417,7 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 		goto out;
 
 	x->props.header_len = 0;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct ipv6hdr);
 	
 	mutex_lock(&ipcomp6_resource_mutex);
@@ -429,7 +429,7 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 		goto error;
 	mutex_unlock(&ipcomp6_resource_mutex);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp6_tunnel_attach(x);
 		if (err)
 			goto error_tunnel;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 0405d74ff91..ee2f6b3908b 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -72,7 +72,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi)
 		if (x->mode->input(x, skb))
 			goto drop;
 
-		if (x->props.mode) { /* XXX */
+		if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */
 			decaps = 1;
 			break;
 		}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6d111743e50..26f18869f77 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -47,7 +47,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
 			goto error_nolock;
 	}
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = xfrm6_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
@@ -80,7 +80,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
 		}
 		dst = skb->dst;
 		x = dst->xfrm;
-	} while (x && !x->props.mode);
+	} while (x && (x->props.mode != XFRM_MODE_TUNNEL));
 
 	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
 	err = 0;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 73cd250aecb..81355bb5032 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -114,7 +114,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
-		if (xfrm[i]->props.mode) {
+		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			remote = (struct in6_addr*)&xfrm[i]->id.daddr;
 			local  = (struct in6_addr*)&xfrm[i]->props.saddr;
 			tunnel = 1;
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b33296b3f6d..a1a1f547644 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -42,7 +42,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
 	if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
 		memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
-	if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) {
+	if (tmpl->mode == XFRM_MODE_TUNNEL && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) {
 		struct rt6_info *rt;
 		struct flowi fl_tunnel = {
 			.nl_u = {
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index c8f9369c2a8..59685ee8f70 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -307,7 +307,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 static int xfrm6_tunnel_init_state(struct xfrm_state *x)
 {
-	if (!x->props.mode)
+	if (x->props.mode != XFRM_MODE_TUNNEL)
 		return -EINVAL;
 
 	if (x->encap)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 797c744a843..19e047b0e67 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1765,7 +1765,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
 	}
 
 	/* addresses present only in tunnel mode */
-	if (t->mode) {
+	if (t->mode == XFRM_MODE_TUNNEL) {
 		switch (xp->family) {
 		case AF_INET:
 			sin = (void*)(rq+1);
@@ -1997,7 +1997,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 		int req_size;
 
 		req_size = sizeof(struct sadb_x_ipsecrequest);
-		if (t->mode)
+		if (t->mode == XFRM_MODE_TUNNEL)
 			req_size += 2*socklen;
 		else
 			size -= 2*socklen;
@@ -2013,7 +2013,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 		if (t->optional)
 			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE;
 		rq->sadb_x_ipsecrequest_reqid = t->reqid;
-		if (t->mode) {
+		if (t->mode == XFRM_MODE_TUNNEL) {
 			switch (xp->family) {
 			case AF_INET:
 				sin = (void*)(rq+1);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 32c963c9057..a0d58971391 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -779,7 +779,7 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
 		xfrm_address_t *local  = saddr;
 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
 
-		if (tmpl->mode) {
+		if (tmpl->mode == XFRM_MODE_TUNNEL) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
 		}
@@ -1005,7 +1005,8 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
 		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
 		x->props.mode == tmpl->mode &&
 		(tmpl->aalgos & (1<<x->props.aalgo)) &&
-		!(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
+		!(x->props.mode != XFRM_MODE_TRANSPORT &&
+		  xfrm_state_addr_cmp(tmpl, x, family));
 }
 
 static inline int
@@ -1015,14 +1016,14 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
 	int idx = start;
 
 	if (tmpl->optional) {
-		if (!tmpl->mode)
+		if (tmpl->mode == XFRM_MODE_TRANSPORT)
 			return start;
 	} else
 		start = -1;
 	for (; idx < sp->len; idx++) {
 		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
 			return ++idx;
-		if (sp->xvec[idx]->props.mode)
+		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT)
 			break;
 	}
 	return start;
@@ -1047,7 +1048,7 @@ EXPORT_SYMBOL(xfrm_decode_session);
 static inline int secpath_has_tunnel(struct sec_path *sp, int k)
 {
 	for (; k < sp->len; k++) {
-		if (sp->xvec[k]->props.mode)
+		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT)
 			return 1;
 	}
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f70e158874d..0d580ac1977 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -174,8 +174,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	err = -EINVAL;
 	switch (p->mode) {
-	case 0:
-	case 1:
+	case XFRM_MODE_TRANSPORT:
+	case XFRM_MODE_TUNNEL:
 		break;
 
 	default:
-- 
cgit v1.2.3-70-g09d2


From f3bd484021d9486b826b422a017d75dd0bd258ad Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 18:00:48 -0700
Subject: [XFRM]: Restrict authentication algorithm only when inbound
 transformation protocol is IPsec.

For Mobile IPv6 usage, routing header or destination options header is
used and it doesn't require this comparison. It is checked only for
IPsec template.

Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a0d58971391..f1cdcfb9095 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1004,7 +1004,8 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
 		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
 		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
 		x->props.mode == tmpl->mode &&
-		(tmpl->aalgos & (1<<x->props.aalgo)) &&
+		((tmpl->aalgos & (1<<x->props.aalgo)) ||
+		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
 		!(x->props.mode != XFRM_MODE_TRANSPORT &&
 		  xfrm_state_addr_cmp(tmpl, x, family));
 }
-- 
cgit v1.2.3-70-g09d2


From 9e51fd371a022318c5b64b831c43026e89bc4f75 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 18:09:09 -0700
Subject: [XFRM]: Rename secpath_has_tunnel to secpath_has_nontransport.

On current kernel inbound transformation state is allowed transport and
disallowed tunnel mode when mismatch is occurred between tempates and states.
As the result of adding two more modes by Mobile IPv6, this function name
is misleading. Inbound transformation can allow only transport mode
when mismatch is occurred between template and secpath.
Based on MIPL2 kernel patch.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f1cdcfb9095..56abb5c057d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1046,7 +1046,7 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family
 }
 EXPORT_SYMBOL(xfrm_decode_session);
 
-static inline int secpath_has_tunnel(struct sec_path *sp, int k)
+static inline int secpath_has_nontransport(struct sec_path *sp, int k)
 {
 	for (; k < sp->len; k++) {
 		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT)
@@ -1087,7 +1087,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 					xfrm_policy_lookup);
 
 	if (!pol)
-		return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
+		return !skb->sp || !secpath_has_nontransport(skb->sp, 0);
 
 	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
 
@@ -1111,7 +1111,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 				goto reject;
 		}
 
-		if (secpath_has_tunnel(sp, k))
+		if (secpath_has_nontransport(sp, k))
 			goto reject;
 
 		xfrm_pol_put(pol);
-- 
cgit v1.2.3-70-g09d2


From e53820de0f81da1429048634cadc6ef5f50c2f8b Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 19:12:01 -0700
Subject: [XFRM] IPV6: Restrict bundle reusing

For outbound transformation, bundle is checked whether it is
suitable for current flow to be reused or not. In such IPv6 case
as below, transformation may apply incorrect bundle for the flow instead
of creating another bundle:

- The policy selector has destination prefix length < 128
  (Two or more addresses can be matched it)
- Its bundle holds dst entry of default route whose prefix length < 128
  (Previous traffic was used such route as next hop)
- The policy and the bundle were used a transport mode state and
  this time flow address is not matched the bundled state.

This issue is found by Mobile IPv6 usage to protect mobility signaling
by IPsec, but it is not a Mobile IPv6 specific.
This patch adds strict check to xfrm_bundle_ok() for each
state mode and address when prefix length is less than 128.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 19 ++++++++++++++++++-
 net/ipv4/xfrm4_policy.c |  2 +-
 net/ipv6/xfrm6_policy.c |  4 +++-
 net/xfrm/xfrm_policy.c  |  8 ++++++--
 4 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 248874ecf8d..7f1630630dc 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -869,6 +869,23 @@ xfrm_state_addr_check(struct xfrm_state *x,
 	return 0;
 }
 
+static __inline__ int
+xfrm_state_addr_flow_check(struct xfrm_state *x, struct flowi *fl,
+			   unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_state_addr_check(x,
+						(xfrm_address_t *)&fl->fl4_dst,
+						(xfrm_address_t *)&fl->fl4_src);
+	case AF_INET6:
+		return __xfrm6_state_addr_check(x,
+						(xfrm_address_t *)&fl->fl6_dst,
+						(xfrm_address_t *)&fl->fl6_src);
+	}
+	return 0;
+}
+
 static inline int xfrm_state_kern(struct xfrm_state *x)
 {
 	return atomic_read(&x->tunnel_users);
@@ -1014,7 +1031,7 @@ extern void xfrm_policy_flush(void);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
 extern int xfrm_flush_bundles(void);
 extern void xfrm_flush_all_bundles(void);
-extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family);
+extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
 extern wait_queue_head_t km_waitq;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e517981cead..42d8ded0f96 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -33,7 +33,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
 	    	    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
 	    	    xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
-		    xfrm_bundle_ok(xdst, fl, AF_INET)) {
+		    xfrm_bundle_ok(xdst, fl, AF_INET, 0)) {
 			dst_clone(dst);
 			break;
 		}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index a3f68c8b737..729b4748d6d 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -50,7 +50,9 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 				 xdst->u.rt6.rt6i_src.plen);
 		if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
 		    ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
-		    xfrm_bundle_ok(xdst, fl, AF_INET6)) {
+		    xfrm_bundle_ok(xdst, fl, AF_INET6,
+				   (xdst->u.rt6.rt6i_dst.plen != 128 ||
+				    xdst->u.rt6.rt6i_src.plen != 128))) {
 			dst_clone(dst);
 			break;
 		}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 56abb5c057d..ad2a5cba1f5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1167,7 +1167,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 
 static int stale_bundle(struct dst_entry *dst)
 {
-	return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
+	return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
 }
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
@@ -1282,7 +1282,7 @@ EXPORT_SYMBOL(xfrm_init_pmtu);
  * still valid.
  */
 
-int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
+int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict)
 {
 	struct dst_entry *dst = &first->u.dst;
 	struct xfrm_dst *last;
@@ -1304,6 +1304,10 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
 
+		if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&
+		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
+			return 0;
+
 		mtu = dst_mtu(dst->child);
 		if (xdst->child_mtu_cached != mtu) {
 			last = xdst;
-- 
cgit v1.2.3-70-g09d2


From df0ba92a99ca757039dfa84a929281ea3f7a50e8 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 20:41:00 -0700
Subject: [XFRM]: Trace which secpath state is reject factor.

For Mobile IPv6 usage, it is required to trace which secpath state is
reject factor in order to notify it to user space (to know the address
which cannot be used route optimized communication).

Based on MIPL2 kernel patch.

This patch was also written by: Henrik Petander <petander@tcs.hut.fi>

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  1 +
 net/xfrm/xfrm_policy.c | 55 +++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 49 insertions(+), 7 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 13488e7ba68..9ebbdc1dd47 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -273,6 +273,7 @@ struct xfrm_type
 	void			(*destructor)(struct xfrm_state *);
 	int			(*input)(struct xfrm_state *, struct sk_buff *skb);
 	int			(*output)(struct xfrm_state *, struct sk_buff *pskb);
+	int			(*reject)(struct xfrm_state *, struct sk_buff *, struct flowi *);
 	int			(*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **);
 	xfrm_address_t		*(*local_addr)(struct xfrm_state *, xfrm_address_t *);
 	xfrm_address_t		*(*remote_addr)(struct xfrm_state *, xfrm_address_t *);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ad2a5cba1f5..d125a264903 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -988,6 +988,23 @@ error:
 }
 EXPORT_SYMBOL(xfrm_lookup);
 
+static inline int
+xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
+{
+	struct xfrm_state *x;
+	int err;
+
+	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
+		return 0;
+	x = skb->sp->xvec[idx];
+	if (!x->type->reject)
+		return 0;
+	xfrm_state_hold(x);
+	err = x->type->reject(x, skb, fl);
+	xfrm_state_put(x);
+	return err;
+}
+
 /* When skb is transformed back to its "native" form, we have to
  * check policy restrictions. At the moment we make this in maximally
  * stupid way. Shame on me. :-) Of course, connected sockets must
@@ -1010,6 +1027,13 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
 		  xfrm_state_addr_cmp(tmpl, x, family));
 }
 
+/*
+ * 0 or more than 0 is returned when validation is succeeded (either bypass
+ * because of optional transport mode, or next index of the mathced secpath
+ * state with the template.
+ * -1 is returned when no matching template is found.
+ * Otherwise "-2 - errored_index" is returned.
+ */
 static inline int
 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
 	       unsigned short family)
@@ -1024,8 +1048,11 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
 	for (; idx < sp->len; idx++) {
 		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
 			return ++idx;
-		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT)
+		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
+			if (start == -1)
+				start = -2-idx;
 			break;
+		}
 	}
 	return start;
 }
@@ -1046,11 +1073,14 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family
 }
 EXPORT_SYMBOL(xfrm_decode_session);
 
-static inline int secpath_has_nontransport(struct sec_path *sp, int k)
+static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
 {
 	for (; k < sp->len; k++) {
-		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT)
+		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
+			if (idxp)
+				*idxp = k;
 			return 1;
+		}
 	}
 
 	return 0;
@@ -1062,6 +1092,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	struct xfrm_policy *pol;
 	struct flowi fl;
 	u8 fl_dir = policy_to_flow_dir(dir);
+	int xerr_idx = -1;
+	int *xerr_idxp = &xerr_idx;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0)
 		return 0;
@@ -1086,8 +1118,13 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		pol = flow_cache_lookup(&fl, family, fl_dir,
 					xfrm_policy_lookup);
 
-	if (!pol)
-		return !skb->sp || !secpath_has_nontransport(skb->sp, 0);
+	if (!pol) {
+		if (skb->sp && secpath_has_nontransport(skb->sp, 0, xerr_idxp)) {
+			xfrm_secpath_reject(xerr_idx, skb, &fl);
+			return 0;
+		}
+		return 1;
+	}
 
 	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
 
@@ -1107,11 +1144,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		 */
 		for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
 			k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
-			if (k < 0)
+			if (k < 0) {
+				if (k < -1 && xerr_idxp)
+					*xerr_idxp = -(2+k);
 				goto reject;
+			}
 		}
 
-		if (secpath_has_nontransport(sp, k))
+		if (secpath_has_nontransport(sp, k, xerr_idxp))
 			goto reject;
 
 		xfrm_pol_put(pol);
@@ -1119,6 +1159,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	}
 
 reject:
+	xfrm_secpath_reject(xerr_idx, skb, &fl);
 	xfrm_pol_put(pol);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 4e81bb8336a0ac50289d4d4c7a55e559b994ee8f Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 22:43:30 -0700
Subject: [XFRM] POLICY: sub policy support.

Sub policy is introduced. Main and sub policy are applied the same flow.
(Policy that current kernel uses is named as main.)
It is required another transformation policy management to keep IPsec
and Mobile IPv6 lives separate.
Policy which lives shorter time in kernel should be a sub i.e. normally
main is for IPsec and sub is for Mobile IPv6.
(Such usage as two IPsec policies on different database can be used, too.)

Limitation or TODOs:
 - Sub policy is not supported for per socket one (it is always inserted as main).
 - Current kernel makes cached outbound with flowi to skip searching database.
   However this patch makes it disabled only when "two policies are used and
   the first matched one is bypass case" because neither flowi nor bundle
   information knows about transformation template size.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/xfrm.h   |   7 ++
 include/net/xfrm.h     |  45 +++++++--
 net/xfrm/xfrm_policy.c | 252 ++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 260 insertions(+), 44 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 4009f4445fa..492fb981874 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -102,6 +102,13 @@ struct xfrm_stats {
 	__u32	integrity_failed;
 };
 
+enum
+{
+	XFRM_POLICY_TYPE_MAIN	= 0,
+	XFRM_POLICY_TYPE_SUB	= 1,
+	XFRM_POLICY_TYPE_MAX	= 2
+};
+
 enum
 {
 	XFRM_POLICY_IN	= 0,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0b223eed4c9..4655ca25f80 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -341,6 +341,7 @@ struct xfrm_policy
 	atomic_t		refcnt;
 	struct timer_list	timer;
 
+	u8			type;
 	u32			priority;
 	u32			index;
 	struct xfrm_selector	selector;
@@ -389,6 +390,19 @@ extern int xfrm_unregister_km(struct xfrm_mgr *km);
 
 
 extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
+#ifdef CONFIG_XFRM_SUB_POLICY
+extern struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2];
+
+static inline int xfrm_policy_lists_empty(int dir)
+{
+	return (!xfrm_policy_list[dir] && !xfrm_policy_list_sub[dir]);
+}
+#else
+static inline int xfrm_policy_lists_empty(int dir)
+{
+	return (!xfrm_policy_list[dir]);
+}
+#endif
 
 static inline void xfrm_pol_hold(struct xfrm_policy *policy)
 {
@@ -404,6 +418,20 @@ static inline void xfrm_pol_put(struct xfrm_policy *policy)
 		__xfrm_policy_destroy(policy);
 }
 
+#ifdef CONFIG_XFRM_SUB_POLICY
+static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
+{
+	int i;
+	for (i = npols - 1; i >= 0; --i)
+		xfrm_pol_put(pols[i]);
+}
+#else
+static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
+{
+	xfrm_pol_put(pols[0]);
+}
+#endif
+
 #define XFRM_DST_HSIZE		1024
 
 static __inline__
@@ -737,8 +765,8 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk
 {
 	if (sk && sk->sk_policy[XFRM_POLICY_IN])
 		return __xfrm_policy_check(sk, dir, skb, family);
-		
-	return	(!xfrm_policy_list[dir] && !skb->sp) ||
+
+	return	(xfrm_policy_lists_empty(dir) && !skb->sp) ||
 		(skb->dst->flags & DST_NOPOLICY) ||
 		__xfrm_policy_check(sk, dir, skb, family);
 }
@@ -758,7 +786,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
 
 static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
-	return	!xfrm_policy_list[XFRM_POLICY_OUT] ||
+	return	xfrm_policy_lists_empty(XFRM_POLICY_OUT) ||
 		(skb->dst->flags & DST_NOXFRM) ||
 		__xfrm_route_forward(skb, family);
 }
@@ -1023,18 +1051,19 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig
 #endif
 
 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp);
-extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
+extern int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
+					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete);
-struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete);
-void xfrm_policy_flush(void);
+struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete);
+void xfrm_policy_flush(u8 type);
 u32 xfrm_get_acqseq(void);
 void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
 struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
 				  xfrm_address_t *daddr, xfrm_address_t *saddr, 
 				  int create, unsigned short family);
-extern void xfrm_policy_flush(void);
+extern void xfrm_policy_flush(u8 type);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
 extern int xfrm_flush_bundles(void);
 extern void xfrm_flush_all_bundles(void);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d125a264903..96de6c76ed5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -32,6 +32,24 @@ static DEFINE_RWLOCK(xfrm_policy_lock);
 
 struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
 EXPORT_SYMBOL(xfrm_policy_list);
+#ifdef CONFIG_XFRM_SUB_POLICY
+struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2];
+EXPORT_SYMBOL(xfrm_policy_list_sub);
+
+#define XFRM_POLICY_LISTS(type) \
+	((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub : \
+	 xfrm_policy_list)
+#define XFRM_POLICY_LISTHEAD(type, dir) \
+	((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub[dir] : \
+	 xfrm_policy_list[dir])
+#define XFRM_POLICY_LISTHEADP(type, dir) \
+	((type == XFRM_POLICY_TYPE_SUB) ? &xfrm_policy_list_sub[dir] : \
+	 &xfrm_policy_list[dir])
+#else
+#define XFRM_POLICY_LISTS(type)              xfrm_policy_list
+#define XFRM_POLICY_LISTHEAD(type, dif)      xfrm_policy_list[dir]
+#define XFRM_POLICY_LISTHEADP(type, dif)     &xfrm_policy_list[dir]
+#endif
 
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
@@ -397,7 +415,7 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 
 /* Generate new index... KAME seems to generate them ordered by cost
  * of an absolute inpredictability of ordering of rules. This will not pass. */
-static u32 xfrm_gen_index(int dir)
+static u32 xfrm_gen_index(u8 type, int dir)
 {
 	u32 idx;
 	struct xfrm_policy *p;
@@ -408,7 +426,7 @@ static u32 xfrm_gen_index(int dir)
 		idx_generator += 8;
 		if (idx == 0)
 			idx = 8;
-		for (p = xfrm_policy_list[dir]; p; p = p->next) {
+		for (p = XFRM_POLICY_LISTHEAD(type, dir); p; p = p->next) {
 			if (p->index == idx)
 				break;
 		}
@@ -425,7 +443,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	struct dst_entry *gc_list;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
+	for (p = XFRM_POLICY_LISTHEADP(policy->type, dir); (pol=*p)!=NULL;) {
 		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
 		    xfrm_sec_ctx_match(pol->security, policy->security)) {
 			if (excl) {
@@ -452,7 +470,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	policy->next = *p;
 	*p = policy;
 	atomic_inc(&flow_cache_genid);
-	policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
+	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
 	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
@@ -493,13 +511,14 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
+					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete)
 {
 	struct xfrm_policy *pol, **p;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+	for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) {
 		if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
 		    (xfrm_sec_ctx_match(ctx, pol->security))) {
 			xfrm_pol_hold(pol);
@@ -518,12 +537,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
-struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
+struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
 {
 	struct xfrm_policy *pol, **p;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+	for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) {
 		if (pol->index == id) {
 			xfrm_pol_hold(pol);
 			if (delete)
@@ -541,15 +560,16 @@ struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
 }
 EXPORT_SYMBOL(xfrm_policy_byid);
 
-void xfrm_policy_flush(void)
+void xfrm_policy_flush(u8 type)
 {
 	struct xfrm_policy *xp;
+	struct xfrm_policy **p_list = XFRM_POLICY_LISTS(type);
 	int dir;
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
-		while ((xp = xfrm_policy_list[dir]) != NULL) {
-			xfrm_policy_list[dir] = xp->next;
+		while ((xp = p_list[dir]) != NULL) {
+			p_list[dir] = xp->next;
 			write_unlock_bh(&xfrm_policy_lock);
 
 			xfrm_policy_kill(xp);
@@ -562,7 +582,7 @@ void xfrm_policy_flush(void)
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
 
-int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
+int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
 		     void *data)
 {
 	struct xfrm_policy *xp;
@@ -572,7 +592,7 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
 
 	read_lock_bh(&xfrm_policy_lock);
 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
+		for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next)
 			count++;
 	}
 
@@ -582,7 +602,7 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
 	}
 
 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
+		for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) {
 			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
 			if (error)
 				goto out;
@@ -597,13 +617,13 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
-			       void **objp, atomic_t **obj_refp)
+static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
+						     u16 family, u8 dir)
 {
 	struct xfrm_policy *pol;
 
 	read_lock_bh(&xfrm_policy_lock);
-	for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
+	for (pol = XFRM_POLICY_LISTHEAD(type, dir); pol; pol = pol->next) {
 		struct xfrm_selector *sel = &pol->selector;
 		int match;
 
@@ -620,6 +640,25 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 		}
 	}
 	read_unlock_bh(&xfrm_policy_lock);
+
+	return pol;
+}
+
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+			       void **objp, atomic_t **obj_refp)
+{
+	struct xfrm_policy *pol;
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
+	if (pol)
+		goto end;
+#endif
+	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+ end:
+#endif
 	if ((*objp = (void *) pol) != NULL)
 		*obj_refp = &pol->refcnt;
 }
@@ -665,8 +704,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
 
 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 {
-	pol->next = xfrm_policy_list[dir];
-	xfrm_policy_list[dir] = pol;
+	struct xfrm_policy **p_list = XFRM_POLICY_LISTS(pol->type);
+
+	pol->next = p_list[dir];
+	p_list[dir] = pol;
 	xfrm_pol_hold(pol);
 }
 
@@ -675,7 +716,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 {
 	struct xfrm_policy **polp;
 
-	for (polp = &xfrm_policy_list[dir];
+	for (polp = XFRM_POLICY_LISTHEADP(pol->type, dir);
 	     *polp != NULL; polp = &(*polp)->next) {
 		if (*polp == pol) {
 			*polp = pol->next;
@@ -704,12 +745,17 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 {
 	struct xfrm_policy *old_pol;
 
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
+		return -EINVAL;
+#endif
+
 	write_lock_bh(&xfrm_policy_lock);
 	old_pol = sk->sk_policy[dir];
 	sk->sk_policy[dir] = pol;
 	if (pol) {
 		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
-		pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
+		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 	}
 	if (old_pol)
@@ -738,6 +784,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
 		newp->flags = old->flags;
 		newp->xfrm_nr = old->xfrm_nr;
 		newp->index = old->index;
+		newp->type = old->type;
 		memcpy(newp->xfrm_vec, old->xfrm_vec,
 		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
 		write_lock_bh(&xfrm_policy_lock);
@@ -764,9 +811,9 @@ int __xfrm_sk_clone_policy(struct sock *sk)
 /* Resolve list of templates for the flow, given policy. */
 
 static int
-xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
-		  struct xfrm_state **xfrm,
-		  unsigned short family)
+xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
+		      struct xfrm_state **xfrm,
+		      unsigned short family)
 {
 	int nx;
 	int i, error;
@@ -809,6 +856,38 @@ fail:
 	return error;
 }
 
+static int
+xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
+		  struct xfrm_state **xfrm,
+		  unsigned short family)
+{
+	int cnx = 0;
+	int error;
+	int ret;
+	int i;
+
+	for (i = 0; i < npols; i++) {
+		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
+			error = -ENOBUFS;
+			goto fail;
+		}
+		ret = xfrm_tmpl_resolve_one(pols[i], fl, &xfrm[cnx], family);
+		if (ret < 0) {
+			error = ret;
+			goto fail;
+		} else
+			cnx += ret;
+	}
+
+	return cnx;
+
+ fail:
+	for (cnx--; cnx>=0; cnx--)
+		xfrm_state_put(xfrm[cnx]);
+	return error;
+
+}
+
 /* Check that the bundle accepts the flow and its components are
  * still valid.
  */
@@ -855,6 +934,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 		struct sock *sk, int flags)
 {
 	struct xfrm_policy *policy;
+	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+	int npols;
+	int pol_dead;
+	int xfrm_nr;
+	int pi;
 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
 	struct dst_entry *dst, *dst_orig = *dst_p;
 	int nx = 0;
@@ -866,12 +950,18 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
+	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
+		pols[pi] = NULL;
+	npols = 0;
+	pol_dead = 0;
+	xfrm_nr = 0;
+
 	if (sk && sk->sk_policy[1])
 		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
 
 	if (!policy) {
 		/* To accelerate a bit...  */
-		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
+		if ((dst_orig->flags & DST_NOXFRM) || xfrm_policy_lists_empty(XFRM_POLICY_OUT))
 			return 0;
 
 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
@@ -883,6 +973,9 @@ restart:
 
 	family = dst_orig->ops->family;
 	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+	pols[0] = policy;
+	npols ++;
+	xfrm_nr += pols[0]->xfrm_nr;
 
 	switch (policy->action) {
 	case XFRM_POLICY_BLOCK:
@@ -891,11 +984,13 @@ restart:
 		goto error;
 
 	case XFRM_POLICY_ALLOW:
+#ifndef CONFIG_XFRM_SUB_POLICY
 		if (policy->xfrm_nr == 0) {
 			/* Flow passes not transformed. */
 			xfrm_pol_put(policy);
 			return 0;
 		}
+#endif
 
 		/* Try to find matching bundle.
 		 *
@@ -911,7 +1006,36 @@ restart:
 		if (dst)
 			break;
 
-		nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+#ifdef CONFIG_XFRM_SUB_POLICY
+		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+			pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
+							    fl, family,
+							    XFRM_POLICY_OUT);
+			if (pols[1]) {
+				if (pols[1]->action == XFRM_POLICY_BLOCK) {
+					err = -EPERM;
+					goto error;
+				}
+				npols ++;
+				xfrm_nr += pols[1]->xfrm_nr;
+			}
+		}
+
+		/*
+		 * Because neither flowi nor bundle information knows about
+		 * transformation template size. On more than one policy usage
+		 * we can realize whether all of them is bypass or not after
+		 * they are searched. See above not-transformed bypass
+		 * is surrounded by non-sub policy configuration, too.
+		 */
+		if (xfrm_nr == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pols_put(pols, npols);
+			return 0;
+		}
+
+#endif
+		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
 		if (unlikely(nx<0)) {
 			err = nx;
@@ -924,7 +1048,7 @@ restart:
 				set_current_state(TASK_RUNNING);
 				remove_wait_queue(&km_waitq, &wait);
 
-				nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
 				if (nx == -EAGAIN && signal_pending(current)) {
 					err = -ERESTART;
@@ -932,7 +1056,7 @@ restart:
 				}
 				if (nx == -EAGAIN ||
 				    genid != atomic_read(&flow_cache_genid)) {
-					xfrm_pol_put(policy);
+					xfrm_pols_put(pols, npols);
 					goto restart;
 				}
 				err = nx;
@@ -942,7 +1066,7 @@ restart:
 		}
 		if (nx == 0) {
 			/* Flow passes not transformed. */
-			xfrm_pol_put(policy);
+			xfrm_pols_put(pols, npols);
 			return 0;
 		}
 
@@ -956,8 +1080,14 @@ restart:
 			goto error;
 		}
 
+		for (pi = 0; pi < npols; pi++) {
+			read_lock_bh(&pols[pi]->lock);
+			pol_dead |= pols[pi]->dead;
+			read_unlock_bh(&pols[pi]->lock);
+		}
+
 		write_lock_bh(&policy->lock);
-		if (unlikely(policy->dead || stale_bundle(dst))) {
+		if (unlikely(pol_dead || stale_bundle(dst))) {
 			/* Wow! While we worked on resolving, this
 			 * policy has gone. Retry. It is not paranoia,
 			 * we just cannot enlist new bundle to dead object.
@@ -977,12 +1107,12 @@ restart:
 	}
 	*dst_p = dst;
 	dst_release(dst_orig);
-	xfrm_pol_put(policy);
+ 	xfrm_pols_put(pols, npols);
 	return 0;
 
 error:
 	dst_release(dst_orig);
-	xfrm_pol_put(policy);
+	xfrm_pols_put(pols, npols);
 	*dst_p = NULL;
 	return err;
 }
@@ -1090,6 +1220,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 			unsigned short family)
 {
 	struct xfrm_policy *pol;
+	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+	int npols = 0;
+	int xfrm_nr;
+	int pi;
 	struct flowi fl;
 	u8 fl_dir = policy_to_flow_dir(dir);
 	int xerr_idx = -1;
@@ -1128,22 +1262,50 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
 
+	pols[0] = pol;
+	npols ++;
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+		pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
+						    &fl, family,
+						    XFRM_POLICY_IN);
+		if (pols[1]) {
+			pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
+			npols ++;
+		}
+	}
+#endif
+
 	if (pol->action == XFRM_POLICY_ALLOW) {
 		struct sec_path *sp;
 		static struct sec_path dummy;
+		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
+		struct xfrm_tmpl **tpp = tp;
+		int ti = 0;
 		int i, k;
 
 		if ((sp = skb->sp) == NULL)
 			sp = &dummy;
 
+		for (pi = 0; pi < npols; pi++) {
+			if (pols[pi] != pol &&
+			    pols[pi]->action != XFRM_POLICY_ALLOW)
+				goto reject;
+			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
+				goto reject_error;
+			for (i = 0; i < pols[pi]->xfrm_nr; i++)
+				tpp[ti++] = &pols[pi]->xfrm_vec[i];
+		}
+		xfrm_nr = ti;
+
 		/* For each tunnel xfrm, find the first matching tmpl.
 		 * For each tmpl before that, find corresponding xfrm.
 		 * Order is _important_. Later we will implement
 		 * some barriers, but at the moment barriers
 		 * are implied between each two transformations.
 		 */
-		for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
-			k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
+		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
+			k = xfrm_policy_ok(tpp[i], sp, k, family);
 			if (k < 0) {
 				if (k < -1 && xerr_idxp)
 					*xerr_idxp = -(2+k);
@@ -1154,13 +1316,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		if (secpath_has_nontransport(sp, k, xerr_idxp))
 			goto reject;
 
-		xfrm_pol_put(pol);
+		xfrm_pols_put(pols, npols);
 		return 1;
 	}
 
 reject:
 	xfrm_secpath_reject(xerr_idx, skb, &fl);
-	xfrm_pol_put(pol);
+reject_error:
+	xfrm_pols_put(pols, npols);
 	return 0;
 }
 EXPORT_SYMBOL(__xfrm_policy_check);
@@ -1246,6 +1409,23 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
 
 	read_lock_bh(&xfrm_policy_lock);
 	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
+#ifdef CONFIG_XFRM_SUB_POLICY
+		for (pol = xfrm_policy_list_sub[i]; pol; pol = pol->next) {
+			write_lock(&pol->lock);
+			dstp = &pol->bundles;
+			while ((dst=*dstp) != NULL) {
+				if (func(dst)) {
+					*dstp = dst->next;
+					dst->next = gc_list;
+					gc_list = dst;
+				} else {
+					dstp = &dst->next;
+				}
+			}
+			write_unlock(&pol->lock);
+		}
+
+#endif
 		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
 			write_lock(&pol->lock);
 			dstp = &pol->bundles;
-- 
cgit v1.2.3-70-g09d2


From 41a49cc3c02ace59d4dddae91ea211c330970ee3 Mon Sep 17 00:00:00 2001
From: Masahide NAKAMURA <nakam@linux-ipv6.org>
Date: Wed, 23 Aug 2006 22:48:31 -0700
Subject: [XFRM]: Add sorting interface for state and template.

Under two transformation policies it is required to merge them.
This is a platform to sort state for outbound and templates
for inbound respectively.
It will be used when Mobile IPv6 and IPsec are used at the same time.

Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     | 20 ++++++++++++++++++++
 net/xfrm/xfrm_policy.c | 16 ++++++++++++++--
 net/xfrm/xfrm_state.c  | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 2 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4655ca25f80..d341603e4ba 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -254,6 +254,8 @@ struct xfrm_state_afinfo {
 	struct xfrm_state	*(*find_acq)(u8 mode, u32 reqid, u8 proto, 
 					     xfrm_address_t *daddr, xfrm_address_t *saddr, 
 					     int create);
+	int			(*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
+	int			(*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
 };
 
 extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -1002,6 +1004,24 @@ extern int xfrm_state_add(struct xfrm_state *x);
 extern int xfrm_state_update(struct xfrm_state *x);
 extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family);
 extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family);
+#ifdef CONFIG_XFRM_SUB_POLICY
+extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src,
+			  int n, unsigned short family);
+extern int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src,
+			   int n, unsigned short family);
+#else
+static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src,
+				 int n, unsigned short family)
+{
+	return -ENOSYS;
+}
+
+static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src,
+				  int n, unsigned short family)
+{
+	return -ENOSYS;
+}
+#endif
 extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
 extern void xfrm_state_flush(u8 proto);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 96de6c76ed5..1732159ffd0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -861,6 +861,8 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
 		  struct xfrm_state **xfrm,
 		  unsigned short family)
 {
+	struct xfrm_state *tp[XFRM_MAX_DEPTH];
+	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
 	int cnx = 0;
 	int error;
 	int ret;
@@ -871,7 +873,8 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
 			error = -ENOBUFS;
 			goto fail;
 		}
-		ret = xfrm_tmpl_resolve_one(pols[i], fl, &xfrm[cnx], family);
+
+		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
 		if (ret < 0) {
 			error = ret;
 			goto fail;
@@ -879,11 +882,15 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
 			cnx += ret;
 	}
 
+	/* found states are sorted for outbound processing */
+	if (npols > 1)
+		xfrm_state_sort(xfrm, tpp, cnx, family);
+
 	return cnx;
 
  fail:
 	for (cnx--; cnx>=0; cnx--)
-		xfrm_state_put(xfrm[cnx]);
+		xfrm_state_put(tpp[cnx]);
 	return error;
 
 }
@@ -1280,6 +1287,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		struct sec_path *sp;
 		static struct sec_path dummy;
 		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
+		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
 		struct xfrm_tmpl **tpp = tp;
 		int ti = 0;
 		int i, k;
@@ -1297,6 +1305,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 				tpp[ti++] = &pols[pi]->xfrm_vec[i];
 		}
 		xfrm_nr = ti;
+		if (npols > 1) {
+			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
+			tpp = stp;
+		}
 
 		/* For each tunnel xfrm, find the first matching tmpl.
 		 * For each tmpl before that, find corresponding xfrm.
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a26ef6952c3..622e92a08d0 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -728,6 +728,44 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
 }
 EXPORT_SYMBOL(xfrm_find_acq);
 
+#ifdef CONFIG_XFRM_SUB_POLICY
+int
+xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
+	       unsigned short family)
+{
+	int err = 0;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+	if (afinfo->tmpl_sort)
+		err = afinfo->tmpl_sort(dst, src, n);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_tmpl_sort);
+
+int
+xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
+		unsigned short family)
+{
+	int err = 0;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+	if (afinfo->state_sort)
+		err = afinfo->state_sort(dst, src, n);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_sort);
+#endif
+
 /* Silly enough, but I'm lazy to build resolution list */
 
 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
-- 
cgit v1.2.3-70-g09d2


From 9d4a706d852411154d0c91b9ffb3bec68b94b25c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 03:18:09 -0700
Subject: [XFRM]: Add generation count to xfrm_state and xfrm_dst.

Each xfrm_state inserted gets a new generation counter
value.  When a bundle is created, the xfrm_dst objects
get the current generation counter of the xfrm_state
they will attach to at dst->xfrm.

xfrm_bundle_ok() will return false if it sees an
xfrm_dst with a generation count different from the
generation count of the xfrm_state that dst points to.

This provides a facility by which to passively and
cheaply invalidate cached IPSEC routes during SA
database changes.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 3 +++
 net/ipv4/xfrm4_policy.c | 1 +
 net/ipv6/xfrm6_policy.c | 1 +
 net/xfrm/xfrm_policy.c  | 2 ++
 net/xfrm/xfrm_state.c   | 3 +++
 5 files changed, 10 insertions(+)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3405e5d9d51..fd4a300b5ba 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -104,6 +104,8 @@ struct xfrm_state
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 
+	u32			genid;
+
 	/* Key manger bits */
 	struct {
 		u8		state;
@@ -590,6 +592,7 @@ struct xfrm_dst
 		struct rt6_info		rt6;
 	} u;
 	struct dst_entry *route;
+	u32 genid;
 	u32 route_mtu_cached;
 	u32 child_mtu_cached;
 	u32 route_cookie;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 42d8ded0f96..479598566f1 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -93,6 +93,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 		xdst = (struct xfrm_dst *)dst1;
 		xdst->route = &rt->u.dst;
+		xdst->genid = xfrm[i]->genid;
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 98c2fe449b3..9391c4c94fe 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -149,6 +149,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 		xdst = (struct xfrm_dst *)dst1;
 		xdst->route = &rt->u.dst;
+		xdst->genid = xfrm[i]->genid;
 		if (rt->rt6i_node)
 			xdst->route_cookie = rt->rt6i_node->fn_sernum;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1732159ffd0..7fc6944ee36 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1536,6 +1536,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int str
 			return 0;
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
+		if (xdst->genid != dst->xfrm->genid)
+			return 0;
 
 		if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&
 		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 445263c54c9..535d43c1472 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -53,6 +53,7 @@ static struct hlist_head *xfrm_state_byspi __read_mostly;
 static unsigned int xfrm_state_hmask __read_mostly;
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
+static unsigned int xfrm_state_genid;
 
 static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
 {
@@ -745,6 +746,8 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 {
 	unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
 
+	x->genid = ++xfrm_state_genid;
+
 	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 	xfrm_state_hold(x);
 
-- 
cgit v1.2.3-70-g09d2


From c7f5ea3a4d1ae6b3b426e113358fdc57494bc754 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 03:29:04 -0700
Subject: [XFRM]: Do not flush all bundles on SA insert.

Instead, simply set all potentially aliasing existing xfrm_state
objects to have the current generation counter value.

This will make routes get relooked up the next time an existing
route mentioning these aliased xfrm_state objects gets used,
via xfrm_dst_check().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  1 -
 net/xfrm/xfrm_policy.c | 10 ----------
 net/xfrm/xfrm_state.c  | 25 ++++++++++++++++++++-----
 3 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index fd4a300b5ba..a620a43c9ee 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -996,7 +996,6 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
 extern void xfrm_policy_flush(u8 type);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
 extern int xfrm_flush_bundles(void);
-extern void xfrm_flush_all_bundles(void);
 extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7fc6944ee36..cfa5c692f2e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1478,16 +1478,6 @@ int xfrm_flush_bundles(void)
 	return 0;
 }
 
-static int always_true(struct dst_entry *dst)
-{
-	return 1;
-}
-
-void xfrm_flush_all_bundles(void)
-{
-	xfrm_prune_bundles(always_true);
-}
-
 void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 98200397e09..77ef796c9d0 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -761,13 +761,30 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 		schedule_work(&xfrm_hash_work);
 }
 
+/* xfrm_state_lock is held */
+static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
+{
+	unsigned short family = xnew->props.family;
+	u32 reqid = xnew->props.reqid;
+	struct xfrm_state *x;
+	struct hlist_node *entry;
+	unsigned int h;
+
+	h = xfrm_dst_hash(&xnew->id.daddr, reqid, family);
+	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+		if (x->props.family	== family &&
+		    x->props.reqid	== reqid &&
+		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family))
+			x->genid = xfrm_state_genid;
+	}
+}
+
 void xfrm_state_insert(struct xfrm_state *x)
 {
 	spin_lock_bh(&xfrm_state_lock);
+	__xfrm_state_bump_genids(x);
 	__xfrm_state_insert(x);
 	spin_unlock_bh(&xfrm_state_lock);
-
-	xfrm_flush_all_bundles();
 }
 EXPORT_SYMBOL(xfrm_state_insert);
 
@@ -889,15 +906,13 @@ int xfrm_state_add(struct xfrm_state *x)
 				     x->id.proto,
 				     &x->id.daddr, &x->props.saddr, 0);
 
+	__xfrm_state_bump_genids(x);
 	__xfrm_state_insert(x);
 	err = 0;
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
 
-	if (!err)
-		xfrm_flush_all_bundles();
-
 	if (x1) {
 		xfrm_state_delete(x1);
 		xfrm_state_put(x1);
-- 
cgit v1.2.3-70-g09d2


From 1c0953997567b22e32fdf85d3b4bc0f2461fd161 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 03:30:28 -0700
Subject: [XFRM]: Purge dst references to deleted SAs passively.

Just let GC and other normal mechanisms take care of getting
rid of DST cache references to deleted xfrm_state objects
instead of walking all the policy bundles.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  1 -
 net/xfrm/xfrm_policy.c |  2 +-
 net/xfrm/xfrm_state.c  | 17 -----------------
 3 files changed, 1 insertion(+), 19 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a620a43c9ee..c7870b6eae0 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -995,7 +995,6 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
 				  int create, unsigned short family);
 extern void xfrm_policy_flush(u8 type);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
-extern int xfrm_flush_bundles(void);
 extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cfa5c692f2e..1bcaae4adf3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1472,7 +1472,7 @@ static void __xfrm_garbage_collect(void)
 	xfrm_prune_bundles(unused_bundle);
 }
 
-int xfrm_flush_bundles(void)
+static int xfrm_flush_bundles(void)
 {
 	xfrm_prune_bundles(stale_bundle);
 	return 0;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 77ef796c9d0..9ff00b7d6ad 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -256,8 +256,6 @@ static struct work_struct xfrm_state_gc_work;
 static HLIST_HEAD(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
-static int xfrm_state_gc_flush_bundles;
-
 int __xfrm_state_delete(struct xfrm_state *x);
 
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
@@ -293,11 +291,6 @@ static void xfrm_state_gc_task(void *data)
 	struct hlist_node *entry, *tmp;
 	struct hlist_head gc_list;
 
-	if (xfrm_state_gc_flush_bundles) {
-		xfrm_state_gc_flush_bundles = 0;
-		xfrm_flush_bundles();
-	}
-
 	spin_lock_bh(&xfrm_state_gc_lock);
 	gc_list.first = xfrm_state_gc_list.first;
 	INIT_HLIST_HEAD(&xfrm_state_gc_list);
@@ -454,16 +447,6 @@ int __xfrm_state_delete(struct xfrm_state *x)
 		if (del_timer(&x->rtimer))
 			__xfrm_state_put(x);
 
-		/* The number two in this test is the reference
-		 * mentioned in the comment below plus the reference
-		 * our caller holds.  A larger value means that
-		 * there are DSTs attached to this xfrm_state.
-		 */
-		if (atomic_read(&x->refcnt) > 2) {
-			xfrm_state_gc_flush_bundles = 1;
-			schedule_work(&xfrm_state_gc_work);
-		}
-
 		/* All xfrm_state objects are created by xfrm_state_alloc.
 		 * The xfrm_state_alloc call gives a reference, and that
 		 * is what we are dropping here.
-- 
cgit v1.2.3-70-g09d2


From 2518c7c2b3d7f0a6b302b4efe17c911f8dd4049f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 04:45:07 -0700
Subject: [XFRM]: Hash policies when non-prefixed.

This idea is from Alexey Kuznetsov.

It is common for policies to be non-prefixed.  And for
that case we can optimize lookups, insert, etc. quite
a bit.

For each direction, we have a dynamically sized policy
hash table for non-prefixed policies.  We also have a
hash table on policy->index.

For prefixed policies, we have a list per-direction which
we will consult on lookups when a non-prefix hashtable
lookup fails.

This still isn't as efficient as I would like it.  There
are four immediate problems:

1) Lots of excessive refcounting, which can be fixed just
   like xfrm_state was
2) We do 2 hash probes on insert, one to look for dups and
   one to allocate a unique policy->index.  Althought I wonder
   how much this matters since xfrm_state inserts do up to
   3 hash probes and that seems to perform fine.
3) xfrm_policy_insert() is very complex because of the priority
   ordering and entry replacement logic.
4) Lots of counter bumping, in addition to policy refcounts,
   in the form of xfrm_policy_count[].  This is merely used
   to let code path(s) know that some IPSEC rules exist.  So
   this count is indexed per-direction, maybe that is overkill.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  23 +-
 net/xfrm/xfrm_policy.c | 681 +++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 546 insertions(+), 158 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c7870b6eae0..0acabf2a0a8 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -331,7 +331,8 @@ struct xfrm_tmpl
 struct xfrm_policy
 {
 	struct xfrm_policy	*next;
-	struct list_head	list;
+	struct hlist_node	bydst;
+	struct hlist_node	byidx;
 
 	/* This lock only affects elements except for entry. */
 	rwlock_t		lock;
@@ -385,21 +386,7 @@ struct xfrm_mgr
 extern int xfrm_register_km(struct xfrm_mgr *km);
 extern int xfrm_unregister_km(struct xfrm_mgr *km);
 
-
-extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
-#ifdef CONFIG_XFRM_SUB_POLICY
-extern struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2];
-
-static inline int xfrm_policy_lists_empty(int dir)
-{
-	return (!xfrm_policy_list[dir] && !xfrm_policy_list_sub[dir]);
-}
-#else
-static inline int xfrm_policy_lists_empty(int dir)
-{
-	return (!xfrm_policy_list[dir]);
-}
-#endif
+extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
 
 static inline void xfrm_pol_hold(struct xfrm_policy *policy)
 {
@@ -678,7 +665,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk
 	if (sk && sk->sk_policy[XFRM_POLICY_IN])
 		return __xfrm_policy_check(sk, dir, skb, family);
 
-	return	(xfrm_policy_lists_empty(dir) && !skb->sp) ||
+	return	(!xfrm_policy_count[dir] && !skb->sp) ||
 		(skb->dst->flags & DST_NOPOLICY) ||
 		__xfrm_policy_check(sk, dir, skb, family);
 }
@@ -698,7 +685,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
 
 static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
-	return	xfrm_policy_lists_empty(XFRM_POLICY_OUT) ||
+	return	!xfrm_policy_count[XFRM_POLICY_OUT] ||
 		(skb->dst->flags & DST_NOXFRM) ||
 		__xfrm_route_forward(skb, family);
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1bcaae4adf3..087a5443b05 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -22,6 +22,9 @@
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
+#include <linux/cache.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
@@ -30,26 +33,8 @@ EXPORT_SYMBOL(xfrm_cfg_mutex);
 
 static DEFINE_RWLOCK(xfrm_policy_lock);
 
-struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
-EXPORT_SYMBOL(xfrm_policy_list);
-#ifdef CONFIG_XFRM_SUB_POLICY
-struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2];
-EXPORT_SYMBOL(xfrm_policy_list_sub);
-
-#define XFRM_POLICY_LISTS(type) \
-	((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub : \
-	 xfrm_policy_list)
-#define XFRM_POLICY_LISTHEAD(type, dir) \
-	((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub[dir] : \
-	 xfrm_policy_list[dir])
-#define XFRM_POLICY_LISTHEADP(type, dir) \
-	((type == XFRM_POLICY_TYPE_SUB) ? &xfrm_policy_list_sub[dir] : \
-	 &xfrm_policy_list[dir])
-#else
-#define XFRM_POLICY_LISTS(type)              xfrm_policy_list
-#define XFRM_POLICY_LISTHEAD(type, dif)      xfrm_policy_list[dir]
-#define XFRM_POLICY_LISTHEADP(type, dif)     &xfrm_policy_list[dir]
-#endif
+unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
+EXPORT_SYMBOL(xfrm_policy_count);
 
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
@@ -57,8 +42,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
 static kmem_cache_t *xfrm_dst_cache __read_mostly;
 
 static struct work_struct xfrm_policy_gc_work;
-static struct list_head xfrm_policy_gc_list =
-	LIST_HEAD_INIT(xfrm_policy_gc_list);
+static HLIST_HEAD(xfrm_policy_gc_list);
 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
 
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
@@ -328,8 +312,10 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
 	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
 
 	if (policy) {
-		atomic_set(&policy->refcnt, 1);
+		INIT_HLIST_NODE(&policy->bydst);
+		INIT_HLIST_NODE(&policy->byidx);
 		rwlock_init(&policy->lock);
+		atomic_set(&policy->refcnt, 1);
 		init_timer(&policy->timer);
 		policy->timer.data = (unsigned long)policy;
 		policy->timer.function = xfrm_policy_timer;
@@ -375,17 +361,16 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
 static void xfrm_policy_gc_task(void *data)
 {
 	struct xfrm_policy *policy;
-	struct list_head *entry, *tmp;
-	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+	struct hlist_node *entry, *tmp;
+	struct hlist_head gc_list;
 
 	spin_lock_bh(&xfrm_policy_gc_lock);
-	list_splice_init(&xfrm_policy_gc_list, &gc_list);
+	gc_list.first = xfrm_policy_gc_list.first;
+	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
 	spin_unlock_bh(&xfrm_policy_gc_lock);
 
-	list_for_each_safe(entry, tmp, &gc_list) {
-		policy = list_entry(entry, struct xfrm_policy, list);
+	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
 		xfrm_policy_gc_kill(policy);
-	}
 }
 
 /* Rule must be locked. Release descentant resources, announce
@@ -407,70 +392,354 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 	}
 
 	spin_lock(&xfrm_policy_gc_lock);
-	list_add(&policy->list, &xfrm_policy_gc_list);
+	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
 	spin_unlock(&xfrm_policy_gc_lock);
 
 	schedule_work(&xfrm_policy_gc_work);
 }
 
+struct xfrm_policy_hash {
+	struct hlist_head	*table;
+	unsigned int		hmask;
+};
+
+static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
+static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
+static struct hlist_head *xfrm_policy_byidx __read_mostly;
+static unsigned int xfrm_idx_hmask __read_mostly;
+static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
+
+static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
+{
+	return (index ^ (index >> 8)) & hmask;
+}
+
+static inline unsigned int idx_hash(u32 index)
+{
+	return __idx_hash(index, xfrm_idx_hmask);
+}
+
+static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
+{
+	xfrm_address_t *daddr = &sel->daddr;
+	xfrm_address_t *saddr = &sel->saddr;
+	unsigned int h = 0;
+
+	switch (family) {
+	case AF_INET:
+		if (sel->prefixlen_d != 32 ||
+		    sel->prefixlen_s != 32)
+			return hmask + 1;
+
+		h = ntohl(daddr->a4 ^ saddr->a4);
+		break;
+
+	case AF_INET6:
+		if (sel->prefixlen_d != 128 ||
+		    sel->prefixlen_s != 128)
+			return hmask + 1;
+
+		h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^
+			  saddr->a6[2] ^ saddr->a6[3]);
+		break;
+	};
+	h ^= (h >> 16);
+	return h & hmask;
+}
+
+static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
+{
+	unsigned int h = 0;
+
+	switch (family) {
+	case AF_INET:
+		h = ntohl(daddr->a4 ^ saddr->a4);
+		break;
+
+	case AF_INET6:
+		h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^
+			  saddr->a6[2] ^ saddr->a6[3]);
+		break;
+	};
+	h ^= (h >> 16);
+	return h & hmask;
+}
+
+static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
+{
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int hash = __sel_hash(sel, family, hmask);
+
+	return (hash == hmask + 1 ?
+		&xfrm_policy_inexact[dir] :
+		xfrm_policy_bydst[dir].table + hash);
+}
+
+static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
+{
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
+
+	return xfrm_policy_bydst[dir].table + hash;
+}
+
+static struct hlist_head *xfrm_policy_hash_alloc(unsigned int sz)
+{
+	struct hlist_head *n;
+
+	if (sz <= PAGE_SIZE)
+		n = kmalloc(sz, GFP_KERNEL);
+	else if (hashdist)
+		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
+	else
+		n = (struct hlist_head *)
+			__get_free_pages(GFP_KERNEL, get_order(sz));
+
+	if (n)
+		memset(n, 0, sz);
+
+	return n;
+}
+
+static void xfrm_policy_hash_free(struct hlist_head *n, unsigned int sz)
+{
+	if (sz <= PAGE_SIZE)
+		kfree(n);
+	else if (hashdist)
+		vfree(n);
+	else
+		free_pages((unsigned long)n, get_order(sz));
+}
+
+static void xfrm_dst_hash_transfer(struct hlist_head *list,
+				   struct hlist_head *ndsttable,
+				   unsigned int nhashmask)
+{
+	struct hlist_node *entry, *tmp;
+	struct xfrm_policy *pol;
+
+	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
+		unsigned int h;
+
+		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
+				pol->family, nhashmask);
+		hlist_add_head(&pol->bydst, ndsttable+h);
+	}
+}
+
+static void xfrm_idx_hash_transfer(struct hlist_head *list,
+				   struct hlist_head *nidxtable,
+				   unsigned int nhashmask)
+{
+	struct hlist_node *entry, *tmp;
+	struct xfrm_policy *pol;
+
+	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
+		unsigned int h;
+
+		h = __idx_hash(pol->index, nhashmask);
+		hlist_add_head(&pol->byidx, nidxtable+h);
+	}
+}
+
+static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
+{
+	return ((old_hmask + 1) << 1) - 1;
+}
+
+static void xfrm_bydst_resize(int dir)
+{
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
+	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
+	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
+	struct hlist_head *ndst = xfrm_policy_hash_alloc(nsize);
+	int i;
+
+	if (!ndst)
+		return;
+
+	write_lock_bh(&xfrm_policy_lock);
+
+	for (i = hmask; i >= 0; i--)
+		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
+
+	xfrm_policy_bydst[dir].table = ndst;
+	xfrm_policy_bydst[dir].hmask = nhashmask;
+
+	write_unlock_bh(&xfrm_policy_lock);
+
+	xfrm_policy_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
+}
+
+static void xfrm_byidx_resize(int total)
+{
+	unsigned int hmask = xfrm_idx_hmask;
+	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
+	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
+	struct hlist_head *oidx = xfrm_policy_byidx;
+	struct hlist_head *nidx = xfrm_policy_hash_alloc(nsize);
+	int i;
+
+	if (!nidx)
+		return;
+
+	write_lock_bh(&xfrm_policy_lock);
+
+	for (i = hmask; i >= 0; i--)
+		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
+
+	xfrm_policy_byidx = nidx;
+	xfrm_idx_hmask = nhashmask;
+
+	write_unlock_bh(&xfrm_policy_lock);
+
+	xfrm_policy_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
+}
+
+static inline int xfrm_bydst_should_resize(int dir, int *total)
+{
+	unsigned int cnt = xfrm_policy_count[dir];
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+
+	if (total)
+		*total += cnt;
+
+	if ((hmask + 1) < xfrm_policy_hashmax &&
+	    cnt > hmask)
+		return 1;
+
+	return 0;
+}
+
+static inline int xfrm_byidx_should_resize(int total)
+{
+	unsigned int hmask = xfrm_idx_hmask;
+
+	if ((hmask + 1) < xfrm_policy_hashmax &&
+	    total > hmask)
+		return 1;
+
+	return 0;
+}
+
+static DEFINE_MUTEX(hash_resize_mutex);
+
+static void xfrm_hash_resize(void *__unused)
+{
+	int dir, total;
+
+	mutex_lock(&hash_resize_mutex);
+
+	total = 0;
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		if (xfrm_bydst_should_resize(dir, &total))
+			xfrm_bydst_resize(dir);
+	}
+	if (xfrm_byidx_should_resize(total))
+		xfrm_byidx_resize(total);
+
+	mutex_unlock(&hash_resize_mutex);
+}
+
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
+
 /* Generate new index... KAME seems to generate them ordered by cost
  * of an absolute inpredictability of ordering of rules. This will not pass. */
 static u32 xfrm_gen_index(u8 type, int dir)
 {
-	u32 idx;
-	struct xfrm_policy *p;
 	static u32 idx_generator;
 
 	for (;;) {
+		struct hlist_node *entry;
+		struct hlist_head *list;
+		struct xfrm_policy *p;
+		u32 idx;
+		int found;
+
 		idx = (idx_generator | dir);
 		idx_generator += 8;
 		if (idx == 0)
 			idx = 8;
-		for (p = XFRM_POLICY_LISTHEAD(type, dir); p; p = p->next) {
-			if (p->index == idx)
+		list = xfrm_policy_byidx + idx_hash(idx);
+		found = 0;
+		hlist_for_each_entry(p, entry, list, byidx) {
+			if (p->index == idx) {
+				found = 1;
 				break;
+			}
 		}
-		if (!p)
+		if (!found)
 			return idx;
 	}
 }
 
+static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
+{
+	u32 *p1 = (u32 *) s1;
+	u32 *p2 = (u32 *) s2;
+	int len = sizeof(struct xfrm_selector) / sizeof(u32);
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (p1[i] != p2[i])
+			return 1;
+	}
+
+	return 0;
+}
+
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 {
-	struct xfrm_policy *pol, **p;
-	struct xfrm_policy *delpol = NULL;
-	struct xfrm_policy **newpos = NULL;
+	struct xfrm_policy *pol;
+	struct xfrm_policy *delpol;
+	struct hlist_head *chain;
+	struct hlist_node *entry, *newpos, *last;
 	struct dst_entry *gc_list;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = XFRM_POLICY_LISTHEADP(policy->type, dir); (pol=*p)!=NULL;) {
-		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
+	chain = policy_hash_bysel(&policy->selector, policy->family, dir);
+	delpol = NULL;
+	newpos = NULL;
+	last = NULL;
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (!delpol &&
+		    pol->type == policy->type &&
+		    !selector_cmp(&pol->selector, &policy->selector) &&
 		    xfrm_sec_ctx_match(pol->security, policy->security)) {
 			if (excl) {
 				write_unlock_bh(&xfrm_policy_lock);
 				return -EEXIST;
 			}
-			*p = pol->next;
 			delpol = pol;
 			if (policy->priority > pol->priority)
 				continue;
 		} else if (policy->priority >= pol->priority) {
-			p = &pol->next;
+			last = &pol->bydst;
 			continue;
 		}
 		if (!newpos)
-			newpos = p;
+			newpos = &pol->bydst;
 		if (delpol)
 			break;
-		p = &pol->next;
+		last = &pol->bydst;
 	}
+	if (!newpos)
+		newpos = last;
 	if (newpos)
-		p = newpos;
+		hlist_add_after(newpos, &policy->bydst);
+	else
+		hlist_add_head(&policy->bydst, chain);
 	xfrm_pol_hold(policy);
-	policy->next = *p;
-	*p = policy;
+	xfrm_policy_count[dir]++;
 	atomic_inc(&flow_cache_genid);
+	if (delpol) {
+		hlist_del(&delpol->bydst);
+		hlist_del(&delpol->byidx);
+		xfrm_policy_count[dir]--;
+	}
 	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
+	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
 	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
@@ -479,10 +748,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 
 	if (delpol)
 		xfrm_policy_kill(delpol);
+	else if (xfrm_bydst_should_resize(dir, NULL))
+		schedule_work(&xfrm_hash_work);
 
 	read_lock_bh(&xfrm_policy_lock);
 	gc_list = NULL;
-	for (policy = policy->next; policy; policy = policy->next) {
+	entry = &policy->bydst;
+	hlist_for_each_entry_continue(policy, entry, bydst) {
 		struct dst_entry *dst;
 
 		write_lock(&policy->lock);
@@ -515,67 +787,112 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete)
 {
-	struct xfrm_policy *pol, **p;
+	struct xfrm_policy *pol, *ret;
+	struct hlist_head *chain;
+	struct hlist_node *entry;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) {
-		if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
-		    (xfrm_sec_ctx_match(ctx, pol->security))) {
+	chain = policy_hash_bysel(sel, sel->family, dir);
+	ret = NULL;
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (pol->type == type &&
+		    !selector_cmp(sel, &pol->selector) &&
+		    xfrm_sec_ctx_match(ctx, pol->security)) {
 			xfrm_pol_hold(pol);
-			if (delete)
-				*p = pol->next;
+			if (delete) {
+				hlist_del(&pol->bydst);
+				hlist_del(&pol->byidx);
+				xfrm_policy_count[dir]--;
+			}
+			ret = pol;
 			break;
 		}
 	}
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (pol && delete) {
+	if (ret && delete) {
 		atomic_inc(&flow_cache_genid);
-		xfrm_policy_kill(pol);
+		xfrm_policy_kill(ret);
 	}
-	return pol;
+	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
 {
-	struct xfrm_policy *pol, **p;
+	struct xfrm_policy *pol, *ret;
+	struct hlist_head *chain;
+	struct hlist_node *entry;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) {
-		if (pol->index == id) {
+	chain = xfrm_policy_byidx + idx_hash(id);
+	ret = NULL;
+	hlist_for_each_entry(pol, entry, chain, byidx) {
+		if (pol->type == type && pol->index == id) {
 			xfrm_pol_hold(pol);
-			if (delete)
-				*p = pol->next;
+			if (delete) {
+				hlist_del(&pol->bydst);
+				hlist_del(&pol->byidx);
+				xfrm_policy_count[dir]--;
+			}
+			ret = pol;
 			break;
 		}
 	}
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (pol && delete) {
+	if (ret && delete) {
 		atomic_inc(&flow_cache_genid);
-		xfrm_policy_kill(pol);
+		xfrm_policy_kill(ret);
 	}
-	return pol;
+	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_byid);
 
 void xfrm_policy_flush(u8 type)
 {
-	struct xfrm_policy *xp;
-	struct xfrm_policy **p_list = XFRM_POLICY_LISTS(type);
 	int dir;
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
-		while ((xp = p_list[dir]) != NULL) {
-			p_list[dir] = xp->next;
+		struct xfrm_policy *pol;
+		struct hlist_node *entry;
+		int i;
+
+	again1:
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst) {
+			if (pol->type != type)
+				continue;
+			hlist_del(&pol->bydst);
+			hlist_del(&pol->byidx);
 			write_unlock_bh(&xfrm_policy_lock);
 
-			xfrm_policy_kill(xp);
+			xfrm_policy_kill(pol);
 
 			write_lock_bh(&xfrm_policy_lock);
+			goto again1;
+		}
+
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+	again2:
+			hlist_for_each_entry(pol, entry,
+					     xfrm_policy_bydst[dir].table + i,
+					     bydst) {
+				if (pol->type != type)
+					continue;
+				hlist_del(&pol->bydst);
+				hlist_del(&pol->byidx);
+				write_unlock_bh(&xfrm_policy_lock);
+
+				xfrm_policy_kill(pol);
+
+				write_lock_bh(&xfrm_policy_lock);
+				goto again2;
+			}
 		}
+
+		xfrm_policy_count[dir] = 0;
 	}
 	atomic_inc(&flow_cache_genid);
 	write_unlock_bh(&xfrm_policy_lock);
@@ -585,15 +902,27 @@ EXPORT_SYMBOL(xfrm_policy_flush);
 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
 		     void *data)
 {
-	struct xfrm_policy *xp;
-	int dir;
-	int count = 0;
-	int error = 0;
+	struct xfrm_policy *pol;
+	struct hlist_node *entry;
+	int dir, count, error;
 
 	read_lock_bh(&xfrm_policy_lock);
+	count = 0;
 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next)
-			count++;
+		struct hlist_head *table = xfrm_policy_bydst[dir].table;
+		int i;
+
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst) {
+			if (pol->type == type)
+				count++;
+		}
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+			hlist_for_each_entry(pol, entry, table + i, bydst) {
+				if (pol->type == type)
+					count++;
+			}
+		}
 	}
 
 	if (count == 0) {
@@ -602,13 +931,28 @@ int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*)
 	}
 
 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) {
-			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
+		struct hlist_head *table = xfrm_policy_bydst[dir].table;
+		int i;
+
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst) {
+			if (pol->type != type)
+				continue;
+			error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
 			if (error)
 				goto out;
 		}
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+			hlist_for_each_entry(pol, entry, table + i, bydst) {
+				if (pol->type != type)
+					continue;
+				error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
+				if (error)
+					goto out;
+			}
+		}
 	}
-
+	error = 0;
 out:
 	read_unlock_bh(&xfrm_policy_lock);
 	return error;
@@ -617,31 +961,61 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
-						     u16 family, u8 dir)
+static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
+			     u8 type, u16 family, int dir)
 {
-	struct xfrm_policy *pol;
+	struct xfrm_selector *sel = &pol->selector;
+	int match;
 
-	read_lock_bh(&xfrm_policy_lock);
-	for (pol = XFRM_POLICY_LISTHEAD(type, dir); pol; pol = pol->next) {
-		struct xfrm_selector *sel = &pol->selector;
-		int match;
+	if (pol->family != family ||
+	    pol->type != type)
+		return 0;
 
-		if (pol->family != family)
-			continue;
+	match = xfrm_selector_match(sel, fl, family);
+	if (match) {
+		if (!security_xfrm_policy_lookup(pol, fl->secid, dir))
+			return 1;
+	}
+
+	return 0;
+}
 
-		match = xfrm_selector_match(sel, fl, family);
+static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
+						     u16 family, u8 dir)
+{
+	struct xfrm_policy *pol, *ret;
+	xfrm_address_t *daddr, *saddr;
+	struct hlist_node *entry;
+	struct hlist_head *chain;
 
-		if (match) {
- 			if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) {
+	daddr = xfrm_flowi_daddr(fl, family);
+	saddr = xfrm_flowi_saddr(fl, family);
+	if (unlikely(!daddr || !saddr))
+		return NULL;
+
+	read_lock_bh(&xfrm_policy_lock);
+	chain = policy_hash_direct(daddr, saddr, family, dir);
+	ret = NULL;
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (xfrm_policy_match(pol, fl, type, family, dir)) {
+			xfrm_pol_hold(pol);
+			ret = pol;
+			break;
+		}
+	}
+	if (!ret) {
+		chain = &xfrm_policy_inexact[dir];
+		hlist_for_each_entry(pol, entry, chain, bydst) {
+			if (xfrm_policy_match(pol, fl, type, family, dir)) {
 				xfrm_pol_hold(pol);
+				ret = pol;
 				break;
 			}
 		}
 	}
 	read_unlock_bh(&xfrm_policy_lock);
 
-	return pol;
+	return ret;
 }
 
 static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
@@ -657,7 +1031,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 
 #ifdef CONFIG_XFRM_SUB_POLICY
- end:
+end:
 #endif
 	if ((*objp = (void *) pol) != NULL)
 		*obj_refp = &pol->refcnt;
@@ -704,26 +1078,29 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
 
 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 {
-	struct xfrm_policy **p_list = XFRM_POLICY_LISTS(pol->type);
+	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
+						     pol->family, dir);
 
-	pol->next = p_list[dir];
-	p_list[dir] = pol;
+	hlist_add_head(&pol->bydst, chain);
+	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
+	xfrm_policy_count[dir]++;
 	xfrm_pol_hold(pol);
+
+	if (xfrm_bydst_should_resize(dir, NULL))
+		schedule_work(&xfrm_hash_work);
 }
 
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir)
 {
-	struct xfrm_policy **polp;
+	if (hlist_unhashed(&pol->bydst))
+		return NULL;
 
-	for (polp = XFRM_POLICY_LISTHEADP(pol->type, dir);
-	     *polp != NULL; polp = &(*polp)->next) {
-		if (*polp == pol) {
-			*polp = pol->next;
-			return pol;
-		}
-	}
-	return NULL;
+	hlist_del(&pol->bydst);
+	hlist_del(&pol->byidx);
+	xfrm_policy_count[dir]--;
+
+	return pol;
 }
 
 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
@@ -968,7 +1345,8 @@ restart:
 
 	if (!policy) {
 		/* To accelerate a bit...  */
-		if ((dst_orig->flags & DST_NOXFRM) || xfrm_policy_lists_empty(XFRM_POLICY_OUT))
+		if ((dst_orig->flags & DST_NOXFRM) ||
+		    !xfrm_policy_count[XFRM_POLICY_OUT])
 			return 0;
 
 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
@@ -1413,50 +1791,50 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
 	return dst;
 }
 
+static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
+{
+	struct dst_entry *dst, **dstp;
+
+	write_lock(&pol->lock);
+	dstp = &pol->bundles;
+	while ((dst=*dstp) != NULL) {
+		if (func(dst)) {
+			*dstp = dst->next;
+			dst->next = *gc_list_p;
+			*gc_list_p = dst;
+		} else {
+			dstp = &dst->next;
+		}
+	}
+	write_unlock(&pol->lock);
+}
+
 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
 {
-	int i;
-	struct xfrm_policy *pol;
-	struct dst_entry *dst, **dstp, *gc_list = NULL;
+	struct dst_entry *gc_list = NULL;
+	int dir;
 
 	read_lock_bh(&xfrm_policy_lock);
-	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
-#ifdef CONFIG_XFRM_SUB_POLICY
-		for (pol = xfrm_policy_list_sub[i]; pol; pol = pol->next) {
-			write_lock(&pol->lock);
-			dstp = &pol->bundles;
-			while ((dst=*dstp) != NULL) {
-				if (func(dst)) {
-					*dstp = dst->next;
-					dst->next = gc_list;
-					gc_list = dst;
-				} else {
-					dstp = &dst->next;
-				}
-			}
-			write_unlock(&pol->lock);
-		}
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		struct xfrm_policy *pol;
+		struct hlist_node *entry;
+		struct hlist_head *table;
+		int i;
 
-#endif
-		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
-			write_lock(&pol->lock);
-			dstp = &pol->bundles;
-			while ((dst=*dstp) != NULL) {
-				if (func(dst)) {
-					*dstp = dst->next;
-					dst->next = gc_list;
-					gc_list = dst;
-				} else {
-					dstp = &dst->next;
-				}
-			}
-			write_unlock(&pol->lock);
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst)
+			prune_one_bundle(pol, func, &gc_list);
+
+		table = xfrm_policy_bydst[dir].table;
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+			hlist_for_each_entry(pol, entry, table + i, bydst)
+				prune_one_bundle(pol, func, &gc_list);
 		}
 	}
 	read_unlock_bh(&xfrm_policy_lock);
 
 	while (gc_list) {
-		dst = gc_list;
+		struct dst_entry *dst = gc_list;
 		gc_list = dst->next;
 		dst_free(dst);
 	}
@@ -1680,6 +2058,9 @@ static struct notifier_block xfrm_dev_notifier = {
 
 static void __init xfrm_policy_init(void)
 {
+	unsigned int hmask, sz;
+	int dir;
+
 	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
 					   sizeof(struct xfrm_dst),
 					   0, SLAB_HWCACHE_ALIGN,
@@ -1687,6 +2068,26 @@ static void __init xfrm_policy_init(void)
 	if (!xfrm_dst_cache)
 		panic("XFRM: failed to allocate xfrm_dst_cache\n");
 
+	hmask = 8 - 1;
+	sz = (hmask+1) * sizeof(struct hlist_head);
+
+	xfrm_policy_byidx = xfrm_policy_hash_alloc(sz);
+	xfrm_idx_hmask = hmask;
+	if (!xfrm_policy_byidx)
+		panic("XFRM: failed to allocate byidx hash\n");
+
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		struct xfrm_policy_hash *htab;
+
+		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
+
+		htab = &xfrm_policy_bydst[dir];
+		htab->table = xfrm_policy_hash_alloc(sz);
+		htab->hmask = hmask;
+		if (!htab->table)
+			panic("XFRM: failed to allocate bydst hash\n");
+	}
+
 	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
 	register_netdevice_notifier(&xfrm_dev_notifier);
 }
-- 
cgit v1.2.3-70-g09d2


From 44e36b42a8378be1dcf7e6f8a1cb2710a8903387 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 24 Aug 2006 04:50:50 -0700
Subject: [XFRM]: Extract common hashing code into xfrm_hash.[ch]

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/Makefile      |   3 +-
 net/xfrm/xfrm_hash.c   |  41 ++++++++++++++++
 net/xfrm/xfrm_hash.h   | 128 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_policy.c |  95 ++++--------------------------------
 net/xfrm/xfrm_state.c  | 128 +++++++------------------------------------------
 5 files changed, 195 insertions(+), 200 deletions(-)
 create mode 100644 net/xfrm/xfrm_hash.c
 create mode 100644 net/xfrm/xfrm_hash.h

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 693aac1aa83..de3c1a625a4 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -2,6 +2,7 @@
 # Makefile for the XFRM subsystem.
 #
 
-obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o
+obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
+		      xfrm_input.o xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
new file mode 100644
index 00000000000..37643bb8768
--- /dev/null
+++ b/net/xfrm/xfrm_hash.c
@@ -0,0 +1,41 @@
+/* xfrm_hash.c: Common hash table code.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/xfrm.h>
+
+#include "xfrm_hash.h"
+
+struct hlist_head *xfrm_hash_alloc(unsigned int sz)
+{
+	struct hlist_head *n;
+
+	if (sz <= PAGE_SIZE)
+		n = kmalloc(sz, GFP_KERNEL);
+	else if (hashdist)
+		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
+	else
+		n = (struct hlist_head *)
+			__get_free_pages(GFP_KERNEL, get_order(sz));
+
+	if (n)
+		memset(n, 0, sz);
+
+	return n;
+}
+
+void xfrm_hash_free(struct hlist_head *n, unsigned int sz)
+{
+	if (sz <= PAGE_SIZE)
+		kfree(n);
+	else if (hashdist)
+		vfree(n);
+	else
+		free_pages((unsigned long)n, get_order(sz));
+}
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
new file mode 100644
index 00000000000..d3abb0b7dc6
--- /dev/null
+++ b/net/xfrm/xfrm_hash.h
@@ -0,0 +1,128 @@
+#ifndef _XFRM_HASH_H
+#define _XFRM_HASH_H
+
+#include <linux/xfrm.h>
+#include <linux/socket.h>
+
+static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
+{
+	return ntohl(addr->a4);
+}
+
+static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
+{
+	return ntohl(addr->a6[2] ^ addr->a6[3]);
+}
+
+static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	return ntohl(daddr->a4 ^ saddr->a4);
+}
+
+static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
+		     saddr->a6[2] ^ saddr->a6[3]);
+}
+
+static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr,
+					   u32 reqid, unsigned short family,
+					   unsigned int hmask)
+{
+	unsigned int h = family ^ reqid;
+	switch (family) {
+	case AF_INET:
+		h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
+		break;
+	case AF_INET6:
+		h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
+		break;
+	}
+	return (h ^ (h >> 16)) & hmask;
+}
+
+static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr,
+				       unsigned short family,
+				       unsigned int hmask)
+{
+	unsigned int h = family;
+	switch (family) {
+	case AF_INET:
+		h ^= __xfrm4_addr_hash(saddr);
+		break;
+	case AF_INET6:
+		h ^= __xfrm6_addr_hash(saddr);
+		break;
+	};
+	return (h ^ (h >> 16)) & hmask;
+}
+
+static inline unsigned int
+__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family,
+		unsigned int hmask)
+{
+	unsigned int h = spi ^ proto;
+	switch (family) {
+	case AF_INET:
+		h ^= __xfrm4_addr_hash(daddr);
+		break;
+	case AF_INET6:
+		h ^= __xfrm6_addr_hash(daddr);
+		break;
+	}
+	return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
+}
+
+static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
+{
+	return (index ^ (index >> 8)) & hmask;
+}
+
+static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
+{
+	xfrm_address_t *daddr = &sel->daddr;
+	xfrm_address_t *saddr = &sel->saddr;
+	unsigned int h = 0;
+
+	switch (family) {
+	case AF_INET:
+		if (sel->prefixlen_d != 32 ||
+		    sel->prefixlen_s != 32)
+			return hmask + 1;
+
+		h = __xfrm4_daddr_saddr_hash(daddr, saddr);
+		break;
+
+	case AF_INET6:
+		if (sel->prefixlen_d != 128 ||
+		    sel->prefixlen_s != 128)
+			return hmask + 1;
+
+		h = __xfrm6_daddr_saddr_hash(daddr, saddr);
+		break;
+	};
+	h ^= (h >> 16);
+	return h & hmask;
+}
+
+static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
+{
+	unsigned int h = 0;
+
+	switch (family) {
+	case AF_INET:
+		h = __xfrm4_daddr_saddr_hash(daddr, saddr);
+		break;
+
+	case AF_INET6:
+		h = __xfrm6_daddr_saddr_hash(daddr, saddr);
+		break;
+	};
+	h ^= (h >> 16);
+	return h & hmask;
+}
+
+extern struct hlist_head *xfrm_hash_alloc(unsigned int sz);
+extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz);
+
+#endif /* _XFRM_HASH_H */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 087a5443b05..b446ca31fec 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -22,12 +22,12 @@
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
-#include <linux/vmalloc.h>
 #include <linux/cache.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
+#include "xfrm_hash.h"
+
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
@@ -409,62 +409,11 @@ static struct hlist_head *xfrm_policy_byidx __read_mostly;
 static unsigned int xfrm_idx_hmask __read_mostly;
 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
 
-static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
-{
-	return (index ^ (index >> 8)) & hmask;
-}
-
 static inline unsigned int idx_hash(u32 index)
 {
 	return __idx_hash(index, xfrm_idx_hmask);
 }
 
-static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
-{
-	xfrm_address_t *daddr = &sel->daddr;
-	xfrm_address_t *saddr = &sel->saddr;
-	unsigned int h = 0;
-
-	switch (family) {
-	case AF_INET:
-		if (sel->prefixlen_d != 32 ||
-		    sel->prefixlen_s != 32)
-			return hmask + 1;
-
-		h = ntohl(daddr->a4 ^ saddr->a4);
-		break;
-
-	case AF_INET6:
-		if (sel->prefixlen_d != 128 ||
-		    sel->prefixlen_s != 128)
-			return hmask + 1;
-
-		h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^
-			  saddr->a6[2] ^ saddr->a6[3]);
-		break;
-	};
-	h ^= (h >> 16);
-	return h & hmask;
-}
-
-static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
-{
-	unsigned int h = 0;
-
-	switch (family) {
-	case AF_INET:
-		h = ntohl(daddr->a4 ^ saddr->a4);
-		break;
-
-	case AF_INET6:
-		h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^
-			  saddr->a6[2] ^ saddr->a6[3]);
-		break;
-	};
-	h ^= (h >> 16);
-	return h & hmask;
-}
-
 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
 {
 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
@@ -483,34 +432,6 @@ static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address
 	return xfrm_policy_bydst[dir].table + hash;
 }
 
-static struct hlist_head *xfrm_policy_hash_alloc(unsigned int sz)
-{
-	struct hlist_head *n;
-
-	if (sz <= PAGE_SIZE)
-		n = kmalloc(sz, GFP_KERNEL);
-	else if (hashdist)
-		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
-	else
-		n = (struct hlist_head *)
-			__get_free_pages(GFP_KERNEL, get_order(sz));
-
-	if (n)
-		memset(n, 0, sz);
-
-	return n;
-}
-
-static void xfrm_policy_hash_free(struct hlist_head *n, unsigned int sz)
-{
-	if (sz <= PAGE_SIZE)
-		kfree(n);
-	else if (hashdist)
-		vfree(n);
-	else
-		free_pages((unsigned long)n, get_order(sz));
-}
-
 static void xfrm_dst_hash_transfer(struct hlist_head *list,
 				   struct hlist_head *ndsttable,
 				   unsigned int nhashmask)
@@ -553,7 +474,7 @@ static void xfrm_bydst_resize(int dir)
 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
-	struct hlist_head *ndst = xfrm_policy_hash_alloc(nsize);
+	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
 	int i;
 
 	if (!ndst)
@@ -569,7 +490,7 @@ static void xfrm_bydst_resize(int dir)
 
 	write_unlock_bh(&xfrm_policy_lock);
 
-	xfrm_policy_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
+	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
 }
 
 static void xfrm_byidx_resize(int total)
@@ -578,7 +499,7 @@ static void xfrm_byidx_resize(int total)
 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 	struct hlist_head *oidx = xfrm_policy_byidx;
-	struct hlist_head *nidx = xfrm_policy_hash_alloc(nsize);
+	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
 	int i;
 
 	if (!nidx)
@@ -594,7 +515,7 @@ static void xfrm_byidx_resize(int total)
 
 	write_unlock_bh(&xfrm_policy_lock);
 
-	xfrm_policy_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
+	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
 }
 
 static inline int xfrm_bydst_should_resize(int dir, int *total)
@@ -2071,7 +1992,7 @@ static void __init xfrm_policy_init(void)
 	hmask = 8 - 1;
 	sz = (hmask+1) * sizeof(struct hlist_head);
 
-	xfrm_policy_byidx = xfrm_policy_hash_alloc(sz);
+	xfrm_policy_byidx = xfrm_hash_alloc(sz);
 	xfrm_idx_hmask = hmask;
 	if (!xfrm_policy_byidx)
 		panic("XFRM: failed to allocate byidx hash\n");
@@ -2082,7 +2003,7 @@ static void __init xfrm_policy_init(void)
 		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
 
 		htab = &xfrm_policy_bydst[dir];
-		htab->table = xfrm_policy_hash_alloc(sz);
+		htab->table = xfrm_hash_alloc(sz);
 		htab->hmask = hmask;
 		if (!htab->table)
 			panic("XFRM: failed to allocate bydst hash\n");
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 37213f9f6a0..4341795eb24 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -18,11 +18,11 @@
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
-#include <linux/vmalloc.h>
 #include <linux/cache.h>
 #include <asm/uaccess.h>
 
+#include "xfrm_hash.h"
+
 struct sock *xfrm_nl;
 EXPORT_SYMBOL(xfrm_nl);
 
@@ -55,44 +55,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
 static unsigned int xfrm_state_genid;
 
-static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
-{
-	return ntohl(addr->a4);
-}
-
-static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
-{
-	return ntohl(addr->a6[2]^addr->a6[3]);
-}
-
-static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
-{
-	return ntohl(daddr->a4 ^ saddr->a4);
-}
-
-static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
-{
-	return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
-		     saddr->a6[2] ^ saddr->a6[3]);
-}
-
-static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr,
-					   xfrm_address_t *saddr,
-					   u32 reqid, unsigned short family,
-					   unsigned int hmask)
-{
-	unsigned int h = family ^ reqid;
-	switch (family) {
-	case AF_INET:
-		h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
-		break;
-	case AF_INET6:
-		h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
-		break;
-	};
-	return (h ^ (h >> 16)) & hmask;
-}
-
 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
 					 xfrm_address_t *saddr,
 					 u32 reqid,
@@ -101,76 +63,18 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
 	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
 }
 
-static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
-				       unsigned int hmask)
-{
-	unsigned int h = family;
-	switch (family) {
-	case AF_INET:
-		h ^= __xfrm4_addr_hash(addr);
-		break;
-	case AF_INET6:
-		h ^= __xfrm6_addr_hash(addr);
-		break;
-	};
-	return (h ^ (h >> 16)) & hmask;
-}
-
-static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
+static inline unsigned int xfrm_src_hash(xfrm_address_t *addr,
+					 unsigned short family)
 {
 	return __xfrm_src_hash(addr, family, xfrm_state_hmask);
 }
 
-static inline unsigned int
-__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto,
-		unsigned short family, unsigned int hmask)
-{
-	unsigned int h = spi ^ proto;
-	switch (family) {
-	case AF_INET:
-		h ^= __xfrm4_addr_hash(daddr);
-		break;
-	case AF_INET6:
-		h ^= __xfrm6_addr_hash(daddr);
-		break;
-	}
-	return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
-}
-
 static inline unsigned int
 xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
 {
 	return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
 }
 
-static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
-{
-	struct hlist_head *n;
-
-	if (sz <= PAGE_SIZE)
-		n = kmalloc(sz, GFP_KERNEL);
-	else if (hashdist)
-		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
-	else
-		n = (struct hlist_head *)
-			__get_free_pages(GFP_KERNEL, get_order(sz));
-
-	if (n)
-		memset(n, 0, sz);
-
-	return n;
-}
-
-static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
-{
-	if (sz <= PAGE_SIZE)
-		kfree(n);
-	else if (hashdist)
-		vfree(n);
-	else
-		free_pages((unsigned long)n, get_order(sz));
-}
-
 static void xfrm_hash_transfer(struct hlist_head *list,
 			       struct hlist_head *ndsttable,
 			       struct hlist_head *nsrctable,
@@ -216,18 +120,18 @@ static void xfrm_hash_resize(void *__unused)
 	mutex_lock(&hash_resize_mutex);
 
 	nsize = xfrm_hash_new_size();
-	ndst = xfrm_state_hash_alloc(nsize);
+	ndst = xfrm_hash_alloc(nsize);
 	if (!ndst)
 		goto out_unlock;
-	nsrc = xfrm_state_hash_alloc(nsize);
+	nsrc = xfrm_hash_alloc(nsize);
 	if (!nsrc) {
-		xfrm_state_hash_free(ndst, nsize);
+		xfrm_hash_free(ndst, nsize);
 		goto out_unlock;
 	}
-	nspi = xfrm_state_hash_alloc(nsize);
+	nspi = xfrm_hash_alloc(nsize);
 	if (!nspi) {
-		xfrm_state_hash_free(ndst, nsize);
-		xfrm_state_hash_free(nsrc, nsize);
+		xfrm_hash_free(ndst, nsize);
+		xfrm_hash_free(nsrc, nsize);
 		goto out_unlock;
 	}
 
@@ -251,9 +155,9 @@ static void xfrm_hash_resize(void *__unused)
 	spin_unlock_bh(&xfrm_state_lock);
 
 	osize = (ohashmask + 1) * sizeof(struct hlist_head);
-	xfrm_state_hash_free(odst, osize);
-	xfrm_state_hash_free(osrc, osize);
-	xfrm_state_hash_free(ospi, osize);
+	xfrm_hash_free(odst, osize);
+	xfrm_hash_free(osrc, osize);
+	xfrm_hash_free(ospi, osize);
 
 out_unlock:
 	mutex_unlock(&hash_resize_mutex);
@@ -1643,9 +1547,9 @@ void __init xfrm_state_init(void)
 
 	sz = sizeof(struct hlist_head) * 8;
 
-	xfrm_state_bydst = xfrm_state_hash_alloc(sz);
-	xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
-	xfrm_state_byspi = xfrm_state_hash_alloc(sz);
+	xfrm_state_bydst = xfrm_hash_alloc(sz);
+	xfrm_state_bysrc = xfrm_hash_alloc(sz);
+	xfrm_state_byspi = xfrm_hash_alloc(sz);
 	if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
 		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
 	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
-- 
cgit v1.2.3-70-g09d2


From acba48e1a3c95082af1e12c5efaaca3506103a92 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 25 Aug 2006 15:46:46 -0700
Subject: [XFRM]: Respect priority in policy lookups.

Even if we find an exact match in the hash table,
we must inspect the inexact list to look for a match
with a better priority.

Noticed by Masahide NAKAMURA <nakam@linux-ipv6.org>.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b446ca31fec..1cf3209cdf4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -908,6 +908,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
 	xfrm_address_t *daddr, *saddr;
 	struct hlist_node *entry;
 	struct hlist_head *chain;
+	u32 priority = ~0U;
 
 	daddr = xfrm_flowi_daddr(fl, family);
 	saddr = xfrm_flowi_saddr(fl, family);
@@ -919,21 +920,21 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
 	ret = NULL;
 	hlist_for_each_entry(pol, entry, chain, bydst) {
 		if (xfrm_policy_match(pol, fl, type, family, dir)) {
-			xfrm_pol_hold(pol);
 			ret = pol;
+			priority = ret->priority;
 			break;
 		}
 	}
-	if (!ret) {
-		chain = &xfrm_policy_inexact[dir];
-		hlist_for_each_entry(pol, entry, chain, bydst) {
-			if (xfrm_policy_match(pol, fl, type, family, dir)) {
-				xfrm_pol_hold(pol);
-				ret = pol;
-				break;
-			}
+	chain = &xfrm_policy_inexact[dir];
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (xfrm_policy_match(pol, fl, type, family, dir) &&
+		    pol->priority < priority) {
+			ret = pol;
+			break;
 		}
 	}
+	if (ret)
+		xfrm_pol_hold(ret);
 	read_unlock_bh(&xfrm_policy_lock);
 
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From e5d679f33900c71d1a76ba07c5b04055abd34480 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Aug 2006 19:25:52 -0700
Subject: [NET]: Use SLAB_PANIC

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow.c        |  6 +-----
 net/core/neighbour.c   | 12 ++++--------
 net/core/skbuff.c      |  9 ++-------
 net/decnet/dn_route.c  | 11 +++--------
 net/ipv4/inetpeer.c    |  5 +----
 net/ipv4/ipmr.c        |  5 +----
 net/ipv4/route.c       | 10 +++-------
 net/ipv4/tcp.c         |  4 +---
 net/ipv6/ip6_fib.c     |  4 +---
 net/ipv6/route.c       | 10 +++-------
 net/xfrm/xfrm_input.c  |  4 +---
 net/xfrm/xfrm_policy.c |  4 +---
 12 files changed, 22 insertions(+), 62 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/net/core/flow.c b/net/core/flow.c
index 645241165e6..f23e7e38654 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -343,12 +343,8 @@ static int __init flow_cache_init(void)
 
 	flow_cachep = kmem_cache_create("flow_cache",
 					sizeof(struct flow_cache_entry),
-					0, SLAB_HWCACHE_ALIGN,
+					0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					NULL, NULL);
-
-	if (!flow_cachep)
-		panic("NET: failed to allocate flow cache slab\n");
-
 	flow_hash_shift = 10;
 	flow_lwm = 2 * flow_hash_size;
 	flow_hwm = 4 * flow_hash_size;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c0a27407f44..a45bd2124d6 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1339,14 +1339,10 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
 
 	if (!tbl->kmem_cachep)
-		tbl->kmem_cachep = kmem_cache_create(tbl->id,
-						     tbl->entry_size,
-						     0, SLAB_HWCACHE_ALIGN,
-						     NULL, NULL);
-
-	if (!tbl->kmem_cachep)
-		panic("cannot create neighbour cache");
-
+		tbl->kmem_cachep =
+			kmem_cache_create(tbl->id, tbl->entry_size, 0,
+					  SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+					  NULL, NULL);
 	tbl->stats = alloc_percpu(struct neigh_statistics);
 	if (!tbl->stats)
 		panic("cannot create neighbour cache statistics");
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8a476f1956e..c448c7f6fde 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2046,19 +2046,14 @@ void __init skb_init(void)
 	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
 					      sizeof(struct sk_buff),
 					      0,
-					      SLAB_HWCACHE_ALIGN,
+					      SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					      NULL, NULL);
-	if (!skbuff_head_cache)
-		panic("cannot create skbuff cache");
-
 	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
 						(2*sizeof(struct sk_buff)) +
 						sizeof(atomic_t),
 						0,
-						SLAB_HWCACHE_ALIGN,
+						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 						NULL, NULL);
-	if (!skbuff_fclone_cache)
-		panic("cannot create skbuff cache");
 }
 
 EXPORT_SYMBOL(___pskb_trim);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index c5daf3557c1..dd0761e3d28 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1781,14 +1781,9 @@ void __init dn_route_init(void)
 {
 	int i, goal, order;
 
-	dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache",
-						   sizeof(struct dn_route),
-						   0, SLAB_HWCACHE_ALIGN,
-						   NULL, NULL);
-
-	if (!dn_dst_ops.kmem_cachep)
-		panic("DECnet: Failed to allocate dn_dst_cache\n");
-
+	dn_dst_ops.kmem_cachep =
+		kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 	init_timer(&dn_route_timer);
 	dn_route_timer.function = dn_dst_check_expire;
 	dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 03ff62ebcfe..a675602ef29 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -126,12 +126,9 @@ void __init inet_initpeers(void)
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_HWCACHE_ALIGN,
+			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 			NULL, NULL);
 
-	if (!peer_cachep)
-		panic("cannot create inet_peer_cache");
-
 	/* All the timers, started at system startup tend
 	   to synchronize. Perturb it a bit.
 	 */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 98f0aa0d421..ba49588da24 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1900,11 +1900,8 @@ void __init ip_mr_init(void)
 {
 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
 				       sizeof(struct mfc_cache),
-				       0, SLAB_HWCACHE_ALIGN,
+				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 				       NULL, NULL);
-	if (!mrt_cachep)
-		panic("cannot allocate ip_mrt_cache");
-
 	init_timer(&ipmr_expire_timer);
 	ipmr_expire_timer.function=ipmr_expire_process;
 	register_netdevice_notifier(&ip_mr_notifier);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a4d4cb85a16..20ffe8e88c0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3147,13 +3147,9 @@ int __init ip_rt_init(void)
 	}
 #endif
 
-	ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache",
-						     sizeof(struct rtable),
-						     0, SLAB_HWCACHE_ALIGN,
-						     NULL, NULL);
-
-	if (!ipv4_dst_ops.kmem_cachep)
-		panic("IP: failed to allocate ip_dst_cache\n");
+	ipv4_dst_ops.kmem_cachep =
+		kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	rt_hash_table = (struct rt_hash_bucket *)
 		alloc_large_system_hash("IP route cache",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e570db4d33c..29e3d606db7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2254,9 +2254,7 @@ void __init tcp_init(void)
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
-				  SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if (!tcp_hashinfo.bind_bucket_cachep)
-		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	/* Size and allocate the main established and bind bucket
 	 * hash tables.
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index fbca60950b1..8fcae7a6510 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1472,10 +1472,8 @@ void __init fib6_init(void)
 {
 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
 					   sizeof(struct fib6_node),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL, NULL);
-	if (!fib6_node_kmem)
-		panic("cannot create fib6_nodes cache");
 
 	fib6_tables_init();
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d83844d9499..ba1b3d11865 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2419,13 +2419,9 @@ void __init ip6_route_init(void)
 {
 	struct proc_dir_entry *p;
 
-	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
-						     sizeof(struct rt6_info),
-						     0, SLAB_HWCACHE_ALIGN,
-						     NULL, NULL);
-	if (!ip6_dst_ops.kmem_cachep)
-		panic("cannot create ip6_dst_cache");
-
+	ip6_dst_ops.kmem_cachep =
+		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 	fib6_init();
 #ifdef 	CONFIG_PROC_FS
 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 891a6090cc0..dfc90bb1cf1 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -82,8 +82,6 @@ void __init xfrm_input_init(void)
 {
 	secpath_cachep = kmem_cache_create("secpath_cache",
 					   sizeof(struct sec_path),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL, NULL);
-	if (!secpath_cachep)
-		panic("XFRM: failed to allocate secpath_cache\n");
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1cf3209cdf4..7db1c48537f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1985,10 +1985,8 @@ static void __init xfrm_policy_init(void)
 
 	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
 					   sizeof(struct xfrm_dst),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL, NULL);
-	if (!xfrm_dst_cache)
-		panic("XFRM: failed to allocate xfrm_dst_cache\n");
 
 	hmask = 8 - 1;
 	sz = (hmask+1) * sizeof(struct hlist_head);
-- 
cgit v1.2.3-70-g09d2


From d1d9facfd1b326e0df587c96f0ee55de2ae9f946 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Fri, 1 Sep 2006 00:32:12 -0700
Subject: [XFRM]: remove xerr_idxp from __xfrm_policy_check()

It seems that during the MIPv6 respin, some code which was originally
conditionally compiled around CONFIG_XFRM_ADVANCED was accidently left
in after the config option was removed.

This patch removes an extraneous pointer (xerr_idxp) which is no
longer needed.

Signed-off-by: James Morris <jmorris@namei.org>
Acked-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7db1c48537f..537854fe47c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1514,8 +1514,7 @@ static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp
 {
 	for (; k < sp->len; k++) {
 		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
-			if (idxp)
-				*idxp = k;
+			*idxp = k;
 			return 1;
 		}
 	}
@@ -1534,7 +1533,6 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	struct flowi fl;
 	u8 fl_dir = policy_to_flow_dir(dir);
 	int xerr_idx = -1;
-	int *xerr_idxp = &xerr_idx;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0)
 		return 0;
@@ -1560,7 +1558,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 					xfrm_policy_lookup);
 
 	if (!pol) {
-		if (skb->sp && secpath_has_nontransport(skb->sp, 0, xerr_idxp)) {
+		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
 			xfrm_secpath_reject(xerr_idx, skb, &fl);
 			return 0;
 		}
@@ -1619,13 +1617,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
 			k = xfrm_policy_ok(tpp[i], sp, k, family);
 			if (k < 0) {
-				if (k < -1 && xerr_idxp)
-					*xerr_idxp = -(2+k);
+				if (k < -1)
+					/* "-2 - errored_index" returned */
+					xerr_idx = -(2+k);
 				goto reject;
 			}
 		}
 
-		if (secpath_has_nontransport(sp, k, xerr_idxp))
+		if (secpath_has_nontransport(sp, k, &xerr_idx))
 			goto reject;
 
 		xfrm_pols_put(pols, npols);
-- 
cgit v1.2.3-70-g09d2


From a1e59abf824969554b90facd44a4ab16e265afa4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 19 Sep 2006 12:57:34 -0700
Subject: [XFRM]: Fix wildcard as tunnel source

Hashing SAs by source address breaks templates with wildcards as tunnel
source since the source address used for hashing/lookup is still 0/0.
Move source address lookup to xfrm_tmpl_resolve_one() so we can use the
real address in the lookup.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 13 +++++++++++++
 net/ipv4/xfrm4_policy.c | 20 ++++++++++++++++++++
 net/ipv4/xfrm4_state.c  | 15 ---------------
 net/ipv6/xfrm6_policy.c | 21 +++++++++++++++++++++
 net/ipv6/xfrm6_state.c  | 16 ----------------
 net/xfrm/xfrm_policy.c  | 21 +++++++++++++++++++++
 6 files changed, 75 insertions(+), 31 deletions(-)

(limited to 'net/xfrm/xfrm_policy.c')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4d6dc627df9..11e0b1d6bd4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -222,6 +222,7 @@ struct xfrm_policy_afinfo {
 	struct dst_ops		*dst_ops;
 	void			(*garbage_collect)(void);
 	int			(*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl);
+	int			(*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr);
 	struct dst_entry	*(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
 	int			(*bundle_create)(struct xfrm_policy *policy, 
 						 struct xfrm_state **xfrm, 
@@ -630,6 +631,18 @@ secpath_reset(struct sk_buff *skb)
 #endif
 }
 
+static inline int
+xfrm_addr_any(xfrm_address_t *addr, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return addr->a4 == 0;
+	case AF_INET6:
+		return ipv6_addr_any((struct in6_addr *)&addr->a6);
+	}
+	return 0;
+}
+
 static inline int
 __xfrm4_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x)
 {
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 479598566f1..eabcd27b176 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -21,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
 	return __ip_route_output_key((struct rtable**)dst, fl);
 }
 
+static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+	struct rtable *rt;
+	struct flowi fl_tunnel = {
+		.nl_u = {
+			.ip4_u = {
+				.daddr = daddr->a4,
+			},
+		},
+	};
+
+	if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
+		saddr->a4 = rt->rt_src;
+		dst_release(&rt->u.dst);
+		return 0;
+	}
+	return -EHOSTUNREACH;
+}
+
 static struct dst_entry *
 __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 {
@@ -298,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 	.family = 		AF_INET,
 	.dst_ops =		&xfrm4_dst_ops,
 	.dst_lookup =		xfrm4_dst_lookup,
+	.get_saddr =		xfrm4_get_saddr,
 	.find_bundle = 		__xfrm4_find_bundle,
 	.bundle_create =	__xfrm4_bundle_create,
 	.decode_session =	_decode_session4,
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 6a2a4ab4277..fe2034494d0 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,21 +42,6 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->props.saddr = tmpl->saddr;
 	if (x->props.saddr.a4 == 0)
 		x->props.saddr.a4 = saddr->a4;
-	if (tmpl->mode == XFRM_MODE_TUNNEL && x->props.saddr.a4 == 0) {
-		struct rtable *rt;
-	        struct flowi fl_tunnel = {
-        	        .nl_u = {
-        			.ip4_u = {
-					.daddr = x->id.daddr.a4,
-				}
-			}
-		};
-		if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
-		                     &fl_tunnel, AF_INET)) {
-			x->props.saddr.a4 = rt->rt_src;
-			dst_release(&rt->u.dst);
-		}
-	}
 	x->props.mode = tmpl->mode;
 	x->props.reqid = tmpl->reqid;
 	x->props.family = AF_INET;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 9391c4c94fe..6a252e2134d 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -34,6 +34,26 @@ static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
 	return err;
 }
 
+static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+	struct rt6_info *rt;
+	struct flowi fl_tunnel = {
+		.nl_u = {
+			.ip6_u = {
+				.daddr = *(struct in6_addr *)&daddr->a6,
+			},
+		},
+	};
+
+	if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
+		ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6,
+			       (struct in6_addr *)&saddr->a6);
+		dst_release(&rt->u.dst);
+		return 0;
+	}
+	return -EHOSTUNREACH;
+}
+
 static struct dst_entry *
 __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 {
@@ -362,6 +382,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.family =		AF_INET6,
 	.dst_ops =		&xfrm6_dst_ops,
 	.dst_lookup =		xfrm6_dst_lookup,
+	.get_saddr = 		xfrm6_get_saddr,
 	.find_bundle =		__xfrm6_find_bundle,
 	.bundle_create =	__xfrm6_bundle_create,
 	.decode_session =	_decode_session6,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index d88cd92c864..711bfafb247 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -42,22 +42,6 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
 	if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
 		memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
-	if (tmpl->mode == XFRM_MODE_TUNNEL && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) {
-		struct rt6_info *rt;
-		struct flowi fl_tunnel = {
-			.nl_u = {
-				.ip6_u = {
-					.daddr = *(struct in6_addr *)daddr,
-				}
-			}
-		};
-		if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
-		                     &fl_tunnel, AF_INET6)) {
-			ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr,
-			               (struct in6_addr *)&x->props.saddr);
-			dst_release(&rt->u.dst);
-		}
-	}
 	x->props.mode = tmpl->mode;
 	x->props.reqid = tmpl->reqid;
 	x->props.family = AF_INET6;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 537854fe47c..b6e2e79d726 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1107,6 +1107,20 @@ int __xfrm_sk_clone_policy(struct sock *sk)
 	return 0;
 }
 
+static int
+xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
+	       unsigned short family)
+{
+	int err;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	err = afinfo->get_saddr(local, remote);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
 /* Resolve list of templates for the flow, given policy. */
 
 static int
@@ -1118,6 +1132,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 	int i, error;
 	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
 	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
+	xfrm_address_t tmp;
 
 	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
 		struct xfrm_state *x;
@@ -1128,6 +1143,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 		if (tmpl->mode == XFRM_MODE_TUNNEL) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
+			if (xfrm_addr_any(local, family)) {
+				error = xfrm_get_saddr(&tmp, remote, family);
+				if (error)
+					goto fail;
+				local = &tmp;
+			}
 		}
 
 		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
-- 
cgit v1.2.3-70-g09d2