7 files changed, 4494 insertions, 0 deletions
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
new file mode 100644
index 00000000000..58ca6a972c4
--- /dev/null
+++ b/net/xfrm/Kconfig
@@ -0,0 +1,12 @@
+#
+# XFRM configuration
+#
+config XFRM_USER
+	tristate "IPsec user configuration interface"
+	depends on INET && XFRM
+	---help---
+	  Support for IPsec user configuration interface used
+	  by native Linux tools.
+
+	  If unsure, say Y.
+
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
new file mode 100644
index 00000000000..693aac1aa83
--- /dev/null
+++ b/net/xfrm/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the XFRM subsystem.
+#
+
+obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o
+obj-$(CONFIG_XFRM_USER) += xfrm_user.o
+
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
new file mode 100644
index 00000000000..080aae243ce
--- /dev/null
+++ b/net/xfrm/xfrm_algo.c
@@ -0,0 +1,729 @@
+/* 
+ * xfrm algorithm interface
+ *
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pfkeyv2.h>
+#include <linux/crypto.h>
+#include <net/xfrm.h>
+#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
+#include <net/ah.h>
+#endif
+#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+#include <net/esp.h>
+#endif
+#include <asm/scatterlist.h>
+
+/*
+ * Algorithms supported by IPsec.  These entries contain properties which
+ * are used in key negotiation and xfrm processing, and are used to verify
+ * that instantiated crypto transforms have correct parameters for IPsec
+ * purposes.
+ */
+static struct xfrm_algo_desc aalg_list[] = {
+{
+	.name = "digest_null",
+	
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 0,
+			.icv_fullbits = 0,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_NULL,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 0,
+		.sadb_alg_maxbits = 0
+	}
+},
+{
+	.name = "md5",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 128,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id = SADB_AALG_MD5HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 128
+	}
+},
+{
+	.name = "sha1",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 160,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_AALG_SHA1HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 160,
+		.sadb_alg_maxbits = 160
+	}
+},
+{
+	.name = "sha256",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 256,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 256,
+		.sadb_alg_maxbits = 256
+	}
+},
+{
+	.name = "ripemd160",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 160,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 160,
+		.sadb_alg_maxbits = 160
+	}
+},
+};
+
+static struct xfrm_algo_desc ealg_list[] = {
+{
+	.name = "cipher_null",
+	
+	.uinfo = {
+		.encr = {
+			.blockbits = 8,
+			.defkeybits = 0,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id =	SADB_EALG_NULL,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 0,
+		.sadb_alg_maxbits = 0
+	}
+},
+{
+	.name = "des",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 64,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_EALG_DESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 64,
+		.sadb_alg_maxbits = 64
+	}
+},
+{
+	.name = "des3_ede",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 192,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_EALG_3DESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 192,
+		.sadb_alg_maxbits = 192
+	}
+},
+{
+	.name = "cast128",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_CASTCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 40,
+		.sadb_alg_maxbits = 128
+	}
+},
+{
+	.name = "blowfish",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 40,
+		.sadb_alg_maxbits = 448
+	}
+},
+{
+	.name = "aes",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 128,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_AESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 256
+	}
+},
+{
+        .name = "serpent",
+
+        .uinfo = {
+                .encr = {
+                        .blockbits = 128,
+                        .defkeybits = 128,
+                }
+        },
+
+        .desc = {
+                .sadb_alg_id = SADB_X_EALG_SERPENTCBC,
+                .sadb_alg_ivlen = 8,
+                .sadb_alg_minbits = 128,
+                .sadb_alg_maxbits = 256,
+        }
+},
+{
+        .name = "twofish",
+                 
+        .uinfo = {
+                .encr = {
+                        .blockbits = 128,
+                        .defkeybits = 128,
+                }
+        },
+
+        .desc = {
+                .sadb_alg_id = SADB_X_EALG_TWOFISHCBC,
+                .sadb_alg_ivlen = 8,
+                .sadb_alg_minbits = 128,
+                .sadb_alg_maxbits = 256
+        }
+},
+};
+
+static struct xfrm_algo_desc calg_list[] = {
+{
+	.name = "deflate",
+	.uinfo = {
+		.comp = {
+			.threshold = 90,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE }
+},
+{
+	.name = "lzs",
+	.uinfo = {
+		.comp = {
+			.threshold = 90,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_LZS }
+},
+{
+	.name = "lzjh",
+	.uinfo = {
+		.comp = {
+			.threshold = 50,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_LZJH }
+},
+};
+
+static inline int aalg_entries(void)
+{
+	return ARRAY_SIZE(aalg_list);
+}
+
+static inline int ealg_entries(void)
+{
+	return ARRAY_SIZE(ealg_list);
+}
+
+static inline int calg_entries(void)
+{
+	return ARRAY_SIZE(calg_list);
+}
+
+/* Todo: generic iterators */
+struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < aalg_entries(); i++) {
+		if (aalg_list[i].desc.sadb_alg_id == alg_id) {
+			if (aalg_list[i].available)
+				return &aalg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid);
+
+struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < ealg_entries(); i++) {
+		if (ealg_list[i].desc.sadb_alg_id == alg_id) {
+			if (ealg_list[i].available)
+				return &ealg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid);
+
+struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < calg_entries(); i++) {
+		if (calg_list[i].desc.sadb_alg_id == alg_id) {
+			if (calg_list[i].available)
+				return &calg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xfrm_calg_get_byid);
+
+static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
+					      int entries, char *name,
+					      int probe)
+{
+	int i, status;
+
+	if (!name)
+		return NULL;
+
+	for (i = 0; i < entries; i++) {
+		if (strcmp(name, list[i].name))
+			continue;
+
+		if (list[i].available)
+			return &list[i];
+
+		if (!probe)
+			break;
+
+		status = crypto_alg_available(name, 0);
+		if (!status)
+			break;
+
+		list[i].available = status;
+		return &list[i];
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe)
+{
+	return xfrm_get_byname(aalg_list, aalg_entries(), name, probe);
+}
+EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
+
+struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe)
+{
+	return xfrm_get_byname(ealg_list, ealg_entries(), name, probe);
+}
+EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
+
+struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe)
+{
+	return xfrm_get_byname(calg_list, calg_entries(), name, probe);
+}
+EXPORT_SYMBOL_GPL(xfrm_calg_get_byname);
+
+struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx)
+{
+	if (idx >= aalg_entries())
+		return NULL;
+
+	return &aalg_list[idx];
+}
+EXPORT_SYMBOL_GPL(xfrm_aalg_get_byidx);
+
+struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx)
+{
+	if (idx >= ealg_entries())
+		return NULL;
+
+	return &ealg_list[idx];
+}
+EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx);
+
+/*
+ * Probe for the availability of crypto algorithms, and set the available
+ * flag for any algorithms found on the system.  This is typically called by
+ * pfkey during userspace SA add, update or register.
+ */
+void xfrm_probe_algs(void)
+{
+#ifdef CONFIG_CRYPTO
+	int i, status;
+	
+	BUG_ON(in_softirq());
+
+	for (i = 0; i < aalg_entries(); i++) {
+		status = crypto_alg_available(aalg_list[i].name, 0);
+		if (aalg_list[i].available != status)
+			aalg_list[i].available = status;
+	}
+	
+	for (i = 0; i < ealg_entries(); i++) {
+		status = crypto_alg_available(ealg_list[i].name, 0);
+		if (ealg_list[i].available != status)
+			ealg_list[i].available = status;
+	}
+	
+	for (i = 0; i < calg_entries(); i++) {
+		status = crypto_alg_available(calg_list[i].name, 0);
+		if (calg_list[i].available != status)
+			calg_list[i].available = status;
+	}
+#endif
+}
+EXPORT_SYMBOL_GPL(xfrm_probe_algs);
+
+int xfrm_count_auth_supported(void)
+{
+	int i, n;
+
+	for (i = 0, n = 0; i < aalg_entries(); i++)
+		if (aalg_list[i].available)
+			n++;
+	return n;
+}
+EXPORT_SYMBOL_GPL(xfrm_count_auth_supported);
+
+int xfrm_count_enc_supported(void)
+{
+	int i, n;
+
+	for (i = 0, n = 0; i < ealg_entries(); i++)
+		if (ealg_list[i].available)
+			n++;
+	return n;
+}
+EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
+
+/* Move to common area: it is shared with AH. */
+
+void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
+		  int offset, int len, icv_update_fn_t icv_update)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct scatterlist sg;
+
+	/* Checksum header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		
+		sg.page = virt_to_page(skb->data + offset);
+		sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+		sg.length = copy;
+		
+		icv_update(tfm, &sg, 1);
+		
+		if ((len -= copy) == 0)
+			return;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			
+			sg.page = frag->page;
+			sg.offset = frag->page_offset + offset-start;
+			sg.length = copy;
+			
+			icv_update(tfm, &sg, 1);
+
+			if (!(len -= copy))
+				return;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				skb_icv_walk(list, tfm, offset-start, copy, icv_update);
+				if ((len -= copy) == 0)
+					return;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+}
+EXPORT_SYMBOL_GPL(skb_icv_walk);
+
+#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+
+/* Looking generic it is not used in another places. */
+
+int
+skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	int elt = 0;
+
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		sg[elt].page = virt_to_page(skb->data + offset);
+		sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+		sg[elt].length = copy;
+		elt++;
+		if ((len -= copy) == 0)
+			return elt;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			sg[elt].page = frag->page;
+			sg[elt].offset = frag->page_offset+offset-start;
+			sg[elt].length = copy;
+			elt++;
+			if (!(len -= copy))
+				return elt;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+				if ((len -= copy) == 0)
+					return elt;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+	return elt;
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+
+/* Check that skb data bits are writable. If they are not, copy data
+ * to newly created private area. If "tailbits" is given, make sure that
+ * tailbits bytes beyond current end of skb are writable.
+ *
+ * Returns amount of elements of scatterlist to load for subsequent
+ * transformations and pointer to writable trailer skb.
+ */
+
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
+{
+	int copyflag;
+	int elt;
+	struct sk_buff *skb1, **skb_p;
+
+	/* If skb is cloned or its head is paged, reallocate
+	 * head pulling out all the pages (pages are considered not writable
+	 * at the moment even if they are anonymous).
+	 */
+	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
+	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+
+	/* Easy case. Most of packets will go this way. */
+	if (!skb_shinfo(skb)->frag_list) {
+		/* A little of trouble, not enough of space for trailer.
+		 * This should not happen, when stack is tuned to generate
+		 * good frames. OK, on miss we reallocate and reserve even more
+		 * space, 128 bytes is fair. */
+
+		if (skb_tailroom(skb) < tailbits &&
+		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
+			return -ENOMEM;
+
+		/* Voila! */
+		*trailer = skb;
+		return 1;
+	}
+
+	/* Misery. We are in troubles, going to mincer fragments... */
+
+	elt = 1;
+	skb_p = &skb_shinfo(skb)->frag_list;
+	copyflag = 0;
+
+	while ((skb1 = *skb_p) != NULL) {
+		int ntail = 0;
+
+		/* The fragment is partially pulled by someone,
+		 * this can happen on input. Copy it and everything
+		 * after it. */
+
+		if (skb_shared(skb1))
+			copyflag = 1;
+
+		/* If the skb is the last, worry about trailer. */
+
+		if (skb1->next == NULL && tailbits) {
+			if (skb_shinfo(skb1)->nr_frags ||
+			    skb_shinfo(skb1)->frag_list ||
+			    skb_tailroom(skb1) < tailbits)
+				ntail = tailbits + 128;
+		}
+
+		if (copyflag ||
+		    skb_cloned(skb1) ||
+		    ntail ||
+		    skb_shinfo(skb1)->nr_frags ||
+		    skb_shinfo(skb1)->frag_list) {
+			struct sk_buff *skb2;
+
+			/* Fuck, we are miserable poor guys... */
+			if (ntail == 0)
+				skb2 = skb_copy(skb1, GFP_ATOMIC);
+			else
+				skb2 = skb_copy_expand(skb1,
+						       skb_headroom(skb1),
+						       ntail,
+						       GFP_ATOMIC);
+			if (unlikely(skb2 == NULL))
+				return -ENOMEM;
+
+			if (skb1->sk)
+				skb_set_owner_w(skb, skb1->sk);
+
+			/* Looking around. Are we still alive?
+			 * OK, link new skb, drop old one */
+
+			skb2->next = skb1->next;
+			*skb_p = skb2;
+			kfree_skb(skb1);
+			skb1 = skb2;
+		}
+		elt++;
+		*trailer = skb1;
+		skb_p = &skb1->next;
+	}
+
+	return elt;
+}
+EXPORT_SYMBOL_GPL(skb_cow_data);
+
+void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
+{
+	if (tail != skb) {
+		skb->data_len += len;
+		skb->len += len;
+	}
+	return skb_put(tail, len);
+}
+EXPORT_SYMBOL_GPL(pskb_put);
+#endif
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
new file mode 100644
index 00000000000..c58a6f05a0b
--- /dev/null
+++ b/net/xfrm/xfrm_input.c
@@ -0,0 +1,89 @@
+/*
+ * xfrm_input.c
+ *
+ * Changes:
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific portion
+ * 	
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+
+static kmem_cache_t *secpath_cachep;
+
+void __secpath_destroy(struct sec_path *sp)
+{
+	int i;
+	for (i = 0; i < sp->len; i++)
+		xfrm_state_put(sp->x[i].xvec);
+	kmem_cache_free(secpath_cachep, sp);
+}
+EXPORT_SYMBOL(__secpath_destroy);
+
+struct sec_path *secpath_dup(struct sec_path *src)
+{
+	struct sec_path *sp;
+
+	sp = kmem_cache_alloc(secpath_cachep, SLAB_ATOMIC);
+	if (!sp)
+		return NULL;
+
+	sp->len = 0;
+	if (src) {
+		int i;
+
+		memcpy(sp, src, sizeof(*sp));
+		for (i = 0; i < sp->len; i++)
+			xfrm_state_hold(sp->x[i].xvec);
+	}
+	atomic_set(&sp->refcnt, 1);
+	return sp;
+}
+EXPORT_SYMBOL(secpath_dup);
+
+/* Fetch spi and seq from ipsec header */
+
+int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
+{
+	int offset, offset_seq;
+
+	switch (nexthdr) {
+	case IPPROTO_AH:
+		offset = offsetof(struct ip_auth_hdr, spi);
+		offset_seq = offsetof(struct ip_auth_hdr, seq_no);
+		break;
+	case IPPROTO_ESP:
+		offset = offsetof(struct ip_esp_hdr, spi);
+		offset_seq = offsetof(struct ip_esp_hdr, seq_no);
+		break;
+	case IPPROTO_COMP:
+		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
+			return -EINVAL;
+		*spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2)));
+		*seq = 0;
+		return 0;
+	default:
+		return 1;
+	}
+
+	if (!pskb_may_pull(skb, 16))
+		return -EINVAL;
+
+	*spi = *(u32*)(skb->h.raw + offset);
+	*seq = *(u32*)(skb->h.raw + offset_seq);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_parse_spi);
+
+void __init xfrm_input_init(void)
+{
+	secpath_cachep = kmem_cache_create("secpath_cache",
+					   sizeof(struct sec_path),
+					   0, SLAB_HWCACHE_ALIGN,
+					   NULL, NULL);
+	if (!secpath_cachep)
+		panic("XFRM: failed to allocate secpath_cache\n");
+}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
new file mode 100644
index 00000000000..80828078733
--- /dev/null
+++ b/net/xfrm/xfrm_policy.c
@@ -0,0 +1,1367 @@
+/* 
+ * xfrm_policy.c
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	YOSHIFUJI Hideaki
+ * 		Split up af-specific portion
+ *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
+ * 	
+ */
+
+#include <asm/bug.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+
+DECLARE_MUTEX(xfrm_cfg_sem);
+EXPORT_SYMBOL(xfrm_cfg_sem);
+
+static DEFINE_RWLOCK(xfrm_policy_lock);
+
+struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
+EXPORT_SYMBOL(xfrm_policy_list);
+
+static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
+static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
+
+static kmem_cache_t *xfrm_dst_cache;
+
+static struct work_struct xfrm_policy_gc_work;
+static struct list_head xfrm_policy_gc_list =
+	LIST_HEAD_INIT(xfrm_policy_gc_list);
+static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
+
+static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
+static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
+
+int xfrm_register_type(struct xfrm_type *type, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct xfrm_type_map *typemap;
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+	typemap = afinfo->type_map;
+
+	write_lock(&typemap->lock);
+	if (likely(typemap->map[type->proto] == NULL))
+		typemap->map[type->proto] = type;
+	else
+		err = -EEXIST;
+	write_unlock(&typemap->lock);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_register_type);
+
+int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct xfrm_type_map *typemap;
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+	typemap = afinfo->type_map;
+
+	write_lock(&typemap->lock);
+	if (unlikely(typemap->map[type->proto] != type))
+		err = -ENOENT;
+	else
+		typemap->map[type->proto] = NULL;
+	write_unlock(&typemap->lock);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_unregister_type);
+
+struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo;
+	struct xfrm_type_map *typemap;
+	struct xfrm_type *type;
+	int modload_attempted = 0;
+
+retry:
+	afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return NULL;
+	typemap = afinfo->type_map;
+
+	read_lock(&typemap->lock);
+	type = typemap->map[proto];
+	if (unlikely(type && !try_module_get(type->owner)))
+		type = NULL;
+	read_unlock(&typemap->lock);
+	if (!type && !modload_attempted) {
+		xfrm_policy_put_afinfo(afinfo);
+		request_module("xfrm-type-%d-%d",
+			       (int) family, (int) proto);
+		modload_attempted = 1;
+		goto retry;
+	}
+
+	xfrm_policy_put_afinfo(afinfo);
+	return type;
+}
+EXPORT_SYMBOL(xfrm_get_type);
+
+int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
+		    unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	if (likely(afinfo->dst_lookup != NULL))
+		err = afinfo->dst_lookup(dst, fl);
+	else
+		err = -EINVAL;
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_dst_lookup);
+
+void xfrm_put_type(struct xfrm_type *type)
+{
+	module_put(type->owner);
+}
+
+static inline unsigned long make_jiffies(long secs)
+{
+	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
+		return MAX_SCHEDULE_TIMEOUT-1;
+	else
+	        return secs*HZ;
+}
+
+static void xfrm_policy_timer(unsigned long data)
+{
+	struct xfrm_policy *xp = (struct xfrm_policy*)data;
+	unsigned long now = (unsigned long)xtime.tv_sec;
+	long next = LONG_MAX;
+	int warn = 0;
+	int dir;
+
+	read_lock(&xp->lock);
+
+	if (xp->dead)
+		goto out;
+
+	dir = xp->index & 7;
+
+	if (xp->lft.hard_add_expires_seconds) {
+		long tmo = xp->lft.hard_add_expires_seconds +
+			xp->curlft.add_time - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (xp->lft.hard_use_expires_seconds) {
+		long tmo = xp->lft.hard_use_expires_seconds +
+			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (xp->lft.soft_add_expires_seconds) {
+		long tmo = xp->lft.soft_add_expires_seconds +
+			xp->curlft.add_time - now;
+		if (tmo <= 0) {
+			warn = 1;
+			tmo = XFRM_KM_TIMEOUT;
+		}
+		if (tmo < next)
+			next = tmo;
+	}
+	if (xp->lft.soft_use_expires_seconds) {
+		long tmo = xp->lft.soft_use_expires_seconds +
+			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
+		if (tmo <= 0) {
+			warn = 1;
+			tmo = XFRM_KM_TIMEOUT;
+		}
+		if (tmo < next)
+			next = tmo;
+	}
+
+	if (warn)
+		km_policy_expired(xp, dir, 0);
+	if (next != LONG_MAX &&
+	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
+		xfrm_pol_hold(xp);
+
+out:
+	read_unlock(&xp->lock);
+	xfrm_pol_put(xp);
+	return;
+
+expired:
+	read_unlock(&xp->lock);
+	km_policy_expired(xp, dir, 1);
+	xfrm_policy_delete(xp, dir);
+	xfrm_pol_put(xp);
+}
+
+
+/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
+ * SPD calls.
+ */
+
+struct xfrm_policy *xfrm_policy_alloc(int gfp)
+{
+	struct xfrm_policy *policy;
+
+	policy = kmalloc(sizeof(struct xfrm_policy), gfp);
+
+	if (policy) {
+		memset(policy, 0, sizeof(struct xfrm_policy));
+		atomic_set(&policy->refcnt, 1);
+		rwlock_init(&policy->lock);
+		init_timer(&policy->timer);
+		policy->timer.data = (unsigned long)policy;
+		policy->timer.function = xfrm_policy_timer;
+	}
+	return policy;
+}
+EXPORT_SYMBOL(xfrm_policy_alloc);
+
+/* Destroy xfrm_policy: descendant resources must be released to this moment. */
+
+void __xfrm_policy_destroy(struct xfrm_policy *policy)
+{
+	if (!policy->dead)
+		BUG();
+
+	if (policy->bundles)
+		BUG();
+
+	if (del_timer(&policy->timer))
+		BUG();
+
+	kfree(policy);
+}
+EXPORT_SYMBOL(__xfrm_policy_destroy);
+
+static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
+{
+	struct dst_entry *dst;
+
+	while ((dst = policy->bundles) != NULL) {
+		policy->bundles = dst->next;
+		dst_free(dst);
+	}
+
+	if (del_timer(&policy->timer))
+		atomic_dec(&policy->refcnt);
+
+	if (atomic_read(&policy->refcnt) > 1)
+		flow_cache_flush();
+
+	xfrm_pol_put(policy);
+}
+
+static void xfrm_policy_gc_task(void *data)
+{
+	struct xfrm_policy *policy;
+	struct list_head *entry, *tmp;
+	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+
+	spin_lock_bh(&xfrm_policy_gc_lock);
+	list_splice_init(&xfrm_policy_gc_list, &gc_list);
+	spin_unlock_bh(&xfrm_policy_gc_lock);
+
+	list_for_each_safe(entry, tmp, &gc_list) {
+		policy = list_entry(entry, struct xfrm_policy, list);
+		xfrm_policy_gc_kill(policy);
+	}
+}
+
+/* Rule must be locked. Release descentant resources, announce
+ * entry dead. The rule must be unlinked from lists to the moment.
+ */
+
+static void xfrm_policy_kill(struct xfrm_policy *policy)
+{
+	int dead;
+
+	write_lock_bh(&policy->lock);
+	dead = policy->dead;
+	policy->dead = 1;
+	write_unlock_bh(&policy->lock);
+
+	if (unlikely(dead)) {
+		WARN_ON(1);
+		return;
+	}
+
+	spin_lock(&xfrm_policy_gc_lock);
+	list_add(&policy->list, &xfrm_policy_gc_list);
+	spin_unlock(&xfrm_policy_gc_lock);
+
+	schedule_work(&xfrm_policy_gc_work);
+}
+
+/* Generate new index... KAME seems to generate them ordered by cost
+ * of an absolute inpredictability of ordering of rules. This will not pass. */
+static u32 xfrm_gen_index(int dir)
+{
+	u32 idx;
+	struct xfrm_policy *p;
+	static u32 idx_generator;
+
+	for (;;) {
+		idx = (idx_generator | dir);
+		idx_generator += 8;
+		if (idx == 0)
+			idx = 8;
+		for (p = xfrm_policy_list[dir]; p; p = p->next) {
+			if (p->index == idx)
+				break;
+		}
+		if (!p)
+			return idx;
+	}
+}
+
+int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+{
+	struct xfrm_policy *pol, **p;
+	struct xfrm_policy *delpol = NULL;
+	struct xfrm_policy **newpos = NULL;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
+		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
+			if (excl) {
+				write_unlock_bh(&xfrm_policy_lock);
+				return -EEXIST;
+			}
+			*p = pol->next;
+			delpol = pol;
+			if (policy->priority > pol->priority)
+				continue;
+		} else if (policy->priority >= pol->priority) {
+			p = &pol->next;
+			continue;
+		}
+		if (!newpos)
+			newpos = p;
+		if (delpol)
+			break;
+		p = &pol->next;
+	}
+	if (newpos)
+		p = newpos;
+	xfrm_pol_hold(policy);
+	policy->next = *p;
+	*p = policy;
+	atomic_inc(&flow_cache_genid);
+	policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
+	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
+	policy->curlft.use_time = 0;
+	if (!mod_timer(&policy->timer, jiffies + HZ))
+		xfrm_pol_hold(policy);
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (delpol) {
+		xfrm_policy_kill(delpol);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_policy_insert);
+
+struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
+				      int delete)
+{
+	struct xfrm_policy *pol, **p;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+		if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
+			xfrm_pol_hold(pol);
+			if (delete)
+				*p = pol->next;
+			break;
+		}
+	}
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (pol && delete) {
+		atomic_inc(&flow_cache_genid);
+		xfrm_policy_kill(pol);
+	}
+	return pol;
+}
+EXPORT_SYMBOL(xfrm_policy_bysel);
+
+struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
+{
+	struct xfrm_policy *pol, **p;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
+		if (pol->index == id) {
+			xfrm_pol_hold(pol);
+			if (delete)
+				*p = pol->next;
+			break;
+		}
+	}
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (pol && delete) {
+		atomic_inc(&flow_cache_genid);
+		xfrm_policy_kill(pol);
+	}
+	return pol;
+}
+EXPORT_SYMBOL(xfrm_policy_byid);
+
+void xfrm_policy_flush(void)
+{
+	struct xfrm_policy *xp;
+	int dir;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
+		while ((xp = xfrm_policy_list[dir]) != NULL) {
+			xfrm_policy_list[dir] = xp->next;
+			write_unlock_bh(&xfrm_policy_lock);
+
+			xfrm_policy_kill(xp);
+
+			write_lock_bh(&xfrm_policy_lock);
+		}
+	}
+	atomic_inc(&flow_cache_genid);
+	write_unlock_bh(&xfrm_policy_lock);
+}
+EXPORT_SYMBOL(xfrm_policy_flush);
+
+int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
+		     void *data)
+{
+	struct xfrm_policy *xp;
+	int dir;
+	int count = 0;
+	int error = 0;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
+		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
+			count++;
+	}
+
+	if (count == 0) {
+		error = -ENOENT;
+		goto out;
+	}
+
+	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
+		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
+			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
+			if (error)
+				goto out;
+		}
+	}
+
+out:
+	read_unlock_bh(&xfrm_policy_lock);
+	return error;
+}
+EXPORT_SYMBOL(xfrm_policy_walk);
+
+/* Find policy to apply to this flow. */
+
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+			       void **objp, atomic_t **obj_refp)
+{
+	struct xfrm_policy *pol;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
+		struct xfrm_selector *sel = &pol->selector;
+		int match;
+
+		if (pol->family != family)
+			continue;
+
+		match = xfrm_selector_match(sel, fl, family);
+		if (match) {
+			xfrm_pol_hold(pol);
+			break;
+		}
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+	if ((*objp = (void *) pol) != NULL)
+		*obj_refp = &pol->refcnt;
+}
+
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
+{
+	struct xfrm_policy *pol;
+
+	read_lock_bh(&xfrm_policy_lock);
+	if ((pol = sk->sk_policy[dir]) != NULL) {
+		int match = xfrm_selector_match(&pol->selector, fl,
+						sk->sk_family);
+		if (match)
+			xfrm_pol_hold(pol);
+		else
+			pol = NULL;
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+	return pol;
+}
+
+static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
+{
+	pol->next = xfrm_policy_list[dir];
+	xfrm_policy_list[dir] = pol;
+	xfrm_pol_hold(pol);
+}
+
+static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
+						int dir)
+{
+	struct xfrm_policy **polp;
+
+	for (polp = &xfrm_policy_list[dir];
+	     *polp != NULL; polp = &(*polp)->next) {
+		if (*polp == pol) {
+			*polp = pol->next;
+			return pol;
+		}
+	}
+	return NULL;
+}
+
+void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
+{
+	write_lock_bh(&xfrm_policy_lock);
+	pol = __xfrm_policy_unlink(pol, dir);
+	write_unlock_bh(&xfrm_policy_lock);
+	if (pol) {
+		if (dir < XFRM_POLICY_MAX)
+			atomic_inc(&flow_cache_genid);
+		xfrm_policy_kill(pol);
+	}
+}
+
+int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
+{
+	struct xfrm_policy *old_pol;
+
+	write_lock_bh(&xfrm_policy_lock);
+	old_pol = sk->sk_policy[dir];
+	sk->sk_policy[dir] = pol;
+	if (pol) {
+		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
+		pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
+		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
+	}
+	if (old_pol)
+		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (old_pol) {
+		xfrm_policy_kill(old_pol);
+	}
+	return 0;
+}
+
+static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
+{
+	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
+
+	if (newp) {
+		newp->selector = old->selector;
+		newp->lft = old->lft;
+		newp->curlft = old->curlft;
+		newp->action = old->action;
+		newp->flags = old->flags;
+		newp->xfrm_nr = old->xfrm_nr;
+		newp->index = old->index;
+		memcpy(newp->xfrm_vec, old->xfrm_vec,
+		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
+		write_lock_bh(&xfrm_policy_lock);
+		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
+		write_unlock_bh(&xfrm_policy_lock);
+		xfrm_pol_put(newp);
+	}
+	return newp;
+}
+
+int __xfrm_sk_clone_policy(struct sock *sk)
+{
+	struct xfrm_policy *p0 = sk->sk_policy[0],
+			   *p1 = sk->sk_policy[1];
+
+	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
+	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
+		return -ENOMEM;
+	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+/* Resolve list of templates for the flow, given policy. */
+
+static int
+xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
+		  struct xfrm_state **xfrm,
+		  unsigned short family)
+{
+	int nx;
+	int i, error;
+	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
+	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
+
+	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
+		struct xfrm_state *x;
+		xfrm_address_t *remote = daddr;
+		xfrm_address_t *local  = saddr;
+		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
+
+		if (tmpl->mode) {
+			remote = &tmpl->id.daddr;
+			local = &tmpl->saddr;
+		}
+
+		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
+
+		if (x && x->km.state == XFRM_STATE_VALID) {
+			xfrm[nx++] = x;
+			daddr = remote;
+			saddr = local;
+			continue;
+		}
+		if (x) {
+			error = (x->km.state == XFRM_STATE_ERROR ?
+				 -EINVAL : -EAGAIN);
+			xfrm_state_put(x);
+		}
+
+		if (!tmpl->optional)
+			goto fail;
+	}
+	return nx;
+
+fail:
+	for (nx--; nx>=0; nx--)
+		xfrm_state_put(xfrm[nx]);
+	return error;
+}
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
+ */
+
+static struct dst_entry *
+xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
+{
+	struct dst_entry *x;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return ERR_PTR(-EINVAL);
+	x = afinfo->find_bundle(fl, policy);
+	xfrm_policy_put_afinfo(afinfo);
+	return x;
+}
+
+/* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+
+static int
+xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
+		   struct flowi *fl, struct dst_entry **dst_p,
+		   unsigned short family)
+{
+	int err;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
+static inline int policy_to_flow_dir(int dir)
+{
+	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
+	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
+	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
+		return dir;
+	switch (dir) {
+	default:
+	case XFRM_POLICY_IN:
+		return FLOW_DIR_IN;
+	case XFRM_POLICY_OUT:
+		return FLOW_DIR_OUT;
+	case XFRM_POLICY_FWD:
+		return FLOW_DIR_FWD;
+	};
+}
+
+static int stale_bundle(struct dst_entry *dst);
+
+/* Main function: finds/creates a bundle for given flow.
+ *
+ * At the moment we eat a raw IP route. Mostly to speed up lookups
+ * on interfaces with disabled IPsec.
+ */
+int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
+		struct sock *sk, int flags)
+{
+	struct xfrm_policy *policy;
+	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+	struct dst_entry *dst, *dst_orig = *dst_p;
+	int nx = 0;
+	int err;
+	u32 genid;
+	u16 family = dst_orig->ops->family;
+restart:
+	genid = atomic_read(&flow_cache_genid);
+	policy = NULL;
+	if (sk && sk->sk_policy[1])
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+
+	if (!policy) {
+		/* To accelerate a bit...  */
+		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
+			return 0;
+
+		policy = flow_cache_lookup(fl, family,
+					   policy_to_flow_dir(XFRM_POLICY_OUT),
+					   xfrm_policy_lookup);
+	}
+
+	if (!policy)
+		return 0;
+
+	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	switch (policy->action) {
+	case XFRM_POLICY_BLOCK:
+		/* Prohibit the flow */
+		xfrm_pol_put(policy);
+		return -EPERM;
+
+	case XFRM_POLICY_ALLOW:
+		if (policy->xfrm_nr == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pol_put(policy);
+			return 0;
+		}
+
+		/* Try to find matching bundle.
+		 *
+		 * LATER: help from flow cache. It is optional, this
+		 * is required only for output policy.
+		 */
+		dst = xfrm_find_bundle(fl, policy, family);
+		if (IS_ERR(dst)) {
+			xfrm_pol_put(policy);
+			return PTR_ERR(dst);
+		}
+
+		if (dst)
+			break;
+
+		nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+
+		if (unlikely(nx<0)) {
+			err = nx;
+			if (err == -EAGAIN && flags) {
+				DECLARE_WAITQUEUE(wait, current);
+
+				add_wait_queue(&km_waitq, &wait);
+				set_current_state(TASK_INTERRUPTIBLE);
+				schedule();
+				set_current_state(TASK_RUNNING);
+				remove_wait_queue(&km_waitq, &wait);
+
+				nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+
+				if (nx == -EAGAIN && signal_pending(current)) {
+					err = -ERESTART;
+					goto error;
+				}
+				if (nx == -EAGAIN ||
+				    genid != atomic_read(&flow_cache_genid)) {
+					xfrm_pol_put(policy);
+					goto restart;
+				}
+				err = nx;
+			}
+			if (err < 0)
+				goto error;
+		}
+		if (nx == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pol_put(policy);
+			return 0;
+		}
+
+		dst = dst_orig;
+		err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
+
+		if (unlikely(err)) {
+			int i;
+			for (i=0; i<nx; i++)
+				xfrm_state_put(xfrm[i]);
+			goto error;
+		}
+
+		write_lock_bh(&policy->lock);
+		if (unlikely(policy->dead || stale_bundle(dst))) {
+			/* Wow! While we worked on resolving, this
+			 * policy has gone. Retry. It is not paranoia,
+			 * we just cannot enlist new bundle to dead object.
+			 * We can't enlist stable bundles either.
+			 */
+			write_unlock_bh(&policy->lock);
+
+			xfrm_pol_put(policy);
+			if (dst)
+				dst_free(dst);
+			goto restart;
+		}
+		dst->next = policy->bundles;
+		policy->bundles = dst;
+		dst_hold(dst);
+		write_unlock_bh(&policy->lock);
+	}
+	*dst_p = dst;
+	dst_release(dst_orig);
+	xfrm_pol_put(policy);
+	return 0;
+
+error:
+	dst_release(dst_orig);
+	xfrm_pol_put(policy);
+	*dst_p = NULL;
+	return err;
+}
+EXPORT_SYMBOL(xfrm_lookup);
+
+/* When skb is transformed back to its "native" form, we have to
+ * check policy restrictions. At the moment we make this in maximally
+ * stupid way. Shame on me. :-) Of course, connected sockets must
+ * have policy cached at them.
+ */
+
+static inline int
+xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
+	      unsigned short family)
+{
+	if (xfrm_state_kern(x))
+		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
+	return	x->id.proto == tmpl->id.proto &&
+		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
+		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
+		x->props.mode == tmpl->mode &&
+		(tmpl->aalgos & (1<<x->props.aalgo)) &&
+		!(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
+}
+
+static inline int
+xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
+	       unsigned short family)
+{
+	int idx = start;
+
+	if (tmpl->optional) {
+		if (!tmpl->mode)
+			return start;
+	} else
+		start = -1;
+	for (; idx < sp->len; idx++) {
+		if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family))
+			return ++idx;
+		if (sp->x[idx].xvec->props.mode)
+			break;
+	}
+	return start;
+}
+
+static int
+_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	afinfo->decode_session(skb, fl);
+	xfrm_policy_put_afinfo(afinfo);
+	return 0;
+}
+
+static inline int secpath_has_tunnel(struct sec_path *sp, int k)
+{
+	for (; k < sp->len; k++) {
+		if (sp->x[k].xvec->props.mode)
+			return 1;
+	}
+
+	return 0;
+}
+
+int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
+			unsigned short family)
+{
+	struct xfrm_policy *pol;
+	struct flowi fl;
+
+	if (_decode_session(skb, &fl, family) < 0)
+		return 0;
+
+	/* First, check used SA against their selectors. */
+	if (skb->sp) {
+		int i;
+
+		for (i=skb->sp->len-1; i>=0; i--) {
+		  struct sec_decap_state *xvec = &(skb->sp->x[i]);
+			if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
+				return 0;
+
+			/* If there is a post_input processor, try running it */
+			if (xvec->xvec->type->post_input &&
+			    (xvec->xvec->type->post_input)(xvec->xvec,
+							   &(xvec->decap),
+							   skb) != 0)
+				return 0;
+		}
+	}
+
+	pol = NULL;
+	if (sk && sk->sk_policy[dir])
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+
+	if (!pol)
+		pol = flow_cache_lookup(&fl, family,
+					policy_to_flow_dir(dir),
+					xfrm_policy_lookup);
+
+	if (!pol)
+		return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
+
+	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	if (pol->action == XFRM_POLICY_ALLOW) {
+		struct sec_path *sp;
+		static struct sec_path dummy;
+		int i, k;
+
+		if ((sp = skb->sp) == NULL)
+			sp = &dummy;
+
+		/* For each tunnel xfrm, find the first matching tmpl.
+		 * For each tmpl before that, find corresponding xfrm.
+		 * Order is _important_. Later we will implement
+		 * some barriers, but at the moment barriers
+		 * are implied between each two transformations.
+		 */
+		for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
+			k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
+			if (k < 0)
+				goto reject;
+		}
+
+		if (secpath_has_tunnel(sp, k))
+			goto reject;
+
+		xfrm_pol_put(pol);
+		return 1;
+	}
+
+reject:
+	xfrm_pol_put(pol);
+	return 0;
+}
+EXPORT_SYMBOL(__xfrm_policy_check);
+
+int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
+{
+	struct flowi fl;
+
+	if (_decode_session(skb, &fl, family) < 0)
+		return 0;
+
+	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
+}
+EXPORT_SYMBOL(__xfrm_route_forward);
+
+/* Optimize later using cookies and generation ids. */
+
+static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
+{
+	if (!stale_bundle(dst))
+		return dst;
+
+	return NULL;
+}
+
+static int stale_bundle(struct dst_entry *dst)
+{
+	return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
+}
+
+static void xfrm_dst_destroy(struct dst_entry *dst)
+{
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+	dst_release(xdst->route);
+
+	if (!dst->xfrm)
+		return;
+	xfrm_state_put(dst->xfrm);
+	dst->xfrm = NULL;
+}
+
+static void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			    int unregister)
+{
+	if (!unregister)
+		return;
+
+	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
+		dst->dev = &loopback_dev;
+		dev_hold(&loopback_dev);
+		dev_put(dev);
+	}
+}
+
+static void xfrm_link_failure(struct sk_buff *skb)
+{
+	/* Impossible. Such dst must be popped before reaches point of failure. */
+	return;
+}
+
+static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+{
+	if (dst) {
+		if (dst->obsolete) {
+			dst_release(dst);
+			dst = NULL;
+		}
+	}
+	return dst;
+}
+
+static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
+{
+	int i;
+	struct xfrm_policy *pol;
+	struct dst_entry *dst, **dstp, *gc_list = NULL;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
+		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
+			write_lock(&pol->lock);
+			dstp = &pol->bundles;
+			while ((dst=*dstp) != NULL) {
+				if (func(dst)) {
+					*dstp = dst->next;
+					dst->next = gc_list;
+					gc_list = dst;
+				} else {
+					dstp = &dst->next;
+				}
+			}
+			write_unlock(&pol->lock);
+		}
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+
+	while (gc_list) {
+		dst = gc_list;
+		gc_list = dst->next;
+		dst_free(dst);
+	}
+}
+
+static int unused_bundle(struct dst_entry *dst)
+{
+	return !atomic_read(&dst->__refcnt);
+}
+
+static void __xfrm_garbage_collect(void)
+{
+	xfrm_prune_bundles(unused_bundle);
+}
+
+int xfrm_flush_bundles(void)
+{
+	xfrm_prune_bundles(stale_bundle);
+	return 0;
+}
+
+void xfrm_init_pmtu(struct dst_entry *dst)
+{
+	do {
+		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+		u32 pmtu, route_mtu_cached;
+
+		pmtu = dst_mtu(dst->child);
+		xdst->child_mtu_cached = pmtu;
+
+		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
+
+		route_mtu_cached = dst_mtu(xdst->route);
+		xdst->route_mtu_cached = route_mtu_cached;
+
+		if (pmtu > route_mtu_cached)
+			pmtu = route_mtu_cached;
+
+		dst->metrics[RTAX_MTU-1] = pmtu;
+	} while ((dst = dst->next));
+}
+
+EXPORT_SYMBOL(xfrm_init_pmtu);
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
+ */
+
+int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
+{
+	struct dst_entry *dst = &first->u.dst;
+	struct xfrm_dst *last;
+	u32 mtu;
+
+	if (!dst_check(dst->path, 0) ||
+	    (dst->dev && !netif_running(dst->dev)))
+		return 0;
+
+	last = NULL;
+
+	do {
+		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
+			return 0;
+		if (dst->xfrm->km.state != XFRM_STATE_VALID)
+			return 0;
+
+		mtu = dst_mtu(dst->child);
+		if (xdst->child_mtu_cached != mtu) {
+			last = xdst;
+			xdst->child_mtu_cached = mtu;
+		}
+
+		if (!dst_check(xdst->route, 0))
+			return 0;
+		mtu = dst_mtu(xdst->route);
+		if (xdst->route_mtu_cached != mtu) {
+			last = xdst;
+			xdst->route_mtu_cached = mtu;
+		}
+
+		dst = dst->child;
+	} while (dst->xfrm);
+
+	if (likely(!last))
+		return 1;
+
+	mtu = last->child_mtu_cached;
+	for (;;) {
+		dst = &last->u.dst;
+
+		mtu = xfrm_state_mtu(dst->xfrm, mtu);
+		if (mtu > last->route_mtu_cached)
+			mtu = last->route_mtu_cached;
+		dst->metrics[RTAX_MTU-1] = mtu;
+
+		if (last == first)
+			break;
+
+		last = last->u.next;
+		last->child_mtu_cached = mtu;
+	}
+
+	return 1;
+}
+
+EXPORT_SYMBOL(xfrm_bundle_ok);
+
+/* Well... that's _TASK_. We need to scan through transformation
+ * list and figure out what mss tcp should generate in order to
+ * final datagram fit to mtu. Mama mia... :-)
+ *
+ * Apparently, some easy way exists, but we used to choose the most
+ * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
+ *
+ * Consider this function as something like dark humour. :-)
+ */
+static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
+{
+	int res = mtu - dst->header_len;
+
+	for (;;) {
+		struct dst_entry *d = dst;
+		int m = res;
+
+		do {
+			struct xfrm_state *x = d->xfrm;
+			if (x) {
+				spin_lock_bh(&x->lock);
+				if (x->km.state == XFRM_STATE_VALID &&
+				    x->type && x->type->get_max_size)
+					m = x->type->get_max_size(d->xfrm, m);
+				else
+					m += x->props.header_len;
+				spin_unlock_bh(&x->lock);
+			}
+		} while ((d = d->child) != NULL);
+
+		if (m <= mtu)
+			break;
+		res -= (m - mtu);
+		if (res < 88)
+			return mtu;
+	}
+
+	return res + dst->header_len;
+}
+
+int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_policy_afinfo_lock);
+	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
+		err = -ENOBUFS;
+	else {
+		struct dst_ops *dst_ops = afinfo->dst_ops;
+		if (likely(dst_ops->kmem_cachep == NULL))
+			dst_ops->kmem_cachep = xfrm_dst_cache;
+		if (likely(dst_ops->check == NULL))
+			dst_ops->check = xfrm_dst_check;
+		if (likely(dst_ops->destroy == NULL))
+			dst_ops->destroy = xfrm_dst_destroy;
+		if (likely(dst_ops->ifdown == NULL))
+			dst_ops->ifdown = xfrm_dst_ifdown;
+		if (likely(dst_ops->negative_advice == NULL))
+			dst_ops->negative_advice = xfrm_negative_advice;
+		if (likely(dst_ops->link_failure == NULL))
+			dst_ops->link_failure = xfrm_link_failure;
+		if (likely(dst_ops->get_mss == NULL))
+			dst_ops->get_mss = xfrm_get_mss;
+		if (likely(afinfo->garbage_collect == NULL))
+			afinfo->garbage_collect = __xfrm_garbage_collect;
+		xfrm_policy_afinfo[afinfo->family] = afinfo;
+	}
+	write_unlock(&xfrm_policy_afinfo_lock);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_policy_register_afinfo);
+
+int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_policy_afinfo_lock);
+	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
+		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
+			err = -EINVAL;
+		else {
+			struct dst_ops *dst_ops = afinfo->dst_ops;
+			xfrm_policy_afinfo[afinfo->family] = NULL;
+			dst_ops->kmem_cachep = NULL;
+			dst_ops->check = NULL;
+			dst_ops->destroy = NULL;
+			dst_ops->ifdown = NULL;
+			dst_ops->negative_advice = NULL;
+			dst_ops->link_failure = NULL;
+			dst_ops->get_mss = NULL;
+			afinfo->garbage_collect = NULL;
+		}
+	}
+	write_unlock(&xfrm_policy_afinfo_lock);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
+
+static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo;
+	if (unlikely(family >= NPROTO))
+		return NULL;
+	read_lock(&xfrm_policy_afinfo_lock);
+	afinfo = xfrm_policy_afinfo[family];
+	if (likely(afinfo != NULL))
+		read_lock(&afinfo->lock);
+	read_unlock(&xfrm_policy_afinfo_lock);
+	return afinfo;
+}
+
+static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	if (unlikely(afinfo == NULL))
+		return;
+	read_unlock(&afinfo->lock);
+}
+
+static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	switch (event) {
+	case NETDEV_DOWN:
+		xfrm_flush_bundles();
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block xfrm_dev_notifier = {
+	xfrm_dev_event,
+	NULL,
+	0
+};
+
+static void __init xfrm_policy_init(void)
+{
+	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
+					   sizeof(struct xfrm_dst),
+					   0, SLAB_HWCACHE_ALIGN,
+					   NULL, NULL);
+	if (!xfrm_dst_cache)
+		panic("XFRM: failed to allocate xfrm_dst_cache\n");
+
+	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
+	register_netdevice_notifier(&xfrm_dev_notifier);
+}
+
+void __init xfrm_init(void)
+{
+	xfrm_state_init();
+	xfrm_policy_init();
+	xfrm_input_init();
+}
+
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
new file mode 100644
index 00000000000..1db59f11f37
--- /dev/null
+++ b/net/xfrm/xfrm_state.c
@@ -0,0 +1,1037 @@
+/*
+ * xfrm_state.c
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific functions
+ *	Derek Atkins <derek@ihtfp.com>
+ *		Add UDP Encapsulation
+ * 	
+ */
+
+#include <linux/workqueue.h>
+#include <net/xfrm.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+/* Each xfrm_state may be linked to two tables:
+
+   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
+   2. Hash table by daddr to find what SAs exist for given
+      destination/tunnel endpoint. (output)
+ */
+
+static DEFINE_SPINLOCK(xfrm_state_lock);
+
+/* Hash table to find appropriate SA towards given target (endpoint
+ * of tunnel or destination of transport mode) allowed by selector.
+ *
+ * Main use is finding SA after policy selected tunnel or transport mode.
+ * Also, it can be used by ah/esp icmp error handler to find offending SA.
+ */
+static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
+static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
+
+DECLARE_WAIT_QUEUE_HEAD(km_waitq);
+EXPORT_SYMBOL(km_waitq);
+
+static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
+static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
+
+static struct work_struct xfrm_state_gc_work;
+static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
+static DEFINE_SPINLOCK(xfrm_state_gc_lock);
+
+static int xfrm_state_gc_flush_bundles;
+
+static void __xfrm_state_delete(struct xfrm_state *x);
+
+static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
+static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
+
+static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
+static void km_state_expired(struct xfrm_state *x, int hard);
+
+static void xfrm_state_gc_destroy(struct xfrm_state *x)
+{
+	if (del_timer(&x->timer))
+		BUG();
+	if (x->aalg)
+		kfree(x->aalg);
+	if (x->ealg)
+		kfree(x->ealg);
+	if (x->calg)
+		kfree(x->calg);
+	if (x->encap)
+		kfree(x->encap);
+	if (x->type) {
+		x->type->destructor(x);
+		xfrm_put_type(x->type);
+	}
+	kfree(x);
+}
+
+static void xfrm_state_gc_task(void *data)
+{
+	struct xfrm_state *x;
+	struct list_head *entry, *tmp;
+	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+
+	if (xfrm_state_gc_flush_bundles) {
+		xfrm_state_gc_flush_bundles = 0;
+		xfrm_flush_bundles();
+	}
+
+	spin_lock_bh(&xfrm_state_gc_lock);
+	list_splice_init(&xfrm_state_gc_list, &gc_list);
+	spin_unlock_bh(&xfrm_state_gc_lock);
+
+	list_for_each_safe(entry, tmp, &gc_list) {
+		x = list_entry(entry, struct xfrm_state, bydst);
+		xfrm_state_gc_destroy(x);
+	}
+	wake_up(&km_waitq);
+}
+
+static inline unsigned long make_jiffies(long secs)
+{
+	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
+		return MAX_SCHEDULE_TIMEOUT-1;
+	else
+	        return secs*HZ;
+}
+
+static void xfrm_timer_handler(unsigned long data)
+{
+	struct xfrm_state *x = (struct xfrm_state*)data;
+	unsigned long now = (unsigned long)xtime.tv_sec;
+	long next = LONG_MAX;
+	int warn = 0;
+
+	spin_lock(&x->lock);
+	if (x->km.state == XFRM_STATE_DEAD)
+		goto out;
+	if (x->km.state == XFRM_STATE_EXPIRED)
+		goto expired;
+	if (x->lft.hard_add_expires_seconds) {
+		long tmo = x->lft.hard_add_expires_seconds +
+			x->curlft.add_time - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (x->lft.hard_use_expires_seconds) {
+		long tmo = x->lft.hard_use_expires_seconds +
+			(x->curlft.use_time ? : now) - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (x->km.dying)
+		goto resched;
+	if (x->lft.soft_add_expires_seconds) {
+		long tmo = x->lft.soft_add_expires_seconds +
+			x->curlft.add_time - now;
+		if (tmo <= 0)
+			warn = 1;
+		else if (tmo < next)
+			next = tmo;
+	}
+	if (x->lft.soft_use_expires_seconds) {
+		long tmo = x->lft.soft_use_expires_seconds +
+			(x->curlft.use_time ? : now) - now;
+		if (tmo <= 0)
+			warn = 1;
+		else if (tmo < next)
+			next = tmo;
+	}
+
+	if (warn)
+		km_state_expired(x, 0);
+resched:
+	if (next != LONG_MAX &&
+	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
+		xfrm_state_hold(x);
+	goto out;
+
+expired:
+	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
+		x->km.state = XFRM_STATE_EXPIRED;
+		wake_up(&km_waitq);
+		next = 2;
+		goto resched;
+	}
+	if (x->id.spi != 0)
+		km_state_expired(x, 1);
+	__xfrm_state_delete(x);
+
+out:
+	spin_unlock(&x->lock);
+	xfrm_state_put(x);
+}
+
+struct xfrm_state *xfrm_state_alloc(void)
+{
+	struct xfrm_state *x;
+
+	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
+
+	if (x) {
+		memset(x, 0, sizeof(struct xfrm_state));
+		atomic_set(&x->refcnt, 1);
+		atomic_set(&x->tunnel_users, 0);
+		INIT_LIST_HEAD(&x->bydst);
+		INIT_LIST_HEAD(&x->byspi);
+		init_timer(&x->timer);
+		x->timer.function = xfrm_timer_handler;
+		x->timer.data	  = (unsigned long)x;
+		x->curlft.add_time = (unsigned long)xtime.tv_sec;
+		x->lft.soft_byte_limit = XFRM_INF;
+		x->lft.soft_packet_limit = XFRM_INF;
+		x->lft.hard_byte_limit = XFRM_INF;
+		x->lft.hard_packet_limit = XFRM_INF;
+		spin_lock_init(&x->lock);
+	}
+	return x;
+}
+EXPORT_SYMBOL(xfrm_state_alloc);
+
+void __xfrm_state_destroy(struct xfrm_state *x)
+{
+	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
+
+	spin_lock_bh(&xfrm_state_gc_lock);
+	list_add(&x->bydst, &xfrm_state_gc_list);
+	spin_unlock_bh(&xfrm_state_gc_lock);
+	schedule_work(&xfrm_state_gc_work);
+}
+EXPORT_SYMBOL(__xfrm_state_destroy);
+
+static void __xfrm_state_delete(struct xfrm_state *x)
+{
+	if (x->km.state != XFRM_STATE_DEAD) {
+		x->km.state = XFRM_STATE_DEAD;
+		spin_lock(&xfrm_state_lock);
+		list_del(&x->bydst);
+		atomic_dec(&x->refcnt);
+		if (x->id.spi) {
+			list_del(&x->byspi);
+			atomic_dec(&x->refcnt);
+		}
+		spin_unlock(&xfrm_state_lock);
+		if (del_timer(&x->timer))
+			atomic_dec(&x->refcnt);
+
+		/* The number two in this test is the reference
+		 * mentioned in the comment below plus the reference
+		 * our caller holds.  A larger value means that
+		 * there are DSTs attached to this xfrm_state.
+		 */
+		if (atomic_read(&x->refcnt) > 2) {
+			xfrm_state_gc_flush_bundles = 1;
+			schedule_work(&xfrm_state_gc_work);
+		}
+
+		/* All xfrm_state objects are created by xfrm_state_alloc.
+		 * The xfrm_state_alloc call gives a reference, and that
+		 * is what we are dropping here.
+		 */
+		atomic_dec(&x->refcnt);
+	}
+}
+
+void xfrm_state_delete(struct xfrm_state *x)
+{
+	spin_lock_bh(&x->lock);
+	__xfrm_state_delete(x);
+	spin_unlock_bh(&x->lock);
+}
+EXPORT_SYMBOL(xfrm_state_delete);
+
+void xfrm_state_flush(u8 proto)
+{
+	int i;
+	struct xfrm_state *x;
+
+	spin_lock_bh(&xfrm_state_lock);
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+restart:
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (!xfrm_state_kern(x) &&
+			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
+				xfrm_state_hold(x);
+				spin_unlock_bh(&xfrm_state_lock);
+
+				xfrm_state_delete(x);
+				xfrm_state_put(x);
+
+				spin_lock_bh(&xfrm_state_lock);
+				goto restart;
+			}
+		}
+	}
+	spin_unlock_bh(&xfrm_state_lock);
+	wake_up(&km_waitq);
+}
+EXPORT_SYMBOL(xfrm_state_flush);
+
+static int
+xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
+		  struct xfrm_tmpl *tmpl,
+		  xfrm_address_t *daddr, xfrm_address_t *saddr,
+		  unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return -1;
+	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
+	xfrm_state_put_afinfo(afinfo);
+	return 0;
+}
+
+struct xfrm_state *
+xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
+		struct flowi *fl, struct xfrm_tmpl *tmpl,
+		struct xfrm_policy *pol, int *err,
+		unsigned short family)
+{
+	unsigned h = xfrm_dst_hash(daddr, family);
+	struct xfrm_state *x, *x0;
+	int acquire_in_progress = 0;
+	int error = 0;
+	struct xfrm_state *best = NULL;
+	struct xfrm_state_afinfo *afinfo;
+	
+	afinfo = xfrm_state_get_afinfo(family);
+	if (afinfo == NULL) {
+		*err = -EAFNOSUPPORT;
+		return NULL;
+	}
+
+	spin_lock_bh(&xfrm_state_lock);
+	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
+		if (x->props.family == family &&
+		    x->props.reqid == tmpl->reqid &&
+		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    tmpl->mode == x->props.mode &&
+		    tmpl->id.proto == x->id.proto &&
+		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
+			/* Resolution logic:
+			   1. There is a valid state with matching selector.
+			      Done.
+			   2. Valid state with inappropriate selector. Skip.
+
+			   Entering area of "sysdeps".
+
+			   3. If state is not valid, selector is temporary,
+			      it selects only session which triggered
+			      previous resolution. Key manager will do
+			      something to install a state with proper
+			      selector.
+			 */
+			if (x->km.state == XFRM_STATE_VALID) {
+				if (!xfrm_selector_match(&x->sel, fl, family))
+					continue;
+				if (!best ||
+				    best->km.dying > x->km.dying ||
+				    (best->km.dying == x->km.dying &&
+				     best->curlft.add_time < x->curlft.add_time))
+					best = x;
+			} else if (x->km.state == XFRM_STATE_ACQ) {
+				acquire_in_progress = 1;
+			} else if (x->km.state == XFRM_STATE_ERROR ||
+				   x->km.state == XFRM_STATE_EXPIRED) {
+				if (xfrm_selector_match(&x->sel, fl, family))
+					error = -ESRCH;
+			}
+		}
+	}
+
+	x = best;
+	if (!x && !error && !acquire_in_progress) {
+		x0 = afinfo->state_lookup(&tmpl->id.daddr, tmpl->id.spi, tmpl->id.proto);
+		if (x0 != NULL) {
+			xfrm_state_put(x0);
+			error = -EEXIST;
+			goto out;
+		}
+		x = xfrm_state_alloc();
+		if (x == NULL) {
+			error = -ENOMEM;
+			goto out;
+		}
+		/* Initialize temporary selector matching only
+		 * to current session. */
+		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
+
+		if (km_query(x, tmpl, pol) == 0) {
+			x->km.state = XFRM_STATE_ACQ;
+			list_add_tail(&x->bydst, xfrm_state_bydst+h);
+			xfrm_state_hold(x);
+			if (x->id.spi) {
+				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
+				list_add(&x->byspi, xfrm_state_byspi+h);
+				xfrm_state_hold(x);
+			}
+			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+			xfrm_state_hold(x);
+			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
+			add_timer(&x->timer);
+		} else {
+			x->km.state = XFRM_STATE_DEAD;
+			xfrm_state_put(x);
+			x = NULL;
+			error = -ESRCH;
+		}
+	}
+out:
+	if (x)
+		xfrm_state_hold(x);
+	else
+		*err = acquire_in_progress ? -EAGAIN : error;
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+
+static void __xfrm_state_insert(struct xfrm_state *x)
+{
+	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
+
+	list_add(&x->bydst, xfrm_state_bydst+h);
+	xfrm_state_hold(x);
+
+	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+
+	list_add(&x->byspi, xfrm_state_byspi+h);
+	xfrm_state_hold(x);
+
+	if (!mod_timer(&x->timer, jiffies + HZ))
+		xfrm_state_hold(x);
+
+	wake_up(&km_waitq);
+}
+
+void xfrm_state_insert(struct xfrm_state *x)
+{
+	spin_lock_bh(&xfrm_state_lock);
+	__xfrm_state_insert(x);
+	spin_unlock_bh(&xfrm_state_lock);
+}
+EXPORT_SYMBOL(xfrm_state_insert);
+
+static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
+
+int xfrm_state_add(struct xfrm_state *x)
+{
+	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_state *x1;
+	int family;
+	int err;
+
+	family = x->props.family;
+	afinfo = xfrm_state_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+
+	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+	if (x1) {
+		xfrm_state_put(x1);
+		x1 = NULL;
+		err = -EEXIST;
+		goto out;
+	}
+
+	if (x->km.seq) {
+		x1 = __xfrm_find_acq_byseq(x->km.seq);
+		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
+			xfrm_state_put(x1);
+			x1 = NULL;
+		}
+	}
+
+	if (!x1)
+		x1 = afinfo->find_acq(
+			x->props.mode, x->props.reqid, x->id.proto,
+			&x->id.daddr, &x->props.saddr, 0);
+
+	__xfrm_state_insert(x);
+	err = 0;
+
+out:
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+
+	if (x1) {
+		xfrm_state_delete(x1);
+		xfrm_state_put(x1);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_add);
+
+int xfrm_state_update(struct xfrm_state *x)
+{
+	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_state *x1;
+	int err;
+
+	afinfo = xfrm_state_get_afinfo(x->props.family);
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+
+	err = -ESRCH;
+	if (!x1)
+		goto out;
+
+	if (xfrm_state_kern(x1)) {
+		xfrm_state_put(x1);
+		err = -EEXIST;
+		goto out;
+	}
+
+	if (x1->km.state == XFRM_STATE_ACQ) {
+		__xfrm_state_insert(x);
+		x = NULL;
+	}
+	err = 0;
+
+out:
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+
+	if (err)
+		return err;
+
+	if (!x) {
+		xfrm_state_delete(x1);
+		xfrm_state_put(x1);
+		return 0;
+	}
+
+	err = -EINVAL;
+	spin_lock_bh(&x1->lock);
+	if (likely(x1->km.state == XFRM_STATE_VALID)) {
+		if (x->encap && x1->encap)
+			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
+		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
+		x1->km.dying = 0;
+
+		if (!mod_timer(&x1->timer, jiffies + HZ))
+			xfrm_state_hold(x1);
+		if (x1->curlft.use_time)
+			xfrm_state_check_expire(x1);
+
+		err = 0;
+	}
+	spin_unlock_bh(&x1->lock);
+
+	xfrm_state_put(x1);
+
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_update);
+
+int xfrm_state_check_expire(struct xfrm_state *x)
+{
+	if (!x->curlft.use_time)
+		x->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	if (x->km.state != XFRM_STATE_VALID)
+		return -EINVAL;
+
+	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
+	    x->curlft.packets >= x->lft.hard_packet_limit) {
+		km_state_expired(x, 1);
+		if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
+			xfrm_state_hold(x);
+		return -EINVAL;
+	}
+
+	if (!x->km.dying &&
+	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
+	     x->curlft.packets >= x->lft.soft_packet_limit))
+		km_state_expired(x, 0);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_state_check_expire);
+
+static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
+		- skb_headroom(skb);
+
+	if (nhead > 0)
+		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
+
+	/* Check tail too... */
+	return 0;
+}
+
+int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err = xfrm_state_check_expire(x);
+	if (err < 0)
+		goto err;
+	err = xfrm_state_check_space(x, skb);
+err:
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_check);
+
+struct xfrm_state *
+xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
+		  unsigned short family)
+{
+	struct xfrm_state *x;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = afinfo->state_lookup(daddr, spi, proto);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+EXPORT_SYMBOL(xfrm_state_lookup);
+
+struct xfrm_state *
+xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
+	      xfrm_address_t *daddr, xfrm_address_t *saddr, 
+	      int create, unsigned short family)
+{
+	struct xfrm_state *x;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+EXPORT_SYMBOL(xfrm_find_acq);
+
+/* Silly enough, but I'm lazy to build resolution list */
+
+static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
+{
+	int i;
+	struct xfrm_state *x;
+
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
+				xfrm_state_hold(x);
+				return x;
+			}
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
+{
+	struct xfrm_state *x;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = __xfrm_find_acq_byseq(seq);
+	spin_unlock_bh(&xfrm_state_lock);
+	return x;
+}
+EXPORT_SYMBOL(xfrm_find_acq_byseq);
+
+u32 xfrm_get_acqseq(void)
+{
+	u32 res;
+	static u32 acqseq;
+	static DEFINE_SPINLOCK(acqseq_lock);
+
+	spin_lock_bh(&acqseq_lock);
+	res = (++acqseq ? : ++acqseq);
+	spin_unlock_bh(&acqseq_lock);
+	return res;
+}
+EXPORT_SYMBOL(xfrm_get_acqseq);
+
+void
+xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
+{
+	u32 h;
+	struct xfrm_state *x0;
+
+	if (x->id.spi)
+		return;
+
+	if (minspi == maxspi) {
+		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
+		if (x0) {
+			xfrm_state_put(x0);
+			return;
+		}
+		x->id.spi = minspi;
+	} else {
+		u32 spi = 0;
+		minspi = ntohl(minspi);
+		maxspi = ntohl(maxspi);
+		for (h=0; h<maxspi-minspi+1; h++) {
+			spi = minspi + net_random()%(maxspi-minspi+1);
+			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
+			if (x0 == NULL) {
+				x->id.spi = htonl(spi);
+				break;
+			}
+			xfrm_state_put(x0);
+		}
+	}
+	if (x->id.spi) {
+		spin_lock_bh(&xfrm_state_lock);
+		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+		list_add(&x->byspi, xfrm_state_byspi+h);
+		xfrm_state_hold(x);
+		spin_unlock_bh(&xfrm_state_lock);
+		wake_up(&km_waitq);
+	}
+}
+EXPORT_SYMBOL(xfrm_alloc_spi);
+
+int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
+		    void *data)
+{
+	int i;
+	struct xfrm_state *x;
+	int count = 0;
+	int err = 0;
+
+	spin_lock_bh(&xfrm_state_lock);
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
+				count++;
+		}
+	}
+	if (count == 0) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
+				continue;
+			err = func(x, --count, data);
+			if (err)
+				goto out;
+		}
+	}
+out:
+	spin_unlock_bh(&xfrm_state_lock);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_walk);
+
+int xfrm_replay_check(struct xfrm_state *x, u32 seq)
+{
+	u32 diff;
+
+	seq = ntohl(seq);
+
+	if (unlikely(seq == 0))
+		return -EINVAL;
+
+	if (likely(seq > x->replay.seq))
+		return 0;
+
+	diff = x->replay.seq - seq;
+	if (diff >= x->props.replay_window) {
+		x->stats.replay_window++;
+		return -EINVAL;
+	}
+
+	if (x->replay.bitmap & (1U << diff)) {
+		x->stats.replay++;
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_replay_check);
+
+void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
+{
+	u32 diff;
+
+	seq = ntohl(seq);
+
+	if (seq > x->replay.seq) {
+		diff = seq - x->replay.seq;
+		if (diff < x->props.replay_window)
+			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
+		else
+			x->replay.bitmap = 1;
+		x->replay.seq = seq;
+	} else {
+		diff = x->replay.seq - seq;
+		x->replay.bitmap |= (1U << diff);
+	}
+}
+EXPORT_SYMBOL(xfrm_replay_advance);
+
+static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
+static DEFINE_RWLOCK(xfrm_km_lock);
+
+static void km_state_expired(struct xfrm_state *x, int hard)
+{
+	struct xfrm_mgr *km;
+
+	if (hard)
+		x->km.state = XFRM_STATE_EXPIRED;
+	else
+		x->km.dying = 1;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list)
+		km->notify(x, hard);
+	read_unlock(&xfrm_km_lock);
+
+	if (hard)
+		wake_up(&km_waitq);
+}
+
+static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
+{
+	int err = -EINVAL;
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
+		if (!err)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+	return err;
+}
+
+int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
+{
+	int err = -EINVAL;
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		if (km->new_mapping)
+			err = km->new_mapping(x, ipaddr, sport);
+		if (!err)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+	return err;
+}
+EXPORT_SYMBOL(km_new_mapping);
+
+void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
+{
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list)
+		if (km->notify_policy)
+			km->notify_policy(pol, dir, hard);
+	read_unlock(&xfrm_km_lock);
+
+	if (hard)
+		wake_up(&km_waitq);
+}
+
+int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
+{
+	int err;
+	u8 *data;
+	struct xfrm_mgr *km;
+	struct xfrm_policy *pol = NULL;
+
+	if (optlen <= 0 || optlen > PAGE_SIZE)
+		return -EMSGSIZE;
+
+	data = kmalloc(optlen, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	err = -EFAULT;
+	if (copy_from_user(data, optval, optlen))
+		goto out;
+
+	err = -EINVAL;
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		pol = km->compile_policy(sk->sk_family, optname, data,
+					 optlen, &err);
+		if (err >= 0)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+
+	if (err >= 0) {
+		xfrm_sk_policy_insert(sk, err, pol);
+		xfrm_pol_put(pol);
+		err = 0;
+	}
+
+out:
+	kfree(data);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_user_policy);
+
+int xfrm_register_km(struct xfrm_mgr *km)
+{
+	write_lock_bh(&xfrm_km_lock);
+	list_add_tail(&km->list, &xfrm_km_list);
+	write_unlock_bh(&xfrm_km_lock);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_register_km);
+
+int xfrm_unregister_km(struct xfrm_mgr *km)
+{
+	write_lock_bh(&xfrm_km_lock);
+	list_del(&km->list);
+	write_unlock_bh(&xfrm_km_lock);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_unregister_km);
+
+int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_state_afinfo_lock);
+	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
+		err = -ENOBUFS;
+	else {
+		afinfo->state_bydst = xfrm_state_bydst;
+		afinfo->state_byspi = xfrm_state_byspi;
+		xfrm_state_afinfo[afinfo->family] = afinfo;
+	}
+	write_unlock(&xfrm_state_afinfo_lock);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_register_afinfo);
+
+int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_state_afinfo_lock);
+	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
+		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
+			err = -EINVAL;
+		else {
+			xfrm_state_afinfo[afinfo->family] = NULL;
+			afinfo->state_byspi = NULL;
+			afinfo->state_bydst = NULL;
+		}
+	}
+	write_unlock(&xfrm_state_afinfo_lock);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
+
+static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo;
+	if (unlikely(family >= NPROTO))
+		return NULL;
+	read_lock(&xfrm_state_afinfo_lock);
+	afinfo = xfrm_state_afinfo[family];
+	if (likely(afinfo != NULL))
+		read_lock(&afinfo->lock);
+	read_unlock(&xfrm_state_afinfo_lock);
+	return afinfo;
+}
+
+static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	if (unlikely(afinfo == NULL))
+		return;
+	read_unlock(&afinfo->lock);
+}
+
+/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
+void xfrm_state_delete_tunnel(struct xfrm_state *x)
+{
+	if (x->tunnel) {
+		struct xfrm_state *t = x->tunnel;
+
+		if (atomic_read(&t->tunnel_users) == 2)
+			xfrm_state_delete(t);
+		atomic_dec(&t->tunnel_users);
+		xfrm_state_put(t);
+		x->tunnel = NULL;
+	}
+}
+EXPORT_SYMBOL(xfrm_state_delete_tunnel);
+
+int xfrm_state_mtu(struct xfrm_state *x, int mtu)
+{
+	int res = mtu;
+
+	res -= x->props.header_len;
+
+	for (;;) {
+		int m = res;
+
+		if (m < 68)
+			return 68;
+
+		spin_lock_bh(&x->lock);
+		if (x->km.state == XFRM_STATE_VALID &&
+		    x->type && x->type->get_max_size)
+			m = x->type->get_max_size(x, m);
+		else
+			m += x->props.header_len;
+		spin_unlock_bh(&x->lock);
+
+		if (m <= mtu)
+			break;
+		res -= (m - mtu);
+	}
+
+	return res;
+}
+
+EXPORT_SYMBOL(xfrm_state_mtu);
+ 
+void __init xfrm_state_init(void)
+{
+	int i;
+
+	for (i=0; i<XFRM_DST_HSIZE; i++) {
+		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
+		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
+	}
+	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
+}
+
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
new file mode 100644
index 00000000000..63661b0fd73
--- /dev/null
+++ b/net/xfrm/xfrm_user.c
@@ -0,0 +1,1253 @@
+/* xfrm_user.c: User interface to configure xfrm engine.
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <linux/init.h>
+#include <linux/security.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <asm/uaccess.h>
+
+static struct sock *xfrm_nl;
+
+static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
+{
+	struct rtattr *rt = xfrma[type - 1];
+	struct xfrm_algo *algp;
+
+	if (!rt)
+		return 0;
+
+	if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp))
+		return -EINVAL;
+
+	algp = RTA_DATA(rt);
+	switch (type) {
+	case XFRMA_ALG_AUTH:
+		if (!algp->alg_key_len &&
+		    strcmp(algp->alg_name, "digest_null") != 0)
+			return -EINVAL;
+		break;
+
+	case XFRMA_ALG_CRYPT:
+		if (!algp->alg_key_len &&
+		    strcmp(algp->alg_name, "cipher_null") != 0)
+			return -EINVAL;
+		break;
+
+	case XFRMA_ALG_COMP:
+		/* Zero length keys are legal.  */
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
+	return 0;
+}
+
+static int verify_encap_tmpl(struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_ENCAP - 1];
+	struct xfrm_encap_tmpl *encap;
+
+	if (!rt)
+		return 0;
+
+	if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int verify_newsa_info(struct xfrm_usersa_info *p,
+			     struct rtattr **xfrma)
+{
+	int err;
+
+	err = -EINVAL;
+	switch (p->family) {
+	case AF_INET:
+		break;
+
+	case AF_INET6:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		break;
+#else
+		err = -EAFNOSUPPORT;
+		goto out;
+#endif
+
+	default:
+		goto out;
+	};
+
+	err = -EINVAL;
+	switch (p->id.proto) {
+	case IPPROTO_AH:
+		if (!xfrma[XFRMA_ALG_AUTH-1]	||
+		    xfrma[XFRMA_ALG_CRYPT-1]	||
+		    xfrma[XFRMA_ALG_COMP-1])
+			goto out;
+		break;
+
+	case IPPROTO_ESP:
+		if ((!xfrma[XFRMA_ALG_AUTH-1] &&
+		     !xfrma[XFRMA_ALG_CRYPT-1])	||
+		    xfrma[XFRMA_ALG_COMP-1])
+			goto out;
+		break;
+
+	case IPPROTO_COMP:
+		if (!xfrma[XFRMA_ALG_COMP-1]	||
+		    xfrma[XFRMA_ALG_AUTH-1]	||
+		    xfrma[XFRMA_ALG_CRYPT-1])
+			goto out;
+		break;
+
+	default:
+		goto out;
+	};
+
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
+		goto out;
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT)))
+		goto out;
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP)))
+		goto out;
+	if ((err = verify_encap_tmpl(xfrma)))
+		goto out;
+
+	err = -EINVAL;
+	switch (p->mode) {
+	case 0:
+	case 1:
+		break;
+
+	default:
+		goto out;
+	};
+
+	err = 0;
+
+out:
+	return err;
+}
+
+static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
+			   struct xfrm_algo_desc *(*get_byname)(char *, int),
+			   struct rtattr *u_arg)
+{
+	struct rtattr *rta = u_arg;
+	struct xfrm_algo *p, *ualg;
+	struct xfrm_algo_desc *algo;
+
+	if (!rta)
+		return 0;
+
+	ualg = RTA_DATA(rta);
+
+	algo = get_byname(ualg->alg_name, 1);
+	if (!algo)
+		return -ENOSYS;
+	*props = algo->desc.sadb_alg_id;
+
+	p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len);
+	*algpp = p;
+	return 0;
+}
+
+static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_arg)
+{
+	struct rtattr *rta = u_arg;
+	struct xfrm_encap_tmpl *p, *uencap;
+
+	if (!rta)
+		return 0;
+
+	uencap = RTA_DATA(rta);
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, uencap, sizeof(*p));
+	*encapp = p;
+	return 0;
+}
+
+static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
+{
+	memcpy(&x->id, &p->id, sizeof(x->id));
+	memcpy(&x->sel, &p->sel, sizeof(x->sel));
+	memcpy(&x->lft, &p->lft, sizeof(x->lft));
+	x->props.mode = p->mode;
+	x->props.replay_window = p->replay_window;
+	x->props.reqid = p->reqid;
+	x->props.family = p->family;
+	x->props.saddr = p->saddr;
+	x->props.flags = p->flags;
+}
+
+static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
+					       struct rtattr **xfrma,
+					       int *errp)
+{
+	struct xfrm_state *x = xfrm_state_alloc();
+	int err = -ENOMEM;
+
+	if (!x)
+		goto error_no_put;
+
+	copy_from_user_state(x, p);
+
+	if ((err = attach_one_algo(&x->aalg, &x->props.aalgo,
+				   xfrm_aalg_get_byname,
+				   xfrma[XFRMA_ALG_AUTH-1])))
+		goto error;
+	if ((err = attach_one_algo(&x->ealg, &x->props.ealgo,
+				   xfrm_ealg_get_byname,
+				   xfrma[XFRMA_ALG_CRYPT-1])))
+		goto error;
+	if ((err = attach_one_algo(&x->calg, &x->props.calgo,
+				   xfrm_calg_get_byname,
+				   xfrma[XFRMA_ALG_COMP-1])))
+		goto error;
+	if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
+		goto error;
+
+	err = -ENOENT;
+	x->type = xfrm_get_type(x->id.proto, x->props.family);
+	if (x->type == NULL)
+		goto error;
+
+	err = x->type->init_state(x, NULL);
+	if (err)
+		goto error;
+
+	x->curlft.add_time = (unsigned long) xtime.tv_sec;
+	x->km.state = XFRM_STATE_VALID;
+	x->km.seq = p->seq;
+
+	return x;
+
+error:
+	x->km.state = XFRM_STATE_DEAD;
+	xfrm_state_put(x);
+error_no_put:
+	*errp = err;
+	return NULL;
+}
+
+static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
+	struct xfrm_state *x;
+	int err;
+
+	err = verify_newsa_info(p, (struct rtattr **) xfrma);
+	if (err)
+		return err;
+
+	x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err);
+	if (!x)
+		return err;
+
+	if (nlh->nlmsg_type == XFRM_MSG_NEWSA)
+		err = xfrm_state_add(x);
+	else
+		err = xfrm_state_update(x);
+
+	if (err < 0) {
+		x->km.state = XFRM_STATE_DEAD;
+		xfrm_state_put(x);
+	}
+
+	return err;
+}
+
+static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_state *x;
+	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
+
+	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+	if (x == NULL)
+		return -ESRCH;
+
+	if (xfrm_state_kern(x)) {
+		xfrm_state_put(x);
+		return -EPERM;
+	}
+
+	xfrm_state_delete(x);
+	xfrm_state_put(x);
+
+	return 0;
+}
+
+static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
+{
+	memcpy(&p->id, &x->id, sizeof(p->id));
+	memcpy(&p->sel, &x->sel, sizeof(p->sel));
+	memcpy(&p->lft, &x->lft, sizeof(p->lft));
+	memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
+	memcpy(&p->stats, &x->stats, sizeof(p->stats));
+	p->saddr = x->props.saddr;
+	p->mode = x->props.mode;
+	p->replay_window = x->props.replay_window;
+	p->reqid = x->props.reqid;
+	p->family = x->props.family;
+	p->flags = x->props.flags;
+	p->seq = x->km.seq;
+}
+
+struct xfrm_dump_info {
+	struct sk_buff *in_skb;
+	struct sk_buff *out_skb;
+	u32 nlmsg_seq;
+	u16 nlmsg_flags;
+	int start_idx;
+	int this_idx;
+};
+
+static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
+{
+	struct xfrm_dump_info *sp = ptr;
+	struct sk_buff *in_skb = sp->in_skb;
+	struct sk_buff *skb = sp->out_skb;
+	struct xfrm_usersa_info *p;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	if (sp->this_idx < sp->start_idx)
+		goto out;
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
+			sp->nlmsg_seq,
+			XFRM_MSG_NEWSA, sizeof(*p));
+	nlh->nlmsg_flags = sp->nlmsg_flags;
+
+	p = NLMSG_DATA(nlh);
+	copy_to_user_state(x, p);
+
+	if (x->aalg)
+		RTA_PUT(skb, XFRMA_ALG_AUTH,
+			sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg);
+	if (x->ealg)
+		RTA_PUT(skb, XFRMA_ALG_CRYPT,
+			sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg);
+	if (x->calg)
+		RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
+
+	if (x->encap)
+		RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
+
+	nlh->nlmsg_len = skb->tail - b;
+out:
+	sp->this_idx++;
+	return 0;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct xfrm_dump_info info;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.nlmsg_flags = NLM_F_MULTI;
+	info.this_idx = 0;
+	info.start_idx = cb->args[0];
+	(void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info);
+	cb->args[0] = info.this_idx;
+
+	return skb->len;
+}
+
+static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
+					  struct xfrm_state *x, u32 seq)
+{
+	struct xfrm_dump_info info;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = seq;
+	info.nlmsg_flags = 0;
+	info.this_idx = info.start_idx = 0;
+
+	if (dump_one_state(x, 0, &info)) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skb;
+}
+
+static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
+	struct xfrm_state *x;
+	struct sk_buff *resp_skb;
+	int err;
+
+	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+	err = -ESRCH;
+	if (x == NULL)
+		goto out_noput;
+
+	resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
+	if (IS_ERR(resp_skb)) {
+		err = PTR_ERR(resp_skb);
+	} else {
+		err = netlink_unicast(xfrm_nl, resp_skb,
+				      NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	}
+	xfrm_state_put(x);
+out_noput:
+	return err;
+}
+
+static int verify_userspi_info(struct xfrm_userspi_info *p)
+{
+	switch (p->info.id.proto) {
+	case IPPROTO_AH:
+	case IPPROTO_ESP:
+		break;
+
+	case IPPROTO_COMP:
+		/* IPCOMP spi is 16-bits. */
+		if (p->max >= 0x10000)
+			return -EINVAL;
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	if (p->min > p->max)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_state *x;
+	struct xfrm_userspi_info *p;
+	struct sk_buff *resp_skb;
+	xfrm_address_t *daddr;
+	int family;
+	int err;
+
+	p = NLMSG_DATA(nlh);
+	err = verify_userspi_info(p);
+	if (err)
+		goto out_noput;
+
+	family = p->info.family;
+	daddr = &p->info.id.daddr;
+
+	x = NULL;
+	if (p->info.seq) {
+		x = xfrm_find_acq_byseq(p->info.seq);
+		if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) {
+			xfrm_state_put(x);
+			x = NULL;
+		}
+	}
+
+	if (!x)
+		x = xfrm_find_acq(p->info.mode, p->info.reqid,
+				  p->info.id.proto, daddr,
+				  &p->info.saddr, 1,
+				  family);
+	err = -ENOENT;
+	if (x == NULL)
+		goto out_noput;
+
+	resp_skb = ERR_PTR(-ENOENT);
+
+	spin_lock_bh(&x->lock);
+	if (x->km.state != XFRM_STATE_DEAD) {
+		xfrm_alloc_spi(x, htonl(p->min), htonl(p->max));
+		if (x->id.spi)
+			resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
+	}
+	spin_unlock_bh(&x->lock);
+
+	if (IS_ERR(resp_skb)) {
+		err = PTR_ERR(resp_skb);
+		goto out;
+	}
+
+	err = netlink_unicast(xfrm_nl, resp_skb,
+			      NETLINK_CB(skb).pid, MSG_DONTWAIT);
+
+out:
+	xfrm_state_put(x);
+out_noput:
+	return err;
+}
+
+static int verify_policy_dir(__u8 dir)
+{
+	switch (dir) {
+	case XFRM_POLICY_IN:
+	case XFRM_POLICY_OUT:
+	case XFRM_POLICY_FWD:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	return 0;
+}
+
+static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
+{
+	switch (p->share) {
+	case XFRM_SHARE_ANY:
+	case XFRM_SHARE_SESSION:
+	case XFRM_SHARE_USER:
+	case XFRM_SHARE_UNIQUE:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	switch (p->action) {
+	case XFRM_POLICY_ALLOW:
+	case XFRM_POLICY_BLOCK:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	switch (p->sel.family) {
+	case AF_INET:
+		break;
+
+	case AF_INET6:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		break;
+#else
+		return  -EAFNOSUPPORT;
+#endif
+
+	default:
+		return -EINVAL;
+	};
+
+	return verify_policy_dir(p->dir);
+}
+
+static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
+			   int nr)
+{
+	int i;
+
+	xp->xfrm_nr = nr;
+	for (i = 0; i < nr; i++, ut++) {
+		struct xfrm_tmpl *t = &xp->xfrm_vec[i];
+
+		memcpy(&t->id, &ut->id, sizeof(struct xfrm_id));
+		memcpy(&t->saddr, &ut->saddr,
+		       sizeof(xfrm_address_t));
+		t->reqid = ut->reqid;
+		t->mode = ut->mode;
+		t->share = ut->share;
+		t->optional = ut->optional;
+		t->aalgos = ut->aalgos;
+		t->ealgos = ut->ealgos;
+		t->calgos = ut->calgos;
+	}
+}
+
+static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_TMPL-1];
+	struct xfrm_user_tmpl *utmpl;
+	int nr;
+
+	if (!rt) {
+		pol->xfrm_nr = 0;
+	} else {
+		nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
+
+		if (nr > XFRM_MAX_DEPTH)
+			return -EINVAL;
+
+		copy_templates(pol, RTA_DATA(rt), nr);
+	}
+	return 0;
+}
+
+static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p)
+{
+	xp->priority = p->priority;
+	xp->index = p->index;
+	memcpy(&xp->selector, &p->sel, sizeof(xp->selector));
+	memcpy(&xp->lft, &p->lft, sizeof(xp->lft));
+	xp->action = p->action;
+	xp->flags = p->flags;
+	xp->family = p->sel.family;
+	/* XXX xp->share = p->share; */
+}
+
+static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir)
+{
+	memcpy(&p->sel, &xp->selector, sizeof(p->sel));
+	memcpy(&p->lft, &xp->lft, sizeof(p->lft));
+	memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft));
+	p->priority = xp->priority;
+	p->index = xp->index;
+	p->sel.family = xp->family;
+	p->dir = dir;
+	p->action = xp->action;
+	p->flags = xp->flags;
+	p->share = XFRM_SHARE_ANY; /* XXX xp->share */
+}
+
+static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp)
+{
+	struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL);
+	int err;
+
+	if (!xp) {
+		*errp = -ENOMEM;
+		return NULL;
+	}
+
+	copy_from_user_policy(xp, p);
+	err = copy_from_user_tmpl(xp, xfrma);
+	if (err) {
+		*errp = err;
+		kfree(xp);
+		xp = NULL;
+	}
+
+	return xp;
+}
+
+static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
+	struct xfrm_policy *xp;
+	int err;
+	int excl;
+
+	err = verify_newpolicy_info(p);
+	if (err)
+		return err;
+
+	xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err);
+	if (!xp)
+		return err;
+
+	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
+	err = xfrm_policy_insert(p->dir, xp, excl);
+	if (err) {
+		kfree(xp);
+		return err;
+	}
+
+	xfrm_pol_put(xp);
+
+	return 0;
+}
+
+static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	struct xfrm_user_tmpl vec[XFRM_MAX_DEPTH];
+	int i;
+
+	if (xp->xfrm_nr == 0)
+		return 0;
+
+	for (i = 0; i < xp->xfrm_nr; i++) {
+		struct xfrm_user_tmpl *up = &vec[i];
+		struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
+
+		memcpy(&up->id, &kp->id, sizeof(up->id));
+		up->family = xp->family;
+		memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr));
+		up->reqid = kp->reqid;
+		up->mode = kp->mode;
+		up->share = kp->share;
+		up->optional = kp->optional;
+		up->aalgos = kp->aalgos;
+		up->ealgos = kp->ealgos;
+		up->calgos = kp->calgos;
+	}
+	RTA_PUT(skb, XFRMA_TMPL,
+		(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr),
+		vec);
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
+{
+	struct xfrm_dump_info *sp = ptr;
+	struct xfrm_userpolicy_info *p;
+	struct sk_buff *in_skb = sp->in_skb;
+	struct sk_buff *skb = sp->out_skb;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	if (sp->this_idx < sp->start_idx)
+		goto out;
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
+			sp->nlmsg_seq,
+			XFRM_MSG_NEWPOLICY, sizeof(*p));
+	p = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = sp->nlmsg_flags;
+
+	copy_to_user_policy(xp, p, dir);
+	if (copy_to_user_tmpl(xp, skb) < 0)
+		goto nlmsg_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+out:
+	sp->this_idx++;
+	return 0;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct xfrm_dump_info info;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.nlmsg_flags = NLM_F_MULTI;
+	info.this_idx = 0;
+	info.start_idx = cb->args[0];
+	(void) xfrm_policy_walk(dump_one_policy, &info);
+	cb->args[0] = info.this_idx;
+
+	return skb->len;
+}
+
+static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
+					  struct xfrm_policy *xp,
+					  int dir, u32 seq)
+{
+	struct xfrm_dump_info info;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = seq;
+	info.nlmsg_flags = 0;
+	info.this_idx = info.start_idx = 0;
+
+	if (dump_one_policy(xp, dir, 0, &info) < 0) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skb;
+}
+
+static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_policy *xp;
+	struct xfrm_userpolicy_id *p;
+	int err;
+	int delete;
+
+	p = NLMSG_DATA(nlh);
+	delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
+
+	err = verify_policy_dir(p->dir);
+	if (err)
+		return err;
+
+	if (p->index)
+		xp = xfrm_policy_byid(p->dir, p->index, delete);
+	else
+		xp = xfrm_policy_bysel(p->dir, &p->sel, delete);
+	if (xp == NULL)
+		return -ENOENT;
+
+	if (!delete) {
+		struct sk_buff *resp_skb;
+
+		resp_skb = xfrm_policy_netlink(skb, xp, p->dir, nlh->nlmsg_seq);
+		if (IS_ERR(resp_skb)) {
+			err = PTR_ERR(resp_skb);
+		} else {
+			err = netlink_unicast(xfrm_nl, resp_skb,
+					      NETLINK_CB(skb).pid,
+					      MSG_DONTWAIT);
+		}
+	}
+
+	xfrm_pol_put(xp);
+
+	return err;
+}
+
+static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
+
+	xfrm_state_flush(p->proto);
+	return 0;
+}
+
+static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	xfrm_policy_flush();
+	return 0;
+}
+
+static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = {
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* NEW SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* DEL SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* GET SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* DEL POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* GET POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)),	/* ALLOC SPI */
+	NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)),	/* ACQUIRE */
+	NLMSG_LENGTH(sizeof(struct xfrm_user_expire)),	/* EXPIRE */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* UPD POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* UPD SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_user_polexpire)), /* POLEXPIRE */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)),	/* FLUSH SA */
+	NLMSG_LENGTH(0),				/* FLUSH POLICY */
+};
+
+static struct xfrm_link {
+	int (*doit)(struct sk_buff *, struct nlmsghdr *, void **);
+	int (*dump)(struct sk_buff *, struct netlink_callback *);
+} xfrm_dispatch[] = {
+	{	.doit	=	xfrm_add_sa, 		},
+	{	.doit	=	xfrm_del_sa, 		},
+	{
+		.doit	=	xfrm_get_sa,
+		.dump	=	xfrm_dump_sa,
+	},
+	{	.doit	=	xfrm_add_policy 	},
+	{	.doit	=	xfrm_get_policy 	},
+	{
+		.doit	=	xfrm_get_policy,
+		.dump	=	xfrm_dump_policy,
+	},
+	{	.doit	=	xfrm_alloc_userspi	},
+	{},
+	{},
+	{	.doit	=	xfrm_add_policy 	},
+	{	.doit	=	xfrm_add_sa, 		},
+	{},
+	{	.doit	=	xfrm_flush_sa		},
+	{	.doit	=	xfrm_flush_policy	},
+};
+
+static int xfrm_done(struct netlink_callback *cb)
+{
+	return 0;
+}
+
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+{
+	struct rtattr *xfrma[XFRMA_MAX];
+	struct xfrm_link *link;
+	int type, min_len;
+
+	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
+		return 0;
+
+	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
+	if (type < XFRM_MSG_BASE)
+		return 0;
+
+	/* Unknown message: reply with EINVAL */
+	if (type > XFRM_MSG_MAX)
+		goto err_einval;
+
+	type -= XFRM_MSG_BASE;
+	link = &xfrm_dispatch[type];
+
+	/* All operations require privileges, even GET */
+	if (security_netlink_recv(skb)) {
+		*errp = -EPERM;
+		return -1;
+	}
+
+	if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) {
+		u32 rlen;
+
+		if (link->dump == NULL)
+			goto err_einval;
+
+		if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
+						link->dump,
+						xfrm_done)) != 0) {
+			return -1;
+		}
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return -1;
+	}
+
+	memset(xfrma, 0, sizeof(xfrma));
+
+	if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
+		goto err_einval;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned short flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > XFRMA_MAX)
+					goto err_einval;
+				xfrma[flavor - 1] = attr;
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	if (link->doit == NULL)
+		goto err_einval;
+	*errp = link->doit(skb, nlh, (void **) &xfrma);
+
+	return *errp;
+
+err_einval:
+	*errp = -EINVAL;
+	return -1;
+}
+
+static int xfrm_user_rcv_skb(struct sk_buff *skb)
+{
+	int err;
+	struct nlmsghdr *nlh;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		u32 rlen;
+
+		nlh = (struct nlmsghdr *) skb->data;
+		if (nlh->nlmsg_len < sizeof(*nlh) ||
+		    skb->len < nlh->nlmsg_len)
+			return 0;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		if (xfrm_user_rcv_msg(skb, nlh, &err) < 0) {
+			if (err == 0)
+				return -1;
+			netlink_ack(skb, nlh, err);
+		} else if (nlh->nlmsg_flags & NLM_F_ACK)
+			netlink_ack(skb, nlh, 0);
+		skb_pull(skb, rlen);
+	}
+
+	return 0;
+}
+
+static void xfrm_netlink_rcv(struct sock *sk, int len)
+{
+	do {
+		struct sk_buff *skb;
+
+		down(&xfrm_cfg_sem);
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+			if (xfrm_user_rcv_skb(skb)) {
+				if (skb->len)
+					skb_queue_head(&sk->sk_receive_queue,
+						       skb);
+				else
+					kfree_skb(skb);
+				break;
+			}
+			kfree_skb(skb);
+		}
+
+		up(&xfrm_cfg_sem);
+
+	} while (xfrm_nl && xfrm_nl->sk_receive_queue.qlen);
+}
+
+static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard)
+{
+	struct xfrm_user_expire *ue;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_EXPIRE,
+			sizeof(*ue));
+	ue = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	copy_to_user_state(x, &ue->state);
+	ue->hard = (hard != 0) ? 1 : 0;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_state_notify(struct xfrm_state *x, int hard)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_expire(skb, x, hard) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
+}
+
+static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
+			 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
+			 int dir)
+{
+	struct xfrm_user_acquire *ua;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+	__u32 seq = xfrm_get_acqseq();
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
+			sizeof(*ua));
+	ua = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	memcpy(&ua->id, &x->id, sizeof(ua->id));
+	memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));
+	memcpy(&ua->sel, &x->sel, sizeof(ua->sel));
+	copy_to_user_policy(xp, &ua->policy, dir);
+	ua->aalgos = xt->aalgos;
+	ua->ealgos = xt->ealgos;
+	ua->calgos = xt->calgos;
+	ua->seq = x->km.seq = seq;
+
+	if (copy_to_user_tmpl(xp, skb) < 0)
+		goto nlmsg_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
+			     struct xfrm_policy *xp, int dir)
+{
+	struct sk_buff *skb;
+	size_t len;
+
+	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
+	len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_acquire(skb, x, xt, xp, dir) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC);
+}
+
+/* User gives us xfrm_user_policy_info followed by an array of 0
+ * or more templates.
+ */
+static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
+					       u8 *data, int len, int *dir)
+{
+	struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
+	struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1);
+	struct xfrm_policy *xp;
+	int nr;
+
+	switch (family) {
+	case AF_INET:
+		if (opt != IP_XFRM_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		if (opt != IPV6_XFRM_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#endif
+	default:
+		*dir = -EINVAL;
+		return NULL;
+	}
+
+	*dir = -EINVAL;
+
+	if (len < sizeof(*p) ||
+	    verify_newpolicy_info(p))
+		return NULL;
+
+	nr = ((len - sizeof(*p)) / sizeof(*ut));
+	if (nr > XFRM_MAX_DEPTH)
+		return NULL;
+
+	xp = xfrm_policy_alloc(GFP_KERNEL);
+	if (xp == NULL) {
+		*dir = -ENOBUFS;
+		return NULL;
+	}
+
+	copy_from_user_policy(xp, p);
+	copy_templates(xp, ut, nr);
+
+	*dir = p->dir;
+
+	return xp;
+}
+
+static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
+			   int dir, int hard)
+{
+	struct xfrm_user_polexpire *upe;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe));
+	upe = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	copy_to_user_policy(xp, &upe->pol, dir);
+	if (copy_to_user_tmpl(xp, skb) < 0)
+		goto nlmsg_failure;
+	upe->hard = !!hard;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
+{
+	struct sk_buff *skb;
+	size_t len;
+
+	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
+	len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_polexpire(skb, xp, dir, hard) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
+}
+
+static struct xfrm_mgr netlink_mgr = {
+	.id		= "netlink",
+	.notify		= xfrm_send_state_notify,
+	.acquire	= xfrm_send_acquire,
+	.compile_policy	= xfrm_compile_policy,
+	.notify_policy	= xfrm_send_policy_notify,
+};
+
+static int __init xfrm_user_init(void)
+{
+	printk(KERN_INFO "Initializing IPsec netlink socket\n");
+
+	xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv);
+	if (xfrm_nl == NULL)
+		return -ENOMEM;
+
+	xfrm_register_km(&netlink_mgr);
+
+	return 0;
+}
+
+static void __exit xfrm_user_exit(void)
+{
+	xfrm_unregister_km(&netlink_mgr);
+	sock_release(xfrm_nl->sk_socket);
+}
+
+module_init(xfrm_user_init);
+module_exit(xfrm_user_exit);
+MODULE_LICENSE("GPL");