From abb81c4f3cb9b8d421f1e5474811ef1d461d341c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 9 Sep 2008 19:58:29 -0700 Subject: ipsec: Use RCU-like construct for saved state within a walk Now that we save states within a walk we need synchronisation so that the list the saved state is on doesn't disappear from under us. As it stands this is done by keeping the state on the list which is bad because it gets in the way of the management of the state life-cycle. An alternative is to make our own pseudo-RCU system where we use counters to indicate which state can't be freed immediately as it may be referenced by an ongoing walk when that resumes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 52 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 13 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0a8f09c3144..aaafcee02fc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -59,6 +59,11 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; +/* Counter indicating ongoing walk, protected by xfrm_state_lock. */ +static unsigned long xfrm_state_walk_ongoing; +/* Counter indicating walk completion, protected by xfrm_cfg_mutex. */ +static unsigned long xfrm_state_walk_completed; + static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); @@ -191,7 +196,8 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static HLIST_HEAD(xfrm_state_gc_list); +static LIST_HEAD(xfrm_state_gc_leftovers); +static LIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -403,17 +409,22 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(struct work_struct *data) { - struct xfrm_state *x; - struct hlist_node *entry, *tmp; - struct hlist_head gc_list; + struct xfrm_state *x, *tmp; + unsigned long completed; + mutex_lock(&xfrm_cfg_mutex); spin_lock_bh(&xfrm_state_gc_lock); - gc_list.first = xfrm_state_gc_list.first; - INIT_HLIST_HEAD(&xfrm_state_gc_list); + list_splice_tail_init(&xfrm_state_gc_list, &xfrm_state_gc_leftovers); spin_unlock_bh(&xfrm_state_gc_lock); - hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) + completed = xfrm_state_walk_completed; + mutex_unlock(&xfrm_cfg_mutex); + + list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) { + if ((long)(x->lastused - completed) > 0) + break; xfrm_state_gc_destroy(x); + } wake_up(&km_waitq); } @@ -540,12 +551,8 @@ void __xfrm_state_destroy(struct xfrm_state *x) { WARN_ON(x->km.state != XFRM_STATE_DEAD); - spin_lock_bh(&xfrm_state_lock); - list_del(&x->all); - spin_unlock_bh(&xfrm_state_lock); - spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->bydst, &xfrm_state_gc_list); + list_add_tail(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -558,6 +565,8 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); + x->lastused = xfrm_state_walk_ongoing; + list_del_rcu(&x->all); hlist_del(&x->bydst); hlist_del(&x->bysrc); if (x->id.spi) @@ -1574,6 +1583,7 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, if (err) { xfrm_state_hold(last); walk->state = last; + xfrm_state_walk_ongoing++; goto out; } } @@ -1588,12 +1598,28 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, err = func(last, 0, data); out: spin_unlock_bh(&xfrm_state_lock); - if (old != NULL) + if (old != NULL) { xfrm_state_put(old); + xfrm_state_walk_completed++; + if (!list_empty(&xfrm_state_gc_leftovers)) + schedule_work(&xfrm_state_gc_work); + } return err; } EXPORT_SYMBOL(xfrm_state_walk); +void xfrm_state_walk_done(struct xfrm_state_walk *walk) +{ + if (walk->state != NULL) { + xfrm_state_put(walk->state); + walk->state = NULL; + xfrm_state_walk_completed++; + if (!list_empty(&xfrm_state_gc_leftovers)) + schedule_work(&xfrm_state_gc_work); + } +} +EXPORT_SYMBOL(xfrm_state_walk_done); + void xfrm_replay_notify(struct xfrm_state *x, int event) { -- cgit v1.2.3-70-g09d2 From 08569908fffec3625e29eec7cf7577eaa512e719 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Sep 2008 22:13:28 -0700 Subject: ipsec: Add missing list_del() in xfrm_state_gc_task(). Otherwise entries stay on the GC todo list forever, even after we free them. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index aaafcee02fc..abbe2702c40 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -423,6 +423,7 @@ static void xfrm_state_gc_task(struct work_struct *data) list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) { if ((long)(x->lastused - completed) > 0) break; + list_del(&x->gclist); xfrm_state_gc_destroy(x); } -- cgit v1.2.3-70-g09d2 From 5c1824587f0797373c95719a196f6098f7c6d20c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 22 Sep 2008 19:48:19 -0700 Subject: ipsec: Fix xfrm_state_walk race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As discovered by Timo Teräs, the currently xfrm_state_walk scheme is racy because if a second dump finishes before the first, we may free xfrm states that the first dump would walk over later. This patch fixes this by storing the dumps in a list in order to calculate the correct completion counter which cures this problem. I've expanded netlink_cb in order to accomodate the extra state related to this. It shouldn't be a big deal since netlink_cb is kmalloced for each dump and we're just increasing it by 4 or 8 bytes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- include/net/xfrm.h | 10 +++------- net/xfrm/xfrm_state.c | 39 ++++++++++++++++++++++++++++++--------- 3 files changed, 34 insertions(+), 17 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9ff1b54908f..cbba7760545 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -220,7 +220,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[6]; + long args[7]; }; struct netlink_notify diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4bb94992b5f..48630b26659 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1246,6 +1246,8 @@ struct xfrm6_tunnel { }; struct xfrm_state_walk { + struct list_head list; + unsigned long genid; struct xfrm_state *state; int count; u8 proto; @@ -1281,13 +1283,7 @@ static inline void xfrm6_fini(void) extern int xfrm_proc_init(void); #endif -static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) -{ - walk->proto = proto; - walk->state = NULL; - walk->count = 0; -} - +extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); extern int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); extern void xfrm_state_walk_done(struct xfrm_state_walk *walk); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index abbe2702c40..053970e8765 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -64,6 +64,9 @@ static unsigned long xfrm_state_walk_ongoing; /* Counter indicating walk completion, protected by xfrm_cfg_mutex. */ static unsigned long xfrm_state_walk_completed; +/* List of outstanding state walks used to set the completed counter. */ +static LIST_HEAD(xfrm_state_walks); + static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); @@ -1584,7 +1587,6 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, if (err) { xfrm_state_hold(last); walk->state = last; - xfrm_state_walk_ongoing++; goto out; } } @@ -1599,25 +1601,44 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, err = func(last, 0, data); out: spin_unlock_bh(&xfrm_state_lock); - if (old != NULL) { + if (old != NULL) xfrm_state_put(old); - xfrm_state_walk_completed++; - if (!list_empty(&xfrm_state_gc_leftovers)) - schedule_work(&xfrm_state_gc_work); - } return err; } EXPORT_SYMBOL(xfrm_state_walk); +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) +{ + walk->proto = proto; + walk->state = NULL; + walk->count = 0; + list_add_tail(&walk->list, &xfrm_state_walks); + walk->genid = ++xfrm_state_walk_ongoing; +} +EXPORT_SYMBOL(xfrm_state_walk_init); + void xfrm_state_walk_done(struct xfrm_state_walk *walk) { + struct list_head *prev; + if (walk->state != NULL) { xfrm_state_put(walk->state); walk->state = NULL; - xfrm_state_walk_completed++; - if (!list_empty(&xfrm_state_gc_leftovers)) - schedule_work(&xfrm_state_gc_work); } + + prev = walk->list.prev; + list_del(&walk->list); + + if (prev != &xfrm_state_walks) { + list_entry(prev, struct xfrm_state_walk, list)->genid = + walk->genid; + return; + } + + xfrm_state_walk_completed = walk->genid; + + if (!list_empty(&xfrm_state_gc_leftovers)) + schedule_work(&xfrm_state_gc_work); } EXPORT_SYMBOL(xfrm_state_walk_done); -- cgit v1.2.3-70-g09d2 From 12a169e7d8f4b1c95252d8b04ed0f1033ed7cfe2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 1 Oct 2008 07:03:24 -0700 Subject: ipsec: Put dumpers on the dump list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Herbert Xu came up with the idea and the original patch to make xfrm_state dump list contain also dumpers: As it is we go to extraordinary lengths to ensure that states don't go away while dumpers go to sleep. It's much easier if we just put the dumpers themselves on the list since they can't go away while they're going. I've also changed the order of addition on new states to prevent a never-ending dump. Timo Teräs improved the patch to apply cleanly to latest tree, modified iteration code to be more readable by using a common struct for entries in the list, implemented the same idea for xfrm_policy dumping and moved the af_key specific "last" entry caching to af_key. Signed-off-by: Herbert Xu Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- include/net/xfrm.h | 70 ++++++++++++------------------ net/key/af_key.c | 38 ++++++++++++++--- net/xfrm/xfrm_policy.c | 111 +++++++++++++++++++++++++----------------------- net/xfrm/xfrm_state.c | 109 ++++++++++++++++------------------------------- net/xfrm/xfrm_user.c | 4 +- 6 files changed, 159 insertions(+), 175 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index cbba7760545..9ff1b54908f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -220,7 +220,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[7]; + long args[6]; }; struct netlink_notify diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 48630b26659..b98d2056f27 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -117,12 +117,21 @@ extern struct mutex xfrm_cfg_mutex; metrics. Plus, it will be made via sk->sk_dst_cache. Solved. */ +struct xfrm_state_walk { + struct list_head all; + u8 state; + union { + u8 dying; + u8 proto; + }; + u32 seq; +}; + /* Full description of state of transformer. */ struct xfrm_state { - struct list_head all; union { - struct list_head gclist; + struct hlist_node gclist; struct hlist_node bydst; }; struct hlist_node bysrc; @@ -136,12 +145,8 @@ struct xfrm_state u32 genid; - /* Key manger bits */ - struct { - u8 state; - u8 dying; - u32 seq; - } km; + /* Key manager bits */ + struct xfrm_state_walk km; /* Parameters of this state. */ struct { @@ -449,10 +454,20 @@ struct xfrm_tmpl #define XFRM_MAX_DEPTH 6 +struct xfrm_policy_walk_entry { + struct list_head all; + u8 dead; +}; + +struct xfrm_policy_walk { + struct xfrm_policy_walk_entry walk; + u8 type; + u32 seq; +}; + struct xfrm_policy { struct xfrm_policy *next; - struct list_head bytype; struct hlist_node bydst; struct hlist_node byidx; @@ -467,13 +482,12 @@ struct xfrm_policy struct xfrm_lifetime_cfg lft; struct xfrm_lifetime_cur curlft; struct dst_entry *bundles; - u16 family; + struct xfrm_policy_walk_entry walk; u8 type; u8 action; u8 flags; - u8 dead; u8 xfrm_nr; - /* XXX 1 byte hole, try to pack */ + u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; @@ -1245,20 +1259,6 @@ struct xfrm6_tunnel { int priority; }; -struct xfrm_state_walk { - struct list_head list; - unsigned long genid; - struct xfrm_state *state; - int count; - u8 proto; -}; - -struct xfrm_policy_walk { - struct xfrm_policy *policy; - int count; - u8 type, cur_type; -}; - extern void xfrm_init(void); extern void xfrm4_init(void); extern void xfrm_state_init(void); @@ -1410,24 +1410,10 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); -static inline void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) -{ - walk->cur_type = XFRM_POLICY_TYPE_MAIN; - walk->type = type; - walk->policy = NULL; - walk->count = 0; -} - -static inline void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) -{ - if (walk->policy != NULL) { - xfrm_pol_put(walk->policy); - walk->policy = NULL; - } -} - +extern void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); +extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_selector *sel, diff --git a/net/key/af_key.c b/net/key/af_key.c index b7f5a1c353e..7ae641df70b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -58,6 +58,7 @@ struct pfkey_sock { struct xfrm_policy_walk policy; struct xfrm_state_walk state; } u; + struct sk_buff *skb; } dump; }; @@ -76,6 +77,10 @@ static int pfkey_can_dump(struct sock *sk) static void pfkey_terminate_dump(struct pfkey_sock *pfk) { if (pfk->dump.dump) { + if (pfk->dump.skb) { + kfree_skb(pfk->dump.skb); + pfk->dump.skb = NULL; + } pfk->dump.done(pfk); pfk->dump.dump = NULL; pfk->dump.done = NULL; @@ -308,12 +313,25 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, static int pfkey_do_dump(struct pfkey_sock *pfk) { + struct sadb_msg *hdr; int rc; rc = pfk->dump.dump(pfk); if (rc == -ENOBUFS) return 0; + if (pfk->dump.skb) { + if (!pfkey_can_dump(&pfk->sk)) + return 0; + + hdr = (struct sadb_msg *) pfk->dump.skb->data; + hdr->sadb_msg_seq = 0; + hdr->sadb_msg_errno = rc; + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = NULL; + } + pfkey_terminate_dump(pfk); return rc; } @@ -1744,9 +1762,14 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; - out_hdr->sadb_msg_seq = count; + out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk); + + if (pfk->dump.skb) + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = out_skb; + return 0; } @@ -2245,7 +2268,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return 0; out: - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return err; } @@ -2583,9 +2606,14 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_type = SADB_X_SPDDUMP; out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; - out_hdr->sadb_msg_seq = count; + out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk); + + if (pfk->dump.skb) + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = out_skb; + return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ef9ccbc3875..b7ec08025ff 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -46,7 +46,7 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -static struct list_head xfrm_policy_bytype[XFRM_POLICY_TYPE_MAX]; +static struct list_head xfrm_policy_all; unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_count); @@ -164,7 +164,7 @@ static void xfrm_policy_timer(unsigned long data) read_lock(&xp->lock); - if (xp->dead) + if (xp->walk.dead) goto out; dir = xfrm_policy_id2dir(xp->index); @@ -236,7 +236,7 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - INIT_LIST_HEAD(&policy->bytype); + INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -252,17 +252,13 @@ EXPORT_SYMBOL(xfrm_policy_alloc); void xfrm_policy_destroy(struct xfrm_policy *policy) { - BUG_ON(!policy->dead); + BUG_ON(!policy->walk.dead); BUG_ON(policy->bundles); if (del_timer(&policy->timer)) BUG(); - write_lock_bh(&xfrm_policy_lock); - list_del(&policy->bytype); - write_unlock_bh(&xfrm_policy_lock); - security_xfrm_policy_free(policy->security); kfree(policy); } @@ -310,8 +306,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) int dead; write_lock_bh(&policy->lock); - dead = policy->dead; - policy->dead = 1; + dead = policy->walk.dead; + policy->walk.dead = 1; write_unlock_bh(&policy->lock); if (unlikely(dead)) { @@ -609,6 +605,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) { hlist_del(&delpol->bydst); hlist_del(&delpol->byidx); + list_del(&delpol->walk.all); xfrm_policy_count[dir]--; } policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); @@ -617,7 +614,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add_tail(&policy->bytype, &xfrm_policy_bytype[policy->type]); + list_add(&policy->walk.all, &xfrm_policy_all); write_unlock_bh(&xfrm_policy_lock); if (delpol) @@ -684,6 +681,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, } hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; } ret = pol; @@ -727,6 +725,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, } hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; } ret = pol; @@ -840,6 +839,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) continue; hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); write_unlock_bh(&xfrm_policy_lock); xfrm_audit_policy_delete(pol, 1, @@ -867,60 +867,68 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *old, *pol, *last = NULL; + struct xfrm_policy *pol; + struct xfrm_policy_walk_entry *x; int error = 0; if (walk->type >= XFRM_POLICY_TYPE_MAX && walk->type != XFRM_POLICY_TYPE_ANY) return -EINVAL; - if (walk->policy == NULL && walk->count != 0) + if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - old = pol = walk->policy; - walk->policy = NULL; - read_lock_bh(&xfrm_policy_lock); - - for (; walk->cur_type < XFRM_POLICY_TYPE_MAX; walk->cur_type++) { - if (walk->type != walk->cur_type && - walk->type != XFRM_POLICY_TYPE_ANY) + write_lock_bh(&xfrm_policy_lock); + if (list_empty(&walk->walk.all)) + x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all); + else + x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); + list_for_each_entry_from(x, &xfrm_policy_all, all) { + if (x->dead) continue; - - if (pol == NULL) { - pol = list_first_entry(&xfrm_policy_bytype[walk->cur_type], - struct xfrm_policy, bytype); - } - list_for_each_entry_from(pol, &xfrm_policy_bytype[walk->cur_type], bytype) { - if (pol->dead) - continue; - if (last) { - error = func(last, xfrm_policy_id2dir(last->index), - walk->count, data); - if (error) { - xfrm_pol_hold(last); - walk->policy = last; - goto out; - } - } - last = pol; - walk->count++; + pol = container_of(x, struct xfrm_policy, walk); + if (walk->type != XFRM_POLICY_TYPE_ANY && + walk->type != pol->type) + continue; + error = func(pol, xfrm_policy_id2dir(pol->index), + walk->seq, data); + if (error) { + list_move_tail(&walk->walk.all, &x->all); + goto out; } - pol = NULL; + walk->seq++; } - if (walk->count == 0) { + if (walk->seq == 0) { error = -ENOENT; goto out; } - if (last) - error = func(last, xfrm_policy_id2dir(last->index), 0, data); + list_del_init(&walk->walk.all); out: - read_unlock_bh(&xfrm_policy_lock); - if (old != NULL) - xfrm_pol_put(old); + write_unlock_bh(&xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); +void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) +{ + INIT_LIST_HEAD(&walk->walk.all); + walk->walk.dead = 1; + walk->type = type; + walk->seq = 0; +} +EXPORT_SYMBOL(xfrm_policy_walk_init); + +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +{ + if (list_empty(&walk->walk.all)) + return; + + write_lock_bh(&xfrm_policy_lock); + list_del(&walk->walk.all); + write_unlock_bh(&xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_policy_walk_done); + /* * Find policy to apply to this flow. * @@ -1077,7 +1085,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) struct hlist_head *chain = policy_hash_bysel(&pol->selector, pol->family, dir); - list_add_tail(&pol->bytype, &xfrm_policy_bytype[pol->type]); + list_add(&pol->walk.all, &xfrm_policy_all); hlist_add_head(&pol->bydst, chain); hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); xfrm_policy_count[dir]++; @@ -1095,6 +1103,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; return pol; @@ -1720,7 +1729,7 @@ restart: for (pi = 0; pi < npols; pi++) { read_lock_bh(&pols[pi]->lock); - pol_dead |= pols[pi]->dead; + pol_dead |= pols[pi]->walk.dead; read_unlock_bh(&pols[pi]->lock); } @@ -2415,9 +2424,7 @@ static void __init xfrm_policy_init(void) panic("XFRM: failed to allocate bydst hash\n"); } - for (dir = 0; dir < XFRM_POLICY_TYPE_MAX; dir++) - INIT_LIST_HEAD(&xfrm_policy_bytype[dir]); - + INIT_LIST_HEAD(&xfrm_policy_all); INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); register_netdevice_notifier(&xfrm_dev_notifier); } @@ -2601,7 +2608,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, int i, j, n = 0; write_lock_bh(&pol->lock); - if (unlikely(pol->dead)) { + if (unlikely(pol->walk.dead)) { /* target policy has been deleted */ write_unlock_bh(&pol->lock); return -ENOENT; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 053970e8765..747fd8c291a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -59,14 +59,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; -/* Counter indicating ongoing walk, protected by xfrm_state_lock. */ -static unsigned long xfrm_state_walk_ongoing; -/* Counter indicating walk completion, protected by xfrm_cfg_mutex. */ -static unsigned long xfrm_state_walk_completed; - -/* List of outstanding state walks used to set the completed counter. */ -static LIST_HEAD(xfrm_state_walks); - static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); @@ -199,8 +191,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static LIST_HEAD(xfrm_state_gc_leftovers); -static LIST_HEAD(xfrm_state_gc_list); +static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -412,23 +403,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(struct work_struct *data) { - struct xfrm_state *x, *tmp; - unsigned long completed; + struct xfrm_state *x; + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; - mutex_lock(&xfrm_cfg_mutex); spin_lock_bh(&xfrm_state_gc_lock); - list_splice_tail_init(&xfrm_state_gc_list, &xfrm_state_gc_leftovers); + hlist_move_list(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - completed = xfrm_state_walk_completed; - mutex_unlock(&xfrm_cfg_mutex); - - list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) { - if ((long)(x->lastused - completed) > 0) - break; - list_del(&x->gclist); + hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); - } wake_up(&km_waitq); } @@ -529,7 +513,7 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->all); + INIT_LIST_HEAD(&x->km.all); INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); @@ -556,7 +540,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - list_add_tail(&x->gclist, &xfrm_state_gc_list); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -569,8 +553,7 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); - x->lastused = xfrm_state_walk_ongoing; - list_del_rcu(&x->all); + list_del(&x->km.all); hlist_del(&x->bydst); hlist_del(&x->bysrc); if (x->id.spi) @@ -871,7 +854,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -940,7 +923,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); @@ -1069,7 +1052,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -1566,79 +1549,59 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *data) { - struct xfrm_state *old, *x, *last = NULL; + struct xfrm_state *state; + struct xfrm_state_walk *x; int err = 0; - if (walk->state == NULL && walk->count != 0) + if (walk->seq != 0 && list_empty(&walk->all)) return 0; - old = x = walk->state; - walk->state = NULL; spin_lock_bh(&xfrm_state_lock); - if (x == NULL) - x = list_first_entry(&xfrm_state_all, struct xfrm_state, all); + if (list_empty(&walk->all)) + x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all); + else + x = list_entry(&walk->all, struct xfrm_state_walk, all); list_for_each_entry_from(x, &xfrm_state_all, all) { - if (x->km.state == XFRM_STATE_DEAD) + if (x->state == XFRM_STATE_DEAD) continue; - if (!xfrm_id_proto_match(x->id.proto, walk->proto)) + state = container_of(x, struct xfrm_state, km); + if (!xfrm_id_proto_match(state->id.proto, walk->proto)) continue; - if (last) { - err = func(last, walk->count, data); - if (err) { - xfrm_state_hold(last); - walk->state = last; - goto out; - } + err = func(state, walk->seq, data); + if (err) { + list_move_tail(&walk->all, &x->all); + goto out; } - last = x; - walk->count++; + walk->seq++; } - if (walk->count == 0) { + if (walk->seq == 0) { err = -ENOENT; goto out; } - if (last) - err = func(last, 0, data); + list_del_init(&walk->all); out: spin_unlock_bh(&xfrm_state_lock); - if (old != NULL) - xfrm_state_put(old); return err; } EXPORT_SYMBOL(xfrm_state_walk); void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) { + INIT_LIST_HEAD(&walk->all); walk->proto = proto; - walk->state = NULL; - walk->count = 0; - list_add_tail(&walk->list, &xfrm_state_walks); - walk->genid = ++xfrm_state_walk_ongoing; + walk->state = XFRM_STATE_DEAD; + walk->seq = 0; } EXPORT_SYMBOL(xfrm_state_walk_init); void xfrm_state_walk_done(struct xfrm_state_walk *walk) { - struct list_head *prev; - - if (walk->state != NULL) { - xfrm_state_put(walk->state); - walk->state = NULL; - } - - prev = walk->list.prev; - list_del(&walk->list); - - if (prev != &xfrm_state_walks) { - list_entry(prev, struct xfrm_state_walk, list)->genid = - walk->genid; + if (list_empty(&walk->all)) return; - } - - xfrm_state_walk_completed = walk->genid; - if (!list_empty(&xfrm_state_gc_leftovers)) - schedule_work(&xfrm_state_gc_work); + spin_lock_bh(&xfrm_state_lock); + list_del(&walk->all); + spin_lock_bh(&xfrm_state_lock); } EXPORT_SYMBOL(xfrm_state_walk_done); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 04c41504f84..76f75df21e1 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1102,7 +1102,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, return xp; error: *errp = err; - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } @@ -1595,7 +1595,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOENT; read_lock(&xp->lock); - if (xp->dead) { + if (xp->walk.dead) { read_unlock(&xp->lock); goto out; } -- cgit v1.2.3-70-g09d2 From 13c1d18931ebb5cf407cb348ef2cd6284d68902d Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Sun, 5 Oct 2008 13:33:42 -0700 Subject: xfrm: MIGRATE enhancements (draft-ebalard-mext-pfkey-enhanced-migrate) Provides implementation of the enhancements of XFRM/PF_KEY MIGRATE mechanism specified in draft-ebalard-mext-pfkey-enhanced-migrate-00. Defines associated PF_KEY SADB_X_EXT_KMADDRESS extension and XFRM/netlink XFRMA_KMADDRESS attribute. Signed-off-by: Arnaud Ebalard Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 13 +++++++- include/linux/xfrm.h | 10 ++++++ include/net/xfrm.h | 15 +++++++-- net/key/af_key.c | 86 +++++++++++++++++++++++++++++++++++++++---------- net/xfrm/xfrm_policy.c | 5 +-- net/xfrm/xfrm_state.c | 5 +-- net/xfrm/xfrm_user.c | 57 +++++++++++++++++++++++++------- 7 files changed, 154 insertions(+), 37 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 700725ddcaa..01b262959f2 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -226,6 +226,15 @@ struct sadb_x_sec_ctx { } __attribute__((packed)); /* sizeof(struct sadb_sec_ctx) = 8 */ +/* Used by MIGRATE to pass addresses IKE will use to perform + * negotiation with the peer */ +struct sadb_x_kmaddress { + uint16_t sadb_x_kmaddress_len; + uint16_t sadb_x_kmaddress_exttype; + uint32_t sadb_x_kmaddress_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_x_kmaddress) == 8 */ + /* Message types */ #define SADB_RESERVED 0 #define SADB_GETSPI 1 @@ -346,7 +355,9 @@ struct sadb_x_sec_ctx { #define SADB_X_EXT_NAT_T_DPORT 22 #define SADB_X_EXT_NAT_T_OA 23 #define SADB_X_EXT_SEC_CTX 24 -#define SADB_EXT_MAX 24 +/* Used with MIGRATE to pass @ to IKE for negotiation */ +#define SADB_X_EXT_KMADDRESS 25 +#define SADB_EXT_MAX 25 /* Identity Extension values */ #define SADB_IDENTTYPE_RESERVED 0 diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index fb0c215a305..4bc1e6b86cb 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -279,6 +279,7 @@ enum xfrm_attr_type_t { XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ XFRMA_MIGRATE, XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ + XFRMA_KMADDRESS, /* struct xfrm_user_kmaddress */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -415,6 +416,15 @@ struct xfrm_user_report { struct xfrm_selector sel; }; +/* Used by MIGRATE to pass addresses IKE should use to perform + * SA negotiation with the peer */ +struct xfrm_user_kmaddress { + xfrm_address_t local; + xfrm_address_t remote; + __u32 reserved; + __u16 family; +}; + struct xfrm_user_migrate { xfrm_address_t old_daddr; xfrm_address_t old_saddr; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b98d2056f27..11c890ad8eb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -492,6 +492,13 @@ struct xfrm_policy struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; +struct xfrm_kmaddress { + xfrm_address_t local; + xfrm_address_t remote; + u32 reserved; + u16 family; +}; + struct xfrm_migrate { xfrm_address_t old_daddr; xfrm_address_t old_saddr; @@ -531,7 +538,7 @@ struct xfrm_mgr int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); int (*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); - int (*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles); + int (*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k); }; extern int xfrm_register_km(struct xfrm_mgr *km); @@ -1432,12 +1439,14 @@ extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, #ifdef CONFIG_XFRM_MIGRATE extern int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles); + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k); extern struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m); extern struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m); extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles); + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k); #endif extern wait_queue_head_t km_waitq; diff --git a/net/key/af_key.c b/net/key/af_key.c index 7ae641df70b..362fe317e1f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -398,6 +398,7 @@ static u8 sadb_ext_min_len[] = { [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), + [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ @@ -2384,24 +2385,21 @@ static int pfkey_sockaddr_pair_size(sa_family_t family) return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); } -static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq, +static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family) { - u8 *sa = (u8 *) (rq + 1); int af, socklen; - if (rq->sadb_x_ipsecrequest_len < - pfkey_sockaddr_pair_size(((struct sockaddr *)sa)->sa_family)) + if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; - af = pfkey_sockaddr_extract((struct sockaddr *) sa, - saddr); + af = pfkey_sockaddr_extract(sa, saddr); if (!af) return -EINVAL; socklen = pfkey_sockaddr_len(af); - if (pfkey_sockaddr_extract((struct sockaddr *) (sa + socklen), + if (pfkey_sockaddr_extract((struct sockaddr *) (((u8 *)sa) + socklen), daddr) != af) return -EINVAL; @@ -2421,7 +2419,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, return -EINVAL; /* old endoints */ - err = parse_sockaddr_pair(rq1, &m->old_saddr, &m->old_daddr, + err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), + rq1->sadb_x_ipsecrequest_len, + &m->old_saddr, &m->old_daddr, &m->old_family); if (err) return err; @@ -2434,7 +2434,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, return -EINVAL; /* new endpoints */ - err = parse_sockaddr_pair(rq2, &m->new_saddr, &m->new_daddr, + err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), + rq2->sadb_x_ipsecrequest_len, + &m->new_saddr, &m->new_daddr, &m->new_family); if (err) return err; @@ -2460,29 +2462,40 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, int i, len, ret, err = -EINVAL; u8 dir; struct sadb_address *sa; + struct sadb_x_kmaddress *kma; struct sadb_x_policy *pol; struct sadb_x_ipsecrequest *rq; struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress k; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], - ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || + ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || !ext_hdrs[SADB_X_EXT_POLICY - 1]) { err = -EINVAL; goto out; } + kma = ext_hdrs[SADB_X_EXT_KMADDRESS - 1]; pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; - if (!pol) { - err = -EINVAL; - goto out; - } if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { err = -EINVAL; goto out; } + if (kma) { + /* convert sadb_x_kmaddress to xfrm_kmaddress */ + k.reserved = kma->sadb_x_kmaddress_reserved; + ret = parse_sockaddr_pair((struct sockaddr *)(kma + 1), + 8*(kma->sadb_x_kmaddress_len) - sizeof(*kma), + &k.local, &k.remote, &k.family); + if (ret < 0) { + err = ret; + goto out; + } + } + dir = pol->sadb_x_policy_dir - 1; memset(&sel, 0, sizeof(sel)); @@ -2527,7 +2540,8 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, goto out; } - return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i); + return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, + kma ? &k : NULL); out: return err; @@ -3319,6 +3333,32 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, return 0; } + +static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) +{ + struct sadb_x_kmaddress *kma; + u8 *sa; + int family = k->family; + int socklen = pfkey_sockaddr_len(family); + int size_req; + + size_req = (sizeof(struct sadb_x_kmaddress) + + pfkey_sockaddr_pair_size(family)); + + kma = (struct sadb_x_kmaddress *)skb_put(skb, size_req); + memset(kma, 0, size_req); + kma->sadb_x_kmaddress_len = size_req / 8; + kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS; + kma->sadb_x_kmaddress_reserved = k->reserved; + + sa = (u8 *)(kma + 1); + if (!pfkey_sockaddr_fill(&k->local, 0, (struct sockaddr *)sa, family) || + !pfkey_sockaddr_fill(&k->remote, 0, (struct sockaddr *)(sa+socklen), family)) + return -EINVAL; + + return 0; +} + static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, @@ -3351,7 +3391,8 @@ static int set_ipsecrequest(struct sk_buff *skb, #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles) + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k) { int i; int sasize_sel; @@ -3368,6 +3409,12 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) return -EINVAL; + if (k != NULL) { + /* addresses for KM */ + size += PFKEY_ALIGN8(sizeof(struct sadb_x_kmaddress) + + pfkey_sockaddr_pair_size(k->family)); + } + /* selector */ sasize_sel = pfkey_sockaddr_size(sel->family); if (!sasize_sel) @@ -3404,6 +3451,10 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, hdr->sadb_msg_seq = 0; hdr->sadb_msg_pid = 0; + /* Addresses to be used by KM for negotiation, if ext is available */ + if (k != NULL && (set_sadb_kmaddress(skb, k) < 0)) + return -EINVAL; + /* selector src */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); @@ -3449,7 +3500,8 @@ err: } #else static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles) + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b7ec08025ff..832b47c1de8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2679,7 +2679,8 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) } int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -2723,7 +2724,7 @@ int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 5 - announce */ - km_migrate(sel, dir, type, m, num_migrate); + km_migrate(sel, dir, type, m, num_migrate, k); xfrm_pol_put(pol); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 747fd8c291a..508337f9724 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1814,7 +1814,8 @@ EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int err = -EINVAL; int ret; @@ -1823,7 +1824,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { if (km->migrate) { - ret = km->migrate(sel, dir, type, m, num_migrate); + ret = km->migrate(sel, dir, type, m, num_migrate, k); if (!ret) err = ret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 76f75df21e1..4a8a1abb59e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1710,12 +1710,23 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, #ifdef CONFIG_XFRM_MIGRATE static int copy_from_user_migrate(struct xfrm_migrate *ma, + struct xfrm_kmaddress *k, struct nlattr **attrs, int *num) { struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; int i, num_migrate; + if (k != NULL) { + struct xfrm_user_kmaddress *uk; + + uk = nla_data(attrs[XFRMA_KMADDRESS]); + memcpy(&k->local, &uk->local, sizeof(k->local)); + memcpy(&k->remote, &uk->remote, sizeof(k->remote)); + k->family = uk->family; + k->reserved = uk->reserved; + } + um = nla_data(rt); num_migrate = nla_len(rt) / sizeof(*um); @@ -1745,6 +1756,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, { struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress km, *kmp; u8 type; int err; int n = 0; @@ -1752,19 +1764,20 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; + kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL; + err = copy_from_user_policy_type(&type, attrs); if (err) return err; - err = copy_from_user_migrate((struct xfrm_migrate *)m, - attrs, &n); + err = copy_from_user_migrate((struct xfrm_migrate *)m, kmp, attrs, &n); if (err) return err; if (!n) return 0; - xfrm_migrate(&pi->sel, pi->dir, type, m, n); + xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp); return 0; } @@ -1795,16 +1808,30 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); } -static inline size_t xfrm_migrate_msgsize(int num_migrate) +static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) +{ + struct xfrm_user_kmaddress uk; + + memset(&uk, 0, sizeof(uk)); + uk.family = k->family; + uk.reserved = k->reserved; + memcpy(&uk.local, &k->local, sizeof(uk.local)); + memcpy(&uk.remote, &k->local, sizeof(uk.remote)); + + return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); +} + +static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) - + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) - + userpolicy_type_attrsize(); + + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) + + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + + userpolicy_type_attrsize(); } static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, - int num_migrate, struct xfrm_selector *sel, - u8 dir, u8 type) + int num_migrate, struct xfrm_kmaddress *k, + struct xfrm_selector *sel, u8 dir, u8 type) { struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; @@ -1821,6 +1848,9 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); pol_id->dir = dir; + if (k != NULL && (copy_to_user_kmaddress(k, skb) < 0)) + goto nlmsg_failure; + if (copy_to_user_policy_type(type, skb) < 0) goto nlmsg_failure; @@ -1836,23 +1866,25 @@ nlmsg_failure: } static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { struct sk_buff *skb; - skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate), GFP_ATOMIC); + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; /* build migrate */ - if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) + if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } @@ -1901,6 +1933,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, + [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, }; static struct xfrm_link { -- cgit v1.2.3-70-g09d2