From e0d1caa7b0d5f02e4f34aa09c695d04251310c6c Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:29:07 -0700 Subject: [MLSXFRM]: Flow based matching of xfrm policy and state This implements a seemless mechanism for xfrm policy selection and state matching based on the flow sid. This also includes the necessary SELinux enforcement pieces. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0021aad5db4..be02bd981d1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -367,7 +367,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, */ if (x->km.state == XFRM_STATE_VALID) { if (!xfrm_selector_match(&x->sel, fl, family) || - !xfrm_sec_ctx_match(pol->security, x->security)) + !security_xfrm_state_pol_flow_match(x, pol, fl)) continue; if (!best || best->km.dying > x->km.dying || @@ -379,7 +379,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { if (xfrm_selector_match(&x->sel, fl, family) && - xfrm_sec_ctx_match(pol->security, x->security)) + security_xfrm_state_pol_flow_match(x, pol, fl)) error = -ESRCH; } } @@ -403,6 +403,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, * to current session. */ xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + if (error) { + x->km.state = XFRM_STATE_DEAD; + xfrm_state_put(x); + x = NULL; + goto out; + } + if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; list_add_tail(&x->bydst, xfrm_state_bydst+h); -- cgit v1.2.3-70-g09d2 From cb969f072b6d67770b559617f14e767f47e77ece Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:20 -0700 Subject: [MLSXFRM]: Default labeling of socket specific IPSec policies This defaults the label of socket-specific IPSec policies to be the same as the socket they are set on. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 19 ++++++++++++++++--- include/net/xfrm.h | 2 +- net/key/af_key.c | 15 +++++++++++---- net/xfrm/xfrm_state.c | 2 +- net/xfrm/xfrm_user.c | 13 +++++++++++-- security/dummy.c | 3 ++- security/selinux/include/xfrm.h | 3 ++- security/selinux/xfrm.c | 33 ++++++++++++++++++++++----------- 8 files changed, 66 insertions(+), 24 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/linux/security.h b/include/linux/security.h index f3909d189fe..8e3dc6c51a6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -827,8 +827,10 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). + * @sk refers to the sock from which to derive the security context. * Allocate a security structure to the xp->security field; the security - * field is initialized to NULL when the xfrm_policy is allocated. + * field is initialized to NULL when the xfrm_policy is allocated. Only + * one of sec_ctx or sock can be specified. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. @@ -1359,7 +1361,8 @@ struct security_operations { #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); @@ -3057,7 +3060,12 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_policy_alloc_security(xp, sec_ctx); + return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL); +} + +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return security_ops->xfrm_policy_alloc_security(xp, NULL, sk); } static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) @@ -3132,6 +3140,11 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm return 0; } +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return 0; +} + static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) { return 0; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 3ecd9fa1ed4..00bf86e6e82 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -362,7 +362,7 @@ struct xfrm_mgr char *id; int (*notify)(struct xfrm_state *x, struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir); - struct xfrm_policy *(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir); + struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); }; diff --git a/net/key/af_key.c b/net/key/af_key.c index a065e1a6777..797c744a843 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2843,14 +2843,14 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); } -static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, +static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; @@ -2891,7 +2891,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; - xp->family = family; + xp->family = sk->sk_family; xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && @@ -2907,8 +2907,10 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + - sec_ctx->sadb_x_sec_len) + sec_ctx->sadb_x_sec_len) { + *dir = -EINVAL; goto out; + } if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -2918,6 +2920,11 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, if (*dir) goto out; } + else { + *dir = security_xfrm_sock_policy_alloc(xp, sk); + if (*dir) + goto out; + } *dir = pol->sadb_x_policy_dir-1; return xp; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index be02bd981d1..1c796087ee7 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1026,7 +1026,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen err = -EINVAL; read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { - pol = km->compile_policy(sk->sk_family, optname, data, + pol = km->compile_policy(sk, optname, data, optlen, &err); if (err >= 0) break; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dac8db1088b..f70e158874d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1757,7 +1757,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, /* User gives us xfrm_user_policy_info followed by an array of 0 * or more templates. */ -static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, +static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; @@ -1765,7 +1765,7 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, struct xfrm_policy *xp; int nr; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_XFRM_POLICY) { *dir = -EOPNOTSUPP; @@ -1807,6 +1807,15 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, copy_from_user_policy(xp, p); copy_templates(xp, ut, nr); + if (!xp->security) { + int err = security_xfrm_sock_policy_alloc(xp, sk); + if (err) { + kfree(xp); + *dir = err; + return NULL; + } + } + *dir = p->dir; return xp; diff --git a/security/dummy.c b/security/dummy.c index c0ff6b9bfd7..66cc0640493 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -815,7 +815,8 @@ static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM -static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk) { return 0; } diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 8e45c1d588a..1822c73e508 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -7,7 +7,8 @@ #ifndef _SELINUX_XFRM_H_ #define _SELINUX_XFRM_H_ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); +int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new); void selinux_xfrm_policy_free(struct xfrm_policy *xp); int selinux_xfrm_policy_delete(struct xfrm_policy *xp); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index c750ef7af66..d3690f98513 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -208,10 +208,8 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, BUG_ON(uctx && pol); - if (pol) - goto from_policy; - - BUG_ON(!uctx); + if (!uctx) + goto not_from_user; if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX) return -EINVAL; @@ -251,11 +249,14 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, return rc; -from_policy: - BUG_ON(!pol); - rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); - if (rc) - goto out; +not_from_user: + if (pol) { + rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); + if (rc) + goto out; + } + else + ctx_sid = sid; rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len); if (rc) @@ -293,13 +294,23 @@ out2: * LSM hook implementation that allocs and transfers uctx spec to * xfrm_policy. */ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *uctx) +int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *uctx, struct sock *sk) { int err; + u32 sid; BUG_ON(!xp); + BUG_ON(uctx && sk); + + if (sk) { + struct sk_security_struct *ssec = sk->sk_security; + sid = ssec->sid; + } + else + sid = SECSID_NULL; - err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, 0); + err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, sid); return err; } -- cgit v1.2.3-70-g09d2 From 5794708f11551b6d19b10673abf4b0202f66b44d Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Fri, 22 Sep 2006 15:06:24 -0700 Subject: [XFRM]: Introduce a helper to compare id protocol. Put the helper to header for future use. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 ++++++ net/xfrm/xfrm_state.c | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 762795624b1..5b364b0a6a2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -835,6 +836,11 @@ static inline int xfrm_state_kern(struct xfrm_state *x) return atomic_read(&x->tunnel_users); } +static inline int xfrm_id_proto_match(u8 proto, u8 userproto) +{ + return (userproto == IPSEC_PROTO_ANY || proto == userproto); +} + /* * xfrm algorithm information */ diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1c796087ee7..34c038cbdf4 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -294,7 +294,7 @@ void xfrm_state_flush(u8 proto) restart: list_for_each_entry(x, xfrm_state_bydst+i, bydst) { if (!xfrm_state_kern(x) && - (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) { + xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); @@ -772,7 +772,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) + if (xfrm_id_proto_match(x->id.proto, proto)) count++; } } @@ -783,7 +783,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), for (i = 0; i < XFRM_DST_HSIZE; i++) { list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) + if (!xfrm_id_proto_match(x->id.proto, proto)) continue; err = func(x, --count, data); if (err) -- cgit v1.2.3-70-g09d2 From 6c44e6b7ab500d7e3e3f406c83325671be51a752 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:53:57 -0700 Subject: [XFRM] STATE: Add source address list. Support source address based searching. Mobile IPv6 will use it. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 26 ++++++++++++++++++++++++++ net/ipv4/xfrm4_state.c | 3 +++ net/ipv6/xfrm6_state.c | 3 +++ net/xfrm/xfrm_state.c | 21 +++++++++++++++++++-- 4 files changed, 51 insertions(+), 2 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa3be68041b..88145e3348d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -95,6 +95,7 @@ struct xfrm_state { /* Note: bydst is re-used during gc */ struct list_head bydst; + struct list_head bysrc; struct list_head byspi; atomic_t refcnt; @@ -236,6 +237,7 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; struct list_head *state_bydst; + struct list_head *state_bysrc; struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, @@ -420,6 +422,30 @@ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) return 0; } +static __inline__ +unsigned __xfrm4_src_hash(xfrm_address_t *addr) +{ + return __xfrm4_dst_hash(addr); +} + +static __inline__ +unsigned __xfrm6_src_hash(xfrm_address_t *addr) +{ + return __xfrm6_dst_hash(addr); +} + +static __inline__ +unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_src_hash(addr); + case AF_INET6: + return __xfrm6_src_hash(addr); + } + return 0; +} + static __inline__ unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) { diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 97b0c758971..c56b258fad7 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -122,6 +122,9 @@ __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, add_timer(&x0->timer); xfrm_state_hold(x0); list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h); + h = __xfrm4_src_hash(saddr); + xfrm_state_hold(x0); + list_add_tail(&x0->bysrc, xfrm4_state_afinfo.state_bysrc+h); wake_up(&km_waitq); } if (x0) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index a1a1f547644..2fb07850449 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -126,6 +126,9 @@ __xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, add_timer(&x0->timer); xfrm_state_hold(x0); list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h); + h = __xfrm6_src_hash(saddr); + xfrm_state_hold(x0); + list_add_tail(&x0->bysrc, xfrm6_state_afinfo.state_bysrc+h); wake_up(&km_waitq); } if (x0) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 34c038cbdf4..2a9992894e6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -45,6 +45,7 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Also, it can be used by ah/esp icmp error handler to find offending SA. */ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; DECLARE_WAIT_QUEUE_HEAD(km_waitq); @@ -200,6 +201,7 @@ struct xfrm_state *xfrm_state_alloc(void) atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); INIT_LIST_HEAD(&x->bydst); + INIT_LIST_HEAD(&x->bysrc); INIT_LIST_HEAD(&x->byspi); init_timer(&x->timer); x->timer.function = xfrm_timer_handler; @@ -240,6 +242,8 @@ int __xfrm_state_delete(struct xfrm_state *x) spin_lock(&xfrm_state_lock); list_del(&x->bydst); __xfrm_state_put(x); + list_del(&x->bysrc); + __xfrm_state_put(x); if (x->id.spi) { list_del(&x->byspi); __xfrm_state_put(x); @@ -415,6 +419,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x->km.state = XFRM_STATE_ACQ; list_add_tail(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); + list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); list_add(&x->byspi, xfrm_state_byspi+h); @@ -448,11 +454,19 @@ static void __xfrm_state_insert(struct xfrm_state *x) list_add(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); + h = xfrm_src_hash(&x->props.saddr, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + list_add(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); + if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + x->props.family); + + list_add(&x->byspi, xfrm_state_byspi+h); + xfrm_state_hold(x); + } + if (!mod_timer(&x->timer, jiffies + HZ)) xfrm_state_hold(x); @@ -1075,6 +1089,7 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) err = -ENOBUFS; else { afinfo->state_bydst = xfrm_state_bydst; + afinfo->state_bysrc = xfrm_state_bysrc; afinfo->state_byspi = xfrm_state_byspi; xfrm_state_afinfo[afinfo->family] = afinfo; } @@ -1097,6 +1112,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) else { xfrm_state_afinfo[afinfo->family] = NULL; afinfo->state_byspi = NULL; + afinfo->state_bysrc = NULL; afinfo->state_bydst = NULL; } } @@ -1218,6 +1234,7 @@ void __init xfrm_state_init(void) for (i=0; i Date: Wed, 23 Aug 2006 17:56:04 -0700 Subject: [XFRM] STATE: Search by address using source address list. This is a support to search transformation states by its addresses by using source address list for Mobile IPv6 usage. To use it from user-space, it is also added a message type for source address as a xfrm state option. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 2 ++ net/ipv4/xfrm4_state.c | 9 ++++++++ net/ipv6/xfrm6_state.c | 21 ++++++++++++++++++ net/xfrm/xfrm_state.c | 37 +++++++++++++++++++++++++++---- net/xfrm/xfrm_user.c | 59 +++++++++++++++++++++++++++++++++++++++++++++----- 6 files changed, 119 insertions(+), 10 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 5154064b6d9..66343d3d4b9 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -234,6 +234,7 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_VAL, XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, + XFRMA_SRCADDR, /* xfrm_address_t */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 88145e3348d..d9c40e71318 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -244,6 +244,7 @@ struct xfrm_state_afinfo { struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); + struct xfrm_state *(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create); @@ -937,6 +938,7 @@ extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index c56b258fad7..616be131b4e 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -80,6 +80,14 @@ __xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) return NULL; } +/* placeholder until ipv4's code is written */ +static struct xfrm_state * +__xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto) +{ + return NULL; +} + static struct xfrm_state * __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, @@ -137,6 +145,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .state_lookup = __xfrm4_state_lookup, + .state_lookup_byaddr = __xfrm4_state_lookup_byaddr, .find_acq = __xfrm4_find_acq, }; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 2fb07850449..9c95b9d3e11 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -63,6 +63,26 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET6; } +static struct xfrm_state * +__xfrm6_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto) +{ + struct xfrm_state *x = NULL; + unsigned h; + + h = __xfrm6_src_hash(saddr); + list_for_each_entry(x, xfrm6_state_afinfo.state_bysrc+h, bysrc) { + if (x->props.family == AF_INET6 && + ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && + ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && + proto == x->id.proto) { + xfrm_state_hold(x); + return x; + } + } + return NULL; +} + static struct xfrm_state * __xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) { @@ -140,6 +160,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .init_tempsel = __xfrm6_init_tempsel, .state_lookup = __xfrm6_state_lookup, + .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, .find_acq = __xfrm6_find_acq, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2a9992894e6..11f480b1295 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -487,6 +487,16 @@ void xfrm_state_insert(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_insert); +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, + int use_spi) +{ + if (use_spi) + return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + else + return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto); +} + static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) @@ -495,6 +505,7 @@ int xfrm_state_add(struct xfrm_state *x) struct xfrm_state *x1; int family; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; afinfo = xfrm_state_get_afinfo(family); @@ -503,7 +514,7 @@ int xfrm_state_add(struct xfrm_state *x) spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(afinfo, x, use_spi); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -511,7 +522,7 @@ int xfrm_state_add(struct xfrm_state *x) goto out; } - if (x->km.seq) { + if (use_spi && x->km.seq) { x1 = __xfrm_find_acq_byseq(x->km.seq); if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { xfrm_state_put(x1); @@ -519,7 +530,7 @@ int xfrm_state_add(struct xfrm_state *x) } } - if (!x1) + if (use_spi && !x1) x1 = afinfo->find_acq( x->props.mode, x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); @@ -548,13 +559,14 @@ int xfrm_state_update(struct xfrm_state *x) struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); afinfo = xfrm_state_get_afinfo(x->props.family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(afinfo, x, use_spi); err = -ESRCH; if (!x1) @@ -674,6 +686,23 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, } EXPORT_SYMBOL(xfrm_state_lookup); +struct xfrm_state * +xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto, unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return NULL; + + spin_lock_bh(&xfrm_state_lock); + x = afinfo->state_lookup_byaddr(daddr, saddr, proto); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return x; +} +EXPORT_SYMBOL(xfrm_state_lookup_byaddr); + struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 41f3d51ffc3..b5f8ab71aa5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -87,6 +87,22 @@ static int verify_encap_tmpl(struct rtattr **xfrma) return 0; } +static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type, + xfrm_address_t **addrp) +{ + struct rtattr *rt = xfrma[type - 1]; + + if (!rt) + return 0; + + if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp)) + return -EINVAL; + + if (addrp) + *addrp = RTA_DATA(rt); + + return 0; +} static inline int verify_sec_ctx_len(struct rtattr **xfrma) { @@ -418,16 +434,48 @@ out: return err; } +static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, + struct rtattr **xfrma, + int *errp) +{ + struct xfrm_state *x = NULL; + int err; + + if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { + err = -ESRCH; + x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + } else { + xfrm_address_t *saddr = NULL; + + err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr); + if (err) + goto out; + + if (!saddr) { + err = -EINVAL; + goto out; + } + + x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, + p->family); + } + + out: + if (!x && errp) + *errp = err; + return x; +} + static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { struct xfrm_state *x; - int err; + int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = NLMSG_DATA(nlh); - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); if (x == NULL) - return -ESRCH; + return err; if ((err = security_xfrm_state_delete(x)) != 0) goto out; @@ -578,10 +626,9 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) struct xfrm_usersa_id *p = NLMSG_DATA(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; - int err; + int err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); - err = -ESRCH; + x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); if (x == NULL) goto out_noput; -- cgit v1.2.3-70-g09d2 From fbd9a5b47ee9c319ff0cae584391241ce78ffd6b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:08:21 -0700 Subject: [XFRM] STATE: Common receive function for route optimization extension headers. XFRM_STATE_WILDRECV flag is introduced; the last resort state is set it and receives packet which is not route optimized but uses such extension headers i.e. Mobile IPv6 signaling (binding update and acknowledgement). A node enabled Mobile IPv6 adds the state. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 2 + net/ipv6/ipv6_syms.c | 1 + net/ipv6/xfrm6_input.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_state.c | 1 + 5 files changed, 113 insertions(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 66343d3d4b9..a7c9e4cfb15 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -256,6 +256,7 @@ struct xfrm_usersa_info { #define XFRM_STATE_NOECN 1 #define XFRM_STATE_DECAP_DSCP 2 #define XFRM_STATE_NOPMTUDISC 4 +#define XFRM_STATE_WILDRECV 8 }; struct xfrm_usersa_id { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index eed48f832ce..0d735a5aba6 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -955,6 +955,8 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); extern int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi); extern int xfrm6_rcv(struct sk_buff **pskb); +extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, + xfrm_address_t *saddr, u8 proto); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler); extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index e1a74161288..7b7b90d9c3d 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL(ipv6_chk_addr); EXPORT_SYMBOL(in6_dev_finish_destroy); #ifdef CONFIG_XFRM EXPORT_SYMBOL(xfrm6_rcv); +EXPORT_SYMBOL(xfrm6_input_addr); EXPORT_SYMBOL(xfrm6_find_1stfragopt); #endif EXPORT_SYMBOL(rt6_lookup); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index ee2f6b3908b..a40a0578901 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -138,3 +138,111 @@ int xfrm6_rcv(struct sk_buff **pskb) { return xfrm6_rcv_spi(*pskb, 0); } + +int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, + xfrm_address_t *saddr, u8 proto) +{ + struct xfrm_state *x = NULL; + int wildcard = 0; + struct in6_addr any; + xfrm_address_t *xany; + struct xfrm_state *xfrm_vec_one = NULL; + int nh = 0; + int i = 0; + + ipv6_addr_set(&any, 0, 0, 0, 0); + xany = (xfrm_address_t *)&any; + + for (i = 0; i < 3; i++) { + xfrm_address_t *dst, *src; + switch (i) { + case 0: + dst = daddr; + src = saddr; + break; + case 1: + /* lookup state with wild-card source address */ + wildcard = 1; + dst = daddr; + src = xany; + break; + case 2: + default: + /* lookup state with wild-card addresses */ + wildcard = 1; /* XXX */ + dst = xany; + src = xany; + break; + } + + x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); + if (!x) + continue; + + spin_lock(&x->lock); + + if (wildcard) { + if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + } + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + if (xfrm_state_check_expire(x)) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + + nh = x->type->input(x, skb); + if (nh <= 0) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock(&x->lock); + + xfrm_vec_one = x; + break; + } + + if (!xfrm_vec_one) + goto drop; + + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + struct sec_path *sp; + sp = secpath_dup(skb->sp); + if (!sp) + goto drop; + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + + if (1 + skb->sp->len > XFRM_MAX_DEPTH) + goto drop; + + skb->sp->xvec[skb->sp->len] = xfrm_vec_one; + skb->sp->len ++; + + return 1; +drop: + if (xfrm_vec_one) + xfrm_state_put(xfrm_vec_one); + return -1; +} diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 11f480b1295..f05371556cc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -352,6 +352,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, list_for_each_entry(x, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && + !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && -- cgit v1.2.3-70-g09d2 From 060f02a3bdd4d9ba8aa3c48e9b470672b1f3a585 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 18:18:55 -0700 Subject: [XFRM] STATE: Introduce care-of address. Care-of address is carried by state as a transformation option like IPsec encryption/authentication algorithm. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 3 +++ net/xfrm/xfrm_state.c | 6 ++++++ net/xfrm/xfrm_user.c | 28 +++++++++++++++++++++++++++- 4 files changed, 37 insertions(+), 1 deletion(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index a7c9e4cfb15..b53f799189a 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -235,6 +235,7 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ + XFRMA_COADDR, /* xfrm_address_t */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa93cc1f629..872a2a4022b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -134,6 +134,9 @@ struct xfrm_state /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; + /* Data for care-of address */ + xfrm_address_t *coaddr; + /* IPComp needs an IPIP tunnel for handling uncompressed packets */ struct xfrm_state *tunnel; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f05371556cc..3da89c01ea7 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -78,6 +78,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->ealg); kfree(x->calg); kfree(x->encap); + kfree(x->coaddr); if (x->mode) xfrm_put_mode(x->mode); if (x->type) { @@ -603,6 +604,11 @@ out: if (likely(x1->km.state == XFRM_STATE_VALID)) { if (x->encap && x1->encap) memcpy(x1->encap, x->encap, sizeof(*x1->encap)); + if (x->coaddr && x1->coaddr) { + memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); + } + if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel))) + memcpy(&x1->sel, &x->sel, sizeof(x1->sel)); memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b5f8ab71aa5..939808de9e2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -187,11 +187,14 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(xfrma))) goto out; + if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL))) + goto out; err = -EINVAL; switch (p->mode) { case XFRM_MODE_TRANSPORT: case XFRM_MODE_TUNNEL: + case XFRM_MODE_ROUTEOPTIMIZATION: break; default: @@ -276,6 +279,24 @@ static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) return security_xfrm_state_alloc(x, uctx); } +static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) +{ + struct rtattr *rta = u_arg; + xfrm_address_t *p, *uaddrp; + + if (!rta) + return 0; + + uaddrp = RTA_DATA(rta); + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + memcpy(p, uaddrp, sizeof(*p)); + *addrpp = p; + return 0; +} + static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); @@ -365,7 +386,8 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) goto error; - + if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1]))) + goto error; err = xfrm_init_state(x); if (err) goto error; @@ -569,6 +591,10 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) uctx->ctx_len = x->security->ctx_len; memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); } + + if (x->coaddr) + RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + nlh->nlmsg_len = skb->tail - b; out: sp->this_idx++; -- cgit v1.2.3-70-g09d2 From 97a64b4577ae2bc5599dbd008a3cd9e25de9b9f5 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:44:06 -0700 Subject: [XFRM]: Introduce XFRM_MSG_REPORT. XFRM_MSG_REPORT is a message as notification of state protocol and selector from kernel to user-space. Mobile IPv6 will use it when inbound reject is occurred at route optimization to make user-space know a binding error requirement. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 12 ++++++++++++ include/net/xfrm.h | 2 ++ net/xfrm/xfrm_state.c | 19 +++++++++++++++++++ net/xfrm/xfrm_user.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 1d8c1f22c12..4009f4445fa 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -166,6 +166,10 @@ enum { #define XFRM_MSG_NEWAE XFRM_MSG_NEWAE XFRM_MSG_GETAE, #define XFRM_MSG_GETAE XFRM_MSG_GETAE + + XFRM_MSG_REPORT, +#define XFRM_MSG_REPORT XFRM_MSG_REPORT + __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -325,12 +329,18 @@ struct xfrm_usersa_flush { __u8 proto; }; +struct xfrm_user_report { + __u8 proto; + struct xfrm_selector sel; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 #define XFRMGRP_SA 4 #define XFRMGRP_POLICY 8 +#define XFRMGRP_REPORT 0x10 #endif enum xfrm_nlgroups { @@ -346,6 +356,8 @@ enum xfrm_nlgroups { #define XFRMNLGRP_POLICY XFRMNLGRP_POLICY XFRMNLGRP_AEVENTS, #define XFRMNLGRP_AEVENTS XFRMNLGRP_AEVENTS + XFRMNLGRP_REPORT, +#define XFRMNLGRP_REPORT XFRMNLGRP_REPORT __XFRMNLGRP_MAX }; #define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9ebbdc1dd47..0b223eed4c9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -381,6 +381,7 @@ struct xfrm_mgr struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); + int (*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); }; extern int xfrm_register_km(struct xfrm_mgr *km); @@ -1043,6 +1044,7 @@ extern void xfrm_init_pmtu(struct dst_entry *dst); extern wait_queue_head_t km_waitq; extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); +extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3da89c01ea7..a26ef6952c3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1055,6 +1055,25 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) } EXPORT_SYMBOL(km_policy_expired); +int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +{ + int err = -EINVAL; + int ret; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + if (km->report) { + ret = km->report(proto, sel, addr); + if (!ret) + err = ret; + } + } + read_unlock(&xfrm_km_lock); + return err; +} +EXPORT_SYMBOL(km_report); + int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { int err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 770bd241074..7303b820bea 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1491,6 +1491,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), + [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), }; #undef XMSGSIZE @@ -2058,12 +2059,57 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev } +static int build_report(struct sk_buff *skb, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) +{ + struct xfrm_user_report *ur; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); + ur = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + ur->proto = proto; + memcpy(&ur->sel, sel, sizeof(ur->sel)); + + if (addr) + RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, + xfrm_address_t *addr) +{ + struct sk_buff *skb; + size_t len; + + len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report))); + skb = alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_report(skb, proto, sel, addr) < 0) + BUG(); + + NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); +} + static struct xfrm_mgr netlink_mgr = { .id = "netlink", .notify = xfrm_send_state_notify, .acquire = xfrm_send_acquire, .compile_policy = xfrm_compile_policy, .notify_policy = xfrm_send_policy_notify, + .report = xfrm_send_report, }; static int __init xfrm_user_init(void) -- cgit v1.2.3-70-g09d2 From 41a49cc3c02ace59d4dddae91ea211c330970ee3 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:48:31 -0700 Subject: [XFRM]: Add sorting interface for state and template. Under two transformation policies it is required to merge them. This is a platform to sort state for outbound and templates for inbound respectively. It will be used when Mobile IPv6 and IPsec are used at the same time. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 20 ++++++++++++++++++++ net/xfrm/xfrm_policy.c | 16 ++++++++++++++-- net/xfrm/xfrm_state.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4655ca25f80..d341603e4ba 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -254,6 +254,8 @@ struct xfrm_state_afinfo { struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create); + int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); + int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); }; extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); @@ -1002,6 +1004,24 @@ extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); +#ifdef CONFIG_XFRM_SUB_POLICY +extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, + int n, unsigned short family); +extern int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, + int n, unsigned short family); +#else +static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, + int n, unsigned short family) +{ + return -ENOSYS; +} + +static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, + int n, unsigned short family) +{ + return -ENOSYS; +} +#endif extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 96de6c76ed5..1732159ffd0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -861,6 +861,8 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { + struct xfrm_state *tp[XFRM_MAX_DEPTH]; + struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; int cnx = 0; int error; int ret; @@ -871,7 +873,8 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, error = -ENOBUFS; goto fail; } - ret = xfrm_tmpl_resolve_one(pols[i], fl, &xfrm[cnx], family); + + ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); if (ret < 0) { error = ret; goto fail; @@ -879,11 +882,15 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, cnx += ret; } + /* found states are sorted for outbound processing */ + if (npols > 1) + xfrm_state_sort(xfrm, tpp, cnx, family); + return cnx; fail: for (cnx--; cnx>=0; cnx--) - xfrm_state_put(xfrm[cnx]); + xfrm_state_put(tpp[cnx]); return error; } @@ -1280,6 +1287,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct sec_path *sp; static struct sec_path dummy; struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; struct xfrm_tmpl **tpp = tp; int ti = 0; int i, k; @@ -1297,6 +1305,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, tpp[ti++] = &pols[pi]->xfrm_vec[i]; } xfrm_nr = ti; + if (npols > 1) { + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); + tpp = stp; + } /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a26ef6952c3..622e92a08d0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -728,6 +728,44 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto, } EXPORT_SYMBOL(xfrm_find_acq); +#ifdef CONFIG_XFRM_SUB_POLICY +int +xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, + unsigned short family) +{ + int err = 0; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + if (afinfo->tmpl_sort) + err = afinfo->tmpl_sort(dst, src, n); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_tmpl_sort); + +int +xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, + unsigned short family) +{ + int err = 0; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + if (afinfo->state_sort) + err = afinfo->state_sort(dst, src, n); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_state_sort); +#endif + /* Silly enough, but I'm lazy to build resolution list */ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) -- cgit v1.2.3-70-g09d2 From 2770834c9f44afd1bfa13914c7285470775af657 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 00:13:10 -0700 Subject: [XFRM]: Pull xfrm_state_bydst hash table knowledge out of afinfo. Signed-off-by: David S. Miller --- include/net/xfrm.h | 16 ------- net/ipv4/xfrm4_state.c | 53 ------------------------ net/ipv6/xfrm6_state.c | 56 ------------------------- net/xfrm/xfrm_state.c | 110 ++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 100 insertions(+), 135 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c75b3287d8f..cc83443f301 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -243,7 +243,6 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; - struct list_head *state_bydst; struct list_head *state_bysrc; struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); @@ -252,9 +251,6 @@ struct xfrm_state_afinfo { xfrm_address_t *daddr, xfrm_address_t *saddr); struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); struct xfrm_state *(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); - struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); }; @@ -455,18 +451,6 @@ unsigned __xfrm6_dst_hash(xfrm_address_t *addr) return h; } -static __inline__ -unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) -{ - switch (family) { - case AF_INET: - return __xfrm4_dst_hash(addr); - case AF_INET6: - return __xfrm6_dst_hash(addr); - } - return 0; -} - static __inline__ unsigned __xfrm4_src_hash(xfrm_address_t *addr) { diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 616be131b4e..9dc1afc17b6 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -88,65 +88,12 @@ __xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, return NULL; } -static struct xfrm_state * -__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create) -{ - struct xfrm_state *x, *x0; - unsigned h = __xfrm4_dst_hash(daddr); - - x0 = NULL; - - list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) { - if (x->props.family == AF_INET && - daddr->a4 == x->id.daddr.a4 && - mode == x->props.mode && - proto == x->id.proto && - saddr->a4 == x->props.saddr.a4 && - reqid == x->props.reqid && - x->km.state == XFRM_STATE_ACQ && - !x->id.spi) { - x0 = x; - break; - } - } - if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { - x0->sel.daddr.a4 = daddr->a4; - x0->sel.saddr.a4 = saddr->a4; - x0->sel.prefixlen_d = 32; - x0->sel.prefixlen_s = 32; - x0->props.saddr.a4 = saddr->a4; - x0->km.state = XFRM_STATE_ACQ; - x0->id.daddr.a4 = daddr->a4; - x0->id.proto = proto; - x0->props.family = AF_INET; - x0->props.mode = mode; - x0->props.reqid = reqid; - x0->props.family = AF_INET; - x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x0); - x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x0->timer); - xfrm_state_hold(x0); - list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h); - h = __xfrm4_src_hash(saddr); - xfrm_state_hold(x0); - list_add_tail(&x0->bysrc, xfrm4_state_afinfo.state_bysrc+h); - wake_up(&km_waitq); - } - if (x0) - xfrm_state_hold(x0); - return x0; -} - static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .state_lookup = __xfrm4_state_lookup, .state_lookup_byaddr = __xfrm4_state_lookup_byaddr, - .find_acq = __xfrm4_find_acq, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 6269584e610..40fcaab7e02 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -101,61 +101,6 @@ __xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) return NULL; } -static struct xfrm_state * -__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create) -{ - struct xfrm_state *x, *x0; - unsigned h = __xfrm6_dst_hash(daddr); - - x0 = NULL; - - list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) { - if (x->props.family == AF_INET6 && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - mode == x->props.mode && - proto == x->id.proto && - ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && - reqid == x->props.reqid && - x->km.state == XFRM_STATE_ACQ && - !x->id.spi) { - x0 = x; - break; - } - } - if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { - ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6, - (struct in6_addr *)daddr); - ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6, - (struct in6_addr *)saddr); - x0->sel.prefixlen_d = 128; - x0->sel.prefixlen_s = 128; - ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6, - (struct in6_addr *)saddr); - x0->km.state = XFRM_STATE_ACQ; - ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6, - (struct in6_addr *)daddr); - x0->id.proto = proto; - x0->props.family = AF_INET6; - x0->props.mode = mode; - x0->props.reqid = reqid; - x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x0); - x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x0->timer); - xfrm_state_hold(x0); - list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h); - h = __xfrm6_src_hash(saddr); - xfrm_state_hold(x0); - list_add_tail(&x0->bysrc, xfrm6_state_afinfo.state_bysrc+h); - wake_up(&km_waitq); - } - if (x0) - xfrm_state_hold(x0); - return x0; -} - static int __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) { @@ -280,7 +225,6 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .init_tempsel = __xfrm6_init_tempsel, .state_lookup = __xfrm6_state_lookup, .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, - .find_acq = __xfrm6_find_acq, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 622e92a08d0..80f5f9dc2b9 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -48,6 +48,18 @@ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static __inline__ +unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_dst_hash(addr); + case AF_INET6: + return __xfrm6_dst_hash(addr); + } + return 0; +} + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -489,6 +501,89 @@ void xfrm_state_insert(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_insert); +/* xfrm_state_lock is held */ +static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +{ + unsigned int h = xfrm_dst_hash(daddr, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + if (x->props.reqid != reqid || + x->props.mode != mode || + x->props.family != family || + x->km.state != XFRM_STATE_ACQ || + x->id.spi != 0) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4 || + x->props.saddr.a4 != saddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, + (struct in6_addr *)daddr) || + !ipv6_addr_equal((struct in6_addr *) + x->props.saddr.a6, + (struct in6_addr *)saddr)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + if (!create) + return NULL; + + x = xfrm_state_alloc(); + if (likely(x)) { + switch (family) { + case AF_INET: + x->sel.daddr.a4 = daddr->a4; + x->sel.saddr.a4 = saddr->a4; + x->sel.prefixlen_d = 32; + x->sel.prefixlen_s = 32; + x->props.saddr.a4 = saddr->a4; + x->id.daddr.a4 = daddr->a4; + break; + + case AF_INET6: + ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, + (struct in6_addr *)daddr); + ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, + (struct in6_addr *)saddr); + x->sel.prefixlen_d = 128; + x->sel.prefixlen_s = 128; + ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, + (struct in6_addr *)saddr); + ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, + (struct in6_addr *)daddr); + break; + }; + + x->km.state = XFRM_STATE_ACQ; + x->id.proto = proto; + x->props.family = family; + x->props.mode = mode; + x->props.reqid = reqid; + x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + xfrm_state_hold(x); + x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; + add_timer(&x->timer); + xfrm_state_hold(x); + list_add_tail(&x->bydst, xfrm_state_bydst+h); + h = xfrm_src_hash(saddr, family); + xfrm_state_hold(x); + list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + wake_up(&km_waitq); + } + + return x; +} + static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, int use_spi) @@ -533,9 +628,9 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && !x1) - x1 = afinfo->find_acq( - x->props.mode, x->props.reqid, x->id.proto, - &x->id.daddr, &x->props.saddr, 0); + x1 = __find_acq_core(family, x->props.mode, x->props.reqid, + x->id.proto, + &x->id.daddr, &x->props.saddr, 0); __xfrm_state_insert(x); err = 0; @@ -716,14 +811,11 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto, int create, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); + return x; } EXPORT_SYMBOL(xfrm_find_acq); @@ -1181,7 +1273,6 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { - afinfo->state_bydst = xfrm_state_bydst; afinfo->state_bysrc = xfrm_state_bysrc; afinfo->state_byspi = xfrm_state_byspi; xfrm_state_afinfo[afinfo->family] = afinfo; @@ -1206,7 +1297,6 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) xfrm_state_afinfo[afinfo->family] = NULL; afinfo->state_byspi = NULL; afinfo->state_bysrc = NULL; - afinfo->state_bydst = NULL; } } write_unlock_bh(&xfrm_state_afinfo_lock); -- cgit v1.2.3-70-g09d2 From edcd582152090bfb0ccb4ad444c151798a73eda8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 00:42:45 -0700 Subject: [XFRM]: Pull xfrm_state_by{spi,src} hash table knowledge out of afinfo. Signed-off-by: David S. Miller --- include/net/xfrm.h | 78 ------------------ net/ipv4/xfrm4_state.c | 28 ------- net/ipv6/xfrm6_state.c | 40 ---------- net/xfrm/xfrm_state.c | 210 +++++++++++++++++++++++++++++++++++++------------ 4 files changed, 159 insertions(+), 197 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index cc83443f301..dd3b84b9c04 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -243,14 +243,10 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; - struct list_head *state_bysrc; - struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); - struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); - struct xfrm_state *(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); }; @@ -431,80 +427,6 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) } #endif -#define XFRM_DST_HSIZE 1024 - -static __inline__ -unsigned __xfrm4_dst_hash(xfrm_address_t *addr) -{ - unsigned h; - h = ntohl(addr->a4); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned __xfrm6_dst_hash(xfrm_address_t *addr) -{ - unsigned h; - h = ntohl(addr->a6[2]^addr->a6[3]); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned __xfrm4_src_hash(xfrm_address_t *addr) -{ - return __xfrm4_dst_hash(addr); -} - -static __inline__ -unsigned __xfrm6_src_hash(xfrm_address_t *addr) -{ - return __xfrm6_dst_hash(addr); -} - -static __inline__ -unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) -{ - switch (family) { - case AF_INET: - return __xfrm4_src_hash(addr); - case AF_INET6: - return __xfrm6_src_hash(addr); - } - return 0; -} - -static __inline__ -unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) -{ - unsigned h; - h = ntohl(addr->a4^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) -{ - unsigned h; - h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) -{ - switch (family) { - case AF_INET: - return __xfrm4_spi_hash(addr, spi, proto); - case AF_INET6: - return __xfrm6_spi_hash(addr, spi, proto); - } - return 0; /*XXX*/ -} - extern void __xfrm_state_destroy(struct xfrm_state *); static inline void __xfrm_state_put(struct xfrm_state *x) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 9dc1afc17b6..6a2a4ab4277 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -62,38 +62,10 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET; } -static struct xfrm_state * -__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) -{ - unsigned h = __xfrm4_spi_hash(daddr, spi, proto); - struct xfrm_state *x; - - list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) { - if (x->props.family == AF_INET && - spi == x->id.spi && - daddr->a4 == x->id.daddr.a4 && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - -/* placeholder until ipv4's code is written */ -static struct xfrm_state * -__xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, - u8 proto) -{ - return NULL; -} - static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, - .state_lookup = __xfrm4_state_lookup, - .state_lookup_byaddr = __xfrm4_state_lookup_byaddr, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 40fcaab7e02..d88cd92c864 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -63,44 +63,6 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET6; } -static struct xfrm_state * -__xfrm6_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, - u8 proto) -{ - struct xfrm_state *x = NULL; - unsigned h; - - h = __xfrm6_src_hash(saddr); - list_for_each_entry(x, xfrm6_state_afinfo.state_bysrc+h, bysrc) { - if (x->props.family == AF_INET6 && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - -static struct xfrm_state * -__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) -{ - unsigned h = __xfrm6_spi_hash(daddr, spi, proto); - struct xfrm_state *x; - - list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) { - if (x->props.family == AF_INET6 && - spi == x->id.spi && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - static int __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) { @@ -223,8 +185,6 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .init_tempsel = __xfrm6_init_tempsel, - .state_lookup = __xfrm6_state_lookup, - .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 80f5f9dc2b9..4a3832f81c3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -38,6 +38,8 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); static DEFINE_SPINLOCK(xfrm_state_lock); +#define XFRM_DST_HSIZE 1024 + /* Hash table to find appropriate SA towards given target (endpoint * of tunnel or destination of transport mode) allowed by selector. * @@ -48,6 +50,48 @@ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static __inline__ +unsigned __xfrm4_dst_hash(xfrm_address_t *addr) +{ + unsigned h; + h = ntohl(addr->a4); + h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm6_dst_hash(xfrm_address_t *addr) +{ + unsigned h; + h = ntohl(addr->a6[2]^addr->a6[3]); + h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm4_src_hash(xfrm_address_t *addr) +{ + return __xfrm4_dst_hash(addr); +} + +static __inline__ +unsigned __xfrm6_src_hash(xfrm_address_t *addr) +{ + return __xfrm6_dst_hash(addr); +} + +static __inline__ +unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_src_hash(addr); + case AF_INET6: + return __xfrm6_src_hash(addr); + } + return 0; +} + static __inline__ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) { @@ -60,6 +104,36 @@ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) return 0; } +static __inline__ +unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +{ + unsigned h; + h = ntohl(addr->a4^spi^proto); + h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +{ + unsigned h; + h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); + h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_spi_hash(addr, spi, proto); + case AF_INET6: + return __xfrm6_spi_hash(addr, spi, proto); + } + return 0; /*XXX*/ +} + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -342,6 +416,83 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } +static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) +{ + unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_byspi+h, byspi) { + if (x->props.family != family || + x->id.spi != spi || + x->id.proto != proto) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)daddr, + (struct in6_addr *) + x->id.daddr.a6)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +{ + unsigned int h = xfrm_src_hash(saddr, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) { + if (x->props.family != family || + x->id.proto != proto) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4 || + x->props.saddr.a4 != saddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)daddr, + (struct in6_addr *) + x->id.daddr.a6) || + !ipv6_addr_equal((struct in6_addr *)saddr, + (struct in6_addr *) + x->props.saddr.a6)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) +{ + if (use_spi) + return __xfrm_state_lookup(&x->id.daddr, x->id.spi, + x->id.proto, family); + else + return __xfrm_state_lookup_byaddr(&x->id.daddr, + &x->props.saddr, + x->id.proto, family); +} + struct xfrm_state * xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, @@ -353,14 +504,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; - struct xfrm_state_afinfo *afinfo; - afinfo = xfrm_state_get_afinfo(family); - if (afinfo == NULL) { - *err = -EAFNOSUPPORT; - return NULL; - } - spin_lock_bh(&xfrm_state_lock); list_for_each_entry(x, xfrm_state_bydst+h, bydst) { if (x->props.family == family && @@ -406,8 +550,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = afinfo->state_lookup(daddr, tmpl->id.spi, - tmpl->id.proto)) != NULL) { + (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, + tmpl->id.proto, family)) != NULL) { xfrm_state_put(x0); error = -EEXIST; goto out; @@ -457,7 +601,6 @@ out: else *err = acquire_in_progress ? -EAGAIN : error; spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } @@ -584,34 +727,20 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re return x; } -static inline struct xfrm_state * -__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, - int use_spi) -{ - if (use_spi) - return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); - else - return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto); -} - static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int family; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = __xfrm_state_locate(afinfo, x, use_spi); + x1 = __xfrm_state_locate(x, use_spi, family); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -637,7 +766,6 @@ int xfrm_state_add(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); if (!err) xfrm_flush_all_bundles(); @@ -653,17 +781,12 @@ EXPORT_SYMBOL(xfrm_state_add); int xfrm_state_update(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); - afinfo = xfrm_state_get_afinfo(x->props.family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); - x1 = __xfrm_state_locate(afinfo, x, use_spi); + x1 = __xfrm_state_locate(x, use_spi, x->props.family); err = -ESRCH; if (!x1) @@ -683,7 +806,6 @@ int xfrm_state_update(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); if (err) return err; @@ -776,14 +898,10 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup(daddr, spi, proto); + x = __xfrm_state_lookup(daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } EXPORT_SYMBOL(xfrm_state_lookup); @@ -793,14 +911,10 @@ xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup_byaddr(daddr, saddr, proto); + x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); @@ -1272,11 +1386,8 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) write_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; - else { - afinfo->state_bysrc = xfrm_state_bysrc; - afinfo->state_byspi = xfrm_state_byspi; + else xfrm_state_afinfo[afinfo->family] = afinfo; - } write_unlock_bh(&xfrm_state_afinfo_lock); return err; } @@ -1293,11 +1404,8 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else { + else xfrm_state_afinfo[afinfo->family] = NULL; - afinfo->state_byspi = NULL; - afinfo->state_bysrc = NULL; - } } write_unlock_bh(&xfrm_state_afinfo_lock); return err; -- cgit v1.2.3-70-g09d2 From 8f126e37c0b250310a48a609bedf92a19a5559ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 02:45:07 -0700 Subject: [XFRM]: Convert xfrm_state hash linkage to hlists. Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 ++-- net/xfrm/xfrm_state.c | 92 ++++++++++++++++++++++++++++----------------------- 2 files changed, 54 insertions(+), 44 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index dd3b84b9c04..3405e5d9d51 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -94,9 +94,9 @@ extern struct mutex xfrm_cfg_mutex; struct xfrm_state { /* Note: bydst is re-used during gc */ - struct list_head bydst; - struct list_head bysrc; - struct list_head byspi; + struct hlist_node bydst; + struct hlist_node bysrc; + struct hlist_node byspi; atomic_t refcnt; spinlock_t lock; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4a3832f81c3..fe3c8c38d5e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -46,9 +46,9 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE]; static __inline__ unsigned __xfrm4_dst_hash(xfrm_address_t *addr) @@ -141,7 +141,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list); +static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); static int xfrm_state_gc_flush_bundles; @@ -178,8 +178,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(void *data) { struct xfrm_state *x; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; if (xfrm_state_gc_flush_bundles) { xfrm_state_gc_flush_bundles = 0; @@ -187,13 +187,13 @@ static void xfrm_state_gc_task(void *data) } spin_lock_bh(&xfrm_state_gc_lock); - list_splice_init(&xfrm_state_gc_list, &gc_list); + gc_list.first = xfrm_state_gc_list.first; + INIT_HLIST_HEAD(&xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - list_for_each_safe(entry, tmp, &gc_list) { - x = list_entry(entry, struct xfrm_state, bydst); + hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) xfrm_state_gc_destroy(x); - } + wake_up(&km_waitq); } @@ -287,9 +287,9 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->bydst); - INIT_LIST_HEAD(&x->bysrc); - INIT_LIST_HEAD(&x->byspi); + INIT_HLIST_NODE(&x->bydst); + INIT_HLIST_NODE(&x->bysrc); + INIT_HLIST_NODE(&x->byspi); init_timer(&x->timer); x->timer.function = xfrm_timer_handler; x->timer.data = (unsigned long)x; @@ -314,7 +314,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) BUG_TRAP(x->km.state == XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - list_add(&x->bydst, &xfrm_state_gc_list); + hlist_add_head(&x->bydst, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -327,12 +327,12 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); - list_del(&x->bydst); + hlist_del(&x->bydst); __xfrm_state_put(x); - list_del(&x->bysrc); + hlist_del(&x->bysrc); __xfrm_state_put(x); if (x->id.spi) { - list_del(&x->byspi); + hlist_del(&x->byspi); __xfrm_state_put(x); } spin_unlock(&xfrm_state_lock); @@ -378,12 +378,13 @@ EXPORT_SYMBOL(xfrm_state_delete); void xfrm_state_flush(u8 proto) { int i; - struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { + struct hlist_node *entry; + struct xfrm_state *x; restart: - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -420,8 +421,9 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 { unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); struct xfrm_state *x; + struct hlist_node *entry; - list_for_each_entry(x, xfrm_state_byspi+h, byspi) { + hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto) @@ -451,8 +453,9 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm { unsigned int h = xfrm_src_hash(saddr, family); struct xfrm_state *x; + struct hlist_node *entry; - list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto) continue; @@ -499,14 +502,15 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, family); + struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; spin_lock_bh(&xfrm_state_lock); - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -575,13 +579,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); - list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + h = xfrm_src_hash(saddr, family); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); } x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; @@ -608,19 +613,19 @@ static void __xfrm_state_insert(struct xfrm_state *x) { unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); - list_add(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); h = xfrm_src_hash(&x->props.saddr, x->props.family); - list_add(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); } @@ -648,9 +653,10 @@ EXPORT_SYMBOL(xfrm_state_insert); static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(daddr, family); + struct hlist_node *entry; struct xfrm_state *x; - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -717,10 +723,10 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); xfrm_state_hold(x); - list_add_tail(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(saddr, family); xfrm_state_hold(x); - list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); wake_up(&km_waitq); } @@ -977,11 +983,14 @@ EXPORT_SYMBOL(xfrm_state_sort); static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; - struct xfrm_state *x; for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { + struct hlist_node *entry; + struct xfrm_state *x; + + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + if (x->km.seq == seq && + x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; } @@ -1047,7 +1056,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) if (x->id.spi) { spin_lock_bh(&xfrm_state_lock); h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); @@ -1060,12 +1069,13 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), { int i; struct xfrm_state *x; + struct hlist_node *entry; int count = 0; int err = 0; spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto)) count++; } @@ -1076,7 +1086,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), } for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_id_proto_match(x->id.proto, proto)) continue; err = func(x, --count, data); @@ -1524,9 +1534,9 @@ void __init xfrm_state_init(void) int i; for (i=0; i Date: Thu, 24 Aug 2006 03:08:07 -0700 Subject: [XFRM]: Dynamic xfrm_state hash table sizing. The grow algorithm is simple, we grow if: 1) we see a hash chain collision at insert, and 2) we haven't hit the hash size limit (currently 1*1024*1024 slots), and 3) the number of xfrm_state objects is > the current hash mask All of this needs some tweaking. Remove __initdata from "hashdist" so we can use it safely at run time. Signed-off-by: David S. Miller --- include/linux/bootmem.h | 2 +- mm/page_alloc.c | 2 +- net/xfrm/xfrm_state.c | 247 ++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 197 insertions(+), 54 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 1021f508d82..e319c649e4f 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -114,7 +114,7 @@ extern void *__init alloc_large_system_hash(const char *tablename, #else #define HASHDIST_DEFAULT 0 #endif -extern int __initdata hashdist; /* Distribute hashes across NUMA nodes? */ +extern int hashdist; /* Distribute hashes across NUMA nodes? */ #endif /* _LINUX_BOOTMEM_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 54a4f5375bb..3b5358a0561 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2363,7 +2363,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, return 0; } -__initdata int hashdist = HASHDIST_DEFAULT; +int hashdist = HASHDIST_DEFAULT; #ifdef CONFIG_NUMA static int __init set_hashdist(char *str) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index fe3c8c38d5e..445263c54c9 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -18,6 +18,9 @@ #include #include #include +#include +#include +#include #include struct sock *xfrm_nl; @@ -38,102 +41,230 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); static DEFINE_SPINLOCK(xfrm_state_lock); -#define XFRM_DST_HSIZE 1024 - /* Hash table to find appropriate SA towards given target (endpoint * of tunnel or destination of transport mode) allowed by selector. * * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE]; -static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE]; - -static __inline__ -unsigned __xfrm4_dst_hash(xfrm_address_t *addr) +static struct hlist_head *xfrm_state_bydst __read_mostly; +static struct hlist_head *xfrm_state_bysrc __read_mostly; +static struct hlist_head *xfrm_state_byspi __read_mostly; +static unsigned int xfrm_state_hmask __read_mostly; +static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static unsigned int xfrm_state_num; + +static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a4); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + h = (h ^ (h>>16)) & hmask; return h; } -static __inline__ -unsigned __xfrm6_dst_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a6[2]^addr->a6[3]); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + h = (h ^ (h>>16)) & hmask; return h; } -static __inline__ -unsigned __xfrm4_src_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask) { - return __xfrm4_dst_hash(addr); + return __xfrm4_dst_hash(addr, hmask); } -static __inline__ -unsigned __xfrm6_src_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask) { - return __xfrm6_dst_hash(addr); + return __xfrm6_dst_hash(addr, hmask); } -static __inline__ -unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_src_hash(addr); + return __xfrm4_src_hash(addr, hmask); case AF_INET6: - return __xfrm6_src_hash(addr); + return __xfrm6_src_hash(addr, hmask); } return 0; } -static __inline__ -unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + return __xfrm_src_hash(addr, family, xfrm_state_hmask); +} + +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_dst_hash(addr); + return __xfrm4_dst_hash(addr, hmask); case AF_INET6: - return __xfrm6_dst_hash(addr); + return __xfrm6_dst_hash(addr, hmask); } return 0; } -static __inline__ -unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +{ + return __xfrm_dst_hash(addr, family, xfrm_state_hmask); +} + +static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, + unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a4^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + h = (h ^ (h>>10) ^ (h>>20)) & hmask; return h; } -static __inline__ -unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, + unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + h = (h ^ (h>>10) ^ (h>>20)) & hmask; return h; } -static __inline__ -unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +static inline +unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, + unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_spi_hash(addr, spi, proto); + return __xfrm4_spi_hash(addr, spi, proto, hmask); case AF_INET6: - return __xfrm6_spi_hash(addr, spi, proto); + return __xfrm6_spi_hash(addr, spi, proto, hmask); } return 0; /*XXX*/ } +static inline unsigned int +xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +{ + return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask); +} + +static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) +{ + struct hlist_head *n; + + if (sz <= PAGE_SIZE) + n = kmalloc(sz, GFP_KERNEL); + else if (hashdist) + n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); + else + n = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(sz)); + + if (n) + memset(n, 0, sz); + + return n; +} + +static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz) +{ + if (sz <= PAGE_SIZE) + kfree(n); + else if (hashdist) + vfree(n); + else + free_pages((unsigned long)n, get_order(sz)); +} + +static void xfrm_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + struct hlist_head *nsrctable, + struct hlist_head *nspitable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_state *x; + + hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { + unsigned int h; + + h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask); + hlist_add_head(&x->bydst, ndsttable+h); + + h = __xfrm_src_hash(&x->props.saddr, x->props.family, + nhashmask); + hlist_add_head(&x->bysrc, nsrctable+h); + + h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + x->props.family, nhashmask); + hlist_add_head(&x->byspi, nspitable+h); + } +} + +static unsigned long xfrm_hash_new_size(void) +{ + return ((xfrm_state_hmask + 1) << 1) * + sizeof(struct hlist_head); +} + +static DEFINE_MUTEX(hash_resize_mutex); + +static void xfrm_hash_resize(void *__unused) +{ + struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; + unsigned long nsize, osize; + unsigned int nhashmask, ohashmask; + int i; + + mutex_lock(&hash_resize_mutex); + + nsize = xfrm_hash_new_size(); + ndst = xfrm_state_hash_alloc(nsize); + if (!ndst) + goto out_unlock; + nsrc = xfrm_state_hash_alloc(nsize); + if (!nsrc) { + xfrm_state_hash_free(ndst, nsize); + goto out_unlock; + } + nspi = xfrm_state_hash_alloc(nsize); + if (!nspi) { + xfrm_state_hash_free(ndst, nsize); + xfrm_state_hash_free(nsrc, nsize); + goto out_unlock; + } + + spin_lock_bh(&xfrm_state_lock); + + nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; + for (i = xfrm_state_hmask; i >= 0; i--) + xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, + nhashmask); + + odst = xfrm_state_bydst; + osrc = xfrm_state_bysrc; + ospi = xfrm_state_byspi; + ohashmask = xfrm_state_hmask; + + xfrm_state_bydst = ndst; + xfrm_state_bysrc = nsrc; + xfrm_state_byspi = nspi; + xfrm_state_hmask = nhashmask; + + spin_unlock_bh(&xfrm_state_lock); + + osize = (ohashmask + 1) * sizeof(struct hlist_head); + xfrm_state_hash_free(odst, osize); + xfrm_state_hash_free(osrc, osize); + xfrm_state_hash_free(ospi, osize); + +out_unlock: + mutex_unlock(&hash_resize_mutex); +} + +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -335,6 +466,7 @@ int __xfrm_state_delete(struct xfrm_state *x) hlist_del(&x->byspi); __xfrm_state_put(x); } + xfrm_state_num--; spin_unlock(&xfrm_state_lock); if (del_timer(&x->timer)) __xfrm_state_put(x); @@ -380,7 +512,7 @@ void xfrm_state_flush(u8 proto) int i; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i < xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: @@ -611,7 +743,7 @@ out: static void __xfrm_state_insert(struct xfrm_state *x) { - unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); + unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); @@ -637,6 +769,13 @@ static void __xfrm_state_insert(struct xfrm_state *x) xfrm_state_hold(x); wake_up(&km_waitq); + + xfrm_state_num++; + + if (x->bydst.next != NULL && + (xfrm_state_hmask + 1) < xfrm_state_hashmax && + xfrm_state_num > xfrm_state_hmask) + schedule_work(&xfrm_hash_work); } void xfrm_state_insert(struct xfrm_state *x) @@ -984,7 +1123,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; @@ -1026,7 +1165,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) { - u32 h; + unsigned int h; struct xfrm_state *x0; if (x->id.spi) @@ -1074,7 +1213,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), int err = 0; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto)) count++; @@ -1085,7 +1224,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), goto out; } - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_id_proto_match(x->id.proto, proto)) continue; @@ -1531,13 +1670,17 @@ EXPORT_SYMBOL(xfrm_init_state); void __init xfrm_state_init(void) { - int i; + unsigned int sz; + + sz = sizeof(struct hlist_head) * 8; + + xfrm_state_bydst = xfrm_state_hash_alloc(sz); + xfrm_state_bysrc = xfrm_state_hash_alloc(sz); + xfrm_state_byspi = xfrm_state_hash_alloc(sz); + if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) + panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); + xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); - for (i=0; i Date: Thu, 24 Aug 2006 03:18:09 -0700 Subject: [XFRM]: Add generation count to xfrm_state and xfrm_dst. Each xfrm_state inserted gets a new generation counter value. When a bundle is created, the xfrm_dst objects get the current generation counter of the xfrm_state they will attach to at dst->xfrm. xfrm_bundle_ok() will return false if it sees an xfrm_dst with a generation count different from the generation count of the xfrm_state that dst points to. This provides a facility by which to passively and cheaply invalidate cached IPSEC routes during SA database changes. Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 +++ net/ipv4/xfrm4_policy.c | 1 + net/ipv6/xfrm6_policy.c | 1 + net/xfrm/xfrm_policy.c | 2 ++ net/xfrm/xfrm_state.c | 3 +++ 5 files changed, 10 insertions(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 3405e5d9d51..fd4a300b5ba 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -104,6 +104,8 @@ struct xfrm_state struct xfrm_id id; struct xfrm_selector sel; + u32 genid; + /* Key manger bits */ struct { u8 state; @@ -590,6 +592,7 @@ struct xfrm_dst struct rt6_info rt6; } u; struct dst_entry *route; + u32 genid; u32 route_mtu_cached; u32 child_mtu_cached; u32 route_cookie; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 42d8ded0f96..479598566f1 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -93,6 +93,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; + xdst->genid = xfrm[i]->genid; dst1->next = dst_prev; dst_prev = dst1; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 98c2fe449b3..9391c4c94fe 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -149,6 +149,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; + xdst->genid = xfrm[i]->genid; if (rt->rt6i_node) xdst->route_cookie = rt->rt6i_node->fn_sernum; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1732159ffd0..7fc6944ee36 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1536,6 +1536,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int str return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; + if (xdst->genid != dst->xfrm->genid) + return 0; if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 445263c54c9..535d43c1472 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -53,6 +53,7 @@ static struct hlist_head *xfrm_state_byspi __read_mostly; static unsigned int xfrm_state_hmask __read_mostly; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; +static unsigned int xfrm_state_genid; static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) { @@ -745,6 +746,8 @@ static void __xfrm_state_insert(struct xfrm_state *x) { unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); + x->genid = ++xfrm_state_genid; + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); -- cgit v1.2.3-70-g09d2 From a624c108e5595b5827796c253481436929cd5344 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:24:33 -0700 Subject: [XFRM]: Put more keys into destination hash function. Besides the daddr, key the hash on family and reqid too. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 75 ++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 40 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 535d43c1472..7e5daafc186 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -35,7 +35,7 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) - 2. Hash table by daddr to find what SAs exist for given + 2. Hash table by (daddr,family,reqid) to find what SAs exist for given destination/tunnel endpoint. (output) */ @@ -55,62 +55,56 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; -static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) +static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) { - unsigned int h; - h = ntohl(addr->a4); - h = (h ^ (h>>16)) & hmask; - return h; -} - -static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask) -{ - unsigned int h; - h = ntohl(addr->a6[2]^addr->a6[3]); - h = (h ^ (h>>16)) & hmask; - return h; + return ntohl(addr->a4); } -static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask) +static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) { - return __xfrm4_dst_hash(addr, hmask); + return ntohl(addr->a6[2]^addr->a6[3]); } -static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask) -{ - return __xfrm6_dst_hash(addr, hmask); -} - -static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, + u32 reqid, unsigned short family, + unsigned int hmask) { + unsigned int h = family ^ reqid; switch (family) { case AF_INET: - return __xfrm4_src_hash(addr, hmask); + h ^= __xfrm4_addr_hash(addr); + break; case AF_INET6: - return __xfrm6_src_hash(addr, hmask); - } - return 0; + h ^= __xfrm6_addr_hash(addr); + break; + }; + return (h ^ (h >> 16)) & hmask; } -static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid, + unsigned short family) { - return __xfrm_src_hash(addr, family, xfrm_state_hmask); + return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask); } -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) +static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, + unsigned int hmask) { + unsigned int h = family; switch (family) { case AF_INET: - return __xfrm4_dst_hash(addr, hmask); + h ^= __xfrm4_addr_hash(addr); + break; case AF_INET6: - return __xfrm6_dst_hash(addr, hmask); - } - return 0; + h ^= __xfrm6_addr_hash(addr); + break; + }; + return (h ^ (h >> 16)) & hmask; } -static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) { - return __xfrm_dst_hash(addr, family, xfrm_state_hmask); + return __xfrm_src_hash(addr, family, xfrm_state_hmask); } static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, @@ -190,7 +184,8 @@ static void xfrm_hash_transfer(struct hlist_head *list, hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { unsigned int h; - h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask); + h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid, + x->props.family, nhashmask); hlist_add_head(&x->bydst, ndsttable+h); h = __xfrm_src_hash(&x->props.saddr, x->props.family, @@ -635,7 +630,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned int h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family); struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; @@ -744,15 +739,15 @@ out: static void __xfrm_state_insert(struct xfrm_state *x) { - unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); + unsigned int h; x->genid = ++xfrm_state_genid; + h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); h = xfrm_src_hash(&x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); @@ -794,7 +789,7 @@ EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { - unsigned int h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; -- cgit v1.2.3-70-g09d2 From 2575b65434d56559bd03854450b9b6aaf19b9c90 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:26:44 -0700 Subject: [XFRM]: Simplify xfrm_spi_hash It can use __xfrm{4,6}_addr_hash(). Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7e5daafc186..98200397e09 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -107,35 +107,20 @@ static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family return __xfrm_src_hash(addr, family, xfrm_state_hmask); } -static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, - unsigned int hmask) -{ - unsigned int h; - h = ntohl(addr->a4^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) & hmask; - return h; -} - -static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, - unsigned int hmask) -{ - unsigned int h; - h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) & hmask; - return h; -} - -static inline -unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, - unsigned int hmask) +static inline unsigned int +__xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, + unsigned int hmask) { + unsigned int h = spi ^ proto; switch (family) { case AF_INET: - return __xfrm4_spi_hash(addr, spi, proto, hmask); + h ^= __xfrm4_addr_hash(addr); + break; case AF_INET6: - return __xfrm6_spi_hash(addr, spi, proto, hmask); + h ^= __xfrm6_addr_hash(addr); + break; } - return 0; /*XXX*/ + return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } static inline unsigned int -- cgit v1.2.3-70-g09d2 From c7f5ea3a4d1ae6b3b426e113358fdc57494bc754 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:29:04 -0700 Subject: [XFRM]: Do not flush all bundles on SA insert. Instead, simply set all potentially aliasing existing xfrm_state objects to have the current generation counter value. This will make routes get relooked up the next time an existing route mentioning these aliased xfrm_state objects gets used, via xfrm_dst_check(). Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - net/xfrm/xfrm_policy.c | 10 ---------- net/xfrm/xfrm_state.c | 25 ++++++++++++++++++++----- 3 files changed, 20 insertions(+), 16 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fd4a300b5ba..a620a43c9ee 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -996,7 +996,6 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_flush_bundles(void); -extern void xfrm_flush_all_bundles(void); extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); extern void xfrm_init_pmtu(struct dst_entry *dst); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7fc6944ee36..cfa5c692f2e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1478,16 +1478,6 @@ int xfrm_flush_bundles(void) return 0; } -static int always_true(struct dst_entry *dst) -{ - return 1; -} - -void xfrm_flush_all_bundles(void) -{ - xfrm_prune_bundles(always_true); -} - void xfrm_init_pmtu(struct dst_entry *dst) { do { diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 98200397e09..77ef796c9d0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -761,13 +761,30 @@ static void __xfrm_state_insert(struct xfrm_state *x) schedule_work(&xfrm_hash_work); } +/* xfrm_state_lock is held */ +static void __xfrm_state_bump_genids(struct xfrm_state *xnew) +{ + unsigned short family = xnew->props.family; + u32 reqid = xnew->props.reqid; + struct xfrm_state *x; + struct hlist_node *entry; + unsigned int h; + + h = xfrm_dst_hash(&xnew->id.daddr, reqid, family); + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == reqid && + !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family)) + x->genid = xfrm_state_genid; + } +} + void xfrm_state_insert(struct xfrm_state *x) { spin_lock_bh(&xfrm_state_lock); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); spin_unlock_bh(&xfrm_state_lock); - - xfrm_flush_all_bundles(); } EXPORT_SYMBOL(xfrm_state_insert); @@ -889,15 +906,13 @@ int xfrm_state_add(struct xfrm_state *x) x->id.proto, &x->id.daddr, &x->props.saddr, 0); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); err = 0; out: spin_unlock_bh(&xfrm_state_lock); - if (!err) - xfrm_flush_all_bundles(); - if (x1) { xfrm_state_delete(x1); xfrm_state_put(x1); -- cgit v1.2.3-70-g09d2 From 1c0953997567b22e32fdf85d3b4bc0f2461fd161 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:30:28 -0700 Subject: [XFRM]: Purge dst references to deleted SAs passively. Just let GC and other normal mechanisms take care of getting rid of DST cache references to deleted xfrm_state objects instead of walking all the policy bundles. Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - net/xfrm/xfrm_policy.c | 2 +- net/xfrm/xfrm_state.c | 17 ----------------- 3 files changed, 1 insertion(+), 19 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a620a43c9ee..c7870b6eae0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -995,7 +995,6 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, int create, unsigned short family); extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern int xfrm_flush_bundles(void); extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); extern void xfrm_init_pmtu(struct dst_entry *dst); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cfa5c692f2e..1bcaae4adf3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1472,7 +1472,7 @@ static void __xfrm_garbage_collect(void) xfrm_prune_bundles(unused_bundle); } -int xfrm_flush_bundles(void) +static int xfrm_flush_bundles(void) { xfrm_prune_bundles(stale_bundle); return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 77ef796c9d0..9ff00b7d6ad 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -256,8 +256,6 @@ static struct work_struct xfrm_state_gc_work; static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); -static int xfrm_state_gc_flush_bundles; - int __xfrm_state_delete(struct xfrm_state *x); static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); @@ -293,11 +291,6 @@ static void xfrm_state_gc_task(void *data) struct hlist_node *entry, *tmp; struct hlist_head gc_list; - if (xfrm_state_gc_flush_bundles) { - xfrm_state_gc_flush_bundles = 0; - xfrm_flush_bundles(); - } - spin_lock_bh(&xfrm_state_gc_lock); gc_list.first = xfrm_state_gc_list.first; INIT_HLIST_HEAD(&xfrm_state_gc_list); @@ -454,16 +447,6 @@ int __xfrm_state_delete(struct xfrm_state *x) if (del_timer(&x->rtimer)) __xfrm_state_put(x); - /* The number two in this test is the reference - * mentioned in the comment below plus the reference - * our caller holds. A larger value means that - * there are DSTs attached to this xfrm_state. - */ - if (atomic_read(&x->refcnt) > 2) { - xfrm_state_gc_flush_bundles = 1; - schedule_work(&xfrm_state_gc_work); - } - /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that * is what we are dropping here. -- cgit v1.2.3-70-g09d2 From a47f0ce05ae12ce9acad62896ff703175764104e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:54:22 -0700 Subject: [XFRM]: Kill excessive refcounting of xfrm_state objects. The refcounting done for timers and hash table insertions are just wasted cycles. We can eliminate all of this refcounting because: 1) The implicit refcount when the xfrm_state object is active will always be held while the object is in the hash tables. We never kfree() the xfrm_state until long after we've made sure that it has been unhashed. 2) Timers are even easier. Once we mark that x->km.state as anything other than XFRM_STATE_VALID (__xfrm_state_delete sets it to XFRM_STATE_DEAD), any timer that fires will do nothing and return without rearming the timer. Therefore we can defer the del_timer calls until when the object is about to be freed up during GC. We have to use del_timer_sync() and defer it to GC because we can't do a del_timer_sync() while holding x->lock which all callers of __xfrm_state_delete hold. This makes SA changes even more light-weight. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 53 ++++++++++++--------------------------------------- 1 file changed, 12 insertions(+), 41 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9ff00b7d6ad..0bc6a4b1cea 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -266,10 +266,8 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid); static void xfrm_state_gc_destroy(struct xfrm_state *x) { - if (del_timer(&x->timer)) - BUG(); - if (del_timer(&x->rtimer)) - BUG(); + del_timer_sync(&x->timer); + del_timer_sync(&x->rtimer); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); @@ -361,9 +359,9 @@ static void xfrm_timer_handler(unsigned long data) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX && - !mod_timer(&x->timer, jiffies + make_jiffies(next))) - xfrm_state_hold(x); + if (next != LONG_MAX) + mod_timer(&x->timer, jiffies + make_jiffies(next)); + goto out; expired: @@ -378,7 +376,6 @@ expired: out: spin_unlock(&x->lock); - xfrm_state_put(x); } static void xfrm_replay_timer_handler(unsigned long data); @@ -433,19 +430,11 @@ int __xfrm_state_delete(struct xfrm_state *x) x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); hlist_del(&x->bydst); - __xfrm_state_put(x); hlist_del(&x->bysrc); - __xfrm_state_put(x); - if (x->id.spi) { + if (x->id.spi) hlist_del(&x->byspi); - __xfrm_state_put(x); - } xfrm_state_num--; spin_unlock(&xfrm_state_lock); - if (del_timer(&x->timer)) - __xfrm_state_put(x); - if (del_timer(&x->rtimer)) - __xfrm_state_put(x); /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that @@ -676,17 +665,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; hlist_add_head(&x->bydst, xfrm_state_bydst+h); - xfrm_state_hold(x); h = xfrm_src_hash(saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); - xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); } x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x); x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); } else { @@ -713,26 +698,20 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); - xfrm_state_hold(x); h = xfrm_src_hash(&x->props.saddr, x->props.family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); - xfrm_state_hold(x); if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); } - if (!mod_timer(&x->timer, jiffies + HZ)) - xfrm_state_hold(x); - - if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) - xfrm_state_hold(x); + mod_timer(&x->timer, jiffies + HZ); + if (x->replay_maxage) + mod_timer(&x->rtimer, jiffies + x->replay_maxage); wake_up(&km_waitq); @@ -844,10 +823,8 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); - xfrm_state_hold(x); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(saddr, family); - xfrm_state_hold(x); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); wake_up(&km_waitq); } @@ -955,8 +932,7 @@ out: memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; - if (!mod_timer(&x1->timer, jiffies + HZ)) - xfrm_state_hold(x1); + mod_timer(&x1->timer, jiffies + HZ); if (x1->curlft.use_time) xfrm_state_check_expire(x1); @@ -981,8 +957,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - if (!mod_timer(&x->timer, jiffies)) - xfrm_state_hold(x); + mod_timer(&x->timer, jiffies); return -EINVAL; } @@ -1177,7 +1152,6 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) spin_lock_bh(&xfrm_state_lock); h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); } @@ -1264,10 +1238,8 @@ void xfrm_replay_notify(struct xfrm_state *x, int event) km_state_notify(x, &c); if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) { - xfrm_state_hold(x); + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) x->xflags &= ~XFRM_TIME_DEFER; - } } EXPORT_SYMBOL(xfrm_replay_notify); @@ -1285,7 +1257,6 @@ static void xfrm_replay_timer_handler(unsigned long data) } spin_unlock(&x->lock); - xfrm_state_put(x); } int xfrm_replay_check(struct xfrm_state *x, u32 seq) -- cgit v1.2.3-70-g09d2 From c1969f294e624d5b642fc8e6ab9468b7c7791fa8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 04:00:03 -0700 Subject: [XFRM]: Hash xfrm_state objects by source address too. The source address is always non-prefixed so we should use it to help give entropy to the bydst hash. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 53 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 18 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0bc6a4b1cea..37213f9f6a0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -65,26 +65,40 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) return ntohl(addr->a6[2]^addr->a6[3]); } -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, +static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a4 ^ saddr->a4); +} + +static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); +} + +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, u32 reqid, unsigned short family, unsigned int hmask) { unsigned int h = family ^ reqid; switch (family) { case AF_INET: - h ^= __xfrm4_addr_hash(addr); + h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); break; case AF_INET6: - h ^= __xfrm6_addr_hash(addr); + h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); break; }; return (h ^ (h >> 16)) & hmask; } -static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid, +static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, + u32 reqid, unsigned short family) { - return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask); + return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); } static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, @@ -108,25 +122,25 @@ static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family } static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, - unsigned int hmask) +__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, + unsigned short family, unsigned int hmask) { unsigned int h = spi ^ proto; switch (family) { case AF_INET: - h ^= __xfrm4_addr_hash(addr); + h ^= __xfrm4_addr_hash(daddr); break; case AF_INET6: - h ^= __xfrm6_addr_hash(addr); + h ^= __xfrm6_addr_hash(daddr); break; } return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } static inline unsigned int -xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { - return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask); + return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); } static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) @@ -169,8 +183,9 @@ static void xfrm_hash_transfer(struct hlist_head *list, hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { unsigned int h; - h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid, - x->props.family, nhashmask); + h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family, + nhashmask); hlist_add_head(&x->bydst, ndsttable+h); h = __xfrm_src_hash(&x->props.saddr, x->props.family, @@ -587,7 +602,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family); + unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; @@ -696,7 +711,8 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; - h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family); + h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(&x->props.saddr, x->props.family); @@ -732,11 +748,12 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) struct hlist_node *entry; unsigned int h; - h = xfrm_dst_hash(&xnew->id.daddr, reqid, family); + h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family); hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && - !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family)) + !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && + !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) x->genid = xfrm_state_genid; } } @@ -753,7 +770,7 @@ EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { - unsigned int h = xfrm_dst_hash(daddr, reqid, family); + unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; -- cgit v1.2.3-70-g09d2 From 44e36b42a8378be1dcf7e6f8a1cb2710a8903387 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 04:50:50 -0700 Subject: [XFRM]: Extract common hashing code into xfrm_hash.[ch] Signed-off-by: David S. Miller --- net/xfrm/Makefile | 3 +- net/xfrm/xfrm_hash.c | 41 ++++++++++++++++ net/xfrm/xfrm_hash.h | 128 +++++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_policy.c | 95 ++++-------------------------------- net/xfrm/xfrm_state.c | 128 +++++++------------------------------------------ 5 files changed, 195 insertions(+), 200 deletions(-) create mode 100644 net/xfrm/xfrm_hash.c create mode 100644 net/xfrm/xfrm_hash.h (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 693aac1aa83..de3c1a625a4 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -2,6 +2,7 @@ # Makefile for the XFRM subsystem. # -obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o +obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ + xfrm_input.o xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c new file mode 100644 index 00000000000..37643bb8768 --- /dev/null +++ b/net/xfrm/xfrm_hash.c @@ -0,0 +1,41 @@ +/* xfrm_hash.c: Common hash table code. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#include +#include +#include +#include +#include +#include + +#include "xfrm_hash.h" + +struct hlist_head *xfrm_hash_alloc(unsigned int sz) +{ + struct hlist_head *n; + + if (sz <= PAGE_SIZE) + n = kmalloc(sz, GFP_KERNEL); + else if (hashdist) + n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); + else + n = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(sz)); + + if (n) + memset(n, 0, sz); + + return n; +} + +void xfrm_hash_free(struct hlist_head *n, unsigned int sz) +{ + if (sz <= PAGE_SIZE) + kfree(n); + else if (hashdist) + vfree(n); + else + free_pages((unsigned long)n, get_order(sz)); +} diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h new file mode 100644 index 00000000000..d3abb0b7dc6 --- /dev/null +++ b/net/xfrm/xfrm_hash.h @@ -0,0 +1,128 @@ +#ifndef _XFRM_HASH_H +#define _XFRM_HASH_H + +#include +#include + +static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) +{ + return ntohl(addr->a4); +} + +static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) +{ + return ntohl(addr->a6[2] ^ addr->a6[3]); +} + +static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a4 ^ saddr->a4); +} + +static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); +} + +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, + u32 reqid, unsigned short family, + unsigned int hmask) +{ + unsigned int h = family ^ reqid; + switch (family) { + case AF_INET: + h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + case AF_INET6: + h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + } + return (h ^ (h >> 16)) & hmask; +} + +static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr, + unsigned short family, + unsigned int hmask) +{ + unsigned int h = family; + switch (family) { + case AF_INET: + h ^= __xfrm4_addr_hash(saddr); + break; + case AF_INET6: + h ^= __xfrm6_addr_hash(saddr); + break; + }; + return (h ^ (h >> 16)) & hmask; +} + +static inline unsigned int +__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family, + unsigned int hmask) +{ + unsigned int h = spi ^ proto; + switch (family) { + case AF_INET: + h ^= __xfrm4_addr_hash(daddr); + break; + case AF_INET6: + h ^= __xfrm6_addr_hash(daddr); + break; + } + return (h ^ (h >> 10) ^ (h >> 20)) & hmask; +} + +static inline unsigned int __idx_hash(u32 index, unsigned int hmask) +{ + return (index ^ (index >> 8)) & hmask; +} + +static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) +{ + xfrm_address_t *daddr = &sel->daddr; + xfrm_address_t *saddr = &sel->saddr; + unsigned int h = 0; + + switch (family) { + case AF_INET: + if (sel->prefixlen_d != 32 || + sel->prefixlen_s != 32) + return hmask + 1; + + h = __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + + case AF_INET6: + if (sel->prefixlen_d != 128 || + sel->prefixlen_s != 128) + return hmask + 1; + + h = __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + }; + h ^= (h >> 16); + return h & hmask; +} + +static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) +{ + unsigned int h = 0; + + switch (family) { + case AF_INET: + h = __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + + case AF_INET6: + h = __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + }; + h ^= (h >> 16); + return h & hmask; +} + +extern struct hlist_head *xfrm_hash_alloc(unsigned int sz); +extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz); + +#endif /* _XFRM_HASH_H */ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 087a5443b05..b446ca31fec 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -22,12 +22,12 @@ #include #include #include -#include -#include #include #include #include +#include "xfrm_hash.h" + DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -409,62 +409,11 @@ static struct hlist_head *xfrm_policy_byidx __read_mostly; static unsigned int xfrm_idx_hmask __read_mostly; static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; -static inline unsigned int __idx_hash(u32 index, unsigned int hmask) -{ - return (index ^ (index >> 8)) & hmask; -} - static inline unsigned int idx_hash(u32 index) { return __idx_hash(index, xfrm_idx_hmask); } -static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) -{ - xfrm_address_t *daddr = &sel->daddr; - xfrm_address_t *saddr = &sel->saddr; - unsigned int h = 0; - - switch (family) { - case AF_INET: - if (sel->prefixlen_d != 32 || - sel->prefixlen_s != 32) - return hmask + 1; - - h = ntohl(daddr->a4 ^ saddr->a4); - break; - - case AF_INET6: - if (sel->prefixlen_d != 128 || - sel->prefixlen_s != 128) - return hmask + 1; - - h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^ - saddr->a6[2] ^ saddr->a6[3]); - break; - }; - h ^= (h >> 16); - return h & hmask; -} - -static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) -{ - unsigned int h = 0; - - switch (family) { - case AF_INET: - h = ntohl(daddr->a4 ^ saddr->a4); - break; - - case AF_INET6: - h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^ - saddr->a6[2] ^ saddr->a6[3]); - break; - }; - h ^= (h >> 16); - return h & hmask; -} - static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) { unsigned int hmask = xfrm_policy_bydst[dir].hmask; @@ -483,34 +432,6 @@ static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address return xfrm_policy_bydst[dir].table + hash; } -static struct hlist_head *xfrm_policy_hash_alloc(unsigned int sz) -{ - struct hlist_head *n; - - if (sz <= PAGE_SIZE) - n = kmalloc(sz, GFP_KERNEL); - else if (hashdist) - n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); - else - n = (struct hlist_head *) - __get_free_pages(GFP_KERNEL, get_order(sz)); - - if (n) - memset(n, 0, sz); - - return n; -} - -static void xfrm_policy_hash_free(struct hlist_head *n, unsigned int sz) -{ - if (sz <= PAGE_SIZE) - kfree(n); - else if (hashdist) - vfree(n); - else - free_pages((unsigned long)n, get_order(sz)); -} - static void xfrm_dst_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, unsigned int nhashmask) @@ -553,7 +474,7 @@ static void xfrm_bydst_resize(int dir) unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); struct hlist_head *odst = xfrm_policy_bydst[dir].table; - struct hlist_head *ndst = xfrm_policy_hash_alloc(nsize); + struct hlist_head *ndst = xfrm_hash_alloc(nsize); int i; if (!ndst) @@ -569,7 +490,7 @@ static void xfrm_bydst_resize(int dir) write_unlock_bh(&xfrm_policy_lock); - xfrm_policy_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); + xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } static void xfrm_byidx_resize(int total) @@ -578,7 +499,7 @@ static void xfrm_byidx_resize(int total) unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); struct hlist_head *oidx = xfrm_policy_byidx; - struct hlist_head *nidx = xfrm_policy_hash_alloc(nsize); + struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; if (!nidx) @@ -594,7 +515,7 @@ static void xfrm_byidx_resize(int total) write_unlock_bh(&xfrm_policy_lock); - xfrm_policy_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); + xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } static inline int xfrm_bydst_should_resize(int dir, int *total) @@ -2071,7 +1992,7 @@ static void __init xfrm_policy_init(void) hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); - xfrm_policy_byidx = xfrm_policy_hash_alloc(sz); + xfrm_policy_byidx = xfrm_hash_alloc(sz); xfrm_idx_hmask = hmask; if (!xfrm_policy_byidx) panic("XFRM: failed to allocate byidx hash\n"); @@ -2082,7 +2003,7 @@ static void __init xfrm_policy_init(void) INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); htab = &xfrm_policy_bydst[dir]; - htab->table = xfrm_policy_hash_alloc(sz); + htab->table = xfrm_hash_alloc(sz); htab->hmask = hmask; if (!htab->table) panic("XFRM: failed to allocate bydst hash\n"); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 37213f9f6a0..4341795eb24 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -18,11 +18,11 @@ #include #include #include -#include -#include #include #include +#include "xfrm_hash.h" + struct sock *xfrm_nl; EXPORT_SYMBOL(xfrm_nl); @@ -55,44 +55,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; -static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) -{ - return ntohl(addr->a4); -} - -static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) -{ - return ntohl(addr->a6[2]^addr->a6[3]); -} - -static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) -{ - return ntohl(daddr->a4 ^ saddr->a4); -} - -static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) -{ - return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ - saddr->a6[2] ^ saddr->a6[3]); -} - -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, - xfrm_address_t *saddr, - u32 reqid, unsigned short family, - unsigned int hmask) -{ - unsigned int h = family ^ reqid; - switch (family) { - case AF_INET: - h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); - break; - case AF_INET6: - h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); - break; - }; - return (h ^ (h >> 16)) & hmask; -} - static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, u32 reqid, @@ -101,76 +63,18 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); } -static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, - unsigned int hmask) -{ - unsigned int h = family; - switch (family) { - case AF_INET: - h ^= __xfrm4_addr_hash(addr); - break; - case AF_INET6: - h ^= __xfrm6_addr_hash(addr); - break; - }; - return (h ^ (h >> 16)) & hmask; -} - -static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned int xfrm_src_hash(xfrm_address_t *addr, + unsigned short family) { return __xfrm_src_hash(addr, family, xfrm_state_hmask); } -static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, - unsigned short family, unsigned int hmask) -{ - unsigned int h = spi ^ proto; - switch (family) { - case AF_INET: - h ^= __xfrm4_addr_hash(daddr); - break; - case AF_INET6: - h ^= __xfrm6_addr_hash(daddr); - break; - } - return (h ^ (h >> 10) ^ (h >> 20)) & hmask; -} - static inline unsigned int xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); } -static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) -{ - struct hlist_head *n; - - if (sz <= PAGE_SIZE) - n = kmalloc(sz, GFP_KERNEL); - else if (hashdist) - n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); - else - n = (struct hlist_head *) - __get_free_pages(GFP_KERNEL, get_order(sz)); - - if (n) - memset(n, 0, sz); - - return n; -} - -static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz) -{ - if (sz <= PAGE_SIZE) - kfree(n); - else if (hashdist) - vfree(n); - else - free_pages((unsigned long)n, get_order(sz)); -} - static void xfrm_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, struct hlist_head *nsrctable, @@ -216,18 +120,18 @@ static void xfrm_hash_resize(void *__unused) mutex_lock(&hash_resize_mutex); nsize = xfrm_hash_new_size(); - ndst = xfrm_state_hash_alloc(nsize); + ndst = xfrm_hash_alloc(nsize); if (!ndst) goto out_unlock; - nsrc = xfrm_state_hash_alloc(nsize); + nsrc = xfrm_hash_alloc(nsize); if (!nsrc) { - xfrm_state_hash_free(ndst, nsize); + xfrm_hash_free(ndst, nsize); goto out_unlock; } - nspi = xfrm_state_hash_alloc(nsize); + nspi = xfrm_hash_alloc(nsize); if (!nspi) { - xfrm_state_hash_free(ndst, nsize); - xfrm_state_hash_free(nsrc, nsize); + xfrm_hash_free(ndst, nsize); + xfrm_hash_free(nsrc, nsize); goto out_unlock; } @@ -251,9 +155,9 @@ static void xfrm_hash_resize(void *__unused) spin_unlock_bh(&xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); - xfrm_state_hash_free(odst, osize); - xfrm_state_hash_free(osrc, osize); - xfrm_state_hash_free(ospi, osize); + xfrm_hash_free(odst, osize); + xfrm_hash_free(osrc, osize); + xfrm_hash_free(ospi, osize); out_unlock: mutex_unlock(&hash_resize_mutex); @@ -1643,9 +1547,9 @@ void __init xfrm_state_init(void) sz = sizeof(struct hlist_head) * 8; - xfrm_state_bydst = xfrm_state_hash_alloc(sz); - xfrm_state_bysrc = xfrm_state_hash_alloc(sz); - xfrm_state_byspi = xfrm_state_hash_alloc(sz); + xfrm_state_bydst = xfrm_hash_alloc(sz); + xfrm_state_bysrc = xfrm_hash_alloc(sz); + xfrm_state_byspi = xfrm_hash_alloc(sz); if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); -- cgit v1.2.3-70-g09d2 From a9917c06652165fe4eeb9ab7a5d1e0674e90e508 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Thu, 31 Aug 2006 15:14:32 -0700 Subject: [XFRM] STATE: Fix flusing with hash mask. This is a minor fix about transformation state flushing for net-2.6.19. Please apply it. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4341795eb24..9f63edd3934 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -384,7 +384,7 @@ void xfrm_state_flush(u8 proto) int i; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < xfrm_state_hmask; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: -- cgit v1.2.3-70-g09d2