summaryrefslogtreecommitdiffstats
path: root/net/xfrm/xfrm_policy.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/xfrm/xfrm_policy.c')
-rw-r--r--net/xfrm/xfrm_policy.c192
1 files changed, 175 insertions, 17 deletions
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index beeed602aeb..4c4e457e788 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,6 +39,11 @@
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN 100
+struct xfrm_flo {
+ struct dst_entry *dst_orig;
+ u8 flags;
+};
+
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
__read_mostly;
@@ -344,12 +349,39 @@ static inline unsigned int idx_hash(struct net *net, u32 index)
return __idx_hash(index, net->xfrm.policy_idx_hmask);
}
+/* calculate policy hash thresholds */
+static void __get_hash_thresh(struct net *net,
+ unsigned short family, int dir,
+ u8 *dbits, u8 *sbits)
+{
+ switch (family) {
+ case AF_INET:
+ *dbits = net->xfrm.policy_bydst[dir].dbits4;
+ *sbits = net->xfrm.policy_bydst[dir].sbits4;
+ break;
+
+ case AF_INET6:
+ *dbits = net->xfrm.policy_bydst[dir].dbits6;
+ *sbits = net->xfrm.policy_bydst[dir].sbits6;
+ break;
+
+ default:
+ *dbits = 0;
+ *sbits = 0;
+ }
+}
+
static struct hlist_head *policy_hash_bysel(struct net *net,
const struct xfrm_selector *sel,
unsigned short family, int dir)
{
unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
- unsigned int hash = __sel_hash(sel, family, hmask);
+ unsigned int hash;
+ u8 dbits;
+ u8 sbits;
+
+ __get_hash_thresh(net, family, dir, &dbits, &sbits);
+ hash = __sel_hash(sel, family, hmask, dbits, sbits);
return (hash == hmask + 1 ?
&net->xfrm.policy_inexact[dir] :
@@ -362,25 +394,35 @@ static struct hlist_head *policy_hash_direct(struct net *net,
unsigned short family, int dir)
{
unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
- unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
+ unsigned int hash;
+ u8 dbits;
+ u8 sbits;
+
+ __get_hash_thresh(net, family, dir, &dbits, &sbits);
+ hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
return net->xfrm.policy_bydst[dir].table + hash;
}
-static void xfrm_dst_hash_transfer(struct hlist_head *list,
+static void xfrm_dst_hash_transfer(struct net *net,
+ struct hlist_head *list,
struct hlist_head *ndsttable,
- unsigned int nhashmask)
+ unsigned int nhashmask,
+ int dir)
{
struct hlist_node *tmp, *entry0 = NULL;
struct xfrm_policy *pol;
unsigned int h0 = 0;
+ u8 dbits;
+ u8 sbits;
redo:
hlist_for_each_entry_safe(pol, tmp, list, bydst) {
unsigned int h;
+ __get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
- pol->family, nhashmask);
+ pol->family, nhashmask, dbits, sbits);
if (!entry0) {
hlist_del(&pol->bydst);
hlist_add_head(&pol->bydst, ndsttable+h);
@@ -434,7 +476,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
write_lock_bh(&net->xfrm.xfrm_policy_lock);
for (i = hmask; i >= 0; i--)
- xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
+ xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
net->xfrm.policy_bydst[dir].table = ndst;
net->xfrm.policy_bydst[dir].hmask = nhashmask;
@@ -529,6 +571,86 @@ static void xfrm_hash_resize(struct work_struct *work)
mutex_unlock(&hash_resize_mutex);
}
+static void xfrm_hash_rebuild(struct work_struct *work)
+{
+ struct net *net = container_of(work, struct net,
+ xfrm.policy_hthresh.work);
+ unsigned int hmask;
+ struct xfrm_policy *pol;
+ struct xfrm_policy *policy;
+ struct hlist_head *chain;
+ struct hlist_head *odst;
+ struct hlist_node *newpos;
+ int i;
+ int dir;
+ unsigned seq;
+ u8 lbits4, rbits4, lbits6, rbits6;
+
+ mutex_lock(&hash_resize_mutex);
+
+ /* read selector prefixlen thresholds */
+ do {
+ seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
+
+ lbits4 = net->xfrm.policy_hthresh.lbits4;
+ rbits4 = net->xfrm.policy_hthresh.rbits4;
+ lbits6 = net->xfrm.policy_hthresh.lbits6;
+ rbits6 = net->xfrm.policy_hthresh.rbits6;
+ } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
+
+ write_lock_bh(&net->xfrm.xfrm_policy_lock);
+
+ /* reset the bydst and inexact table in all directions */
+ for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+ INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
+ hmask = net->xfrm.policy_bydst[dir].hmask;
+ odst = net->xfrm.policy_bydst[dir].table;
+ for (i = hmask; i >= 0; i--)
+ INIT_HLIST_HEAD(odst + i);
+ if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
+ /* dir out => dst = remote, src = local */
+ net->xfrm.policy_bydst[dir].dbits4 = rbits4;
+ net->xfrm.policy_bydst[dir].sbits4 = lbits4;
+ net->xfrm.policy_bydst[dir].dbits6 = rbits6;
+ net->xfrm.policy_bydst[dir].sbits6 = lbits6;
+ } else {
+ /* dir in/fwd => dst = local, src = remote */
+ net->xfrm.policy_bydst[dir].dbits4 = lbits4;
+ net->xfrm.policy_bydst[dir].sbits4 = rbits4;
+ net->xfrm.policy_bydst[dir].dbits6 = lbits6;
+ net->xfrm.policy_bydst[dir].sbits6 = rbits6;
+ }
+ }
+
+ /* re-insert all policies by order of creation */
+ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+ newpos = NULL;
+ chain = policy_hash_bysel(net, &policy->selector,
+ policy->family,
+ xfrm_policy_id2dir(policy->index));
+ hlist_for_each_entry(pol, chain, bydst) {
+ if (policy->priority >= pol->priority)
+ newpos = &pol->bydst;
+ else
+ break;
+ }
+ if (newpos)
+ hlist_add_behind(&policy->bydst, newpos);
+ else
+ hlist_add_head(&policy->bydst, chain);
+ }
+
+ write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+
+ mutex_unlock(&hash_resize_mutex);
+}
+
+void xfrm_policy_hash_rebuild(struct net *net)
+{
+ schedule_work(&net->xfrm.policy_hthresh.work);
+}
+EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
+
/* Generate new index... KAME seems to generate them ordered by cost
* of an absolute inpredictability of ordering of rules. This will not pass. */
static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
@@ -1839,10 +1961,8 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
struct xfrm_policy *pol = xdst->pols[0];
struct xfrm_policy_queue *pq = &pol->polq;
- const struct sk_buff *fclone = skb + 1;
- if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
- fclone->fclone == SKB_FCLONE_CLONE)) {
+ if (unlikely(skb_fclone_busy(skb))) {
kfree_skb(skb);
return 0;
}
@@ -1877,13 +1997,14 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
}
static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
- struct dst_entry *dst,
+ struct xfrm_flo *xflo,
const struct flowi *fl,
int num_xfrms,
u16 family)
{
int err;
struct net_device *dev;
+ struct dst_entry *dst;
struct dst_entry *dst1;
struct xfrm_dst *xdst;
@@ -1891,9 +2012,12 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
if (IS_ERR(xdst))
return xdst;
- if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0)
+ if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
+ net->xfrm.sysctl_larval_drop ||
+ num_xfrms <= 0)
return xdst;
+ dst = xflo->dst_orig;
dst1 = &xdst->u.dst;
dst_hold(dst);
xdst->route = dst;
@@ -1935,7 +2059,7 @@ static struct flow_cache_object *
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
struct flow_cache_object *oldflo, void *ctx)
{
- struct dst_entry *dst_orig = (struct dst_entry *)ctx;
+ struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct xfrm_dst *xdst, *new_xdst;
int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
@@ -1976,7 +2100,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
goto make_dummy_bundle;
}
- new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
+ new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+ xflo->dst_orig);
if (IS_ERR(new_xdst)) {
err = PTR_ERR(new_xdst);
if (err != -EAGAIN)
@@ -2010,7 +2135,7 @@ make_dummy_bundle:
/* We found policies, but there's no bundles to instantiate:
* either because the policy blocks, has no transformations or
* we could not build template (no xfrm_states).*/
- xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
+ xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
return ERR_CAST(xdst);
@@ -2104,13 +2229,18 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
}
if (xdst == NULL) {
+ struct xfrm_flo xflo;
+
+ xflo.dst_orig = dst_orig;
+ xflo.flags = flags;
+
/* To accelerate a bit... */
if ((dst_orig->flags & DST_NOXFRM) ||
!net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol;
flo = flow_cache_lookup(net, fl, family, dir,
- xfrm_bundle_lookup, dst_orig);
+ xfrm_bundle_lookup, &xflo);
if (flo == NULL)
goto nopol;
if (IS_ERR(flo)) {
@@ -2138,7 +2268,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
xfrm_pols_put(pols, drop_pols);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
- return make_blackhole(net, family, dst_orig);
+ return ERR_PTR(-EREMOTE);
}
err = -EAGAIN;
@@ -2195,6 +2325,23 @@ dropdst:
}
EXPORT_SYMBOL(xfrm_lookup);
+/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
+ * Otherwise we may send out blackholed packets.
+ */
+struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
+ const struct flowi *fl,
+ struct sock *sk, int flags)
+{
+ struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
+ flags | XFRM_LOOKUP_QUEUE);
+
+ if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
+ return make_blackhole(net, dst_orig->ops->family, dst_orig);
+
+ return dst;
+}
+EXPORT_SYMBOL(xfrm_lookup_route);
+
static inline int
xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
{
@@ -2460,7 +2607,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
skb_dst_force(skb);
- dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
+ dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
if (IS_ERR(dst)) {
res = 0;
dst = NULL;
@@ -2830,10 +2977,21 @@ static int __net_init xfrm_policy_init(struct net *net)
if (!htab->table)
goto out_bydst;
htab->hmask = hmask;
+ htab->dbits4 = 32;
+ htab->sbits4 = 32;
+ htab->dbits6 = 128;
+ htab->sbits6 = 128;
}
+ net->xfrm.policy_hthresh.lbits4 = 32;
+ net->xfrm.policy_hthresh.rbits4 = 32;
+ net->xfrm.policy_hthresh.lbits6 = 128;
+ net->xfrm.policy_hthresh.rbits6 = 128;
+
+ seqlock_init(&net->xfrm.policy_hthresh.lock);
INIT_LIST_HEAD(&net->xfrm.policy_all);
INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
+ INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
if (net_eq(net, &init_net))
register_netdevice_notifier(&xfrm_dev_notifier);
return 0;