diff options
author | David S. Miller <davem@sunset.davemloft.net> | 2007-06-10 17:22:39 -0700 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-07-10 22:05:57 -0700 |
commit | e06e7c615877026544ad7f8b309d1a3706410383 (patch) | |
tree | eb087b49279e6db492039a5d684ca9acb13265a3 /net/ipv4 | |
parent | 4eb6bf6bfb580afaf1e1a1d30cba17a078530cf4 (diff) |
[IPV4]: The scheduled removal of multipath cached routing support.
With help from Chris Wedgwood.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 42 | ||||
-rw-r--r-- | net/ipv4/Makefile | 5 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 4 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 16 | ||||
-rw-r--r-- | net/ipv4/multipath.c | 55 | ||||
-rw-r--r-- | net/ipv4/multipath_drr.c | 249 | ||||
-rw-r--r-- | net/ipv4/multipath_random.c | 114 | ||||
-rw-r--r-- | net/ipv4/multipath_rr.c | 95 | ||||
-rw-r--r-- | net/ipv4/multipath_wrandom.c | 329 | ||||
-rw-r--r-- | net/ipv4/route.c | 259 |
10 files changed, 11 insertions, 1157 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 010fbb2d45e..fb790977425 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -116,48 +116,6 @@ config IP_ROUTE_MULTIPATH equal "cost" and chooses one of them in a non-deterministic fashion if a matching packet arrives. -config IP_ROUTE_MULTIPATH_CACHED - bool "IP: equal cost multipath with caching support (EXPERIMENTAL)" - depends on IP_ROUTE_MULTIPATH - help - Normally, equal cost multipath routing is not supported by the - routing cache. If you say Y here, alternative routes are cached - and on cache lookup a route is chosen in a configurable fashion. - - If unsure, say N. - -config IP_ROUTE_MULTIPATH_RR - tristate "MULTIPATH: round robin algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen according to Round Robin - -config IP_ROUTE_MULTIPATH_RANDOM - tristate "MULTIPATH: random algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen in a random fashion. Actually, - there is no weight for a route. The advantage of this policy - is that it is implemented stateless and therefore introduces only - a very small delay. - -config IP_ROUTE_MULTIPATH_WRANDOM - tristate "MULTIPATH: weighted random algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Multipath routes are chosen in a weighted random fashion. - The per route weights are the weights visible via ip route 2. As the - corresponding state management introduces some overhead routing delay - is increased. - -config IP_ROUTE_MULTIPATH_DRR - tristate "MULTIPATH: interface round robin algorithm" - depends on IP_ROUTE_MULTIPATH_CACHED - help - Connections are distributed in a round robin fashion over the - available interfaces. This policy makes sense if the connections - should be primarily distributed on interfaces and not on routes. - config IP_ROUTE_VERBOSE bool "IP: verbose route monitoring" depends on IP_ADVANCED_ROUTER diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4ff6c151d7f..fbf1674e0c2 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -29,14 +29,9 @@ obj-$(CONFIG_INET_TUNNEL) += tunnel4.o obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o -obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 311d633f7f3..2eb909be804 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -453,7 +453,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_PROTOINFO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, - [RTA_MP_ALGO] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -515,9 +514,6 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, case RTA_FLOW: cfg->fc_flow = nla_get_u32(attr); break; - case RTA_MP_ALGO: - cfg->fc_mp_alg = nla_get_u32(attr); - break; case RTA_TABLE: cfg->fc_table = nla_get_u32(attr); break; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index bb94550d95c..c434119deb5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -42,7 +42,6 @@ #include <net/tcp.h> #include <net/sock.h> #include <net/ip_fib.h> -#include <net/ip_mp_alg.h> #include <net/netlink.h> #include <net/nexthop.h> @@ -697,13 +696,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto err_inval; } #endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (cfg->fc_mp_alg) { - if (cfg->fc_mp_alg < IP_MP_ALG_NONE || - cfg->fc_mp_alg > IP_MP_ALG_MAX) - goto err_inval; - } -#endif err = -ENOBUFS; if (fib_info_cnt >= fib_hash_size) { @@ -791,10 +783,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) #endif } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - fi->fib_mp_alg = cfg->fc_mp_alg; -#endif - if (fib_props[cfg->fc_type].error) { if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; @@ -940,10 +928,6 @@ out_fill_res: res->type = fa->fa_type; res->scope = fa->fa_scope; res->fi = fa->fa_info; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - res->netmask = mask; - res->network = zone & inet_make_mask(prefixlen); -#endif atomic_inc(&res->fi->fib_clntref); return 0; } diff --git a/net/ipv4/multipath.c b/net/ipv4/multipath.c deleted file mode 100644 index 4e9ca7c7640..00000000000 --- a/net/ipv4/multipath.c +++ /dev/null @@ -1,55 +0,0 @@ -/* multipath.c: IPV4 multipath algorithm support. - * - * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com> - * Copyright (C) 2005 David S. Miller <davem@davemloft.net> - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/netdevice.h> -#include <linux/spinlock.h> - -#include <net/ip_mp_alg.h> - -static DEFINE_SPINLOCK(alg_table_lock); -struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1]; - -int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) -{ - struct ip_mp_alg_ops **slot; - int err; - - if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX || - !ops->mp_alg_select_route) - return -EINVAL; - - spin_lock(&alg_table_lock); - slot = &ip_mp_alg_table[n]; - if (*slot != NULL) { - err = -EBUSY; - } else { - *slot = ops; - err = 0; - } - spin_unlock(&alg_table_lock); - - return err; -} -EXPORT_SYMBOL(multipath_alg_register); - -void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) -{ - struct ip_mp_alg_ops **slot; - - if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX) - return; - - spin_lock(&alg_table_lock); - slot = &ip_mp_alg_table[n]; - if (*slot == ops) - *slot = NULL; - spin_unlock(&alg_table_lock); - - synchronize_net(); -} -EXPORT_SYMBOL(multipath_alg_unregister); diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c deleted file mode 100644 index b03c5ca2c82..00000000000 --- a/net/ipv4/multipath_drr.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Device round robin policy for multipath. - * - * - * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $ - * - * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/kernel.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/igmp.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/module.h> -#include <linux/mroute.h> -#include <linux/init.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/raw.h> -#include <linux/notifier.h> -#include <linux/if_arp.h> -#include <linux/netfilter_ipv4.h> -#include <net/ipip.h> -#include <net/checksum.h> -#include <net/ip_mp_alg.h> - -struct multipath_device { - int ifi; /* interface index of device */ - atomic_t usecount; - int allocated; -}; - -#define MULTIPATH_MAX_DEVICECANDIDATES 10 - -static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; -static DEFINE_SPINLOCK(state_lock); - -static int inline __multipath_findslot(void) -{ - int i; - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { - if (state[i].allocated == 0) - return i; - } - return -1; -} - -static int inline __multipath_finddev(int ifindex) -{ - int i; - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { - if (state[i].allocated != 0 && - state[i].ifi == ifindex) - return i; - } - return -1; -} - -static int drr_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - int devidx; - - switch (event) { - case NETDEV_UNREGISTER: - case NETDEV_DOWN: - spin_lock_bh(&state_lock); - - devidx = __multipath_finddev(dev->ifindex); - if (devidx != -1) { - state[devidx].allocated = 0; - state[devidx].ifi = 0; - atomic_set(&state[devidx].usecount, 0); - } - - spin_unlock_bh(&state_lock); - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block drr_dev_notifier = { - .notifier_call = drr_dev_event, -}; - - -static void drr_safe_inc(atomic_t *usecount) -{ - int n; - - atomic_inc(usecount); - - n = atomic_read(usecount); - if (n <= 0) { - int i; - - spin_lock_bh(&state_lock); - - for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) - atomic_set(&state[i].usecount, 0); - - spin_unlock_bh(&state_lock); - } -} - -static void drr_select_route(const struct flowi *flp, - struct rtable *first, struct rtable **rp) -{ - struct rtable *nh, *result, *cur_min; - int min_usecount = -1; - int devidx = -1; - int cur_min_devidx = -1; - - /* 1. make sure all alt. nexthops have the same GC related data */ - /* 2. determine the new candidate to be returned */ - result = NULL; - cur_min = NULL; - for (nh = rcu_dereference(first); nh; - nh = rcu_dereference(nh->u.dst.rt_next)) { - if ((nh->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&nh->fl, flp)) { - int nh_ifidx = nh->u.dst.dev->ifindex; - - nh->u.dst.lastuse = jiffies; - nh->u.dst.__use++; - if (result != NULL) - continue; - - /* search for the output interface */ - - /* this is not SMP safe, only add/remove are - * SMP safe as wrong usecount updates have no big - * impact - */ - devidx = __multipath_finddev(nh_ifidx); - if (devidx == -1) { - /* add the interface to the array - * SMP safe - */ - spin_lock_bh(&state_lock); - - /* due to SMP: search again */ - devidx = __multipath_finddev(nh_ifidx); - if (devidx == -1) { - /* add entry for device */ - devidx = __multipath_findslot(); - if (devidx == -1) { - /* unlikely but possible */ - continue; - } - - state[devidx].allocated = 1; - state[devidx].ifi = nh_ifidx; - atomic_set(&state[devidx].usecount, 0); - min_usecount = 0; - } - - spin_unlock_bh(&state_lock); - } - - if (min_usecount == 0) { - /* if the device has not been used it is - * the primary target - */ - drr_safe_inc(&state[devidx].usecount); - result = nh; - } else { - int count = - atomic_read(&state[devidx].usecount); - - if (min_usecount == -1 || - count < min_usecount) { - cur_min = nh; - cur_min_devidx = devidx; - min_usecount = count; - } - } - } - } - - if (!result) { - if (cur_min) { - drr_safe_inc(&state[cur_min_devidx].usecount); - result = cur_min; - } else { - result = first; - } - } - - *rp = result; -} - -static struct ip_mp_alg_ops drr_ops = { - .mp_alg_select_route = drr_select_route, -}; - -static int __init drr_init(void) -{ - int err = register_netdevice_notifier(&drr_dev_notifier); - - if (err) - return err; - - err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR); - if (err) - goto fail; - - return 0; - -fail: - unregister_netdevice_notifier(&drr_dev_notifier); - return err; -} - -static void __exit drr_exit(void) -{ - unregister_netdevice_notifier(&drr_dev_notifier); - multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR); -} - -module_init(drr_init); -module_exit(drr_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c deleted file mode 100644 index c312785d14d..00000000000 --- a/net/ipv4/multipath_random.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Random policy for multipath. - * - * - * Version: $Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $ - * - * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/kernel.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/igmp.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/module.h> -#include <linux/mroute.h> -#include <linux/init.h> -#include <linux/random.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/raw.h> -#include <linux/notifier.h> -#include <linux/if_arp.h> -#include <linux/netfilter_ipv4.h> -#include <net/ipip.h> -#include <net/checksum.h> -#include <net/ip_mp_alg.h> - -#define MULTIPATH_MAX_CANDIDATES 40 - -static void random_select_route(const struct flowi *flp, - struct rtable *first, - struct rtable **rp) -{ - struct rtable *rt; - struct rtable *decision; - unsigned char candidate_count = 0; - - /* count all candidate */ - for (rt = rcu_dereference(first); rt; - rt = rcu_dereference(rt->u.dst.rt_next)) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) - ++candidate_count; - } - - /* choose a random candidate */ - decision = first; - if (candidate_count > 1) { - unsigned char i = 0; - unsigned char candidate_no = (unsigned char) - (random32() % candidate_count); - - /* find chosen candidate and adjust GC data for all candidates - * to ensure they stay in cache - */ - for (rt = first; rt; rt = rt->u.dst.rt_next) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) { - rt->u.dst.lastuse = jiffies; - - if (i == candidate_no) - decision = rt; - - if (i >= candidate_count) - break; - - i++; - } - } - } - - decision->u.dst.__use++; - *rp = decision; -} - -static struct ip_mp_alg_ops random_ops = { - .mp_alg_select_route = random_select_route, -}; - -static int __init random_init(void) -{ - return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM); -} - -static void __exit random_exit(void) -{ - multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM); -} - -module_init(random_init); -module_exit(random_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c deleted file mode 100644 index 0ad22524f45..00000000000 --- a/net/ipv4/multipath_rr.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Round robin policy for multipath. - * - * - * Version: $Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $ - * - * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/kernel.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/igmp.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/module.h> -#include <linux/mroute.h> -#include <linux/init.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/raw.h> -#include <linux/notifier.h> -#include <linux/if_arp.h> -#include <linux/netfilter_ipv4.h> -#include <net/ipip.h> -#include <net/checksum.h> -#include <net/ip_mp_alg.h> - -static void rr_select_route(const struct flowi *flp, - struct rtable *first, struct rtable **rp) -{ - struct rtable *nh, *result, *min_use_cand = NULL; - int min_use = -1; - - /* 1. make sure all alt. nexthops have the same GC related data - * 2. determine the new candidate to be returned - */ - result = NULL; - for (nh = rcu_dereference(first); nh; - nh = rcu_dereference(nh->u.dst.rt_next)) { - if ((nh->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&nh->fl, flp)) { - nh->u.dst.lastuse = jiffies; - - if (min_use == -1 || nh->u.dst.__use < min_use) { - min_use = nh->u.dst.__use; - min_use_cand = nh; - } - } - } - result = min_use_cand; - if (!result) - result = first; - - result->u.dst.__use++; - *rp = result; -} - -static struct ip_mp_alg_ops rr_ops = { - .mp_alg_select_route = rr_select_route, -}; - -static int __init rr_init(void) -{ - return multipath_alg_register(&rr_ops, IP_MP_ALG_RR); -} - -static void __exit rr_exit(void) -{ - multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR); -} - -module_init(rr_init); -module_exit(rr_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c deleted file mode 100644 index 57c50369453..00000000000 --- a/net/ipv4/multipath_wrandom.c +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Weighted random policy for multipath. - * - * - * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ - * - * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/kernel.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/igmp.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/module.h> -#include <linux/mroute.h> -#include <linux/init.h> -#include <linux/random.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <linux/skbuff.h> -#include <net/sock.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/raw.h> -#include <linux/notifier.h> -#include <linux/if_arp.h> -#include <linux/netfilter_ipv4.h> -#include <net/ipip.h> -#include <net/checksum.h> -#include <net/ip_fib.h> -#include <net/ip_mp_alg.h> - -#define MULTIPATH_STATE_SIZE 15 - -struct multipath_candidate { - struct multipath_candidate *next; - int power; - struct rtable *rt; -}; - -struct multipath_dest { - struct list_head list; - - const struct fib_nh *nh_info; - __be32 netmask; - __be32 network; - unsigned char prefixlen; - - struct rcu_head rcu; -}; - -struct multipath_bucket { - struct list_head head; - spinlock_t lock; -}; - -struct multipath_route { - struct list_head list; - - int oif; - __be32 gw; - struct list_head dests; - - struct rcu_head rcu; -}; - -/* state: primarily weight per route information */ -static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; - -static unsigned char __multipath_lookup_weight(const struct flowi *fl, - const struct rtable *rt) -{ - const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; - struct multipath_route *r; - struct multipath_route *target_route = NULL; - struct multipath_dest *d; - int weight = 1; - - /* lookup the weight information for a certain route */ - rcu_read_lock(); - - /* find state entry for gateway or add one if necessary */ - list_for_each_entry_rcu(r, &state[state_idx].head, list) { - if (r->gw == rt->rt_gateway && - r->oif == rt->idev->dev->ifindex) { - target_route = r; - break; - } - } - - if (!target_route) { - /* this should not happen... but we are prepared */ - printk( KERN_CRIT"%s: missing state for gateway: %u and " \ - "device %d\n", __FUNCTION__, rt->rt_gateway, - rt->idev->dev->ifindex); - goto out; - } - - /* find state entry for destination */ - list_for_each_entry_rcu(d, &target_route->dests, list) { - __be32 targetnetwork = fl->fl4_dst & - inet_make_mask(d->prefixlen); - - if ((targetnetwork & d->netmask) == d->network) { - weight = d->nh_info->nh_weight; - goto out; - } - } - -out: - rcu_read_unlock(); - return weight; -} - -static void wrandom_init_state(void) -{ - int i; - - for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { - INIT_LIST_HEAD(&state[i].head); - spin_lock_init(&state[i].lock); - } -} - -static void wrandom_select_route(const struct flowi *flp, - struct rtable *first, - struct rtable **rp) -{ - struct rtable *rt; - struct rtable *decision; - struct multipath_candidate *first_mpc = NULL; - struct multipath_candidate *mpc, *last_mpc = NULL; - int power = 0; - int last_power; - int selector; - const size_t size_mpc = sizeof(struct multipath_candidate); - - /* collect all candidates and identify their weights */ - for (rt = rcu_dereference(first); rt; - rt = rcu_dereference(rt->u.dst.rt_next)) { - if ((rt->u.dst.flags & DST_BALANCED) != 0 && - multipath_comparekeys(&rt->fl, flp)) { - struct multipath_candidate* mpc = - (struct multipath_candidate*) - kmalloc(size_mpc, GFP_ATOMIC); - - if (!mpc) - return; - - power += __multipath_lookup_weight(flp, rt) * 10000; - - mpc->power = power; - mpc->rt = rt; - mpc->next = NULL; - - if (!first_mpc) - first_mpc = mpc; - else - last_mpc->next = mpc; - - last_mpc = mpc; - } - } - - /* choose a weighted random candidate */ - decision = first; - selector = random32() % power; - last_power = 0; - - /* select candidate, adjust GC data and cleanup local state */ - decision = first; - last_mpc = NULL; - for (mpc = first_mpc; mpc; mpc = mpc->next) { - mpc->rt->u.dst.lastuse = jiffies; - if (last_power <= selector && selector < mpc->power) - decision = mpc->rt; - - last_power = mpc->power; - kfree(last_mpc); - last_mpc = mpc; - } - - /* concurrent __multipath_flush may lead to !last_mpc */ - kfree(last_mpc); - - decision->u.dst.__use++; - *rp = decision; -} - -static void wrandom_set_nhinfo(__be32 network, - __be32 netmask, - unsigned char prefixlen, - const struct fib_nh *nh) -{ - const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; - struct multipath_route *r, *target_route = NULL; - struct multipath_dest *d, *target_dest = NULL; - - /* store the weight information for a certain route */ - spin_lock_bh(&state[state_idx].lock); - - /* find state entry for gateway or add one if necessary */ - list_for_each_entry_rcu(r, &state[state_idx].head, list) { - if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { - target_route = r; - break; - } - } - - if (!target_route) { - const size_t size_rt = sizeof(struct multipath_route); - target_route = (struct multipath_route *) - kmalloc(size_rt, GFP_ATOMIC); - - target_route->gw = nh->nh_gw; - target_route->oif = nh->nh_oif; - memset(&target_route->rcu, 0, sizeof(struct rcu_head)); - INIT_LIST_HEAD(&target_route->dests); - - list_add_rcu(&target_route->list, &state[state_idx].head); - } - - /* find state entry for destination or add one if necessary */ - list_for_each_entry_rcu(d, &target_route->dests, list) { - if (d->nh_info == nh) { - target_dest = d; - break; - } - } - - if (!target_dest) { - const size_t size_dst = sizeof(struct multipath_dest); - target_dest = (struct multipath_dest*) - kmalloc(size_dst, GFP_ATOMIC); - - target_dest->nh_info = nh; - target_dest->network = network; - target_dest->netmask = netmask; - target_dest->prefixlen = prefixlen; - memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); - - list_add_rcu(&target_dest->list, &target_route->dests); - } - /* else: we already stored this info for another destination => - * we are finished - */ - - spin_unlock_bh(&state[state_idx].lock); -} - -static void __multipath_free(struct rcu_head *head) -{ - struct multipath_route *rt = container_of(head, struct multipath_route, - rcu); - kfree(rt); -} - -static void __multipath_free_dst(struct rcu_head *head) -{ - struct multipath_dest *dst = container_of(head, - struct multipath_dest, - rcu); - kfree(dst); -} - -static void wrandom_flush(void) -{ - int i; - - /* defere delete to all entries */ - for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { - struct multipath_route *r; - - spin_lock_bh(&state[i].lock); - list_for_each_entry_rcu(r, &state[i].head, list) { - struct multipath_dest *d; - list_for_each_entry_rcu(d, &r->dests, list) { - list_del_rcu(&d->list); - call_rcu(&d->rcu, - __multipath_free_dst); - } - list_del_rcu(&r->list); - call_rcu(&r->rcu, - __multipath_free); - } - - spin_unlock_bh(&state[i].lock); - } -} - -static struct ip_mp_alg_ops wrandom_ops = { - .mp_alg_select_route = wrandom_select_route, - .mp_alg_flush = wrandom_flush, - .mp_alg_set_nhinfo = wrandom_set_nhinfo, -}; - -static int __init wrandom_init(void) -{ - wrandom_init_state(); - - return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); -} - -static void __exit wrandom_exit(void) -{ - multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); -} - -module_init(wrandom_init); -module_exit(wrandom_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 29ca63e81ce..85285021518 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -101,7 +101,6 @@ #include <net/tcp.h> #include <net/icmp.h> #include <net/xfrm.h> -#include <net/ip_mp_alg.h> #include <net/netevent.h> #include <net/rtnetlink.h> #ifdef CONFIG_SYSCTL @@ -495,13 +494,11 @@ static const struct file_operations rt_cpu_seq_fops = { static __inline__ void rt_free(struct rtable *rt) { - multipath_remove(rt); call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); } static __inline__ void rt_drop(struct rtable *rt) { - multipath_remove(rt); ip_rt_put(rt); call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); } @@ -574,52 +571,6 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) (fl1->iif ^ fl2->iif)) == 0; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED -static struct rtable **rt_remove_balanced_route(struct rtable **chain_head, - struct rtable *expentry, - int *removed_count) -{ - int passedexpired = 0; - struct rtable **nextstep = NULL; - struct rtable **rthp = chain_head; - struct rtable *rth; - - if (removed_count) - *removed_count = 0; - - while ((rth = *rthp) != NULL) { - if (rth == expentry) - passedexpired = 1; - - if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 && - compare_keys(&(*rthp)->fl, &expentry->fl)) { - if (*rthp == expentry) { - *rthp = rth->u.dst.rt_next; - continue; - } else { - *rthp = rth->u.dst.rt_next; - rt_free(rth); - if (removed_count) - ++(*removed_count); - } - } else { - if (!((*rthp)->u.dst.flags & DST_BALANCED) && - passedexpired && !nextstep) - nextstep = &rth->u.dst.rt_next; - - rthp = &rth->u.dst.rt_next; - } - } - - rt_free(expentry); - if (removed_count) - ++(*removed_count); - - return nextstep; -} -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - - /* This runs via a timer and thus is always in BH context. */ static void rt_check_expire(unsigned long dummy) { @@ -658,22 +609,8 @@ static void rt_check_expire(unsigned long dummy) } /* Cleanup aged off entries. */ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - /* remove all related balanced entries if necessary */ - if (rth->u.dst.flags & DST_BALANCED) { - rthp = rt_remove_balanced_route( - &rt_hash_table[i].chain, - rth, NULL); - if (!rthp) - break; - } else { - *rthp = rth->u.dst.rt_next; - rt_free(rth); - } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ *rthp = rth->u.dst.rt_next; rt_free(rth); -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ } spin_unlock(rt_hash_lock_addr(i)); @@ -721,9 +658,6 @@ void rt_cache_flush(int delay) if (delay < 0) delay = ip_rt_min_delay; - /* flush existing multipath state*/ - multipath_flush(); - spin_lock_bh(&rt_flush_lock); if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { @@ -842,30 +776,9 @@ static int rt_garbage_collect(void) rthp = &rth->u.dst.rt_next; continue; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - /* remove all related balanced entries - * if necessary - */ - if (rth->u.dst.flags & DST_BALANCED) { - int r; - - rthp = rt_remove_balanced_route( - &rt_hash_table[k].chain, - rth, - &r); - goal -= r; - if (!rthp) - break; - } else { - *rthp = rth->u.dst.rt_next; - rt_free(rth); - goal--; - } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ *rthp = rth->u.dst.rt_next; rt_free(rth); goal--; -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ } spin_unlock_bh(rt_hash_lock_addr(k)); if (goal <= 0) @@ -939,12 +852,7 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (!(rth->u.dst.flags & DST_BALANCED) && - compare_keys(&rth->fl, &rt->fl)) { -#else if (compare_keys(&rth->fl, &rt->fl)) { -#endif /* Put it first */ *rthp = rth->u.dst.rt_next; /* @@ -1774,10 +1682,6 @@ static inline int __mkroute_input(struct sk_buff *skb, atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (res->fi->fib_nhs > 1) - rth->u.dst.flags |= DST_BALANCED; -#endif if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; if (IN_DEV_CONF_GET(out_dev, NOXFRM)) @@ -1812,11 +1716,11 @@ static inline int __mkroute_input(struct sk_buff *skb, return err; } -static inline int ip_mkroute_input_def(struct sk_buff *skb, - struct fib_result* res, - const struct flowi *fl, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) +static inline int ip_mkroute_input(struct sk_buff *skb, + struct fib_result* res, + const struct flowi *fl, + struct in_device *in_dev, + __be32 daddr, __be32 saddr, u32 tos) { struct rtable* rth = NULL; int err; @@ -1837,63 +1741,6 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); } -static inline int ip_mkroute_input(struct sk_buff *skb, - struct fib_result* res, - const struct flowi *fl, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct rtable* rth = NULL, *rtres; - unsigned char hop, hopcount; - int err = -EINVAL; - unsigned int hash; - - if (res->fi) - hopcount = res->fi->fib_nhs; - else - hopcount = 1; - - /* distinguish between multipath and singlepath */ - if (hopcount < 2) - return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, - saddr, tos); - - /* add all alternatives to the routing cache */ - for (hop = 0; hop < hopcount; hop++) { - res->nh_sel = hop; - - /* put reference to previous result */ - if (hop) - ip_rt_put(rtres); - - /* create a routing cache entry */ - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, - &rth); - if (err) - return err; - - /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif); - err = rt_intern_hash(hash, rth, &rtres); - if (err) - return err; - - /* forward hop information to multipath impl. */ - multipath_set_nhinfo(rth, - FIB_RES_NETWORK(*res), - FIB_RES_NETMASK(*res), - res->prefixlen, - &FIB_RES_NH(*res)); - } - skb->dst = &rtres->u.dst; - return err; -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); -#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ -} - - /* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet @@ -2211,13 +2058,6 @@ static inline int __mkroute_output(struct rtable **result, atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (res->fi) { - rth->rt_multipath_alg = res->fi->fib_mp_alg; - if (res->fi->fib_nhs > 1) - rth->u.dst.flags |= DST_BALANCED; - } -#endif if (IN_DEV_CONF_GET(in_dev, NOXFRM)) rth->u.dst.flags |= DST_NOXFRM; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) @@ -2277,12 +2117,12 @@ static inline int __mkroute_output(struct rtable **result, return err; } -static inline int ip_mkroute_output_def(struct rtable **rp, - struct fib_result* res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) +static inline int ip_mkroute_output(struct rtable **rp, + struct fib_result* res, + const struct flowi *fl, + const struct flowi *oldflp, + struct net_device *dev_out, + unsigned flags) { struct rtable *rth = NULL; int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); @@ -2295,68 +2135,6 @@ static inline int ip_mkroute_output_def(struct rtable **rp, return err; } -static inline int ip_mkroute_output(struct rtable** rp, - struct fib_result* res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) -{ -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - unsigned char hop; - unsigned hash; - int err = -EINVAL; - struct rtable *rth = NULL; - - if (res->fi && res->fi->fib_nhs > 1) { - unsigned char hopcount = res->fi->fib_nhs; - - for (hop = 0; hop < hopcount; hop++) { - struct net_device *dev2nexthop; - - res->nh_sel = hop; - - /* hold a work reference to the output device */ - dev2nexthop = FIB_RES_DEV(*res); - dev_hold(dev2nexthop); - - /* put reference to previous result */ - if (hop) - ip_rt_put(*rp); - - err = __mkroute_output(&rth, res, fl, oldflp, - dev2nexthop, flags); - - if (err != 0) - goto cleanup; - - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, - oldflp->oif); - err = rt_intern_hash(hash, rth, rp); - - /* forward hop information to multipath impl. */ - multipath_set_nhinfo(rth, - FIB_RES_NETWORK(*res), - FIB_RES_NETMASK(*res), - res->prefixlen, - &FIB_RES_NH(*res)); - cleanup: - /* release work reference to output device */ - dev_put(dev2nexthop); - - if (err != 0) - return err; - } - return err; - } else { - return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, - flags); - } -#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ - return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags); -#endif -} - /* * Major route resolver routine. */ @@ -2570,17 +2348,6 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK))) { - - /* check for multipath routes and choose one if - * necessary - */ - if (multipath_select_route(flp, rth, rp)) { - dst_hold(&(*rp)->u.dst); - RT_CACHE_STAT_INC(out_hit); - rcu_read_unlock_bh(); - return 0; - } - rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); rth->u.dst.__use++; @@ -2729,10 +2496,6 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, if (rt->u.dst.tclassid) NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); #endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rt->rt_multipath_alg != IP_MP_ALG_NONE) - NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); -#endif if (rt->fl.iif) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); else if (rt->rt_src != rt->fl.fl4_src) |