diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/devinet.c | 15 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 22 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 71 | ||||
-rw-r--r-- | net/ipv4/inet_timewait_sock.c | 35 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_app.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_conn.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_ctl.c | 27 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_dh.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_est.c | 116 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_lblc.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_lblcr.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_lc.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_nq.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_proto.c | 4 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_rr.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_sched.c | 4 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_sed.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_sh.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_sync.c | 4 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_wlc.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_wrr.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_addrtype.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_proto_common.c | 8 | ||||
-rw-r--r-- | net/ipv4/route.c | 100 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 6 | ||||
-rw-r--r-- | net/ipv4/udp.c | 6 |
27 files changed, 264 insertions, 181 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 91d3d96805d..b12dae2b0b2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1029,6 +1029,11 @@ skip: } } +static inline bool inetdev_valid_mtu(unsigned mtu) +{ + return mtu >= 68; +} + /* Called only under RTNL semaphore */ static int inetdev_event(struct notifier_block *this, unsigned long event, @@ -1048,6 +1053,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, IN_DEV_CONF_SET(in_dev, NOXFRM, 1); IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } + } else if (event == NETDEV_CHANGEMTU) { + /* Re-enabling IP */ + if (inetdev_valid_mtu(dev->mtu)) + in_dev = inetdev_init(dev); } goto out; } @@ -1058,7 +1067,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, dev->ip_ptr = NULL; break; case NETDEV_UP: - if (dev->mtu < 68) + if (!inetdev_valid_mtu(dev->mtu)) break; if (dev->flags & IFF_LOOPBACK) { struct in_ifaddr *ifa; @@ -1080,9 +1089,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ip_mc_down(in_dev); break; case NETDEV_CHANGEMTU: - if (dev->mtu >= 68) + if (inetdev_valid_mtu(dev->mtu)) break; - /* MTU falled under 68, disable IP */ + /* disable IP when MTU is not enough */ case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 860558633b2..55c355e6323 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -204,18 +204,22 @@ static struct sock *icmp_sk(struct net *net) return net->ipv4.icmp_sk[smp_processor_id()]; } -static inline int icmp_xmit_lock(struct sock *sk) +static inline struct sock *icmp_xmit_lock(struct net *net) { + struct sock *sk; + local_bh_disable(); + sk = icmp_sk(net); + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ local_bh_enable(); - return 1; + return NULL; } - return 0; + return sk; } static inline void icmp_xmit_unlock(struct sock *sk) @@ -354,15 +358,17 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct rtable *rt = skb->rtable; struct net *net = dev_net(rt->u.dst.dev); - struct sock *sk = icmp_sk(net); - struct inet_sock *inet = inet_sk(sk); + struct sock *sk; + struct inet_sock *inet; __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; + inet = inet_sk(sk); icmp_param->data.icmph.checksum = 0; @@ -419,7 +425,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!rt) goto out; net = dev_net(rt->u.dst.dev); - sk = icmp_sk(net); /* * Find the original header. It is expected to be valid, of course. @@ -483,7 +488,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; /* diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6203ece5360..f70fac61259 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -289,6 +289,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct rtable *rt; struct iphdr *pip; struct igmpv3_report *pig; + struct net *net = dev_net(dev); skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) @@ -299,7 +300,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) .nl_u = { .ip4_u = { .daddr = IGMPV3_ALL_MCR } }, .proto = IPPROTO_IGMP }; - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(net, &rt, &fl)) { kfree_skb(skb); return NULL; } @@ -629,6 +630,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct igmphdr *ih; struct rtable *rt; struct net_device *dev = in_dev->dev; + struct net *net = dev_net(dev); __be32 group = pmc ? pmc->multiaddr : 0; __be32 dst; @@ -643,7 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct flowi fl = { .oif = dev->ifindex, .nl_u = { .ip4_u = { .daddr = dst } }, .proto = IPPROTO_IGMP }; - if (ip_route_output_key(&init_net, &rt, &fl)) + if (ip_route_output_key(net, &rt, &fl)) return -1; } if (rt->rt_src == 0) { @@ -1196,9 +1198,6 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - for (im=in_dev->mc_list; im; im=im->next) { if (im->multiaddr == addr) { im->users++; @@ -1278,9 +1277,6 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { if (i->multiaddr==addr) { if (--i->users == 0) { @@ -1308,9 +1304,6 @@ void ip_mc_down(struct in_device *in_dev) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - for (i=in_dev->mc_list; i; i=i->next) igmp_group_dropped(i); @@ -1331,9 +1324,6 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - in_dev->mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST in_dev->mr_gq_running = 0; @@ -1357,9 +1347,6 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for (i=in_dev->mc_list; i; i=i->next) @@ -1376,9 +1363,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - /* Deactivate timers */ ip_mc_down(in_dev); @@ -1395,7 +1379,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev) write_unlock_bh(&in_dev->mc_list_lock); } -static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) +static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = imr->imr_multiaddr.s_addr } } }; @@ -1404,19 +1388,19 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) struct in_device *idev = NULL; if (imr->imr_ifindex) { - idev = inetdev_by_index(&init_net, imr->imr_ifindex); + idev = inetdev_by_index(net, imr->imr_ifindex); if (idev) __in_dev_put(idev); return idev; } if (imr->imr_address.s_addr) { - dev = ip_dev_find(&init_net, imr->imr_address.s_addr); + dev = ip_dev_find(net, imr->imr_address.s_addr); if (!dev) return NULL; dev_put(dev); } - if (!dev && !ip_route_output_key(&init_net, &rt, &fl)) { + if (!dev && !ip_route_output_key(net, &rt, &fl)) { dev = rt->u.dst.dev; ip_rt_put(rt); } @@ -1754,18 +1738,16 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) struct ip_mc_socklist *iml=NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); int ifindex; int count = 0; if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); - in_dev = ip_mc_find_dev(imr); + in_dev = ip_mc_find_dev(net, imr); if (!in_dev) { iml = NULL; @@ -1827,15 +1809,13 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml, **imlp; struct in_device *in_dev; + struct net *net = sock_net(sk); __be32 group = imr->imr_multiaddr.s_addr; u32 ifindex; int ret = -EADDRNOTAVAIL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); - in_dev = ip_mc_find_dev(imr); + in_dev = ip_mc_find_dev(net, imr); ifindex = imr->imr_ifindex; for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { if (iml->multi.imr_multiaddr.s_addr != group) @@ -1873,21 +1853,19 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct struct in_device *in_dev = NULL; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + struct net *net = sock_net(sk); int leavegroup = 0; int i, j, rv; if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; imr.imr_address.s_addr = mreqs->imr_interface; imr.imr_ifindex = ifindex; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2007,6 +1985,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *newpsl, *psl; + struct net *net = sock_net(sk); int leavegroup = 0; if (!ipv4_is_multicast(addr)) @@ -2015,15 +1994,12 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = ifindex; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2094,19 +2070,17 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + struct net *net = sock_net(sk); if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = 0; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2163,9 +2137,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); err = -EADDRNOTAVAIL; @@ -2246,19 +2217,17 @@ void ip_mc_drop_socket(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml; + struct net *net = sock_net(sk); if (inet->mc_list == NULL) return; - if (!net_eq(sock_net(sk), &init_net)) - return; - rtnl_lock(); while ((iml = inet->mc_list) != NULL) { struct in_device *in_dev; inet->mc_list = iml->next; - in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex); + in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) { ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index d985bd613d2..743f011b9a8 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -409,3 +409,38 @@ out: } EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); + +void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, + struct inet_timewait_death_row *twdr, int family) +{ + struct inet_timewait_sock *tw; + struct sock *sk; + struct hlist_node *node; + int h; + + local_bh_disable(); + for (h = 0; h < (hashinfo->ehash_size); h++) { + struct inet_ehash_bucket *head = + inet_ehash_bucket(hashinfo, h); + rwlock_t *lock = inet_ehash_lockp(hashinfo, h); +restart: + write_lock(lock); + sk_for_each(sk, node, &head->twchain) { + + tw = inet_twsk(sk); + if (!net_eq(twsk_net(tw), net) || + tw->tw_family != family) + continue; + + atomic_inc(&tw->tw_refcnt); + write_unlock(lock); + inet_twsk_deschedule(tw, twdr); + inet_twsk_put(tw); + + goto restart; + } + write_unlock(lock); + } + local_bh_enable(); +} +EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 1f1897a1a70..201b8ea3020 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -608,7 +608,7 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, } -int ip_vs_app_init(void) +int __init ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index f8bdae47a77..44a6872dc24 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -965,7 +965,7 @@ static void ip_vs_conn_flush(void) } -int ip_vs_conn_init(void) +int __init ip_vs_conn_init(void) { int idx; diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 9a5ace0b4dd..6379705a8dc 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -683,9 +683,22 @@ static void ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); - memset(stats, 0, (char *)&stats->lock - (char *)stats); - spin_unlock_bh(&stats->lock); + + stats->conns = 0; + stats->inpkts = 0; + stats->outpkts = 0; + stats->inbytes = 0; + stats->outbytes = 0; + + stats->cps = 0; + stats->inpps = 0; + stats->outpps = 0; + stats->inbps = 0; + stats->outbps = 0; + ip_vs_zero_estimator(stats); + + spin_unlock_bh(&stats->lock); } /* @@ -1589,7 +1602,7 @@ static struct ctl_table vs_vars[] = { { .ctl_name = 0 } }; -struct ctl_path net_vs_ctl_path[] = { +const struct ctl_path net_vs_ctl_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv4", .ctl_name = NET_IPV4, }, { .procname = "vs", }, @@ -1784,7 +1797,9 @@ static const struct file_operations ip_vs_info_fops = { #endif -struct ip_vs_stats ip_vs_stats; +struct ip_vs_stats ip_vs_stats = { + .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), +}; #ifdef CONFIG_PROC_FS static int ip_vs_stats_show(struct seq_file *seq, void *v) @@ -2306,7 +2321,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { }; -int ip_vs_control_init(void) +int __init ip_vs_control_init(void) { int ret; int idx; @@ -2333,8 +2348,6 @@ int ip_vs_control_init(void) INIT_LIST_HEAD(&ip_vs_rtable[idx]); } - memset(&ip_vs_stats, 0, sizeof(ip_vs_stats)); - spin_lock_init(&ip_vs_stats.lock); ip_vs_new_estimator(&ip_vs_stats); /* Hook the defense timer */ diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index 8afc1503ed2..fa66824d264 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -233,6 +233,7 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = .name = "dh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), .init_service = ip_vs_dh_init_svc, .done_service = ip_vs_dh_done_svc, .update_service = ip_vs_dh_update_svc, @@ -242,7 +243,6 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = static int __init ip_vs_dh_init(void) { - INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_dh_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index bc04eedd6db..5a20f93bd7f 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/interrupt.h> #include <linux/sysctl.h> +#include <linux/list.h> #include <net/ip_vs.h> @@ -44,28 +45,11 @@ */ -struct ip_vs_estimator -{ - struct ip_vs_estimator *next; - struct ip_vs_stats *stats; - - u32 last_conns; - u32 last_inpkts; - u32 last_outpkts; - u64 last_inbytes; - u64 last_outbytes; - - u32 cps; - u32 inpps; - u32 outpps; - u32 inbps; - u32 outbps; -}; - +static void estimation_timer(unsigned long arg); -static struct ip_vs_estimator *est_list = NULL; -static DEFINE_RWLOCK(est_lock); -static struct timer_list est_timer; +static LIST_HEAD(est_list); +static DEFINE_SPINLOCK(est_lock); +static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); static void estimation_timer(unsigned long arg) { @@ -76,9 +60,9 @@ static void estimation_timer(unsigned long arg) u64 n_inbytes, n_outbytes; u32 rate; - read_lock(&est_lock); - for (e = est_list; e; e = e->next) { - s = e->stats; + spin_lock(&est_lock); + list_for_each_entry(e, &est_list, list) { + s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); n_conns = s->conns; @@ -114,19 +98,16 @@ static void estimation_timer(unsigned long arg) s->outbps = (e->outbps+0xF)>>5; spin_unlock(&s->lock); } - read_unlock(&est_lock); + spin_unlock(&est_lock); mod_timer(&est_timer, jiffies + 2*HZ); } -int ip_vs_new_estimator(struct ip_vs_stats *stats) +void ip_vs_new_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *est; + struct ip_vs_estimator *est = &stats->est; - est = kzalloc(sizeof(*est), GFP_KERNEL); - if (est == NULL) - return -ENOMEM; + INIT_LIST_HEAD(&est->list); - est->stats = stats; est->last_conns = stats->conns; est->cps = stats->cps<<10; @@ -142,59 +123,40 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats) est->last_outbytes = stats->outbytes; est->outbps = stats->outbps<<5; - write_lock_bh(&est_lock); - est->next = est_list; - if (est->next == NULL) { - setup_timer(&est_timer, estimation_timer, 0); - est_timer.expires = jiffies + 2*HZ; - add_timer(&est_timer); - } - est_list = est; - write_unlock_bh(&est_lock); - return 0; + spin_lock_bh(&est_lock); + if (list_empty(&est_list)) + mod_timer(&est_timer, jiffies + 2 * HZ); + list_add(&est->list, &est_list); + spin_unlock_bh(&est_lock); } void ip_vs_kill_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *est, **pest; - int killed = 0; - - write_lock_bh(&est_lock); - pest = &est_list; - while ((est=*pest) != NULL) { - if (est->stats != stats) { - pest = &est->next; - continue; - } - *pest = est->next; - kfree(est); - killed++; + struct ip_vs_estimator *est = &stats->est; + + spin_lock_bh(&est_lock); + list_del(&est->list); + while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) { + spin_unlock_bh(&est_lock); + cpu_relax(); + spin_lock_bh(&est_lock); } - if (killed && est_list == NULL) - del_timer_sync(&est_timer); - write_unlock_bh(&est_lock); + spin_unlock_bh(&est_lock); } void ip_vs_zero_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *e; - - write_lock_bh(&est_lock); - for (e = est_list; e; e = e->next) { - if (e->stats != stats) - continue; - - /* set counters zero */ - e->last_conns = 0; - e->last_inpkts = 0; - e->last_outpkts = 0; - e->last_inbytes = 0; - e->last_outbytes = 0; - e->cps = 0; - e->inpps = 0; - e->outpps = 0; - e->inbps = 0; - e->outbps = 0; - } - write_unlock_bh(&est_lock); + struct ip_vs_estimator *est = &stats->est; + + /* set counters zero, caller must hold the stats->lock lock */ + est->last_inbytes = 0; + est->last_outbytes = 0; + est->last_conns = 0; + est->last_inpkts = 0; + est->last_outpkts = 0; + est->cps = 0; + est->inpps = 0; + est->outpps = 0; + est->inbps = 0; + est->outbps = 0; } diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 0efa3db4b18..7a6a319f544 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -539,6 +539,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .name = "lblc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), .init_service = ip_vs_lblc_init_svc, .done_service = ip_vs_lblc_done_svc, .update_service = ip_vs_lblc_update_svc, @@ -550,7 +551,6 @@ static int __init ip_vs_lblc_init(void) { int ret; - INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list); sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); if (ret) diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 8e3bbeb4513..c234e73968a 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -728,6 +728,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .name = "lblcr", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), .init_service = ip_vs_lblcr_init_svc, .done_service = ip_vs_lblcr_done_svc, .update_service = ip_vs_lblcr_update_svc, @@ -739,7 +740,6 @@ static int __init ip_vs_lblcr_init(void) { int ret; - INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); if (ret) diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c index ac9f08e065d..ebcdbf75ac6 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/ipv4/ipvs/ip_vs_lc.c @@ -98,6 +98,7 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { .name = "lc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), .init_service = ip_vs_lc_init_svc, .done_service = ip_vs_lc_done_svc, .update_service = ip_vs_lc_update_svc, @@ -107,7 +108,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { static int __init ip_vs_lc_init(void) { - INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ; } diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c index a46bf258d42..92f3a677003 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/ipv4/ipvs/ip_vs_nq.c @@ -136,6 +136,7 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = .name = "nq", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), .init_service = ip_vs_nq_init_svc, .done_service = ip_vs_nq_done_svc, .update_service = ip_vs_nq_update_svc, @@ -145,7 +146,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = static int __init ip_vs_nq_init(void) { - INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_nq_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 876714f23d6..6099a88fc20 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -43,7 +43,7 @@ static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; /* * register an ipvs protocol */ -static int __used register_ip_vs_protocol(struct ip_vs_protocol *pp) +static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) { unsigned hash = IP_VS_PROTO_HASH(pp->protocol); @@ -190,7 +190,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, } -int ip_vs_protocol_init(void) +int __init ip_vs_protocol_init(void) { char protocols[64]; #define REGISTER_PROTOCOL(p) \ diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c index c8db12d39e6..358110d17e5 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/ipv4/ipvs/ip_vs_rr.c @@ -94,6 +94,7 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .name = "rr", /* name */ .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .init_service = ip_vs_rr_init_svc, .done_service = ip_vs_rr_done_svc, .update_service = ip_vs_rr_update_svc, @@ -102,7 +103,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { static int __init ip_vs_rr_init(void) { - INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_rr_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c index b6476730985..a46ad9e3501 100644 --- a/net/ipv4/ipvs/ip_vs_sched.c +++ b/net/ipv4/ipvs/ip_vs_sched.c @@ -184,7 +184,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) write_lock_bh(&__ip_vs_sched_lock); - if (scheduler->n_list.next != &scheduler->n_list) { + if (!list_empty(&scheduler->n_list)) { write_unlock_bh(&__ip_vs_sched_lock); ip_vs_use_count_dec(); IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler " @@ -229,7 +229,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) } write_lock_bh(&__ip_vs_sched_lock); - if (scheduler->n_list.next == &scheduler->n_list) { + if (list_empty(&scheduler->n_list)) { write_unlock_bh(&__ip_vs_sched_lock); IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler " "is not in the list. failed\n", scheduler->name); diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index 2a7d3135818..77663d84cbd 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c @@ -138,6 +138,7 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = .name = "sed", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), .init_service = ip_vs_sed_init_svc, .done_service = ip_vs_sed_done_svc, .update_service = ip_vs_sed_update_svc, @@ -147,7 +148,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = static int __init ip_vs_sed_init(void) { - INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_sed_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index b8fdfac6500..7b979e22805 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -230,6 +230,7 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = .name = "sh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, .update_service = ip_vs_sh_update_svc, @@ -239,7 +240,6 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = static int __init ip_vs_sh_init(void) { - INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_sh_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 45e9bd96c28..a652da2c320 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -904,9 +904,9 @@ int stop_sync_thread(int state) * progress of stopping the master sync daemon. */ - spin_lock(&ip_vs_sync_lock); + spin_lock_bh(&ip_vs_sync_lock); ip_vs_sync_state &= ~IP_VS_STATE_MASTER; - spin_unlock(&ip_vs_sync_lock); + spin_unlock_bh(&ip_vs_sync_lock); kthread_stop(sync_master_thread); sync_master_thread = NULL; } else if (state == IP_VS_STATE_BACKUP) { diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c index 772c3cb4eca..9b0ef86bb1f 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/ipv4/ipvs/ip_vs_wlc.c @@ -126,6 +126,7 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = .name = "wlc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), .init_service = ip_vs_wlc_init_svc, .done_service = ip_vs_wlc_done_svc, .update_service = ip_vs_wlc_update_svc, @@ -135,7 +136,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = static int __init ip_vs_wlc_init(void) { - INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_wlc_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c index 1d6932d7dc9..0d86a79b87b 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/ipv4/ipvs/ip_vs_wrr.c @@ -212,6 +212,7 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .name = "wrr", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, .update_service = ip_vs_wrr_update_svc, @@ -220,7 +221,6 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { static int __init ip_vs_wrr_init(void) { - INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ; } diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 49587a49722..462a22c9787 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -70,7 +70,7 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); if (ret && info->dest) ret &= match_type(dev, iph->daddr, info->dest) ^ - (info->flags & IPT_ADDRTYPE_INVERT_DEST); + !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 91537f11273..6c4f11f5144 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -73,9 +73,13 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.all) - min + 1; } - off = *rover; if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) - off = net_random(); + off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, + maniptype == IP_NAT_MANIP_SRC + ? tuple->dst.u.all + : tuple->src.u.all); + else + off = *rover; for (i = 0; i < range_size; i++, off++) { *portptr = htons(min + off % range_size); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 16fc6f454a3..6ee5354c9aa 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2914,6 +2914,68 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, return 0; } +static void rt_secret_reschedule(int old) +{ + struct net *net; + int new = ip_rt_secret_interval; + int diff = new - old; + + if (!diff) + return; + + rtnl_lock(); + for_each_net(net) { + int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); + + if (!new) + continue; + + if (deleted) { + long time = net->ipv4.rt_secret_timer.expires - jiffies; + + if (time <= 0 || (time += diff) <= 0) + time = 0; + + net->ipv4.rt_secret_timer.expires = time; + } else + net->ipv4.rt_secret_timer.expires = new; + + net->ipv4.rt_secret_timer.expires += jiffies; + add_timer(&net->ipv4.rt_secret_timer); + } + rtnl_unlock(); +} + +static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, + struct file *filp, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int old = ip_rt_secret_interval; + int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); + + rt_secret_reschedule(old); + + return ret; +} + +static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, + int __user *name, + int nlen, + void __user *oldval, + size_t __user *oldlenp, + void __user *newval, + size_t newlen) +{ + int old = ip_rt_secret_interval; + int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, + newlen); + + rt_secret_reschedule(old); + + return ret; +} + static ctl_table ipv4_route_table[] = { { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, @@ -3048,20 +3110,29 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = &ipv4_sysctl_rt_secret_interval, + .strategy = &ipv4_sysctl_rt_secret_interval_strategy, }, { .ctl_name = 0 } }; -static __net_initdata struct ctl_path ipv4_route_path[] = { +static struct ctl_table empty[1]; + +static struct ctl_table ipv4_skeleton[] = +{ + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, + .mode = 0555, .child = ipv4_route_table}, + { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, + .mode = 0555, .child = empty}, + { } +}; + +static __net_initdata struct ctl_path ipv4_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, { }, }; - static struct ctl_table ipv4_route_flush_table[] = { { .ctl_name = NET_IPV4_ROUTE_FLUSH, @@ -3074,6 +3145,13 @@ static struct ctl_table ipv4_route_flush_table[] = { { .ctl_name = 0 }, }; +static __net_initdata struct ctl_path ipv4_route_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { }, +}; + static __net_init int sysctl_route_net_init(struct net *net) { struct ctl_table *tbl; @@ -3126,10 +3204,12 @@ static __net_init int rt_secret_timer_init(struct net *net) net->ipv4.rt_secret_timer.data = (unsigned long)net; init_timer_deferrable(&net->ipv4.rt_secret_timer); - net->ipv4.rt_secret_timer.expires = - jiffies + net_random() % ip_rt_secret_interval + - ip_rt_secret_interval; - add_timer(&net->ipv4.rt_secret_timer); + if (ip_rt_secret_interval) { + net->ipv4.rt_secret_timer.expires = + jiffies + net_random() % ip_rt_secret_interval + + ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + } return 0; } @@ -3223,7 +3303,7 @@ int __init ip_rt_init(void) */ void __init ip_static_sysctl_init(void) { - register_sysctl_paths(ipv4_route_path, ipv4_route_table); + register_sysctl_paths(ipv4_path, ipv4_skeleton); } #endif diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 44c1e934824..1b4fee20fc9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2376,6 +2376,7 @@ static int __net_init tcp_sk_init(struct net *net) static void __net_exit tcp_sk_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv4.tcp_sock); + inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a00532de2a8..8165f5aa8c7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -468,7 +468,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; - size += TCPOLEN_WSCALE_ALIGNED; + if(likely(opts->ws)) + size += TCPOLEN_WSCALE_ALIGNED; } if (likely(sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; @@ -509,7 +510,8 @@ static unsigned tcp_synack_options(struct sock *sk, if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; - size += TCPOLEN_WSCALE_ALIGNED; + if(likely(opts->ws)) + size += TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { opts->options |= OPTION_TS; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 383d17359d0..8e42fbbd576 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -989,7 +989,9 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) up->encap_rcv != NULL) { int ret; + bh_unlock_sock(sk); ret = (*up->encap_rcv)(sk, skb); + bh_lock_sock(sk); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INDATAGRAMS, @@ -1092,7 +1094,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, if (skb1) { int ret = 0; - bh_lock_sock_nested(sk); + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) ret = udp_queue_rcv_skb(sk, skb1); else @@ -1194,7 +1196,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (sk != NULL) { int ret = 0; - bh_lock_sock_nested(sk); + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) ret = udp_queue_rcv_skb(sk, skb); else |