diff options
Diffstat (limited to 'net/ipv4')
70 files changed, 766 insertions, 484 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 24eca23c2db..dc411335c14 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -5,8 +5,6 @@ * * PF_INET protocol family socket handler. * - * Version: $Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> @@ -1481,14 +1479,15 @@ static int __init inet_init(void) * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) - ip_mr_init(); + if (ip_mr_init()) + printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n"); #endif /* * Initialise per-cpu ipv4 mibs */ if (init_ipv4_mibs()) - printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; + printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ipv4_proc_init(); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9b539fa9fe1..29df75a6bcc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1,7 +1,5 @@ /* linux/net/ipv4/arp.c * - * Version: $Id: arp.c,v 1.99 2001/08/30 22:55:42 davem Exp $ - * * Copyright (C) 1994 by Florian La Roche * * This module implements the Address Resolution Protocol ARP (RFC 826), @@ -1199,7 +1197,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); break; default: break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 79a7ef6209f..2e667e2f90d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1,8 +1,6 @@ /* * NET3 IP device support routines. * - * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -170,6 +168,8 @@ static struct in_device *inetdev_init(struct net_device *dev) in_dev->dev = dev; if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) goto out_kfree; + if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) + dev_disable_lro(dev); /* Reference in_dev->dev */ dev_hold(dev); /* Account for reference dev->ip_ptr (below) */ @@ -1013,7 +1013,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) memcpy(old, ifa->ifa_label, IFNAMSIZ); memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); if (named++ == 0) - continue; + goto skip; dot = strchr(old, ':'); if (dot == NULL) { sprintf(old, ":%d", named); @@ -1024,6 +1024,8 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) } else { strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); } +skip: + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); } } @@ -1241,6 +1243,8 @@ static void inet_forward_change(struct net *net) read_lock(&dev_base_lock); for_each_netdev(net, dev) { struct in_device *in_dev; + if (on) + dev_disable_lro(dev); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) @@ -1248,8 +1252,6 @@ static void inet_forward_change(struct net *net) rcu_read_unlock(); } read_unlock(&dev_base_lock); - - rt_cache_flush(0); } static int devinet_conf_proc(ctl_table *ctl, int write, @@ -1335,10 +1337,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, if (write && *valp != val) { struct net *net = ctl->extra2; - if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) - inet_forward_change(net); - else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) - rt_cache_flush(0); + if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { + rtnl_lock(); + if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { + inet_forward_change(net); + } else if (*valp) { + struct ipv4_devconf *cnf = ctl->extra1; + struct in_device *idev = + container_of(cnf, struct in_device, cnf); + dev_disable_lro(idev->dev); + } + rtnl_unlock(); + rt_cache_flush(net, 0); + } } return ret; @@ -1351,9 +1362,10 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, int *valp = ctl->data; int val = *valp; int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + struct net *net = ctl->extra2; if (write && *valp != val) - rt_cache_flush(0); + rt_cache_flush(net, 0); return ret; } @@ -1364,9 +1376,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, { int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, newval, newlen); + struct net *net = table->extra2; if (ret == 1) - rt_cache_flush(0); + rt_cache_flush(net, 0); return ret; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0b2ac6a3d90..65c1503f8cc 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -5,8 +5,6 @@ * * IPv4 Forwarding Information Base: FIB frontend. * - * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or @@ -146,7 +144,7 @@ static void fib_flush(struct net *net) } if (flushed) - rt_cache_flush(-1); + rt_cache_flush(net, -1); } /* @@ -899,21 +897,22 @@ static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); arp_ifdown(dev); } static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; + struct net_device *dev = ifa->ifa_dev->dev; switch (event) { case NETDEV_UP: fib_add_ifaddr(ifa); #ifdef CONFIG_IP_ROUTE_MULTIPATH - fib_sync_up(ifa->ifa_dev->dev); + fib_sync_up(dev); #endif - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_del_ifaddr(ifa); @@ -921,9 +920,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. Disable IP. */ - fib_disable_ip(ifa->ifa_dev->dev, 1); + fib_disable_ip(dev, 1); } else { - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); } break; } @@ -951,14 +950,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_disable_ip(dev, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); break; } return NOTIFY_DONE; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 2e2fc3376ac..c8cac6c7f88 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -5,8 +5,6 @@ * * IPv4 FIB: lookup engine and maintenance routines. * - * Version: $Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or @@ -474,7 +472,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); return 0; @@ -534,7 +532,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) if (new_f) fz->fz_nent++; - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -616,7 +614,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) write_unlock_bh(&fib_hash_lock); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); fn_free_alias(fa, f); if (kill_fn) { fn_free_node(f); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 1fb56876be5..6080d712082 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -258,9 +258,9 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(4); /* flow */ } -static void fib4_rule_flush_cache(void) +static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { - rt_cache_flush(-1); + rt_cache_flush(ops->fro_net, -1); } static struct fib_rules_ops fib4_rules_ops_template = { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 0d4d72827e4..ded2ae34eab 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -5,8 +5,6 @@ * * IPv4 Forwarding Information Base: semantics. * - * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e1600ad8fb0..f155a66d6eb 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -22,8 +22,6 @@ * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 * - * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $ - * * * Code from fib_hash has been reused which includes the following header: * @@ -1273,7 +1271,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); @@ -1318,7 +1316,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); succeeded: @@ -1661,7 +1659,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) trie_leaf_remove(t, l); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 87397351dda..aa7cf46853b 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -3,8 +3,6 @@ * * Alan Cox, <alan@redhat.com> * - * Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 2769dc4a4c8..68e84a933e9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -8,8 +8,6 @@ * the older version didn't come out right using gcc 2.5.8, the newer one * seems to fall out with gcc 2.6.2. * - * Version: $Id: igmp.c,v 1.47 2002/02/01 22:01:03 davem Exp $ - * * Authors: * Alan Cox <Alan.Cox@linux.org> * diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ec834480abe..5bbf0005151 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -103,7 +103,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) rover = net_random() % remaining + low; do { - head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(net, rover, + hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->ib_net == net && tb->port == rover) @@ -130,7 +131,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(net, snum, + hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->ib_net == net && tb->port == snum) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index da97695e709..c10036e7a46 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1,8 +1,6 @@ /* * inet_diag.c Module for monitoring INET transport protocols sockets. * - * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 2023d37b270..eca5899729e 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -70,7 +70,8 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, static void __inet_put_port(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); + const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num, + hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; @@ -95,7 +96,8 @@ EXPORT_SYMBOL(inet_put_port); void __inet_inherit_port(struct sock *sk, struct sock *child) { struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); + const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num, + table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_bucket *tb; @@ -192,7 +194,7 @@ struct sock *__inet_lookup_listener(struct net *net, const struct hlist_head *head; read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; if (!hlist_empty(head)) { const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); @@ -225,7 +227,7 @@ struct sock * __inet_lookup_established(struct net *net, /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); + unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); @@ -265,13 +267,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct net *net = sock_net(sk); + unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; - struct net *net = sock_net(sk); prefetch(head->chain.first); write_lock(lock); @@ -438,7 +440,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, local_bh_disable(); for (i = 1; i <= remaining; i++) { port = low + (i + offset) % remaining; - head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, port, + hinfo->bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -493,7 +496,7 @@ ok: goto out; } - head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ce16e9ac24c..06006a5ac8b 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -32,7 +32,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, write_unlock(lock); /* Disassociate with bind bucket. */ - bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; __hlist_del(&tw->tw_bind_node); @@ -81,7 +82,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, + hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; BUG_TRAP(icsk->icsk_bind_hash); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index af995198f64..a456ceeac3f 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -3,8 +3,6 @@ * * This source is covered by the GNU GPL, the same as all kernel sources. * - * Version: $Id: inetpeer.c,v 1.7 2001/09/20 21:22:50 davem Exp $ - * * Authors: Andrey V. Savochkin <saw@msu.ru> */ diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 4813c39b438..da14725916d 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -5,8 +5,6 @@ * * The IP forwarding functionality. * - * Version: $Id: ip_forward.c,v 1.48 2000/12/13 18:31:48 davem Exp $ - * * Authors: see ip.c * * Fixes: @@ -58,6 +56,9 @@ int ip_forward(struct sk_buff *skb) struct rtable *rt; /* Route we use */ struct ip_options * opt = &(IPCB(skb)->opt); + if (skb_warn_if_lro(skb)) + goto drop; + if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) goto drop; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 37221f65915..fbd5804b5d8 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,8 +5,6 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $ - * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <Alan.Cox@linux.org> * @@ -600,7 +598,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) #ifdef CONFIG_SYSCTL static int zero; -static struct ctl_table ip4_frags_ctl_table[] = { +static struct ctl_table ip4_frags_ns_ctl_table[] = { { .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", @@ -626,6 +624,10 @@ static struct ctl_table ip4_frags_ctl_table[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies }, + { } +}; + +static struct ctl_table ip4_frags_ctl_table[] = { { .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", @@ -646,22 +648,20 @@ static struct ctl_table ip4_frags_ctl_table[] = { { } }; -static int ip4_frags_ctl_register(struct net *net) +static int ip4_frags_ns_ctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; - table = ip4_frags_ctl_table; + table = ip4_frags_ns_ctl_table; if (net != &init_net) { - table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL); + table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; table[0].data = &net->ipv4.frags.high_thresh; table[1].data = &net->ipv4.frags.low_thresh; table[2].data = &net->ipv4.frags.timeout; - table[3].mode &= ~0222; - table[4].mode &= ~0222; } hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); @@ -678,7 +678,7 @@ err_alloc: return -ENOMEM; } -static void ip4_frags_ctl_unregister(struct net *net) +static void ip4_frags_ns_ctl_unregister(struct net *net) { struct ctl_table *table; @@ -686,13 +686,22 @@ static void ip4_frags_ctl_unregister(struct net *net) unregister_net_sysctl_table(net->ipv4.frags_hdr); kfree(table); } + +static void ip4_frags_ctl_register(void) +{ + register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); +} #else -static inline int ip4_frags_ctl_register(struct net *net) +static inline int ip4_frags_ns_ctl_register(struct net *net) { return 0; } -static inline void ip4_frags_ctl_unregister(struct net *net) +static inline void ip4_frags_ns_ctl_unregister(struct net *net) +{ +} + +static inline void ip4_frags_ctl_register(void) { } #endif @@ -716,12 +725,12 @@ static int ipv4_frags_init_net(struct net *net) inet_frags_init_net(&net->ipv4.frags); - return ip4_frags_ctl_register(net); + return ip4_frags_ns_ctl_register(net); } static void ipv4_frags_exit_net(struct net *net) { - ip4_frags_ctl_unregister(net); + ip4_frags_ns_ctl_unregister(net); inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); } @@ -732,6 +741,7 @@ static struct pernet_operations ip4_frags_ops = { void __init ipfrag_init(void) { + ip4_frags_ctl_register(); register_pernet_subsys(&ip4_frags_ops); ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4342cba4ff8..2a61158ea72 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -473,6 +473,8 @@ static int ipgre_rcv(struct sk_buff *skb) read_lock(&ipgre_lock); if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr, key)) != NULL) { + struct net_device_stats *stats = &tunnel->dev->stats; + secpath_reset(skb); skb->protocol = *(__be16*)(h + 2); @@ -497,28 +499,28 @@ static int ipgre_rcv(struct sk_buff *skb) /* Looped back packet, drop it! */ if (skb->rtable->fl.iif == 0) goto drop; - tunnel->stat.multicast++; + stats->multicast++; skb->pkt_type = PACKET_BROADCAST; } #endif if (((flags&GRE_CSUM) && csum) || (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { - tunnel->stat.rx_crc_errors++; - tunnel->stat.rx_errors++; + stats->rx_crc_errors++; + stats->rx_errors++; goto drop; } if (tunnel->parms.i_flags&GRE_SEQ) { if (!(flags&GRE_SEQ) || (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { - tunnel->stat.rx_fifo_errors++; - tunnel->stat.rx_errors++; + stats->rx_fifo_errors++; + stats->rx_errors++; goto drop; } tunnel->i_seqno = seqno + 1; } - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + stats->rx_packets++; + stats->rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -540,7 +542,7 @@ drop_nolock: static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *old_iph = ip_hdr(skb); struct iphdr *tiph; u8 tos; @@ -554,7 +556,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int mtu; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -570,7 +572,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) /* NBMA tunnel */ if (skb->dst == NULL) { - tunnel->stat.tx_fifo_errors++; + stats->tx_fifo_errors++; goto tx_error; } @@ -621,7 +623,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .tos = RT_TOS(tos) } }, .proto = IPPROTO_GRE }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error; } } @@ -629,7 +631,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -954,11 +956,6 @@ done: return err; } -static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); @@ -1084,7 +1081,6 @@ static void ipgre_tunnel_setup(struct net_device *dev) dev->uninit = ipgre_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipgre_tunnel_xmit; - dev->get_stats = ipgre_tunnel_get_stats; dev->do_ioctl = ipgre_tunnel_ioctl; dev->change_mtu = ipgre_tunnel_change_mtu; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index ff77a4a7f9e..7c26428ea67 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -5,8 +5,6 @@ * * The Internet Protocol (IP) module. * - * Version: $Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 33126ad2cfd..be3f18a7a40 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -5,8 +5,6 @@ * * The options processing module for ip.c * - * Version: $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $ - * * Authors: A.N.Kuznetsov * */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e527628f56c..f1278eecf56 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -5,8 +5,6 @@ * * The Internet Protocol (IP) output module. * - * Version: $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index e0514e82308..105d92a039b 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -5,8 +5,6 @@ * * The IP to API glue. * - * Version: $Id: ip_sockglue.c,v 1.62 2002/02/01 22:01:04 davem Exp $ - * * Authors: see ip.c * * Fixes: diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ed45037ce9b..b88aa9afa42 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1,6 +1,4 @@ /* - * $Id: ipconfig.c,v 1.46 2002/02/01 22:01:04 davem Exp $ - * * Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or * user-supplied information to configure own IP address and routes. * diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index af5cb53da5c..4c6d2caf920 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -1,8 +1,6 @@ /* * Linux NET3: IP/IP protocol decoder. * - * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $ - * * Authors: * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 * @@ -368,8 +366,8 @@ static int ipip_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); skb->pkt_type = PACKET_HOST; - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + tunnel->dev->stats.rx_packets++; + tunnel->dev->stats.rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -392,7 +390,7 @@ static int ipip_rcv(struct sk_buff *skb) static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; @@ -405,7 +403,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int mtu; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -418,7 +416,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!dst) { /* NBMA tunnel */ if ((rt = skb->rtable) == NULL) { - tunnel->stat.tx_fifo_errors++; + stats->tx_fifo_errors++; goto tx_error; } if ((dst = rt->rt_gateway) == 0) @@ -433,7 +431,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .tos = RT_TOS(tos) } }, .proto = IPPROTO_IPIP }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error_icmp; } } @@ -441,7 +439,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -451,7 +449,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; if (mtu < 68) { - tunnel->stat.collisions++; + stats->collisions++; ip_rt_put(rt); goto tx_error; } @@ -685,11 +683,6 @@ done: return err; } -static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) @@ -702,7 +695,6 @@ static void ipip_tunnel_setup(struct net_device *dev) { dev->uninit = ipip_tunnel_uninit; dev->hard_start_xmit = ipip_tunnel_xmit; - dev->get_stats = ipip_tunnel_get_stats; dev->do_ioctl = ipip_tunnel_ioctl; dev->change_mtu = ipip_tunnel_change_mtu; dev->destructor = free_netdev; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 11700a4dcd9..438fab9c62a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -9,8 +9,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $ - * * Fixes: * Michael Chastain : Incorrect size of copying. * Alan Cox : Added the cache manager code @@ -181,26 +179,20 @@ static int reg_vif_num = -1; static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { read_lock(&mrt_lock); - ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(dev))->tx_packets++; + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; } -static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) -{ - return (struct net_device_stats*)netdev_priv(dev); -} - static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; dev->hard_start_xmit = reg_vif_xmit; - dev->get_stats = reg_vif_get_stats; dev->destructor = free_netdev; } @@ -209,8 +201,7 @@ static struct net_device *ipmr_reg_vif(void) struct net_device *dev; struct in_device *in_dev; - dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg", - reg_vif_setup); + dev = alloc_netdev(0, "pimreg", reg_vif_setup); if (dev == NULL) return NULL; @@ -1170,8 +1161,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out+=skb->len; - ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++; + vif->dev->stats.tx_bytes += skb->len; + vif->dev->stats.tx_packets++; ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); kfree_skb(skb); return; @@ -1230,8 +1221,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_TUNNEL) { ip_encap(skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ - ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++; - ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len; + vif->dev->stats.tx_packets++; + vif->dev->stats.tx_bytes += skb->len; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1487,8 +1478,8 @@ int pim_rcv_v1(struct sk_buff * skb) skb->pkt_type = PACKET_HOST; dst_release(skb->dst); skb->dst = NULL; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; nf_reset(skb); netif_rx(skb); dev_put(reg_dev); @@ -1542,8 +1533,8 @@ static int pim_rcv(struct sk_buff * skb) skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; skb->dst = NULL; nf_reset(skb); netif_rx(skb); @@ -1887,16 +1878,36 @@ static struct net_protocol pim_protocol = { * Setup for IP multicast routing */ -void __init ip_mr_init(void) +int __init ip_mr_init(void) { + int err; + mrt_cachep = kmem_cache_create("ip_mrt_cache", sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + if (!mrt_cachep) + return -ENOMEM; + setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); - register_netdevice_notifier(&ip_mr_notifier); + err = register_netdevice_notifier(&ip_mr_notifier); + if (err) + goto reg_notif_fail; +#ifdef CONFIG_PROC_FS + err = -ENOMEM; + if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops)) + goto proc_vif_fail; + if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops)) + goto proc_cache_fail; +#endif + return 0; +reg_notif_fail: + kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); - proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops); +proc_vif_fail: + unregister_netdevice_notifier(&ip_mr_notifier); +proc_cache_fail: + proc_net_remove(&init_net, "ip_mr_vif"); #endif + return err; } diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 535abe0c45e..1f1897a1a70 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -1,8 +1,6 @@ /* * ip_vs_app.c: Application module support for IPVS * - * Version: $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 65f1ba11275..f8bdae47a77 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 963981a9d50..bcf6276ba4b 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 94c5767c8e0..9a5ace0b4dd 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index dcf5d46aaa5..8afc1503ed2 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -1,8 +1,6 @@ /* * IPVS: Destination Hashing scheduling module * - * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * Inspired by the consistent hashing scheduler patch from diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index dfa0d713c80..bc04eedd6db 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -1,8 +1,6 @@ /* * ip_vs_est.c: simple rate estimator for IPVS * - * Version: $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 59aa166b767..c1c758e4f73 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -1,8 +1,6 @@ /* * ip_vs_ftp.c: IPVS ftp application module * - * Version: $Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * Changes: diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 3888642706a..0efa3db4b18 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -1,8 +1,6 @@ /* * IPVS: Locality-Based Least-Connection scheduling module * - * Version: $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index daa260eb21c..8e3bbeb4513 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -1,8 +1,6 @@ /* * IPVS: Locality-Based Least-Connection with Replication scheduler * - * Version: $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c index d88fef90a64..ac9f08e065d 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/ipv4/ipvs/ip_vs_lc.c @@ -1,8 +1,6 @@ /* * IPVS: Least-Connection Scheduling module * - * Version: $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c index bc2a9e5f2a7..a46bf258d42 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/ipv4/ipvs/ip_vs_nq.c @@ -1,8 +1,6 @@ /* * IPVS: Never Queue scheduling module * - * Version: $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 4b1c16cbb16..876714f23d6 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -1,8 +1,6 @@ /* * ip_vs_proto.c: transport protocol load balancing support for IPVS * - * Version: $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c index 4bf835e1d86..73e0ea87c1f 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah.c @@ -1,8 +1,6 @@ /* * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS * - * Version: $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $ - * * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 * Wensong Zhang <wensong@linuxvirtualserver.org> * diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c index db6a6b7b1a0..21d70c8ffa5 100644 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ b/net/ipv4/ipvs/ip_vs_proto_esp.c @@ -1,8 +1,6 @@ /* * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS * - * Version: $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $ - * * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 * Wensong Zhang <wensong@linuxvirtualserver.org> * diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index b83dc14b0a4..d0ea467986a 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -1,8 +1,6 @@ /* * ip_vs_proto_tcp.c: TCP load balancing support for IPVS * - * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 75771cb3cd6..c6be5d56823 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -1,8 +1,6 @@ /* * ip_vs_proto_udp.c: UDP load balancing support for IPVS * - * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c index 433f8a94792..c8db12d39e6 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/ipv4/ipvs/ip_vs_rr.c @@ -1,8 +1,6 @@ /* * IPVS: Round-Robin Scheduling module * - * Version: $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c index 121a32b1b75..b6476730985 100644 --- a/net/ipv4/ipvs/ip_vs_sched.c +++ b/net/ipv4/ipvs/ip_vs_sched.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index dd7c128f9db..2a7d3135818 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c @@ -1,8 +1,6 @@ /* * IPVS: Shortest Expected Delay scheduling module * - * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 1b25b00ef1e..b8fdfac6500 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -1,8 +1,6 @@ /* * IPVS: Source Hashing scheduling module * - * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@gnuchina.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index eff54efe035..2d4a86f7332 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -5,8 +5,6 @@ * high-performance and highly available server based on a * cluster of servers. * - * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * ip_vs_sync: sync connection info from master load balancer to backups diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c index 8a9d913261d..772c3cb4eca 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/ipv4/ipvs/ip_vs_wlc.c @@ -1,8 +1,6 @@ /* * IPVS: Weighted Least-Connection Scheduling module * - * Version: $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c index 85c680add6d..1d6932d7dc9 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/ipv4/ipvs/ip_vs_wrr.c @@ -1,8 +1,6 @@ /* * IPVS: Weighted Round-Robin Scheduling module * - * Version: $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index f63006caea0..9892d4aca42 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -1,8 +1,6 @@ /* * ip_vs_xmit.c: various packet transmitters for IPVS * - * Version: $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $ - * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2767841a8ce..f23e60c93ef 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -213,8 +213,7 @@ config IP_NF_TARGET_NETMAP help NETMAP is an implementation of static 1:1 NAT mapping of network addresses. It maps the network address part, while keeping the host - address part intact. It is similar to Fast NAT, except that - Netfilter's connection tracking doesn't work well with Fast NAT. + address part intact. To compile it as a module, choose M here. If unsure, say N. @@ -365,6 +364,18 @@ config IP_NF_RAW If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +# security table for MAC policy +config IP_NF_SECURITY + tristate "Security table" + depends on IP_NF_IPTABLES + depends on SECURITY + default m if NETFILTER_ADVANCED=n + help + This option adds a `security' table to iptables, for use + with Mandatory Access Control (MAC) policy. + + If unsure, say N. + # ARP tables config IP_NF_ARPTABLES tristate "ARP tables support" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index d9b92fbf557..3f31291f37c 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o obj-$(CONFIG_NF_NAT) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o +obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o # matches obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 26a37cedcf2..aa33a4a7a71 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -156,7 +156,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); - data_len = 0; break; case IPQ_COPY_PACKET: @@ -224,8 +223,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: - if (skb) - kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip_queue: error creating packet message\n"); return NULL; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c new file mode 100644 index 00000000000..2b472ac2263 --- /dev/null +++ b/net/ipv4/netfilter/iptable_security.c @@ -0,0 +1,180 @@ +/* + * "security" table + * + * This is for use by Mandatory Access Control (MAC) security models, + * which need to be able to manage security policy in separate context + * to DAC. + * + * Based on iptable_mangle.c + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> + * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <net/ip.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); +MODULE_DESCRIPTION("iptables security table, for MAC rules"); + +#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[3]; + struct ipt_error term; +} initial_table __initdata = { + .repl = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), + .hook_entry = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ipt_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, + }, + .underflow = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ipt_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, + }, + }, + .entries = { + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + }, + .term = IPT_ERROR_INIT, /* ERROR */ +}; + +static struct xt_table security_table = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .lock = __RW_LOCK_UNLOCKED(security_table.lock), + .me = THIS_MODULE, + .af = AF_INET, +}; + +static unsigned int +ipt_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + nf_local_in_net(in, out)->ipv4.iptable_security); +} + +static unsigned int +ipt_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + nf_forward_net(in, out)->ipv4.iptable_security); +} + +static unsigned int +ipt_local_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* Somebody is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk(KERN_INFO "iptable_security: ignoring short " + "SOCK_RAW packet.\n"); + return NF_ACCEPT; + } + return ipt_do_table(skb, hook, in, out, + nf_local_out_net(in, out)->ipv4.iptable_security); +} + +static struct nf_hook_ops ipt_ops[] __read_mostly = { + { + .hook = ipt_local_in_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_SECURITY, + }, + { + .hook = ipt_forward_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_FORWARD, + .priority = NF_IP_PRI_SECURITY, + }, + { + .hook = ipt_local_out_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_SECURITY, + }, +}; + +static int __net_init iptable_security_net_init(struct net *net) +{ + net->ipv4.iptable_security = + ipt_register_table(net, &security_table, &initial_table.repl); + + if (IS_ERR(net->ipv4.iptable_security)) + return PTR_ERR(net->ipv4.iptable_security); + + return 0; +} + +static void __net_exit iptable_security_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_security); +} + +static struct pernet_operations iptable_security_net_ops = { + .init = iptable_security_net_init, + .exit = iptable_security_net_exit, +}; + +static int __init iptable_security_init(void) +{ + int ret; + + ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) + return ret; + + ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + if (ret < 0) + goto cleanup_table; + + return ret; + +cleanup_table: + unregister_pernet_subsys(&iptable_security_net_ops); + return ret; +} + +static void __exit iptable_security_fini(void) +{ + nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + unregister_pernet_subsys(&iptable_security_net_ops); +} + +module_init(iptable_security_init); +module_exit(iptable_security_fini); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 78ab19accac..97791048fa9 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -87,9 +87,8 @@ static int icmp_packet(struct nf_conn *ct, means this will only run once even if count hits zero twice (theoretically possible with SMP) */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count) - && del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + if (atomic_dec_and_test(&ct->proto.icmp.count)) + nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 552169b41b1..eb5cee279c5 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -7,8 +7,6 @@ * PROC file system. It is mainly used for debugging and * statistics. * - * Version: $Id: proc.c,v 1.45 2001/05/16 16:45:35 davem Exp $ - * * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de> * Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de> diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 971ab9356e5..ea50da0649f 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -5,8 +5,6 @@ * * INET protocol dispatch tables. * - * Version: $Id: protocol.c,v 1.14 2001/05/18 02:25:49 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 37a1ecd9d60..925fdf18cf9 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -5,8 +5,6 @@ * * RAW - implementation of IP "raw" sockets. * - * Version: $Id: raw.c,v 1.64 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * @@ -608,12 +606,11 @@ static void raw_close(struct sock *sk, long timeout) sk_common_release(sk); } -static int raw_destroy(struct sock *sk) +static void raw_destroy(struct sock *sk) { lock_sock(sk); ip_flush_pending_frames(sk); release_sock(sk); - return 0; } /* This gets rid of all the nasties in af_inet. -DaveM */ @@ -947,7 +944,7 @@ static int raw_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_printf(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " - "inode drops\n"); + "inode ref pointer drops\n"); else raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 96be336064f..79c1e74263a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -5,8 +5,6 @@ * * ROUTE - implementation of the IP router. * - * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> @@ -134,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; static void rt_worker_func(struct work_struct *work); static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); -static struct timer_list rt_secret_timer; /* * Interface to generic destination cache. @@ -253,20 +250,25 @@ static inline void rt_hash_lock_init(void) static struct rt_hash_bucket *rt_hash_table __read_mostly; static unsigned rt_hash_mask __read_mostly; static unsigned int rt_hash_log __read_mostly; -static atomic_t rt_genid __read_mostly; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) \ (__raw_get_cpu_var(rt_cache_stat).field++) -static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx) +static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, + int genid) { return jhash_3words((__force u32)(__be32)(daddr), (__force u32)(__be32)(saddr), - idx, atomic_read(&rt_genid)) + idx, genid) & rt_hash_mask; } +static inline int rt_genid(struct net *net) +{ + return atomic_read(&net->ipv4.rt_genid); +} + #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { struct seq_net_private p; @@ -336,7 +338,7 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) struct rt_cache_iter_state *st = seq->private; if (*pos) return rt_cache_get_idx(seq, *pos - 1); - st->genid = atomic_read(&rt_genid); + st->genid = rt_genid(seq_file_net(seq)); return SEQ_START_TOKEN; } @@ -683,6 +685,11 @@ static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); } +static inline int rt_is_expired(struct rtable *rth) +{ + return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); +} + /* * Perform a full scan of hash table and free all entries. * Can be called by a softirq or a process. @@ -692,6 +699,7 @@ static void rt_do_flush(int process_context) { unsigned int i; struct rtable *rth, *next; + struct rtable * tail; for (i = 0; i <= rt_hash_mask; i++) { if (process_context && need_resched()) @@ -701,11 +709,39 @@ static void rt_do_flush(int process_context) continue; spin_lock_bh(rt_hash_lock_addr(i)); +#ifdef CONFIG_NET_NS + { + struct rtable ** prev, * p; + + rth = rt_hash_table[i].chain; + + /* defer releasing the head of the list after spin_unlock */ + for (tail = rth; tail; tail = tail->u.dst.rt_next) + if (!rt_is_expired(tail)) + break; + if (rth != tail) + rt_hash_table[i].chain = tail; + + /* call rt_free on entries after the tail requiring flush */ + prev = &rt_hash_table[i].chain; + for (p = *prev; p; p = next) { + next = p->u.dst.rt_next; + if (!rt_is_expired(p)) { + prev = &p->u.dst.rt_next; + } else { + *prev = next; + rt_free(p); + } + } + } +#else rth = rt_hash_table[i].chain; rt_hash_table[i].chain = NULL; + tail = NULL; +#endif spin_unlock_bh(rt_hash_lock_addr(i)); - for (; rth; rth = next) { + for (; rth != tail; rth = next) { next = rth->u.dst.rt_next; rt_free(rth); } @@ -738,7 +774,7 @@ static void rt_check_expire(void) continue; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid != atomic_read(&rt_genid)) { + if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); continue; @@ -781,21 +817,21 @@ static void rt_worker_func(struct work_struct *work) * many times (2^24) without giving recent rt_genid. * Jenkins hash is strong enough that litle changes of rt_genid are OK. */ -static void rt_cache_invalidate(void) +static void rt_cache_invalidate(struct net *net) { unsigned char shuffle; get_random_bytes(&shuffle, sizeof(shuffle)); - atomic_add(shuffle + 1U, &rt_genid); + atomic_add(shuffle + 1U, &net->ipv4.rt_genid); } /* * delay < 0 : invalidate cache (fast : entries will be deleted later) * delay >= 0 : invalidate & flush cache (can be long) */ -void rt_cache_flush(int delay) +void rt_cache_flush(struct net *net, int delay) { - rt_cache_invalidate(); + rt_cache_invalidate(net); if (delay >= 0) rt_do_flush(!in_softirq()); } @@ -803,10 +839,11 @@ void rt_cache_flush(int delay) /* * We change rt_genid and let gc do the cleanup */ -static void rt_secret_rebuild(unsigned long dummy) +static void rt_secret_rebuild(unsigned long __net) { - rt_cache_invalidate(); - mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); + struct net *net = (struct net *)__net; + rt_cache_invalidate(net); + mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); } /* @@ -882,7 +919,7 @@ static int rt_garbage_collect(struct dst_ops *ops) rthp = &rt_hash_table[k].chain; spin_lock_bh(rt_hash_lock_addr(k)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid == atomic_read(&rt_genid) && + if (!rt_is_expired(rth) && !rt_may_expire(rth, tmo, expire)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; @@ -964,7 +1001,7 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid != atomic_read(&rt_genid)) { + if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); continue; @@ -1140,7 +1177,7 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_lock_bh(rt_hash_lock_addr(hash)); ip_rt_put(rt); while ((aux = *rthp) != NULL) { - if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { + if (aux == rt || rt_is_expired(aux)) { *rthp = aux->u.dst.rt_next; rt_free(aux); continue; @@ -1182,7 +1219,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, for (i = 0; i < 2; i++) { for (k = 0; k < 2; k++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], + rt_genid(net)); rthp=&rt_hash_table[hash].chain; @@ -1194,7 +1232,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rth->fl.fl4_src != skeys[i] || rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || - rth->rt_genid != atomic_read(&rt_genid) || + rt_is_expired(rth) || !net_eq(dev_net(rth->u.dst.dev), net)) { rthp = &rth->u.dst.rt_next; continue; @@ -1233,7 +1271,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; rt->u.dst.xfrm = NULL; - rt->rt_genid = atomic_read(&rt_genid); + rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; /* Gateway is different ... */ @@ -1297,7 +1335,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->u.dst.expires) { unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, - rt->fl.oif); + rt->fl.oif, + rt_genid(dev_net(dst->dev))); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ipv4_negative_advice: redirect to " NIPQUAD_FMT "/%02x dropped\n", @@ -1446,7 +1485,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, for (k = 0; k < 2; k++) { for (i = 0; i < 2; i++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], + rt_genid(net)); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -1461,7 +1501,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->fl.iif != 0 || dst_metric_locked(&rth->u.dst, RTAX_MTU) || !net_eq(dev_net(rth->u.dst.dev), net) || - rth->rt_genid != atomic_read(&rt_genid)) + !rt_is_expired(rth)) continue; if (new_mtu < 68 || new_mtu >= old_mtu) { @@ -1696,7 +1736,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; if (our) { @@ -1711,7 +1751,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, RT_CACHE_STAT_INC(in_slow_mc); in_dev_put(in_dev); - hash = rt_hash(daddr, saddr, dev->ifindex); + hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); return rt_intern_hash(hash, rth, &skb->rtable); e_nobufs: @@ -1837,7 +1877,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->u.dst.input = ip_forward; rth->u.dst.output = ip_output; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); rt_set_nexthop(rth, res, itag); @@ -1872,7 +1912,8 @@ static int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif); + hash = rt_hash(daddr, saddr, fl->iif, + rt_genid(dev_net(rth->u.dst.dev))); return rt_intern_hash(hash, rth, &skb->rtable); } @@ -1998,7 +2039,7 @@ local_input: goto e_nobufs; rth->u.dst.output= ip_rt_bug; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(net); atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; @@ -2028,7 +2069,7 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash(daddr, saddr, fl.iif); + hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); err = rt_intern_hash(hash, rth, &skb->rtable); goto done; @@ -2079,7 +2120,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, net = dev_net(dev); tos &= IPTOS_RT_MASK; - hash = rt_hash(daddr, saddr, iif); + hash = rt_hash(daddr, saddr, iif, rt_genid(net)); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2091,7 +2132,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, (rth->fl.fl4_tos ^ tos)) == 0 && rth->fl.mark == skb->mark && net_eq(dev_net(rth->u.dst.dev), net) && - rth->rt_genid == atomic_read(&rt_genid)) { + !rt_is_expired(rth)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); @@ -2219,7 +2260,7 @@ static int __mkroute_output(struct rtable **result, rth->rt_spec_dst= fl->fl4_src; rth->u.dst.output=ip_output; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); @@ -2268,7 +2309,8 @@ static int ip_mkroute_output(struct rtable **rp, int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) { - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, + rt_genid(dev_net(dev_out))); err = rt_intern_hash(hash, rth, rp); } @@ -2480,7 +2522,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, unsigned hash; struct rtable *rth; - hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); rcu_read_lock_bh(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2493,7 +2535,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->u.dst.dev), net) && - rth->rt_genid == atomic_read(&rt_genid)) { + !rt_is_expired(rth)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); @@ -2524,7 +2566,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { }; -static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) +static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) { struct rtable *ort = *rp; struct rtable *rt = (struct rtable *) @@ -2548,7 +2590,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) rt->idev = ort->idev; if (rt->idev) in_dev_hold(rt->idev); - rt->rt_genid = atomic_read(&rt_genid); + rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; @@ -2584,7 +2626,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags ? XFRM_LOOKUP_WAIT : 0); if (err == -EREMOTE) - err = ipv4_dst_blackhole(rp, flp); + err = ipv4_dst_blackhole(net, rp, flp); return err; } @@ -2803,7 +2845,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) rt = rcu_dereference(rt->u.dst.rt_next), idx++) { if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) continue; - if (rt->rt_genid != atomic_read(&rt_genid)) + if (rt_is_expired(rt)) continue; skb->dst = dst_clone(&rt->u.dst); if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, @@ -2827,19 +2869,25 @@ done: void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(0); + rt_cache_flush(dev_net(in_dev->dev), 0); } #ifdef CONFIG_SYSCTL -static int flush_delay; - -static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, +static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { if (write) { - proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - rt_cache_flush(flush_delay); + int flush_delay; + ctl_table ctl; + struct net *net; + + memcpy(&ctl, __ctl, sizeof(ctl)); + ctl.data = &flush_delay; + proc_dointvec(&ctl, write, filp, buffer, lenp, ppos); + + net = (struct net *)__ctl->extra1; + rt_cache_flush(net, flush_delay); return 0; } @@ -2855,25 +2903,18 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, size_t newlen) { int delay; + struct net *net; if (newlen != sizeof(int)) return -EINVAL; if (get_user(delay, (int __user *)newval)) return -EFAULT; - rt_cache_flush(delay); + net = (struct net *)table->extra1; + rt_cache_flush(net, delay); return 0; } ctl_table ipv4_route_table[] = { { - .ctl_name = NET_IPV4_ROUTE_FLUSH, - .procname = "flush", - .data = &flush_delay, - .maxlen = sizeof(int), - .mode = 0200, - .proc_handler = &ipv4_sysctl_rtcache_flush, - .strategy = &ipv4_sysctl_rtcache_flush_strategy, - }, - { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", .data = &ipv4_dst_ops.gc_thresh, @@ -3011,8 +3052,97 @@ ctl_table ipv4_route_table[] = { }, { .ctl_name = 0 } }; + +static __net_initdata struct ctl_path ipv4_route_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { }, +}; + + +static struct ctl_table ipv4_route_flush_table[] = { + { + .ctl_name = NET_IPV4_ROUTE_FLUSH, + .procname = "flush", + .maxlen = sizeof(int), + .mode = 0200, + .proc_handler = &ipv4_sysctl_rtcache_flush, + .strategy = &ipv4_sysctl_rtcache_flush_strategy, + }, + { .ctl_name = 0 }, +}; + +static __net_init int sysctl_route_net_init(struct net *net) +{ + struct ctl_table *tbl; + + tbl = ipv4_route_flush_table; + if (net != &init_net) { + tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); + if (tbl == NULL) + goto err_dup; + } + tbl[0].extra1 = net; + + net->ipv4.route_hdr = + register_net_sysctl_table(net, ipv4_route_path, tbl); + if (net->ipv4.route_hdr == NULL) + goto err_reg; + return 0; + +err_reg: + if (tbl != ipv4_route_flush_table) + kfree(tbl); +err_dup: + return -ENOMEM; +} + +static __net_exit void sysctl_route_net_exit(struct net *net) +{ + struct ctl_table *tbl; + + tbl = net->ipv4.route_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv4.route_hdr); + BUG_ON(tbl == ipv4_route_flush_table); + kfree(tbl); +} + +static __net_initdata struct pernet_operations sysctl_route_ops = { + .init = sysctl_route_net_init, + .exit = sysctl_route_net_exit, +}; #endif + +static __net_init int rt_secret_timer_init(struct net *net) +{ + atomic_set(&net->ipv4.rt_genid, + (int) ((num_physpages ^ (num_physpages>>8)) ^ + (jiffies ^ (jiffies >> 7)))); + + net->ipv4.rt_secret_timer.function = rt_secret_rebuild; + net->ipv4.rt_secret_timer.data = (unsigned long)net; + init_timer_deferrable(&net->ipv4.rt_secret_timer); + + net->ipv4.rt_secret_timer.expires = + jiffies + net_random() % ip_rt_secret_interval + + ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + return 0; +} + +static __net_exit void rt_secret_timer_exit(struct net *net) +{ + del_timer_sync(&net->ipv4.rt_secret_timer); +} + +static __net_initdata struct pernet_operations rt_secret_timer_ops = { + .init = rt_secret_timer_init, + .exit = rt_secret_timer_exit, +}; + + #ifdef CONFIG_NET_CLS_ROUTE struct ip_rt_acct *ip_rt_acct __read_mostly; #endif /* CONFIG_NET_CLS_ROUTE */ @@ -3031,9 +3161,6 @@ int __init ip_rt_init(void) { int rc = 0; - atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^ - (jiffies ^ (jiffies >> 7)))); - #ifdef CONFIG_NET_CLS_ROUTE ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); if (!ip_rt_acct) @@ -3065,19 +3192,14 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); - rt_secret_timer.function = rt_secret_rebuild; - rt_secret_timer.data = 0; - init_timer_deferrable(&rt_secret_timer); - /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ schedule_delayed_work(&expires_work, net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + - ip_rt_secret_interval; - add_timer(&rt_secret_timer); + if (register_pernet_subsys(&rt_secret_timer_ops)) + printk(KERN_ERR "Unable to setup rt_secret_timer\n"); if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); @@ -3087,6 +3209,9 @@ int __init ip_rt_init(void) #endif rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); +#ifdef CONFIG_SYSCTL + register_pernet_subsys(&sysctl_route_ops); +#endif return rc; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index d182a2a2629..fdde2ae07e2 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -8,8 +8,6 @@ * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. - * - * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $ */ #include <linux/tcp.h> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c437f804ee3..14ef202a225 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1,8 +1,6 @@ /* * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. * - * $Id: sysctl_net_ipv4.c,v 1.50 2001/10/20 00:00:11 davem Exp $ - * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] */ @@ -795,7 +793,8 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies }, { .ctl_name = NET_IPV4_ICMP_RATEMASK, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1d723de1868..56a133c6145 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -2467,6 +2465,76 @@ static unsigned long tcp_md5sig_users; static struct tcp_md5sig_pool **tcp_md5sig_pool; static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); +int tcp_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, + int bplen, + struct tcphdr *th, unsigned int tcplen, + struct tcp_md5sig_pool *hp) +{ + struct scatterlist sg[4]; + __u16 data_len; + int block = 0; + __sum16 cksum; + struct hash_desc *desc = &hp->md5_desc; + int err; + unsigned int nbytes = 0; + + sg_init_table(sg, 4); + + /* 1. The TCP pseudo-header */ + sg_set_buf(&sg[block++], &hp->md5_blk, bplen); + nbytes += bplen; + + /* 2. The TCP header, excluding options, and assuming a + * checksum of zero + */ + cksum = th->check; + th->check = 0; + sg_set_buf(&sg[block++], th, sizeof(*th)); + nbytes += sizeof(*th); + + /* 3. The TCP segment data (if any) */ + data_len = tcplen - (th->doff << 2); + if (data_len > 0) { + u8 *data = (u8 *)th + (th->doff << 2); + sg_set_buf(&sg[block++], data, data_len); + nbytes += data_len; + } + + /* 4. an independently-specified key or password, known to both + * TCPs and presumably connection-specific + */ + sg_set_buf(&sg[block++], key->key, key->keylen); + nbytes += key->keylen; + + sg_mark_end(&sg[block - 1]); + + /* Now store the hash into the packet */ + err = crypto_hash_init(desc); + if (err) { + if (net_ratelimit()) + printk(KERN_WARNING "%s(): hash_init failed\n", __func__); + return -1; + } + err = crypto_hash_update(desc, sg, nbytes); + if (err) { + if (net_ratelimit()) + printk(KERN_WARNING "%s(): hash_update failed\n", __func__); + return -1; + } + err = crypto_hash_final(desc, md5_hash); + if (err) { + if (net_ratelimit()) + printk(KERN_WARNING "%s(): hash_final failed\n", __func__); + return -1; + } + + /* Reset header */ + th->check = cksum; + + return 0; +} +EXPORT_SYMBOL(tcp_calc_md5_hash); + static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) { int cpu; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 2fbcc7d1b1a..838d491dfda 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -1,8 +1,6 @@ /* * tcp_diag.c Module for monitoring TCP transport protocols sockets. * - * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cad73b7dfef..d6ea970a151 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.243 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -949,17 +947,21 @@ static void tcp_update_reordering(struct sock *sk, const int metric, { struct tcp_sock *tp = tcp_sk(sk); if (metric > tp->reordering) { + int mib_idx; + tp->reordering = min(TCP_MAX_REORDERING, metric); /* This exciting event is worth to be remembered. 8) */ if (ts) - NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER); + mib_idx = LINUX_MIB_TCPTSREORDER; else if (tcp_is_reno(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER); + mib_idx = LINUX_MIB_TCPRENOREORDER; else if (tcp_is_fack(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER); + mib_idx = LINUX_MIB_TCPFACKREORDER; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); + mib_idx = LINUX_MIB_TCPSACKREORDER; + + NET_INC_STATS_BH(mib_idx); #if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -1458,18 +1460,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, if (!tcp_is_sackblock_valid(tp, dup_sack, sp[used_sacks].start_seq, sp[used_sacks].end_seq)) { + int mib_idx; + if (dup_sack) { if (!tp->undo_marker) - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); + mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO; else - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); + mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD; } else { /* Don't count olds caused by ACK reordering */ if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && !after(sp[used_sacks].end_seq, tp->snd_una)) continue; - NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); + mib_idx = LINUX_MIB_TCPSACKDISCARD; } + + NET_INC_STATS_BH(mib_idx); if (i == 0) first_sack_index = -1; continue; @@ -2382,15 +2388,19 @@ static int tcp_try_undo_recovery(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_may_undo(tp)) { + int mib_idx; + /* Happy end! We did not retransmit anything * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); tcp_undo_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) - NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); + mib_idx = LINUX_MIB_TCPLOSSUNDO; else - NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); + mib_idx = LINUX_MIB_TCPFULLUNDO; + + NET_INC_STATS_BH(mib_idx); tp->undo_marker = 0; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { @@ -2562,7 +2572,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int fast_rexmit = 0; + int fast_rexmit = 0, mib_idx; if (WARN_ON(!tp->packets_out && tp->sacked_out)) tp->sacked_out = 0; @@ -2685,9 +2695,11 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) /* Otherwise enter Recovery state */ if (tcp_is_reno(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY); + mib_idx = LINUX_MIB_TCPRENORECOVERY; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY); + mib_idx = LINUX_MIB_TCPSACKRECOVERY; + + NET_INC_STATS_BH(mib_idx); tp->high_seq = tp->snd_nxt; tp->prior_ssthresh = 0; @@ -3450,6 +3462,43 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, return 1; } +#ifdef CONFIG_TCP_MD5SIG +/* + * Parse MD5 Signature option + */ +u8 *tcp_parse_md5sig_option(struct tcphdr *th) +{ + int length = (th->doff << 2) - sizeof (*th); + u8 *ptr = (u8*)(th + 1); + + /* If the TCP option is too short, we can short cut */ + if (length < TCPOLEN_MD5SIG) + return NULL; + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch(opcode) { + case TCPOPT_EOL: + return NULL; + case TCPOPT_NOP: + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2 || opsize > length) + return NULL; + if (opcode == TCPOPT_MD5SIG) + return ptr; + } + ptr += opsize - 2; + length -= opsize; + } + return NULL; +} +#endif + static inline void tcp_store_ts_recent(struct tcp_sock *tp) { tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; @@ -3665,10 +3714,14 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) { if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + int mib_idx; + if (before(seq, tp->rcv_nxt)) - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT); + mib_idx = LINUX_MIB_TCPDSACKOLDSENT; else - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT); + mib_idx = LINUX_MIB_TCPDSACKOFOSENT; + + NET_INC_STATS_BH(mib_idx); tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; @@ -5422,6 +5475,9 @@ EXPORT_SYMBOL(sysctl_tcp_ecn); EXPORT_SYMBOL(sysctl_tcp_reordering); EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); EXPORT_SYMBOL(tcp_parse_options); +#ifdef CONFIG_TCP_MD5SIG +EXPORT_SYMBOL(tcp_parse_md5sig_option); +#endif EXPORT_SYMBOL(tcp_rcv_established); EXPORT_SYMBOL(tcp_rcv_state_process); EXPORT_SYMBOL(tcp_initialize_rcv_mss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ffe869ac1bc..4300bcf2cea 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $ - * * IPv4 specific functions * * @@ -91,8 +89,13 @@ static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr); static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, __be32 saddr, __be32 daddr, - struct tcphdr *th, int protocol, - unsigned int tcplen); + struct tcphdr *th, unsigned int tcplen); +#else +static inline +struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) +{ + return NULL; +} #endif struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { @@ -582,8 +585,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) key, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, - &rep.th, IPPROTO_TCP, - arg.iov[0].iov_len); + &rep.th, arg.iov[0].iov_len); } #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, @@ -602,9 +604,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, - struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 ts) +static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 ts, int oif, + struct tcp_md5sig_key *key) { struct tcphdr *th = tcp_hdr(skb); struct { @@ -616,10 +618,6 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, ]; } rep; struct ip_reply_arg arg; -#ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; - struct tcp_md5sig_key tw_key; -#endif memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -645,23 +643,6 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, rep.th.window = htons(win); #ifdef CONFIG_TCP_MD5SIG - /* - * The SKB holds an imcoming packet, but may not have a valid ->sk - * pointer. This is especially the case when we're dealing with a - * TIME_WAIT ack, because the sk structure is long gone, and only - * the tcp_timewait_sock remains. So the md5 key is stashed in that - * structure, and we use it in preference. I believe that (twsk || - * skb->sk) holds true, but we program defensively. - */ - if (!twsk && skb->sk) { - key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr); - } else if (twsk && twsk->tw_md5_keylen) { - tw_key.key = twsk->tw_md5_key; - tw_key.keylen = twsk->tw_md5_keylen; - key = &tw_key; - } else - key = NULL; - if (key) { int offset = (ts) ? 3 : 0; @@ -676,16 +657,15 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, key, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, - &rep.th, IPPROTO_TCP, - arg.iov[0].iov_len); + &rep.th, arg.iov[0].iov_len); } #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; - if (twsk) - arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; + if (oif) + arg.bound_dev_if = oif; ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb, &arg, arg.iov[0].iov_len); @@ -698,9 +678,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcptw->tw_ts_recent); + tcptw->tw_ts_recent, + tw->tw_bound_dev_if, + tcp_twsk_md5_key(tcptw) + ); inet_twsk_put(tw); } @@ -708,9 +691,11 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) { - tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, + tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, - req->ts_recent); + req->ts_recent, + 0, + tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr)); } /* @@ -1002,18 +987,12 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, __be32 saddr, __be32 daddr, - struct tcphdr *th, int protocol, + struct tcphdr *th, unsigned int tcplen) { - struct scatterlist sg[4]; - __u16 data_len; - int block = 0; - __sum16 old_checksum; struct tcp_md5sig_pool *hp; struct tcp4_pseudohdr *bp; - struct hash_desc *desc; int err; - unsigned int nbytes = 0; /* * Okay, so RFC2385 is turned on for this connection, @@ -1025,63 +1004,25 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, goto clear_hash_noput; bp = &hp->md5_blk.ip4; - desc = &hp->md5_desc; /* - * 1. the TCP pseudo-header (in the order: source IP address, + * The TCP pseudo-header (in the order: source IP address, * destination IP address, zero-padded protocol number, and * segment length) */ bp->saddr = saddr; bp->daddr = daddr; bp->pad = 0; - bp->protocol = protocol; + bp->protocol = IPPROTO_TCP; bp->len = htons(tcplen); - sg_init_table(sg, 4); - - sg_set_buf(&sg[block++], bp, sizeof(*bp)); - nbytes += sizeof(*bp); - - /* 2. the TCP header, excluding options, and assuming a - * checksum of zero/ - */ - old_checksum = th->check; - th->check = 0; - sg_set_buf(&sg[block++], th, sizeof(struct tcphdr)); - nbytes += sizeof(struct tcphdr); - - /* 3. the TCP segment data (if any) */ - data_len = tcplen - (th->doff << 2); - if (data_len > 0) { - unsigned char *data = (unsigned char *)th + (th->doff << 2); - sg_set_buf(&sg[block++], data, data_len); - nbytes += data_len; - } - - /* 4. an independently-specified key or password, known to both - * TCPs and presumably connection-specific - */ - sg_set_buf(&sg[block++], key->key, key->keylen); - nbytes += key->keylen; - - sg_mark_end(&sg[block - 1]); - - /* Now store the Hash into the packet */ - err = crypto_hash_init(desc); - if (err) - goto clear_hash; - err = crypto_hash_update(desc, sg, nbytes); - if (err) - goto clear_hash; - err = crypto_hash_final(desc, md5_hash); + err = tcp_calc_md5_hash(md5_hash, key, sizeof(*bp), + th, tcplen, hp); if (err) goto clear_hash; - /* Reset header, and free up the crypto */ + /* Free up the crypto pool */ tcp_put_md5sig_pool(); - th->check = old_checksum; - out: return 0; clear_hash: @@ -1095,7 +1036,7 @@ int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct tcphdr *th, int protocol, + struct tcphdr *th, unsigned int tcplen) { __be32 saddr, daddr; @@ -1111,7 +1052,7 @@ int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, } return tcp_v4_do_calc_md5_hash(md5_hash, key, saddr, daddr, - th, protocol, tcplen); + th, tcplen); } EXPORT_SYMBOL(tcp_v4_calc_md5_hash); @@ -1130,52 +1071,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) struct tcp_md5sig_key *hash_expected; const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); - int length = (th->doff << 2) - sizeof(struct tcphdr); int genhash; - unsigned char *ptr; unsigned char newhash[16]; hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); + hash_location = tcp_parse_md5sig_option(th); - /* - * If the TCP option length is less than the TCP_MD5SIG - * option length, then we can shortcut - */ - if (length < TCPOLEN_MD5SIG) { - if (hash_expected) - return 1; - else - return 0; - } - - /* Okay, we can't shortcut - we have to grub through the options */ - ptr = (unsigned char *)(th + 1); - while (length > 0) { - int opcode = *ptr++; - int opsize; - - switch (opcode) { - case TCPOPT_EOL: - goto done_opts; - case TCPOPT_NOP: - length--; - continue; - default: - opsize = *ptr++; - if (opsize < 2) - goto done_opts; - if (opsize > length) - goto done_opts; - - if (opcode == TCPOPT_MD5SIG) { - hash_location = ptr; - goto done_opts; - } - } - ptr += opsize-2; - length -= opsize; - } -done_opts: /* We've parsed the options - do we have a hash? */ if (!hash_expected && !hash_location) return 0; @@ -1202,8 +1103,7 @@ done_opts: genhash = tcp_v4_do_calc_md5_hash(newhash, hash_expected, iph->saddr, iph->daddr, - th, sk->sk_protocol, - skb->len); + th, skb->len); if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { @@ -1871,7 +1771,7 @@ static int tcp_v4_init_sock(struct sock *sk) return 0; } -int tcp_v4_destroy_sock(struct sock *sk) +void tcp_v4_destroy_sock(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -1915,8 +1815,6 @@ int tcp_v4_destroy_sock(struct sock *sk) } atomic_dec(&tcp_sockets_allocated); - - return 0; } EXPORT_SYMBOL(tcp_v4_destroy_sock); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8245247a6ce..ea68a478fad 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_minisocks.c,v 1.15 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ad993ecb481..edef2afe905 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -607,7 +605,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, md5, sk, NULL, NULL, tcp_hdr(skb), - sk->sk_protocol, skb->len); } #endif @@ -1988,14 +1985,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (sacked & TCPCB_LOST) { if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { + int mib_idx; + if (tcp_retransmit_skb(sk, skb)) { tp->retransmit_skb_hint = NULL; return; } if (icsk->icsk_ca_state != TCP_CA_Loss) - NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); + mib_idx = LINUX_MIB_TCPFASTRETRANS; else - NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); + mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; + NET_INC_STATS_BH(mib_idx); if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, @@ -2266,7 +2266,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, tp->af_specific->calc_md5_hash(md5_hash_location, md5, NULL, dst, req, - tcp_hdr(skb), sk->sk_protocol, + tcp_hdr(skb), skb->len); } #endif diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 63ed9d6830e..6a480d1fd8f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -5,8 +5,6 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> @@ -328,24 +326,27 @@ static void tcp_retransmit_timer(struct sock *sk) goto out; if (icsk->icsk_retransmits == 0) { + int mib_idx; + if (icsk->icsk_ca_state == TCP_CA_Disorder || icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) { if (icsk->icsk_ca_state == TCP_CA_Recovery) - NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); + mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); + mib_idx = LINUX_MIB_TCPSACKFAILURES; } else { if (icsk->icsk_ca_state == TCP_CA_Recovery) - NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); + mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; else - NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); + mib_idx = LINUX_MIB_TCPRENOFAILURES; } } else if (icsk->icsk_ca_state == TCP_CA_Loss) { - NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); + mib_idx = LINUX_MIB_TCPLOSSFAILURES; } else { - NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); + mib_idx = LINUX_MIB_TCPTIMEOUTS; } + NET_INC_STATS_BH(mib_idx); } if (tcp_use_frto(sk)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 56fcda3694b..7187121e922 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -5,8 +5,6 @@ * * The User Datagram Protocol (UDP). * - * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> @@ -136,7 +134,7 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, struct sock *sk; struct hlist_node *node; - sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) + sk_for_each(sk, node, &udptable[udp_hashfn(net, num)]) if (net_eq(sock_net(sk), net) && sk->sk_hash == num) return 1; return 0; @@ -176,7 +174,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, for (i = 0; i < UDP_HTABLE_SIZE; i++) { int size = 0; - head = &udptable[rover & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[udp_hashfn(net, rover)]; if (hlist_empty(head)) goto gotit; @@ -213,7 +211,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, gotit: snum = rover; } else { - head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[udp_hashfn(net, snum)]; sk_for_each(sk2, node, head) if (sk2->sk_hash == snum && @@ -229,7 +227,7 @@ gotit: inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[udp_hashfn(net, snum)]; sk_add_node(sk, head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } @@ -266,7 +264,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int badness = -1; read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && @@ -528,7 +526,8 @@ out: up->len = 0; up->pending = 0; if (!err) - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -727,7 +726,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -890,7 +890,8 @@ try_again: goto out_free; if (!peeked) - UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_timestamp(msg, sk, skb); @@ -919,7 +920,7 @@ out: csum_copy_err: lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) - UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); release_sock(sk); if (noblock) @@ -990,7 +991,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) ret = (*up->encap_rcv)(sk, skb); if (ret <= 0) { - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); return -ret; } @@ -1042,15 +1044,18 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); + if (rc == -ENOMEM) { + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); + atomic_inc(&sk->sk_drops); + } goto drop; } return 0; drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -1061,7 +1066,7 @@ drop: * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ -static int __udp4_lib_mcast_deliver(struct sk_buff *skb, +static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, __be32 saddr, __be32 daddr, struct hlist_head udptable[]) @@ -1070,7 +1075,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, int dif; read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1158,6 +1163,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], struct rtable *rt = (struct rtable*)skb->dst; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; + struct net *net = dev_net(skb->dev); /* * Validate the packet. @@ -1180,9 +1186,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], goto csum_error; if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); + return __udp4_lib_mcast_deliver(net, skb, uh, + saddr, daddr, udptable); - sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr, + sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, uh->dest, inet_iif(skb), udptable); if (sk != NULL) { @@ -1211,7 +1218,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -1245,7 +1252,7 @@ csum_error: ntohs(uh->dest), ulen); drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -1255,12 +1262,11 @@ int udp_rcv(struct sk_buff *skb) return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); } -int udp_destroy_sock(struct sock *sk) +void udp_destroy_sock(struct sock *sk) { lock_sock(sk); udp_flush_pending_frames(sk); release_sock(sk); - return 0; } /* @@ -1453,7 +1459,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_INERRORS, is_lite); __skb_unlink(skb, rcvq); kfree_skb(skb); } @@ -1629,12 +1636,13 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->sport); seq_printf(f, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p%n", + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", bucket, src, srcp, dest, destp, sp->sk_state, atomic_read(&sp->sk_wmem_alloc), atomic_read(&sp->sk_rmem_alloc), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, len); + atomic_read(&sp->sk_refcnt), sp, + atomic_read(&sp->sk_drops), len); } int udp4_seq_show(struct seq_file *seq, void *v) @@ -1643,7 +1651,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-127s\n", " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " - "inode"); + "inode ref pointer drops"); else { struct udp_iter_state *state = seq->private; int len; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 7288bf7977f..2e9bad2fa1b 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -26,7 +26,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, extern int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); -extern int udp_destroy_sock(struct sock *sk); +extern void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS extern int udp4_seq_show(struct seq_file *seq, void *v); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 72ce26b6c4d..4ad16b6d513 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -1,8 +1,6 @@ /* * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). * - * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $ - * * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> * * Changes: |