summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c87
-rw-r--r--net/ipv4/arp.c9
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c35
-rw-r--r--net/ipv4/fib_frontend.c19
-rw-r--r--net/ipv4/fib_hash.c8
-rw-r--r--net/ipv4/fib_rules.c4
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fib_trie.c47
-rw-r--r--net/ipv4/icmp.c31
-rw-r--r--net/ipv4/igmp.c28
-rw-r--r--net/ipv4/inet_connection_sock.c13
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_fragment.c16
-rw-r--r--net/ipv4/inet_hashtables.c23
-rw-r--r--net/ipv4/inet_lro.c3
-rw-r--r--net/ipv4/inet_timewait_sock.c21
-rw-r--r--net/ipv4/inetpeer.c2
-rw-r--r--net/ipv4/ip_forward.c11
-rw-r--r--net/ipv4/ip_fragment.c63
-rw-r--r--net/ipv4/ip_gre.c32
-rw-r--r--net/ipv4/ip_input.c40
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_output.c35
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ipconfig.c6
-rw-r--r--net/ipv4/ipip.c24
-rw-r--r--net/ipv4/ipmr.c125
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c5
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_nq.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_esp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_rr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sched.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sed.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c433
-rw-r--r--net/ipv4/ipvs/ip_vs_wlc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_wrr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c2
-rw-r--r--net/ipv4/netfilter/Kconfig15
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/ip_queue.c5
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/iptable_security.c180
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/proc.c113
-rw-r--r--net/ipv4/protocol.c2
-rw-r--r--net/ipv4/raw.c12
-rw-r--r--net/ipv4/route.c262
-rw-r--r--net/ipv4/syncookies.c8
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c115
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_input.c236
-rw-r--r--net/ipv4/tcp_ipv4.c330
-rw-r--r--net/ipv4/tcp_minisocks.c12
-rw-r--r--net/ipv4/tcp_output.c466
-rw-r--r--net/ipv4/tcp_probe.c2
-rw-r--r--net/ipv4/tcp_timer.c27
-rw-r--r--net/ipv4/udp.c76
-rw-r--r--net/ipv4/udp_impl.h2
-rw-r--r--net/ipv4/udplite.c3
77 files changed, 1673 insertions, 1388 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 24eca23c2db..dd919d84285 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,8 +5,6 @@
*
* PF_INET protocol family socket handler.
*
- * Version: $Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Florian La Roche, <flla@stud.uni-sb.de>
@@ -112,12 +110,11 @@
#include <net/ipip.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
#endif
-DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
-
extern void ip_mc_drop_socket(struct sock *sk);
/* The inetsw table contains everything that inet_create needs to
@@ -1341,50 +1338,70 @@ static struct net_protocol icmp_protocol = {
.netns_ok = 1,
};
-static int __init init_ipv4_mibs(void)
+static __net_init int ipv4_mib_init_net(struct net *net)
{
- if (snmp_mib_init((void **)net_statistics,
- sizeof(struct linux_mib)) < 0)
- goto err_net_mib;
- if (snmp_mib_init((void **)ip_statistics,
- sizeof(struct ipstats_mib)) < 0)
- goto err_ip_mib;
- if (snmp_mib_init((void **)icmp_statistics,
- sizeof(struct icmp_mib)) < 0)
- goto err_icmp_mib;
- if (snmp_mib_init((void **)icmpmsg_statistics,
- sizeof(struct icmpmsg_mib)) < 0)
- goto err_icmpmsg_mib;
- if (snmp_mib_init((void **)tcp_statistics,
+ if (snmp_mib_init((void **)net->mib.tcp_statistics,
sizeof(struct tcp_mib)) < 0)
goto err_tcp_mib;
- if (snmp_mib_init((void **)udp_statistics,
+ if (snmp_mib_init((void **)net->mib.ip_statistics,
+ sizeof(struct ipstats_mib)) < 0)
+ goto err_ip_mib;
+ if (snmp_mib_init((void **)net->mib.net_statistics,
+ sizeof(struct linux_mib)) < 0)
+ goto err_net_mib;
+ if (snmp_mib_init((void **)net->mib.udp_statistics,
sizeof(struct udp_mib)) < 0)
goto err_udp_mib;
- if (snmp_mib_init((void **)udplite_statistics,
+ if (snmp_mib_init((void **)net->mib.udplite_statistics,
sizeof(struct udp_mib)) < 0)
goto err_udplite_mib;
+ if (snmp_mib_init((void **)net->mib.icmp_statistics,
+ sizeof(struct icmp_mib)) < 0)
+ goto err_icmp_mib;
+ if (snmp_mib_init((void **)net->mib.icmpmsg_statistics,
+ sizeof(struct icmpmsg_mib)) < 0)
+ goto err_icmpmsg_mib;
- tcp_mib_init();
-
+ tcp_mib_init(net);
return 0;
-err_udplite_mib:
- snmp_mib_free((void **)udp_statistics);
-err_udp_mib:
- snmp_mib_free((void **)tcp_statistics);
-err_tcp_mib:
- snmp_mib_free((void **)icmpmsg_statistics);
err_icmpmsg_mib:
- snmp_mib_free((void **)icmp_statistics);
+ snmp_mib_free((void **)net->mib.icmp_statistics);
err_icmp_mib:
- snmp_mib_free((void **)ip_statistics);
-err_ip_mib:
- snmp_mib_free((void **)net_statistics);
+ snmp_mib_free((void **)net->mib.udplite_statistics);
+err_udplite_mib:
+ snmp_mib_free((void **)net->mib.udp_statistics);
+err_udp_mib:
+ snmp_mib_free((void **)net->mib.net_statistics);
err_net_mib:
+ snmp_mib_free((void **)net->mib.ip_statistics);
+err_ip_mib:
+ snmp_mib_free((void **)net->mib.tcp_statistics);
+err_tcp_mib:
return -ENOMEM;
}
+static __net_exit void ipv4_mib_exit_net(struct net *net)
+{
+ snmp_mib_free((void **)net->mib.icmpmsg_statistics);
+ snmp_mib_free((void **)net->mib.icmp_statistics);
+ snmp_mib_free((void **)net->mib.udplite_statistics);
+ snmp_mib_free((void **)net->mib.udp_statistics);
+ snmp_mib_free((void **)net->mib.net_statistics);
+ snmp_mib_free((void **)net->mib.ip_statistics);
+ snmp_mib_free((void **)net->mib.tcp_statistics);
+}
+
+static __net_initdata struct pernet_operations ipv4_mib_ops = {
+ .init = ipv4_mib_init_net,
+ .exit = ipv4_mib_exit_net,
+};
+
+static int __init init_ipv4_mibs(void)
+{
+ return register_pernet_subsys(&ipv4_mib_ops);
+}
+
static int ipv4_proc_init(void);
/*
@@ -1481,14 +1498,15 @@ static int __init inet_init(void)
* Initialise the multicast router
*/
#if defined(CONFIG_IP_MROUTE)
- ip_mr_init();
+ if (ip_mr_init())
+ printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n");
#endif
/*
* Initialise per-cpu ipv4 mibs
*/
if (init_ipv4_mibs())
- printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
+ printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n");
ipv4_proc_init();
@@ -1560,5 +1578,4 @@ EXPORT_SYMBOL(inet_sock_destruct);
EXPORT_SYMBOL(inet_stream_connect);
EXPORT_SYMBOL(inet_stream_ops);
EXPORT_SYMBOL(inet_unregister_protosw);
-EXPORT_SYMBOL(net_statistics);
EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9b539fa9fe1..b043eda60b0 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1,7 +1,5 @@
/* linux/net/ipv4/arp.c
*
- * Version: $Id: arp.c,v 1.99 2001/08/30 22:55:42 davem Exp $
- *
* Copyright (C) 1994 by Florian La Roche
*
* This module implements the Address Resolution Protocol ARP (RFC 826),
@@ -423,11 +421,12 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
struct rtable *rt;
int flag = 0;
/*unsigned long now; */
+ struct net *net = dev_net(dev);
- if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0)
+ if (ip_route_output_key(net, &rt, &fl) < 0)
return 1;
if (rt->u.dst.dev != dev) {
- NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
+ NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
flag = 1;
}
ip_rt_put(rt);
@@ -1199,7 +1198,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&arp_tbl, dev);
- rt_cache_flush(0);
+ rt_cache_flush(dev_net(dev), 0);
break;
default:
break;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 0c0c73f368c..5e6c5a0f3fd 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -52,7 +52,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->sport, usin->sin_port, sk, 1);
if (err) {
if (err == -ENETUNREACH)
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return err;
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 79a7ef6209f..2e667e2f90d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1,8 +1,6 @@
/*
* NET3 IP device support routines.
*
- * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -170,6 +168,8 @@ static struct in_device *inetdev_init(struct net_device *dev)
in_dev->dev = dev;
if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
goto out_kfree;
+ if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
+ dev_disable_lro(dev);
/* Reference in_dev->dev */
dev_hold(dev);
/* Account for reference dev->ip_ptr (below) */
@@ -1013,7 +1013,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
memcpy(old, ifa->ifa_label, IFNAMSIZ);
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
if (named++ == 0)
- continue;
+ goto skip;
dot = strchr(old, ':');
if (dot == NULL) {
sprintf(old, ":%d", named);
@@ -1024,6 +1024,8 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
} else {
strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
}
+skip:
+ rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
}
}
@@ -1241,6 +1243,8 @@ static void inet_forward_change(struct net *net)
read_lock(&dev_base_lock);
for_each_netdev(net, dev) {
struct in_device *in_dev;
+ if (on)
+ dev_disable_lro(dev);
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (in_dev)
@@ -1248,8 +1252,6 @@ static void inet_forward_change(struct net *net)
rcu_read_unlock();
}
read_unlock(&dev_base_lock);
-
- rt_cache_flush(0);
}
static int devinet_conf_proc(ctl_table *ctl, int write,
@@ -1335,10 +1337,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
if (write && *valp != val) {
struct net *net = ctl->extra2;
- if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
- inet_forward_change(net);
- else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
- rt_cache_flush(0);
+ if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
+ rtnl_lock();
+ if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
+ inet_forward_change(net);
+ } else if (*valp) {
+ struct ipv4_devconf *cnf = ctl->extra1;
+ struct in_device *idev =
+ container_of(cnf, struct in_device, cnf);
+ dev_disable_lro(idev->dev);
+ }
+ rtnl_unlock();
+ rt_cache_flush(net, 0);
+ }
}
return ret;
@@ -1351,9 +1362,10 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
int *valp = ctl->data;
int val = *valp;
int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ struct net *net = ctl->extra2;
if (write && *valp != val)
- rt_cache_flush(0);
+ rt_cache_flush(net, 0);
return ret;
}
@@ -1364,9 +1376,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
{
int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
newval, newlen);
+ struct net *net = table->extra2;
if (ret == 1)
- rt_cache_flush(0);
+ rt_cache_flush(net, 0);
return ret;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0b2ac6a3d90..65c1503f8cc 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -5,8 +5,6 @@
*
* IPv4 Forwarding Information Base: FIB frontend.
*
- * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
@@ -146,7 +144,7 @@ static void fib_flush(struct net *net)
}
if (flushed)
- rt_cache_flush(-1);
+ rt_cache_flush(net, -1);
}
/*
@@ -899,21 +897,22 @@ static void fib_disable_ip(struct net_device *dev, int force)
{
if (fib_sync_down_dev(dev, force))
fib_flush(dev_net(dev));
- rt_cache_flush(0);
+ rt_cache_flush(dev_net(dev), 0);
arp_ifdown(dev);
}
static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
+ struct net_device *dev = ifa->ifa_dev->dev;
switch (event) {
case NETDEV_UP:
fib_add_ifaddr(ifa);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- fib_sync_up(ifa->ifa_dev->dev);
+ fib_sync_up(dev);
#endif
- rt_cache_flush(-1);
+ rt_cache_flush(dev_net(dev), -1);
break;
case NETDEV_DOWN:
fib_del_ifaddr(ifa);
@@ -921,9 +920,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
/* Last address was deleted from this interface.
Disable IP.
*/
- fib_disable_ip(ifa->ifa_dev->dev, 1);
+ fib_disable_ip(dev, 1);
} else {
- rt_cache_flush(-1);
+ rt_cache_flush(dev_net(dev), -1);
}
break;
}
@@ -951,14 +950,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(dev);
#endif
- rt_cache_flush(-1);
+ rt_cache_flush(dev_net(dev), -1);
break;
case NETDEV_DOWN:
fib_disable_ip(dev, 0);
break;
case NETDEV_CHANGEMTU:
case NETDEV_CHANGE:
- rt_cache_flush(0);
+ rt_cache_flush(dev_net(dev), 0);
break;
}
return NOTIFY_DONE;
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 2e2fc3376ac..c8cac6c7f88 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -5,8 +5,6 @@
*
* IPv4 FIB: lookup engine and maintenance routines.
*
- * Version: $Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
@@ -474,7 +472,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id,
&cfg->fc_nlinfo, NLM_F_REPLACE);
return 0;
@@ -534,7 +532,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
if (new_f)
fz->fz_nent++;
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -616,7 +614,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
write_unlock_bh(&fib_hash_lock);
if (fa->fa_state & FA_S_ACCESSED)
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
fn_free_alias(fa, f);
if (kill_fn) {
fn_free_node(f);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 1fb56876be5..6080d712082 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -258,9 +258,9 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
+ nla_total_size(4); /* flow */
}
-static void fib4_rule_flush_cache(void)
+static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
{
- rt_cache_flush(-1);
+ rt_cache_flush(ops->fro_net, -1);
}
static struct fib_rules_ops fib4_rules_ops_template = {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0d4d72827e4..ded2ae34eab 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -5,8 +5,6 @@
*
* IPv4 Forwarding Information Base: semantics.
*
- * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4b02d14e7ab..5cb72786a8a 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -22,8 +22,6 @@
* IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
* IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
*
- * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
- *
*
* Code from fib_hash has been reused which includes the following header:
*
@@ -1273,7 +1271,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
@@ -1318,7 +1316,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
list_add_tail_rcu(&new_fa->fa_list,
(fa ? &fa->fa_list : fa_head));
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
succeeded:
@@ -1359,17 +1357,17 @@ static int check_leaf(struct trie *t, struct leaf *l,
t->stats.semantic_match_miss++;
#endif
if (err <= 0)
- return plen;
+ return err;
}
- return -1;
+ return 1;
}
static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
struct fib_result *res)
{
struct trie *t = (struct trie *) tb->tb_data;
- int plen, ret = 0;
+ int ret;
struct node *n;
struct tnode *pn;
int pos, bits;
@@ -1393,10 +1391,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
/* Just a leaf? */
if (IS_LEAF(n)) {
- plen = check_leaf(t, (struct leaf *)n, key, flp, res);
- if (plen < 0)
- goto failed;
- ret = 0;
+ ret = check_leaf(t, (struct leaf *)n, key, flp, res);
goto found;
}
@@ -1421,11 +1416,9 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
}
if (IS_LEAF(n)) {
- plen = check_leaf(t, (struct leaf *)n, key, flp, res);
- if (plen < 0)
+ ret = check_leaf(t, (struct leaf *)n, key, flp, res);
+ if (ret > 0)
goto backtrace;
-
- ret = 0;
goto found;
}
@@ -1666,7 +1659,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
trie_leaf_remove(t, l);
if (fa->fa_state & FA_S_ACCESSED)
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
@@ -2258,25 +2251,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
static int fib_triestat_seq_open(struct inode *inode, struct file *file)
{
- int err;
- struct net *net;
-
- net = get_proc_net(inode);
- if (net == NULL)
- return -ENXIO;
- err = single_open(file, fib_triestat_seq_show, net);
- if (err < 0) {
- put_net(net);
- return err;
- }
- return 0;
-}
-
-static int fib_triestat_seq_release(struct inode *ino, struct file *f)
-{
- struct seq_file *seq = f->private_data;
- put_net(seq->private);
- return single_release(ino, f);
+ return single_open_net(inode, file, fib_triestat_seq_show);
}
static const struct file_operations fib_triestat_fops = {
@@ -2284,7 +2259,7 @@ static const struct file_operations fib_triestat_fops = {
.open = fib_triestat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = fib_triestat_seq_release,
+ .release = single_release_net,
};
static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 87397351dda..860558633b2 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,8 +3,6 @@
*
* Alan Cox, <alan@redhat.com>
*
- * Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -113,12 +111,6 @@ struct icmp_bxm {
unsigned char optbuf[40];
};
-/*
- * Statistics
- */
-DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly;
-DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly;
-
/* An array of errno for error messages from dest unreach. */
/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
@@ -298,10 +290,10 @@ out:
/*
* Maintain the counters used in the SNMP statistics for outgoing ICMP
*/
-void icmp_out_count(unsigned char type)
+void icmp_out_count(struct net *net, unsigned char type)
{
- ICMPMSGOUT_INC_STATS(type);
- ICMP_INC_STATS(ICMP_MIB_OUTMSGS);
+ ICMPMSGOUT_INC_STATS(net, type);
+ ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS);
}
/*
@@ -765,7 +757,7 @@ static void icmp_unreach(struct sk_buff *skb)
out:
return;
out_err:
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
goto out;
}
@@ -805,7 +797,7 @@ static void icmp_redirect(struct sk_buff *skb)
out:
return;
out_err:
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
goto out;
}
@@ -876,7 +868,7 @@ static void icmp_timestamp(struct sk_buff *skb)
out:
return;
out_err:
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(dev_net(skb->dst->dev), ICMP_MIB_INERRORS);
goto out;
}
@@ -975,6 +967,7 @@ int icmp_rcv(struct sk_buff *skb)
{
struct icmphdr *icmph;
struct rtable *rt = skb->rtable;
+ struct net *net = dev_net(rt->u.dst.dev);
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
int nh;
@@ -995,7 +988,7 @@ int icmp_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
- ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
@@ -1013,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb)
icmph = icmp_hdr(skb);
- ICMPMSGIN_INC_STATS_BH(icmph->type);
+ ICMPMSGIN_INC_STATS_BH(net, icmph->type);
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
*
@@ -1029,9 +1022,6 @@ int icmp_rcv(struct sk_buff *skb)
*/
if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
- struct net *net;
-
- net = dev_net(rt->u.dst.dev);
/*
* RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
* silently ignored (we let user decide with a sysctl).
@@ -1057,7 +1047,7 @@ drop:
kfree_skb(skb);
return 0;
error:
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
goto drop;
}
@@ -1217,5 +1207,4 @@ int __init icmp_init(void)
EXPORT_SYMBOL(icmp_err_convert);
EXPORT_SYMBOL(icmp_send);
-EXPORT_SYMBOL(icmp_statistics);
EXPORT_SYMBOL(xrlim_allow);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2769dc4a4c8..6203ece5360 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -8,8 +8,6 @@
* the older version didn't come out right using gcc 2.5.8, the newer one
* seems to fall out with gcc 2.6.2.
*
- * Version: $Id: igmp.c,v 1.47 2002/02/01 22:01:03 davem Exp $
- *
* Authors:
* Alan Cox <Alan.Cox@linux.org>
*
@@ -1198,7 +1196,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
ASSERT_RTNL();
- if (dev_net(in_dev->dev) != &init_net)
+ if (!net_eq(dev_net(in_dev->dev), &init_net))
return;
for (im=in_dev->mc_list; im; im=im->next) {
@@ -1280,7 +1278,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
ASSERT_RTNL();
- if (dev_net(in_dev->dev) != &init_net)
+ if (!net_eq(dev_net(in_dev->dev), &init_net))
return;
for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
@@ -1310,7 +1308,7 @@ void ip_mc_down(struct in_device *in_dev)
ASSERT_RTNL();
- if (dev_net(in_dev->dev) != &init_net)
+ if (!net_eq(dev_net(in_dev->dev), &init_net))
return;
for (i=in_dev->mc_list; i; i=i->next)
@@ -1333,7 +1331,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
{
ASSERT_RTNL();
- if (dev_net(in_dev->dev) != &init_net)
+ if (!net_eq(dev_net(in_dev->dev), &init_net))
return;
in_dev->mc_tomb = NULL;
@@ -1359,7 +1357,7 @@ void ip_mc_up(struct in_device *in_dev)
ASSERT_RTNL();
- if (dev_net(in_dev->dev) != &init_net)
+ if (!net_eq(dev_net(in_dev->dev), &init_net))
return;
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
@@ -1378,7 +1376,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
ASSERT_RTNL();
- if (dev_net(in_dev->dev) != &init_net)
+ if (!net_eq(dev_net(in_dev->dev), &init_net))
return;
/* Deactivate timers */
@@ -1762,7 +1760,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
if (!ipv4_is_multicast(addr))
return -EINVAL;
- if (sock_net(sk) != &init_net)
+ if (!net_eq(sock_net(sk), &init_net))
return -EPROTONOSUPPORT;
rtnl_lock();
@@ -1833,7 +1831,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
u32 ifindex;
int ret = -EADDRNOTAVAIL;
- if (sock_net(sk) != &init_net)
+ if (!net_eq(sock_net(sk), &init_net))
return -EPROTONOSUPPORT;
rtnl_lock();
@@ -1881,7 +1879,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
if (!ipv4_is_multicast(addr))
return -EINVAL;
- if (sock_net(sk) != &init_net)
+ if (!net_eq(sock_net(sk), &init_net))
return -EPROTONOSUPPORT;
rtnl_lock();
@@ -2017,7 +2015,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
msf->imsf_fmode != MCAST_EXCLUDE)
return -EINVAL;
- if (sock_net(sk) != &init_net)
+ if (!net_eq(sock_net(sk), &init_net))
return -EPROTONOSUPPORT;
rtnl_lock();
@@ -2100,7 +2098,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
- if (sock_net(sk) != &init_net)
+ if (!net_eq(sock_net(sk), &init_net))
return -EPROTONOSUPPORT;
rtnl_lock();
@@ -2165,7 +2163,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
- if (sock_net(sk) != &init_net)
+ if (!net_eq(sock_net(sk), &init_net))
return -EPROTONOSUPPORT;
rtnl_lock();
@@ -2252,7 +2250,7 @@ void ip_mc_drop_socket(struct sock *sk)
if (inet->mc_list == NULL)
return;
- if (sock_net(sk) != &init_net)
+ if (!net_eq(sock_net(sk), &init_net))
return;
rtnl_lock();
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ec834480abe..bb81c958b74 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -103,7 +103,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
rover = net_random() % remaining + low;
do {
- head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
+ head = &hashinfo->bhash[inet_bhashfn(net, rover,
+ hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
if (tb->ib_net == net && tb->port == rover)
@@ -130,7 +131,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
*/
snum = rover;
} else {
- head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
+ head = &hashinfo->bhash[inet_bhashfn(net, snum,
+ hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
if (tb->ib_net == net && tb->port == snum)
@@ -336,15 +338,16 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
.uli_u = { .ports =
{ .sport = inet_sk(sk)->sport,
.dport = ireq->rmt_port } } };
+ struct net *net = sock_net(sk);
security_req_classify_flow(req, &fl);
- if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) {
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ if (ip_route_output_flow(net, &rt, &fl, sk, 0)) {
+ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
ip_rt_put(rt);
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
return &rt->u.dst;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index da97695e709..c10036e7a46 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1,8 +1,6 @@
/*
* inet_diag.c Module for monitoring INET transport protocols sockets.
*
- * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4ed429bd595..0546a0bc97e 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -192,14 +192,21 @@ EXPORT_SYMBOL(inet_frag_evictor);
static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
struct inet_frag_queue *qp_in, struct inet_frags *f,
- unsigned int hash, void *arg)
+ void *arg)
{
struct inet_frag_queue *qp;
#ifdef CONFIG_SMP
struct hlist_node *n;
#endif
+ unsigned int hash;
write_lock(&f->lock);
+ /*
+ * While we stayed w/o the lock other CPU could update
+ * the rnd seed, so we need to re-calculate the hash
+ * chain. Fortunatelly the qp_in can be used to get one.
+ */
+ hash = f->hashfn(qp_in);
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
* such entry could be created on other cpu, while we
@@ -247,7 +254,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- struct inet_frags *f, void *arg, unsigned int hash)
+ struct inet_frags *f, void *arg)
{
struct inet_frag_queue *q;
@@ -255,7 +262,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
if (q == NULL)
return NULL;
- return inet_frag_intern(nf, q, f, hash, arg);
+ return inet_frag_intern(nf, q, f, arg);
}
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
@@ -264,7 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frag_queue *q;
struct hlist_node *n;
- read_lock(&f->lock);
hlist_for_each_entry(q, n, &f->hash[hash], list) {
if (q->net == nf && f->match(q, key)) {
atomic_inc(&q->refcnt);
@@ -274,6 +280,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
}
read_unlock(&f->lock);
- return inet_frag_create(nf, f, key, hash);
+ return inet_frag_create(nf, f, key);
}
EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2023d37b270..115f53722d2 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -70,7 +70,8 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
static void __inet_put_port(struct sock *sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
+ const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num,
+ hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
struct inet_bind_bucket *tb;
@@ -95,7 +96,8 @@ EXPORT_SYMBOL(inet_put_port);
void __inet_inherit_port(struct sock *sk, struct sock *child)
{
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
- const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
+ const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num,
+ table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
struct inet_bind_bucket *tb;
@@ -192,7 +194,7 @@ struct sock *__inet_lookup_listener(struct net *net,
const struct hlist_head *head;
read_lock(&hashinfo->lhash_lock);
- head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+ head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)];
if (!hlist_empty(head)) {
const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
@@ -225,7 +227,7 @@ struct sock * __inet_lookup_established(struct net *net,
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
+ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
@@ -265,13 +267,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
int dif = sk->sk_bound_dev_if;
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
- unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+ struct net *net = sock_net(sk);
+ unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
- struct net *net = sock_net(sk);
prefetch(head->chain.first);
write_lock(lock);
@@ -310,11 +312,11 @@ unique:
if (twp) {
*twp = tw;
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+ NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw) {
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw, death_row);
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+ NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw);
}
@@ -438,7 +440,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
local_bh_disable();
for (i = 1; i <= remaining; i++) {
port = low + (i + offset) % remaining;
- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
spin_lock(&head->lock);
/* Does not bother with rcv_saddr checks,
@@ -493,7 +496,7 @@ ok:
goto out;
}
- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 4a4d49fca1f..cfd034a2b96 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -383,8 +383,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
out2: /* send aggregated SKBs to stack */
lro_flush(lro_mgr, lro_desc);
-out: /* Original SKB has to be posted to stack */
- skb->ip_summed = lro_mgr->ip_summed;
+out:
return 1;
}
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index ce16e9ac24c..75c2def8f9a 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -32,7 +32,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
write_unlock(lock);
/* Disassociate with bind bucket. */
- bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tb = tw->tw_tb;
__hlist_del(&tw->tw_bind_node);
@@ -81,7 +82,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
- bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
BUG_TRAP(icsk->icsk_bind_hash);
@@ -158,6 +160,9 @@ rescan:
__inet_twsk_del_dead_node(tw);
spin_unlock(&twdr->death_lock);
__inet_twsk_kill(tw, twdr->hashinfo);
+#ifdef CONFIG_NET_NS
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
+#endif
inet_twsk_put(tw);
killed++;
spin_lock(&twdr->death_lock);
@@ -176,8 +181,9 @@ rescan:
}
twdr->tw_count -= killed;
- NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
-
+#ifndef CONFIG_NET_NS
+ NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed);
+#endif
return ret;
}
@@ -370,6 +376,9 @@ void inet_twdr_twcal_tick(unsigned long data)
&twdr->twcal_row[slot]) {
__inet_twsk_del_dead_node(tw);
__inet_twsk_kill(tw, twdr->hashinfo);
+#ifdef CONFIG_NET_NS
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
+#endif
inet_twsk_put(tw);
killed++;
}
@@ -393,7 +402,9 @@ void inet_twdr_twcal_tick(unsigned long data)
out:
if ((twdr->tw_count -= killed) == 0)
del_timer(&twdr->tw_timer);
- NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
+#ifndef CONFIG_NET_NS
+ NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed);
+#endif
spin_unlock(&twdr->death_lock);
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index af995198f64..a456ceeac3f 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -3,8 +3,6 @@
*
* This source is covered by the GNU GPL, the same as all kernel sources.
*
- * Version: $Id: inetpeer.c,v 1.7 2001/09/20 21:22:50 davem Exp $
- *
* Authors: Andrey V. Savochkin <saw@msu.ru>
*/
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 4813c39b438..450016b89a1 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -5,8 +5,6 @@
*
* The IP forwarding functionality.
*
- * Version: $Id: ip_forward.c,v 1.48 2000/12/13 18:31:48 davem Exp $
- *
* Authors: see ip.c
*
* Fixes:
@@ -44,7 +42,7 @@ static int ip_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
if (unlikely(opt->optlen))
ip_forward_options(skb);
@@ -58,6 +56,9 @@ int ip_forward(struct sk_buff *skb)
struct rtable *rt; /* Route we use */
struct ip_options * opt = &(IPCB(skb)->opt);
+ if (skb_warn_if_lro(skb))
+ goto drop;
+
if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
goto drop;
@@ -87,7 +88,7 @@ int ip_forward(struct sk_buff *skb)
if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) &&
(ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
- IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(dst_mtu(&rt->u.dst)));
goto drop;
@@ -122,7 +123,7 @@ sr_failed:
too_many_hops:
/* Tell the sender its packet died... */
- IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
drop:
kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cd6ce6ac635..38d38f05801 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,8 +5,6 @@
*
* The IP fragmentation functionality.
*
- * Version: $Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $
- *
* Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox <Alan.Cox@linux.org>
*
@@ -180,7 +178,7 @@ static void ip_evictor(struct net *net)
evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags);
if (evicted)
- IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
+ IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
}
/*
@@ -189,8 +187,10 @@ static void ip_evictor(struct net *net)
static void ip_expire(unsigned long arg)
{
struct ipq *qp;
+ struct net *net;
qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+ net = container_of(qp->q.net, struct net, ipv4.frags);
spin_lock(&qp->q.lock);
@@ -199,14 +199,12 @@ static void ip_expire(unsigned long arg)
ipq_kill(qp);
- IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
- IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
struct sk_buff *head = qp->q.fragments;
- struct net *net;
- net = container_of(qp->q.net, struct net, ipv4.frags);
/* Send an ICMP "Fragment Reassembly Timeout" message. */
if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) {
icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -229,6 +227,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
arg.iph = iph;
arg.user = user;
+
+ read_lock(&ip4_frags.lock);
hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
@@ -261,7 +261,10 @@ static inline int ip_frag_too_far(struct ipq *qp)
rc = qp->q.fragments && (end - start) > max;
if (rc) {
- IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ struct net *net;
+
+ net = container_of(qp->q.net, struct net, ipv4.frags);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
}
return rc;
@@ -545,7 +548,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
iph = ip_hdr(head);
iph->frag_off = 0;
iph->tot_len = htons(len);
- IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMOKS);
qp->q.fragments = NULL;
return 0;
@@ -560,7 +563,7 @@ out_oversize:
"Oversized IP packet from " NIPQUAD_FMT ".\n",
NIPQUAD(qp->saddr));
out_fail:
- IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS);
return err;
}
@@ -570,9 +573,9 @@ int ip_defrag(struct sk_buff *skb, u32 user)
struct ipq *qp;
struct net *net;
- IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
-
net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
+
/* Start by cleaning up the memory. */
if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
ip_evictor(net);
@@ -590,7 +593,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
return ret;
}
- IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -ENOMEM;
}
@@ -598,7 +601,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
#ifdef CONFIG_SYSCTL
static int zero;
-static struct ctl_table ip4_frags_ctl_table[] = {
+static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
.procname = "ipfrag_high_thresh",
@@ -624,6 +627,10 @@ static struct ctl_table ip4_frags_ctl_table[] = {
.proc_handler = &proc_dointvec_jiffies,
.strategy = &sysctl_jiffies
},
+ { }
+};
+
+static struct ctl_table ip4_frags_ctl_table[] = {
{
.ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
.procname = "ipfrag_secret_interval",
@@ -644,22 +651,20 @@ static struct ctl_table ip4_frags_ctl_table[] = {
{ }
};
-static int ip4_frags_ctl_register(struct net *net)
+static int ip4_frags_ns_ctl_register(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
- table = ip4_frags_ctl_table;
+ table = ip4_frags_ns_ctl_table;
if (net != &init_net) {
- table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL);
+ table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL);
if (table == NULL)
goto err_alloc;
table[0].data = &net->ipv4.frags.high_thresh;
table[1].data = &net->ipv4.frags.low_thresh;
table[2].data = &net->ipv4.frags.timeout;
- table[3].mode &= ~0222;
- table[4].mode &= ~0222;
}
hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
@@ -676,7 +681,7 @@ err_alloc:
return -ENOMEM;
}
-static void ip4_frags_ctl_unregister(struct net *net)
+static void ip4_frags_ns_ctl_unregister(struct net *net)
{
struct ctl_table *table;
@@ -684,13 +689,22 @@ static void ip4_frags_ctl_unregister(struct net *net)
unregister_net_sysctl_table(net->ipv4.frags_hdr);
kfree(table);
}
+
+static void ip4_frags_ctl_register(void)
+{
+ register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table);
+}
#else
-static inline int ip4_frags_ctl_register(struct net *net)
+static inline int ip4_frags_ns_ctl_register(struct net *net)
{
return 0;
}
-static inline void ip4_frags_ctl_unregister(struct net *net)
+static inline void ip4_frags_ns_ctl_unregister(struct net *net)
+{
+}
+
+static inline void ip4_frags_ctl_register(void)
{
}
#endif
@@ -714,12 +728,12 @@ static int ipv4_frags_init_net(struct net *net)
inet_frags_init_net(&net->ipv4.frags);
- return ip4_frags_ctl_register(net);
+ return ip4_frags_ns_ctl_register(net);
}
static void ipv4_frags_exit_net(struct net *net)
{
- ip4_frags_ctl_unregister(net);
+ ip4_frags_ns_ctl_unregister(net);
inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
}
@@ -730,6 +744,7 @@ static struct pernet_operations ip4_frags_ops = {
void __init ipfrag_init(void)
{
+ ip4_frags_ctl_register();
register_pernet_subsys(&ip4_frags_ops);
ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4342cba4ff8..2a61158ea72 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -473,6 +473,8 @@ static int ipgre_rcv(struct sk_buff *skb)
read_lock(&ipgre_lock);
if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
iph->saddr, iph->daddr, key)) != NULL) {
+ struct net_device_stats *stats = &tunnel->dev->stats;
+
secpath_reset(skb);
skb->protocol = *(__be16*)(h + 2);
@@ -497,28 +499,28 @@ static int ipgre_rcv(struct sk_buff *skb)
/* Looped back packet, drop it! */
if (skb->rtable->fl.iif == 0)
goto drop;
- tunnel->stat.multicast++;
+ stats->multicast++;
skb->pkt_type = PACKET_BROADCAST;
}
#endif
if (((flags&GRE_CSUM) && csum) ||
(!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
- tunnel->stat.rx_crc_errors++;
- tunnel->stat.rx_errors++;
+ stats->rx_crc_errors++;
+ stats->rx_errors++;
goto drop;
}
if (tunnel->parms.i_flags&GRE_SEQ) {
if (!(flags&GRE_SEQ) ||
(tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
- tunnel->stat.rx_fifo_errors++;
- tunnel->stat.rx_errors++;
+ stats->rx_fifo_errors++;
+ stats->rx_errors++;
goto drop;
}
tunnel->i_seqno = seqno + 1;
}
- tunnel->stat.rx_packets++;
- tunnel->stat.rx_bytes += skb->len;
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
skb->dev = tunnel->dev;
dst_release(skb->dst);
skb->dst = NULL;
@@ -540,7 +542,7 @@ drop_nolock:
static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct net_device_stats *stats = &tunnel->stat;
+ struct net_device_stats *stats = &tunnel->dev->stats;
struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *tiph;
u8 tos;
@@ -554,7 +556,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
int mtu;
if (tunnel->recursion++) {
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
@@ -570,7 +572,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
/* NBMA tunnel */
if (skb->dst == NULL) {
- tunnel->stat.tx_fifo_errors++;
+ stats->tx_fifo_errors++;
goto tx_error;
}
@@ -621,7 +623,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
.tos = RT_TOS(tos) } },
.proto = IPPROTO_GRE };
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
- tunnel->stat.tx_carrier_errors++;
+ stats->tx_carrier_errors++;
goto tx_error;
}
}
@@ -629,7 +631,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (tdev == dev) {
ip_rt_put(rt);
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
@@ -954,11 +956,6 @@ done:
return err;
}
-static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
-{
- return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
-}
-
static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -1084,7 +1081,6 @@ static void ipgre_tunnel_setup(struct net_device *dev)
dev->uninit = ipgre_tunnel_uninit;
dev->destructor = free_netdev;
dev->hard_start_xmit = ipgre_tunnel_xmit;
- dev->get_stats = ipgre_tunnel_get_stats;
dev->do_ioctl = ipgre_tunnel_ioctl;
dev->change_mtu = ipgre_tunnel_change_mtu;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ff77a4a7f9e..e0bed56c51f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -5,8 +5,6 @@
*
* The Internet Protocol (IP) module.
*
- * Version: $Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Donald Becker, <becker@super.org>
@@ -147,12 +145,6 @@
#include <linux/netlink.h>
/*
- * SNMP management statistics
- */
-
-DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
-
-/*
* Process Router Attention IP option
*/
int ip_call_ra_chain(struct sk_buff *skb)
@@ -232,16 +224,16 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
protocol = -ret;
goto resubmit;
}
- IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
} else {
if (!raw) {
if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS);
icmp_send(skb, ICMP_DEST_UNREACH,
ICMP_PROT_UNREACH, 0);
}
} else
- IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
kfree_skb(skb);
}
}
@@ -283,7 +275,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
--ANK (980813)
*/
if (skb_cow(skb, skb_headroom(skb))) {
- IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -292,7 +284,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
if (ip_options_compile(dev_net(dev), opt, skb)) {
- IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
goto drop;
}
@@ -336,9 +328,11 @@ static int ip_rcv_finish(struct sk_buff *skb)
skb->dev);
if (unlikely(err)) {
if (err == -EHOSTUNREACH)
- IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ IP_INC_STATS_BH(dev_net(skb->dev),
+ IPSTATS_MIB_INADDRERRORS);
else if (err == -ENETUNREACH)
- IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
+ IP_INC_STATS_BH(dev_net(skb->dev),
+ IPSTATS_MIB_INNOROUTES);
goto drop;
}
}
@@ -359,9 +353,9 @@ static int ip_rcv_finish(struct sk_buff *skb)
rt = skb->rtable;
if (rt->rt_type == RTN_MULTICAST)
- IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+ IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCASTPKTS);
else if (rt->rt_type == RTN_BROADCAST)
- IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS);
+ IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCASTPKTS);
return dst_input(skb);
@@ -384,10 +378,10 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
if (skb->pkt_type == PACKET_OTHERHOST)
goto drop;
- IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
- IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto out;
}
@@ -420,7 +414,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
len = ntohs(iph->tot_len);
if (skb->len < len) {
- IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
@@ -430,7 +424,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
* Note this now means skb->len holds ntohs(iph->tot_len).
*/
if (pskb_trim_rcsum(skb, len)) {
- IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -441,11 +435,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
ip_rcv_finish);
inhdr_error:
- IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
drop:
kfree_skb(skb);
out:
return NET_RX_DROP;
}
-
-EXPORT_SYMBOL(ip_statistics);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 33126ad2cfd..be3f18a7a40 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -5,8 +5,6 @@
*
* The options processing module for ip.c
*
- * Version: $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $
- *
* Authors: A.N.Kuznetsov
*
*/
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e527628f56c..465544f6281 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,8 +5,6 @@
*
* The Internet Protocol (IP) output module.
*
- * Version: $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Donald Becker, <becker@super.org>
@@ -184,9 +182,9 @@ static inline int ip_finish_output2(struct sk_buff *skb)
unsigned int hh_len = LL_RESERVED_SPACE(dev);
if (rt->rt_type == RTN_MULTICAST)
- IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS);
else if (rt->rt_type == RTN_BROADCAST)
- IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS);
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
@@ -246,7 +244,7 @@ int ip_mc_output(struct sk_buff *skb)
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -300,7 +298,7 @@ int ip_output(struct sk_buff *skb)
{
struct net_device *dev = skb->dst->dev;
- IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -391,7 +389,7 @@ packet_routed:
return ip_local_out(skb);
no_route:
- IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EHOSTUNREACH;
}
@@ -453,7 +451,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
iph = ip_hdr(skb);
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
- IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(ip_skb_dst_mtu(skb)));
kfree_skb(skb);
@@ -544,7 +542,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
err = output(skb);
if (!err)
- IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
if (err || !frag)
break;
@@ -554,7 +552,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
}
if (err == 0) {
- IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
return 0;
}
@@ -563,7 +561,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
kfree_skb(frag);
frag = skb;
}
- IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
return err;
}
@@ -675,15 +673,15 @@ slow_path:
if (err)
goto fail;
- IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
}
kfree_skb(skb);
- IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
return err;
fail:
kfree_skb(skb);
- IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
return err;
}
@@ -1049,7 +1047,7 @@ alloc_new_skb:
error:
inet->cork.length -= length;
- IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
return err;
}
@@ -1191,7 +1189,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
error:
inet->cork.length -= size;
- IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
return err;
}
@@ -1213,6 +1211,7 @@ int ip_push_pending_frames(struct sock *sk)
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
struct ip_options *opt = NULL;
struct rtable *rt = (struct rtable *)inet->cork.dst;
struct iphdr *iph;
@@ -1282,7 +1281,7 @@ int ip_push_pending_frames(struct sock *sk)
skb->dst = dst_clone(&rt->u.dst);
if (iph->protocol == IPPROTO_ICMP)
- icmp_out_count(((struct icmphdr *)
+ icmp_out_count(net, ((struct icmphdr *)
skb_transport_header(skb))->type);
/* Netfilter gets whole the not fragmented skb. */
@@ -1299,7 +1298,7 @@ out:
return err;
error:
- IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
goto out;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index e0514e82308..105d92a039b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -5,8 +5,6 @@
*
* The IP to API glue.
*
- * Version: $Id: ip_sockglue.c,v 1.62 2002/02/01 22:01:04 davem Exp $
- *
* Authors: see ip.c
*
* Fixes:
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index ed45037ce9b..42065fff46c 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,6 +1,4 @@
/*
- * $Id: ipconfig.c,v 1.46 2002/02/01 22:01:04 davem Exp $
- *
* Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or
* user-supplied information to configure own IP address and routes.
*
@@ -434,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
unsigned char *sha, *tha; /* s for "source", t for "target" */
struct ic_device *d;
- if (dev_net(dev) != &init_net)
+ if (!net_eq(dev_net(dev), &init_net))
goto drop;
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -854,7 +852,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
struct ic_device *d;
int len, ext_len;
- if (dev_net(dev) != &init_net)
+ if (!net_eq(dev_net(dev), &init_net))
goto drop;
/* Perform verifications before taking the lock. */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index af5cb53da5c..4c6d2caf920 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -1,8 +1,6 @@
/*
* Linux NET3: IP/IP protocol decoder.
*
- * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
- *
* Authors:
* Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
*
@@ -368,8 +366,8 @@ static int ipip_rcv(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IP);
skb->pkt_type = PACKET_HOST;
- tunnel->stat.rx_packets++;
- tunnel->stat.rx_bytes += skb->len;
+ tunnel->dev->stats.rx_packets++;
+ tunnel->dev->stats.rx_bytes += skb->len;
skb->dev = tunnel->dev;
dst_release(skb->dst);
skb->dst = NULL;
@@ -392,7 +390,7 @@ static int ipip_rcv(struct sk_buff *skb)
static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct net_device_stats *stats = &tunnel->stat;
+ struct net_device_stats *stats = &tunnel->dev->stats;
struct iphdr *tiph = &tunnel->parms.iph;
u8 tos = tunnel->parms.iph.tos;
__be16 df = tiph->frag_off;
@@ -405,7 +403,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
int mtu;
if (tunnel->recursion++) {
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
@@ -418,7 +416,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (!dst) {
/* NBMA tunnel */
if ((rt = skb->rtable) == NULL) {
- tunnel->stat.tx_fifo_errors++;
+ stats->tx_fifo_errors++;
goto tx_error;
}
if ((dst = rt->rt_gateway) == 0)
@@ -433,7 +431,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
.tos = RT_TOS(tos) } },
.proto = IPPROTO_IPIP };
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
- tunnel->stat.tx_carrier_errors++;
+ stats->tx_carrier_errors++;
goto tx_error_icmp;
}
}
@@ -441,7 +439,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (tdev == dev) {
ip_rt_put(rt);
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
@@ -451,7 +449,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
if (mtu < 68) {
- tunnel->stat.collisions++;
+ stats->collisions++;
ip_rt_put(rt);
goto tx_error;
}
@@ -685,11 +683,6 @@ done:
return err;
}
-static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
-{
- return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
-}
-
static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
@@ -702,7 +695,6 @@ static void ipip_tunnel_setup(struct net_device *dev)
{
dev->uninit = ipip_tunnel_uninit;
dev->hard_start_xmit = ipip_tunnel_xmit;
- dev->get_stats = ipip_tunnel_get_stats;
dev->do_ioctl = ipip_tunnel_ioctl;
dev->change_mtu = ipip_tunnel_change_mtu;
dev->destructor = free_netdev;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 11700a4dcd9..c519b8d30ee 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -9,8 +9,6 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
- *
* Fixes:
* Michael Chastain : Incorrect size of copying.
* Alan Cox : Added the cache manager code
@@ -120,6 +118,31 @@ static struct timer_list ipmr_expire_timer;
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
+static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
+{
+ dev_close(dev);
+
+ dev = __dev_get_by_name(&init_net, "tunl0");
+ if (dev) {
+ struct ifreq ifr;
+ mm_segment_t oldfs;
+ struct ip_tunnel_parm p;
+
+ memset(&p, 0, sizeof(p));
+ p.iph.daddr = v->vifc_rmt_addr.s_addr;
+ p.iph.saddr = v->vifc_lcl_addr.s_addr;
+ p.iph.version = 4;
+ p.iph.ihl = 5;
+ p.iph.protocol = IPPROTO_IPIP;
+ sprintf(p.name, "dvmrp%d", v->vifc_vifi);
+ ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
+
+ oldfs = get_fs(); set_fs(KERNEL_DS);
+ dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL);
+ set_fs(oldfs);
+ }
+}
+
static
struct net_device *ipmr_new_tunnel(struct vifctl *v)
{
@@ -161,6 +184,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
if (dev_open(dev))
goto failure;
+ dev_hold(dev);
}
}
return dev;
@@ -181,26 +205,20 @@ static int reg_vif_num = -1;
static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
read_lock(&mrt_lock);
- ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
- ((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
+ dev->stats.tx_bytes += skb->len;
+ dev->stats.tx_packets++;
ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
read_unlock(&mrt_lock);
kfree_skb(skb);
return 0;
}
-static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
-{
- return (struct net_device_stats*)netdev_priv(dev);
-}
-
static void reg_vif_setup(struct net_device *dev)
{
dev->type = ARPHRD_PIMREG;
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
dev->flags = IFF_NOARP;
dev->hard_start_xmit = reg_vif_xmit;
- dev->get_stats = reg_vif_get_stats;
dev->destructor = free_netdev;
}
@@ -209,8 +227,7 @@ static struct net_device *ipmr_reg_vif(void)
struct net_device *dev;
struct in_device *in_dev;
- dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
- reg_vif_setup);
+ dev = alloc_netdev(0, "pimreg", reg_vif_setup);
if (dev == NULL)
return NULL;
@@ -234,6 +251,8 @@ static struct net_device *ipmr_reg_vif(void)
if (dev_open(dev))
goto failure;
+ dev_hold(dev);
+
return dev;
failure:
@@ -248,9 +267,10 @@ failure:
/*
* Delete a VIF entry
+ * @notify: Set to 1, if the caller is a notifier_call
*/
-static int vif_delete(int vifi)
+static int vif_delete(int vifi, int notify)
{
struct vif_device *v;
struct net_device *dev;
@@ -293,7 +313,7 @@ static int vif_delete(int vifi)
ip_rt_multicast_event(in_dev);
}
- if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
+ if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
unregister_netdevice(dev);
dev_put(dev);
@@ -398,6 +418,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
struct vif_device *v = &vif_table[vifi];
struct net_device *dev;
struct in_device *in_dev;
+ int err;
/* Is vif busy ? */
if (VIF_EXISTS(vifi))
@@ -415,18 +436,34 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
dev = ipmr_reg_vif();
if (!dev)
return -ENOBUFS;
+ err = dev_set_allmulti(dev, 1);
+ if (err) {
+ unregister_netdevice(dev);
+ dev_put(dev);
+ return err;
+ }
break;
#endif
case VIFF_TUNNEL:
dev = ipmr_new_tunnel(vifc);
if (!dev)
return -ENOBUFS;
+ err = dev_set_allmulti(dev, 1);
+ if (err) {
+ ipmr_del_tunnel(dev, vifc);
+ dev_put(dev);
+ return err;
+ }
break;
case 0:
dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
if (!dev)
return -EADDRNOTAVAIL;
- dev_put(dev);
+ err = dev_set_allmulti(dev, 1);
+ if (err) {
+ dev_put(dev);
+ return err;
+ }
break;
default:
return -EINVAL;
@@ -435,7 +472,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
return -EADDRNOTAVAIL;
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
- dev_set_allmulti(dev, +1);
ip_rt_multicast_event(in_dev);
/*
@@ -458,7 +494,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
- dev_hold(dev);
v->dev=dev;
#ifdef CONFIG_IP_PIMSM
if (v->flags&VIFF_REGISTER)
@@ -805,7 +840,7 @@ static void mroute_clean_tables(struct sock *sk)
*/
for (i=0; i<maxvif; i++) {
if (!(vif_table[i].flags&VIFF_STATIC))
- vif_delete(i);
+ vif_delete(i, 0);
}
/*
@@ -918,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
if (optname==MRT_ADD_VIF) {
ret = vif_add(&vif, sk==mroute_socket);
} else {
- ret = vif_delete(vif.vifc_vifi);
+ ret = vif_delete(vif.vifc_vifi, 0);
}
rtnl_unlock();
return ret;
@@ -1089,7 +1124,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
struct vif_device *v;
int ct;
- if (dev_net(dev) != &init_net)
+ if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
if (event != NETDEV_UNREGISTER)
@@ -1097,7 +1132,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
v=&vif_table[0];
for (ct=0;ct<maxvif;ct++,v++) {
if (v->dev==dev)
- vif_delete(ct);
+ vif_delete(ct, 1);
}
return NOTIFY_DONE;
}
@@ -1143,7 +1178,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
if (unlikely(opt->optlen))
ip_forward_options(skb);
@@ -1170,8 +1205,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
if (vif->flags & VIFF_REGISTER) {
vif->pkt_out++;
vif->bytes_out+=skb->len;
- ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
- ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
+ vif->dev->stats.tx_bytes += skb->len;
+ vif->dev->stats.tx_packets++;
ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
kfree_skb(skb);
return;
@@ -1206,7 +1241,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
to blackhole.
*/
- IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
ip_rt_put(rt);
goto out_free;
}
@@ -1230,8 +1265,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
if (vif->flags & VIFF_TUNNEL) {
ip_encap(skb, vif->local, vif->remote);
/* FIXME: extra output firewall step used to be here. --RR */
- ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
- ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
+ vif->dev->stats.tx_packets++;
+ vif->dev->stats.tx_bytes += skb->len;
}
IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1487,8 +1522,8 @@ int pim_rcv_v1(struct sk_buff * skb)
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
skb->dst = NULL;
- ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
- ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
+ reg_dev->stats.rx_bytes += skb->len;
+ reg_dev->stats.rx_packets++;
nf_reset(skb);
netif_rx(skb);
dev_put(reg_dev);
@@ -1542,8 +1577,8 @@ static int pim_rcv(struct sk_buff * skb)
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
- ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
- ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
+ reg_dev->stats.rx_bytes += skb->len;
+ reg_dev->stats.rx_packets++;
skb->dst = NULL;
nf_reset(skb);
netif_rx(skb);
@@ -1887,16 +1922,36 @@ static struct net_protocol pim_protocol = {
* Setup for IP multicast routing
*/
-void __init ip_mr_init(void)
+int __init ip_mr_init(void)
{
+ int err;
+
mrt_cachep = kmem_cache_create("ip_mrt_cache",
sizeof(struct mfc_cache),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
+ if (!mrt_cachep)
+ return -ENOMEM;
+
setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
- register_netdevice_notifier(&ip_mr_notifier);
+ err = register_netdevice_notifier(&ip_mr_notifier);
+ if (err)
+ goto reg_notif_fail;
#ifdef CONFIG_PROC_FS
- proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops);
- proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops);
+ err = -ENOMEM;
+ if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
+ goto proc_vif_fail;
+ if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
+ goto proc_cache_fail;
#endif
+ return 0;
+reg_notif_fail:
+ kmem_cache_destroy(mrt_cachep);
+#ifdef CONFIG_PROC_FS
+proc_vif_fail:
+ unregister_netdevice_notifier(&ip_mr_notifier);
+proc_cache_fail:
+ proc_net_remove(&init_net, "ip_mr_vif");
+#endif
+ return err;
}
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 535abe0c45e..1f1897a1a70 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -1,8 +1,6 @@
/*
* ip_vs_app.c: Application module support for IPVS
*
- * Version: $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 65f1ba11275..f8bdae47a77 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
* Julian Anastasov <ja@ssi.bg>
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 963981a9d50..a7879eafc3b 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
* Julian Anastasov <ja@ssi.bg>
@@ -993,7 +991,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
== sysctl_ip_vs_sync_threshold[0])) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
- (cp->state == IP_VS_TCP_S_CLOSE)))))
+ (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
+ (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
ip_vs_sync_conn(cp);
cp->old_state = cp->state;
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 94c5767c8e0..9a5ace0b4dd 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
* Julian Anastasov <ja@ssi.bg>
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index dcf5d46aaa5..8afc1503ed2 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -1,8 +1,6 @@
/*
* IPVS: Destination Hashing scheduling module
*
- * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* Inspired by the consistent hashing scheduler patch from
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index dfa0d713c80..bc04eedd6db 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -1,8 +1,6 @@
/*
* ip_vs_est.c: simple rate estimator for IPVS
*
- * Version: $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 59aa166b767..c1c758e4f73 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -1,8 +1,6 @@
/*
* ip_vs_ftp.c: IPVS ftp application module
*
- * Version: $Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* Changes:
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 3888642706a..0efa3db4b18 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -1,8 +1,6 @@
/*
* IPVS: Locality-Based Least-Connection scheduling module
*
- * Version: $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index daa260eb21c..8e3bbeb4513 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -1,8 +1,6 @@
/*
* IPVS: Locality-Based Least-Connection with Replication scheduler
*
- * Version: $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
index d88fef90a64..ac9f08e065d 100644
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -1,8 +1,6 @@
/*
* IPVS: Least-Connection Scheduling module
*
- * Version: $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
index bc2a9e5f2a7..a46bf258d42 100644
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -1,8 +1,6 @@
/*
* IPVS: Never Queue scheduling module
*
- * Version: $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 4b1c16cbb16..876714f23d6 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -1,8 +1,6 @@
/*
* ip_vs_proto.c: transport protocol load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 4bf835e1d86..73e0ea87c1f 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
- *
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002
* Wensong Zhang <wensong@linuxvirtualserver.org>
*
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index db6a6b7b1a0..21d70c8ffa5 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
- *
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002
* Wensong Zhang <wensong@linuxvirtualserver.org>
*
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index b83dc14b0a4..d0ea467986a 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_tcp.c: TCP load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 75771cb3cd6..c6be5d56823 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_udp.c: UDP load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
index 433f8a94792..c8db12d39e6 100644
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -1,8 +1,6 @@
/*
* IPVS: Round-Robin Scheduling module
*
- * Version: $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
*
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 121a32b1b75..b6476730985 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
*
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
index dd7c128f9db..2a7d3135818 100644
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -1,8 +1,6 @@
/*
* IPVS: Shortest Expected Delay scheduling module
*
- * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 1b25b00ef1e..b8fdfac6500 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -1,8 +1,6 @@
/*
* IPVS: Source Hashing scheduling module
*
- * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index eff54efe035..45e9bd96c28 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* ip_vs_sync: sync connection info from master load balancer to backups
@@ -29,10 +27,12 @@
#include <linux/in.h>
#include <linux/igmp.h> /* for ip_mc_join_group */
#include <linux/udp.h>
+#include <linux/err.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
#include <net/ip.h>
#include <net/sock.h>
-#include <asm/uaccess.h> /* for get_fs and set_fs */
#include <net/ip_vs.h>
@@ -68,8 +68,8 @@ struct ip_vs_sync_conn_options {
};
struct ip_vs_sync_thread_data {
- struct completion *startup;
- int state;
+ struct socket *sock;
+ char *buf;
};
#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
@@ -140,18 +140,19 @@ volatile int ip_vs_backup_syncid = 0;
char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+/* sync daemon tasks */
+static struct task_struct *sync_master_thread;
+static struct task_struct *sync_backup_thread;
+
/* multicast addr */
-static struct sockaddr_in mcast_addr;
+static struct sockaddr_in mcast_addr = {
+ .sin_family = AF_INET,
+ .sin_port = __constant_htons(IP_VS_SYNC_PORT),
+ .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP),
+};
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
-{
- spin_lock(&ip_vs_sync_lock);
- list_add_tail(&sb->list, &ip_vs_sync_queue);
- spin_unlock(&ip_vs_sync_lock);
-}
-
-static inline struct ip_vs_sync_buff * sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(void)
{
struct ip_vs_sync_buff *sb;
@@ -195,6 +196,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
kfree(sb);
}
+static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+{
+ spin_lock(&ip_vs_sync_lock);
+ if (ip_vs_sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&sb->list, &ip_vs_sync_queue);
+ else
+ ip_vs_sync_buff_release(sb);
+ spin_unlock(&ip_vs_sync_lock);
+}
+
/*
* Get the current sync buffer if it has been created for more
* than the specified time or the specified time is zero.
@@ -574,14 +585,17 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
static struct socket * make_send_sock(void)
{
struct socket *sock;
+ int result;
/* First create a socket */
- if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+ result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (result < 0) {
IP_VS_ERR("Error during creation of socket; terminating\n");
- return NULL;
+ return ERR_PTR(result);
}
- if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) {
+ result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+ if (result < 0) {
IP_VS_ERR("Error setting outbound mcast interface\n");
goto error;
}
@@ -589,14 +603,15 @@ static struct socket * make_send_sock(void)
set_mcast_loop(sock->sk, 0);
set_mcast_ttl(sock->sk, 1);
- if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) {
+ result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+ if (result < 0) {
IP_VS_ERR("Error binding address of the mcast interface\n");
goto error;
}
- if (sock->ops->connect(sock,
- (struct sockaddr*)&mcast_addr,
- sizeof(struct sockaddr), 0) < 0) {
+ result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
+ sizeof(struct sockaddr), 0);
+ if (result < 0) {
IP_VS_ERR("Error connecting to the multicast addr\n");
goto error;
}
@@ -605,7 +620,7 @@ static struct socket * make_send_sock(void)
error:
sock_release(sock);
- return NULL;
+ return ERR_PTR(result);
}
@@ -615,27 +630,30 @@ static struct socket * make_send_sock(void)
static struct socket * make_receive_sock(void)
{
struct socket *sock;
+ int result;
/* First create a socket */
- if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+ result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (result < 0) {
IP_VS_ERR("Error during creation of socket; terminating\n");
- return NULL;
+ return ERR_PTR(result);
}
/* it is equivalent to the REUSEADDR option in user-space */
sock->sk->sk_reuse = 1;
- if (sock->ops->bind(sock,
- (struct sockaddr*)&mcast_addr,
- sizeof(struct sockaddr)) < 0) {
+ result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
+ sizeof(struct sockaddr));
+ if (result < 0) {
IP_VS_ERR("Error binding to the multicast addr\n");
goto error;
}
/* join the multicast group */
- if (join_mcast_group(sock->sk,
- (struct in_addr*)&mcast_addr.sin_addr,
- ip_vs_backup_mcast_ifn) < 0) {
+ result = join_mcast_group(sock->sk,
+ (struct in_addr *) &mcast_addr.sin_addr,
+ ip_vs_backup_mcast_ifn);
+ if (result < 0) {
IP_VS_ERR("Error joining to the multicast group\n");
goto error;
}
@@ -644,7 +662,7 @@ static struct socket * make_receive_sock(void)
error:
sock_release(sock);
- return NULL;
+ return ERR_PTR(result);
}
@@ -702,44 +720,29 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
}
-static DECLARE_WAIT_QUEUE_HEAD(sync_wait);
-static pid_t sync_master_pid = 0;
-static pid_t sync_backup_pid = 0;
-
-static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
-static int stop_master_sync = 0;
-static int stop_backup_sync = 0;
-
-static void sync_master_loop(void)
+static int sync_thread_master(void *data)
{
- struct socket *sock;
+ struct ip_vs_sync_thread_data *tinfo = data;
struct ip_vs_sync_buff *sb;
- /* create the sending multicast socket */
- sock = make_send_sock();
- if (!sock)
- return;
-
IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d\n",
ip_vs_master_mcast_ifn, ip_vs_master_syncid);
- for (;;) {
- while ((sb=sb_dequeue())) {
- ip_vs_send_sync_msg(sock, sb->mesg);
+ while (!kthread_should_stop()) {
+ while ((sb = sb_dequeue())) {
+ ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
/* check if entries stay in curr_sb for 2 seconds */
- if ((sb = get_curr_sync_buff(2*HZ))) {
- ip_vs_send_sync_msg(sock, sb->mesg);
+ sb = get_curr_sync_buff(2 * HZ);
+ if (sb) {
+ ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
- if (stop_master_sync)
- break;
-
- msleep_interruptible(1000);
+ schedule_timeout_interruptible(HZ);
}
/* clean up the sync_buff queue */
@@ -753,267 +756,175 @@ static void sync_master_loop(void)
}
/* release the sending multicast socket */
- sock_release(sock);
+ sock_release(tinfo->sock);
+ kfree(tinfo);
+
+ return 0;
}
-static void sync_backup_loop(void)
+static int sync_thread_backup(void *data)
{
- struct socket *sock;
- char *buf;
+ struct ip_vs_sync_thread_data *tinfo = data;
int len;
- if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) {
- IP_VS_ERR("sync_backup_loop: kmalloc error\n");
- return;
- }
-
- /* create the receiving multicast socket */
- sock = make_receive_sock();
- if (!sock)
- goto out;
-
IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d\n",
ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
- for (;;) {
- /* do you have data now? */
- while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) {
- if ((len =
- ip_vs_receive(sock, buf,
- sync_recv_mesg_maxlen)) <= 0) {
+ while (!kthread_should_stop()) {
+ wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
+ !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
+ || kthread_should_stop());
+
+ /* do we have data now? */
+ while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
+ len = ip_vs_receive(tinfo->sock, tinfo->buf,
+ sync_recv_mesg_maxlen);
+ if (len <= 0) {
IP_VS_ERR("receiving message error\n");
break;
}
- /* disable bottom half, because it accessed the data
+
+ /* disable bottom half, because it accesses the data
shared by softirq while getting/creating conns */
local_bh_disable();
- ip_vs_process_message(buf, len);
+ ip_vs_process_message(tinfo->buf, len);
local_bh_enable();
}
-
- if (stop_backup_sync)
- break;
-
- msleep_interruptible(1000);
}
/* release the sending multicast socket */
- sock_release(sock);
+ sock_release(tinfo->sock);
+ kfree(tinfo->buf);
+ kfree(tinfo);
- out:
- kfree(buf);
+ return 0;
}
-static void set_sync_pid(int sync_state, pid_t sync_pid)
-{
- if (sync_state == IP_VS_STATE_MASTER)
- sync_master_pid = sync_pid;
- else if (sync_state == IP_VS_STATE_BACKUP)
- sync_backup_pid = sync_pid;
-}
-
-static void set_stop_sync(int sync_state, int set)
+int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
{
- if (sync_state == IP_VS_STATE_MASTER)
- stop_master_sync = set;
- else if (sync_state == IP_VS_STATE_BACKUP)
- stop_backup_sync = set;
- else {
- stop_master_sync = set;
- stop_backup_sync = set;
- }
-}
+ struct ip_vs_sync_thread_data *tinfo;
+ struct task_struct **realtask, *task;
+ struct socket *sock;
+ char *name, *buf = NULL;
+ int (*threadfn)(void *data);
+ int result = -ENOMEM;
-static int sync_thread(void *startup)
-{
- DECLARE_WAITQUEUE(wait, current);
- mm_segment_t oldmm;
- int state;
- const char *name;
- struct ip_vs_sync_thread_data *tinfo = startup;
+ IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+ IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
+ sizeof(struct ip_vs_sync_conn));
- /* increase the module use count */
- ip_vs_use_count_inc();
+ if (state == IP_VS_STATE_MASTER) {
+ if (sync_master_thread)
+ return -EEXIST;
- if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) {
- state = IP_VS_STATE_MASTER;
+ strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
+ sizeof(ip_vs_master_mcast_ifn));
+ ip_vs_master_syncid = syncid;
+ realtask = &sync_master_thread;
name = "ipvs_syncmaster";
- } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) {
- state = IP_VS_STATE_BACKUP;
+ threadfn = sync_thread_master;
+ sock = make_send_sock();
+ } else if (state == IP_VS_STATE_BACKUP) {
+ if (sync_backup_thread)
+ return -EEXIST;
+
+ strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
+ sizeof(ip_vs_backup_mcast_ifn));
+ ip_vs_backup_syncid = syncid;
+ realtask = &sync_backup_thread;
name = "ipvs_syncbackup";
+ threadfn = sync_thread_backup;
+ sock = make_receive_sock();
} else {
- IP_VS_BUG();
- ip_vs_use_count_dec();
return -EINVAL;
}
- daemonize(name);
-
- oldmm = get_fs();
- set_fs(KERNEL_DS);
-
- /* Block all signals */
- spin_lock_irq(&current->sighand->siglock);
- siginitsetinv(&current->blocked, 0);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ if (IS_ERR(sock)) {
+ result = PTR_ERR(sock);
+ goto out;
+ }
- /* set the maximum length of sync message */
set_sync_mesg_maxlen(state);
+ if (state == IP_VS_STATE_BACKUP) {
+ buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+ if (!buf)
+ goto outsocket;
+ }
- /* set up multicast address */
- mcast_addr.sin_family = AF_INET;
- mcast_addr.sin_port = htons(IP_VS_SYNC_PORT);
- mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP);
-
- add_wait_queue(&sync_wait, &wait);
-
- set_sync_pid(state, task_pid_nr(current));
- complete(tinfo->startup);
-
- /*
- * once we call the completion queue above, we should
- * null out that reference, since its allocated on the
- * stack of the creating kernel thread
- */
- tinfo->startup = NULL;
-
- /* processing master/backup loop here */
- if (state == IP_VS_STATE_MASTER)
- sync_master_loop();
- else if (state == IP_VS_STATE_BACKUP)
- sync_backup_loop();
- else IP_VS_BUG();
-
- remove_wait_queue(&sync_wait, &wait);
-
- /* thread exits */
-
- /*
- * If we weren't explicitly stopped, then we
- * exited in error, and should undo our state
- */
- if ((!stop_master_sync) && (!stop_backup_sync))
- ip_vs_sync_state -= tinfo->state;
+ tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+ if (!tinfo)
+ goto outbuf;
- set_sync_pid(state, 0);
- IP_VS_INFO("sync thread stopped!\n");
+ tinfo->sock = sock;
+ tinfo->buf = buf;
- set_fs(oldmm);
+ task = kthread_run(threadfn, tinfo, name);
+ if (IS_ERR(task)) {
+ result = PTR_ERR(task);
+ goto outtinfo;
+ }
- /* decrease the module use count */
- ip_vs_use_count_dec();
+ /* mark as active */
+ *realtask = task;
+ ip_vs_sync_state |= state;
- set_stop_sync(state, 0);
- wake_up(&stop_sync_wait);
+ /* increase the module use count */
+ ip_vs_use_count_inc();
- /*
- * we need to free the structure that was allocated
- * for us in start_sync_thread
- */
- kfree(tinfo);
return 0;
-}
-
-
-static int fork_sync_thread(void *startup)
-{
- pid_t pid;
-
- /* fork the sync thread here, then the parent process of the
- sync thread is the init process after this thread exits. */
- repeat:
- if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
- IP_VS_ERR("could not create sync_thread due to %d... "
- "retrying.\n", pid);
- msleep_interruptible(1000);
- goto repeat;
- }
- return 0;
+outtinfo:
+ kfree(tinfo);
+outbuf:
+ kfree(buf);
+outsocket:
+ sock_release(sock);
+out:
+ return result;
}
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int stop_sync_thread(int state)
{
- DECLARE_COMPLETION_ONSTACK(startup);
- pid_t pid;
- struct ip_vs_sync_thread_data *tinfo;
-
- if ((state == IP_VS_STATE_MASTER && sync_master_pid) ||
- (state == IP_VS_STATE_BACKUP && sync_backup_pid))
- return -EEXIST;
-
- /*
- * Note that tinfo will be freed in sync_thread on exit
- */
- tinfo = kmalloc(sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL);
- if (!tinfo)
- return -ENOMEM;
-
IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
- IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
- sizeof(struct ip_vs_sync_conn));
- ip_vs_sync_state |= state;
if (state == IP_VS_STATE_MASTER) {
- strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_master_mcast_ifn));
- ip_vs_master_syncid = syncid;
- } else {
- strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_backup_mcast_ifn));
- ip_vs_backup_syncid = syncid;
- }
-
- tinfo->state = state;
- tinfo->startup = &startup;
-
- repeat:
- if ((pid = kernel_thread(fork_sync_thread, tinfo, 0)) < 0) {
- IP_VS_ERR("could not create fork_sync_thread due to %d... "
- "retrying.\n", pid);
- msleep_interruptible(1000);
- goto repeat;
- }
-
- wait_for_completion(&startup);
-
- return 0;
-}
+ if (!sync_master_thread)
+ return -ESRCH;
+ IP_VS_INFO("stopping master sync thread %d ...\n",
+ task_pid_nr(sync_master_thread));
-int stop_sync_thread(int state)
-{
- DECLARE_WAITQUEUE(wait, current);
+ /*
+ * The lock synchronizes with sb_queue_tail(), so that we don't
+ * add sync buffers to the queue, when we are already in
+ * progress of stopping the master sync daemon.
+ */
- if ((state == IP_VS_STATE_MASTER && !sync_master_pid) ||
- (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
- return -ESRCH;
+ spin_lock(&ip_vs_sync_lock);
+ ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
+ spin_unlock(&ip_vs_sync_lock);
+ kthread_stop(sync_master_thread);
+ sync_master_thread = NULL;
+ } else if (state == IP_VS_STATE_BACKUP) {
+ if (!sync_backup_thread)
+ return -ESRCH;
+
+ IP_VS_INFO("stopping backup sync thread %d ...\n",
+ task_pid_nr(sync_backup_thread));
+
+ ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
+ kthread_stop(sync_backup_thread);
+ sync_backup_thread = NULL;
+ } else {
+ return -EINVAL;
+ }
- IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
- IP_VS_INFO("stopping sync thread %d ...\n",
- (state == IP_VS_STATE_MASTER) ?
- sync_master_pid : sync_backup_pid);
-
- __set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&stop_sync_wait, &wait);
- set_stop_sync(state, 1);
- ip_vs_sync_state -= state;
- wake_up(&sync_wait);
- schedule();
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&stop_sync_wait, &wait);
-
- /* Note: no need to reap the sync thread, because its parent
- process is the init process */
-
- if ((state == IP_VS_STATE_MASTER && stop_master_sync) ||
- (state == IP_VS_STATE_BACKUP && stop_backup_sync))
- IP_VS_BUG();
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
return 0;
}
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
index 8a9d913261d..772c3cb4eca 100644
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -1,8 +1,6 @@
/*
* IPVS: Weighted Least-Connection Scheduling module
*
- * Version: $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
*
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
index 85c680add6d..1d6932d7dc9 100644
--- a/net/ipv4/ipvs/ip_vs_wrr.c
+++ b/net/ipv4/ipvs/ip_vs_wrr.c
@@ -1,8 +1,6 @@
/*
* IPVS: Weighted Round-Robin Scheduling module
*
- * Version: $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index f63006caea0..9892d4aca42 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -1,8 +1,6 @@
/*
* ip_vs_xmit.c: various packet transmitters for IPVS
*
- * Version: $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 2767841a8ce..f23e60c93ef 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -213,8 +213,7 @@ config IP_NF_TARGET_NETMAP
help
NETMAP is an implementation of static 1:1 NAT mapping of network
addresses. It maps the network address part, while keeping the host
- address part intact. It is similar to Fast NAT, except that
- Netfilter's connection tracking doesn't work well with Fast NAT.
+ address part intact.
To compile it as a module, choose M here. If unsure, say N.
@@ -365,6 +364,18 @@ config IP_NF_RAW
If you want to compile it as a module, say M here and read
<file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+# security table for MAC policy
+config IP_NF_SECURITY
+ tristate "Security table"
+ depends on IP_NF_IPTABLES
+ depends on SECURITY
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option adds a `security' table to iptables, for use
+ with Mandatory Access Control (MAC) policy.
+
+ If unsure, say N.
+
# ARP tables
config IP_NF_ARPTABLES
tristate "ARP tables support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index d9b92fbf557..3f31291f37c 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
obj-$(CONFIG_NF_NAT) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
+obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
# matches
obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 26a37cedcf2..432ce9d1c11 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -156,7 +156,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
case IPQ_COPY_META:
case IPQ_COPY_NONE:
size = NLMSG_SPACE(sizeof(*pmsg));
- data_len = 0;
break;
case IPQ_COPY_PACKET:
@@ -224,8 +223,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
return skb;
nlmsg_failure:
- if (skb)
- kfree_skb(skb);
*errp = -EINVAL;
printk(KERN_ERR "ip_queue: error creating packet message\n");
return NULL;
@@ -480,7 +477,7 @@ ipq_rcv_dev_event(struct notifier_block *this,
{
struct net_device *dev = ptr;
- if (dev_net(dev) != &init_net)
+ if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
/* Drop any packets associated with the downed device */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 84c26dd27d8..0841aefaa50 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -120,7 +120,7 @@ static int masq_device_event(struct notifier_block *this,
{
const struct net_device *dev = ptr;
- if (dev_net(dev) != &init_net)
+ if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
if (event == NETDEV_DOWN) {
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
new file mode 100644
index 00000000000..2b472ac2263
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -0,0 +1,180 @@
+/*
+ * "security" table
+ *
+ * This is for use by Mandatory Access Control (MAC) security models,
+ * which need to be able to manage security policy in separate context
+ * to DAC.
+ *
+ * Based on iptable_mangle.c
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
+ * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
+MODULE_DESCRIPTION("iptables security table, for MAC rules");
+
+#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT)
+
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+} initial_table __initdata = {
+ .repl = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .num_entries = 4,
+ .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ .hook_entry = {
+ [NF_INET_LOCAL_IN] = 0,
+ [NF_INET_FORWARD] = sizeof(struct ipt_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
+ },
+ .underflow = {
+ [NF_INET_LOCAL_IN] = 0,
+ [NF_INET_FORWARD] = sizeof(struct ipt_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
+ },
+ },
+ .entries = {
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
+ },
+ .term = IPT_ERROR_INIT, /* ERROR */
+};
+
+static struct xt_table security_table = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .lock = __RW_LOCK_UNLOCKED(security_table.lock),
+ .me = THIS_MODULE,
+ .af = AF_INET,
+};
+
+static unsigned int
+ipt_local_in_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ nf_local_in_net(in, out)->ipv4.iptable_security);
+}
+
+static unsigned int
+ipt_forward_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ nf_forward_net(in, out)->ipv4.iptable_security);
+}
+
+static unsigned int
+ipt_local_out_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* Somebody is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr)
+ || ip_hdrlen(skb) < sizeof(struct iphdr)) {
+ if (net_ratelimit())
+ printk(KERN_INFO "iptable_security: ignoring short "
+ "SOCK_RAW packet.\n");
+ return NF_ACCEPT;
+ }
+ return ipt_do_table(skb, hook, in, out,
+ nf_local_out_net(in, out)->ipv4.iptable_security);
+}
+
+static struct nf_hook_ops ipt_ops[] __read_mostly = {
+ {
+ .hook = ipt_local_in_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_SECURITY,
+ },
+ {
+ .hook = ipt_forward_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_FORWARD,
+ .priority = NF_IP_PRI_SECURITY,
+ },
+ {
+ .hook = ipt_local_out_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_SECURITY,
+ },
+};
+
+static int __net_init iptable_security_net_init(struct net *net)
+{
+ net->ipv4.iptable_security =
+ ipt_register_table(net, &security_table, &initial_table.repl);
+
+ if (IS_ERR(net->ipv4.iptable_security))
+ return PTR_ERR(net->ipv4.iptable_security);
+
+ return 0;
+}
+
+static void __net_exit iptable_security_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.iptable_security);
+}
+
+static struct pernet_operations iptable_security_net_ops = {
+ .init = iptable_security_net_init,
+ .exit = iptable_security_net_exit,
+};
+
+static int __init iptable_security_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_security_net_ops);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ if (ret < 0)
+ goto cleanup_table;
+
+ return ret;
+
+cleanup_table:
+ unregister_pernet_subsys(&iptable_security_net_ops);
+ return ret;
+}
+
+static void __exit iptable_security_fini(void)
+{
+ nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ unregister_pernet_subsys(&iptable_security_net_ops);
+}
+
+module_init(iptable_security_init);
+module_exit(iptable_security_fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 78ab19accac..97791048fa9 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -87,9 +87,8 @@ static int icmp_packet(struct nf_conn *ct,
means this will only run once even if count hits zero twice
(theoretically possible with SMP) */
if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count)
- && del_timer(&ct->timeout))
- ct->timeout.function((unsigned long)ct);
+ if (atomic_dec_and_test(&ct->proto.icmp.count))
+ nf_ct_kill_acct(ct, ctinfo, skb);
} else {
atomic_inc(&ct->proto.icmp.count);
nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 82e4c0e286b..65e470bc612 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb,
sctp_sctphdr_t *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
__be32 oldip, newip;
- u32 crc32;
+ __be32 crc32;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
@@ -61,7 +61,7 @@ sctp_manip_pkt(struct sk_buff *skb,
crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb),
crc32);
crc32 = sctp_end_cksum(crc32);
- hdr->checksum = htonl(crc32);
+ hdr->checksum = crc32;
return true;
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 7750c97fde7..ffeaffc3fff 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -439,8 +439,8 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
unsigned int *len)
{
unsigned long subid;
- unsigned int size;
unsigned long *optr;
+ size_t size;
size = eoc - ctx->pointer + 1;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 552169b41b1..834356ea99d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -7,8 +7,6 @@
* PROC file system. It is mainly used for debugging and
* statistics.
*
- * Version: $Id: proc.c,v 1.45 2001/05/16 16:45:35 davem Exp $
- *
* Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
* Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de>
@@ -73,32 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
static int sockstat_seq_open(struct inode *inode, struct file *file)
{
- int err;
- struct net *net;
-
- err = -ENXIO;
- net = get_proc_net(inode);
- if (net == NULL)
- goto err_net;
-
- err = single_open(file, sockstat_seq_show, net);
- if (err < 0)
- goto err_open;
-
- return 0;
-
-err_open:
- put_net(net);
-err_net:
- return err;
-}
-
-static int sockstat_seq_release(struct inode *inode, struct file *file)
-{
- struct net *net = ((struct seq_file *)file->private_data)->private;
-
- put_net(net);
- return single_release(inode, file);
+ return single_open_net(inode, file, sockstat_seq_show);
}
static const struct file_operations sockstat_seq_fops = {
@@ -106,7 +79,7 @@ static const struct file_operations sockstat_seq_fops = {
.open = sockstat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = sockstat_seq_release,
+ .release = single_release_net,
};
/* snmp items */
@@ -268,11 +241,12 @@ static void icmpmsg_put(struct seq_file *seq)
int j, i, count;
static int out[PERLINE];
+ struct net *net = seq->private;
count = 0;
for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
- if (snmp_fold_field((void **) icmpmsg_statistics, i))
+ if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i))
out[count++] = i;
if (count < PERLINE)
continue;
@@ -284,7 +258,7 @@ static void icmpmsg_put(struct seq_file *seq)
seq_printf(seq, "\nIcmpMsg: ");
for (j = 0; j < PERLINE; ++j)
seq_printf(seq, " %lu",
- snmp_fold_field((void **) icmpmsg_statistics,
+ snmp_fold_field((void **) net->mib.icmpmsg_statistics,
out[j]));
seq_putc(seq, '\n');
}
@@ -296,7 +270,7 @@ static void icmpmsg_put(struct seq_file *seq)
seq_printf(seq, "\nIcmpMsg:");
for (j = 0; j < count; ++j)
seq_printf(seq, " %lu", snmp_fold_field((void **)
- icmpmsg_statistics, out[j]));
+ net->mib.icmpmsg_statistics, out[j]));
}
#undef PERLINE
@@ -305,6 +279,7 @@ static void icmpmsg_put(struct seq_file *seq)
static void icmp_put(struct seq_file *seq)
{
int i;
+ struct net *net = seq->private;
seq_puts(seq, "\nIcmp: InMsgs InErrors");
for (i=0; icmpmibmap[i].name != NULL; i++)
@@ -313,18 +288,18 @@ static void icmp_put(struct seq_file *seq)
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name);
seq_printf(seq, "\nIcmp: %lu %lu",
- snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INMSGS),
- snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INERRORS));
+ snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
+ snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **) icmpmsg_statistics,
+ snmp_fold_field((void **) net->mib.icmpmsg_statistics,
icmpmibmap[i].index));
seq_printf(seq, " %lu %lu",
- snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTMSGS),
- snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTERRORS));
+ snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
+ snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **) icmpmsg_statistics,
+ snmp_fold_field((void **) net->mib.icmpmsg_statistics,
icmpmibmap[i].index | 0x100));
}
@@ -334,6 +309,7 @@ static void icmp_put(struct seq_file *seq)
static int snmp_seq_show(struct seq_file *seq, void *v)
{
int i;
+ struct net *net = seq->private;
seq_puts(seq, "Ip: Forwarding DefaultTTL");
@@ -341,12 +317,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
seq_printf(seq, "\nIp: %d %d",
- IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2,
+ IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
sysctl_ip_default_ttl);
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **)ip_statistics,
+ snmp_fold_field((void **)net->mib.ip_statistics,
snmp4_ipstats_list[i].entry));
icmp_put(seq); /* RFC 2011 compatibility */
@@ -361,11 +337,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
/* MaxConn field is signed, RFC 2012 */
if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
seq_printf(seq, " %ld",
- snmp_fold_field((void **)tcp_statistics,
+ snmp_fold_field((void **)net->mib.tcp_statistics,
snmp4_tcp_list[i].entry));
else
seq_printf(seq, " %lu",
- snmp_fold_field((void **)tcp_statistics,
+ snmp_fold_field((void **)net->mib.tcp_statistics,
snmp4_tcp_list[i].entry));
}
@@ -376,7 +352,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nUdp:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **)udp_statistics,
+ snmp_fold_field((void **)net->mib.udp_statistics,
snmp4_udp_list[i].entry));
/* the UDP and UDP-Lite MIBs are the same */
@@ -387,7 +363,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nUdpLite:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **)udplite_statistics,
+ snmp_fold_field((void **)net->mib.udplite_statistics,
snmp4_udp_list[i].entry));
seq_putc(seq, '\n');
@@ -396,7 +372,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
static int snmp_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, snmp_seq_show, NULL);
+ return single_open_net(inode, file, snmp_seq_show);
}
static const struct file_operations snmp_seq_fops = {
@@ -404,7 +380,7 @@ static const struct file_operations snmp_seq_fops = {
.open = snmp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = single_release_net,
};
@@ -415,6 +391,7 @@ static const struct file_operations snmp_seq_fops = {
static int netstat_seq_show(struct seq_file *seq, void *v)
{
int i;
+ struct net *net = seq->private;
seq_puts(seq, "TcpExt:");
for (i = 0; snmp4_net_list[i].name != NULL; i++)
@@ -423,7 +400,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nTcpExt:");
for (i = 0; snmp4_net_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **)net_statistics,
+ snmp_fold_field((void **)net->mib.net_statistics,
snmp4_net_list[i].entry));
seq_puts(seq, "\nIpExt:");
@@ -433,7 +410,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nIpExt:");
for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **)ip_statistics,
+ snmp_fold_field((void **)net->mib.ip_statistics,
snmp4_ipextstats_list[i].entry));
seq_putc(seq, '\n');
@@ -442,7 +419,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
static int netstat_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, netstat_seq_show, NULL);
+ return single_open_net(inode, file, netstat_seq_show);
}
static const struct file_operations netstat_seq_fops = {
@@ -450,18 +427,32 @@ static const struct file_operations netstat_seq_fops = {
.open = netstat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = single_release_net,
};
static __net_init int ip_proc_init_net(struct net *net)
{
if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops))
- return -ENOMEM;
+ goto out_sockstat;
+ if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops))
+ goto out_netstat;
+ if (!proc_net_fops_create(net, "snmp", S_IRUGO, &snmp_seq_fops))
+ goto out_snmp;
+
return 0;
+
+out_snmp:
+ proc_net_remove(net, "netstat");
+out_netstat:
+ proc_net_remove(net, "sockstat");
+out_sockstat:
+ return -ENOMEM;
}
static __net_exit void ip_proc_exit_net(struct net *net)
{
+ proc_net_remove(net, "snmp");
+ proc_net_remove(net, "netstat");
proc_net_remove(net, "sockstat");
}
@@ -472,24 +463,6 @@ static __net_initdata struct pernet_operations ip_proc_ops = {
int __init ip_misc_proc_init(void)
{
- int rc = 0;
-
- if (register_pernet_subsys(&ip_proc_ops))
- goto out_pernet;
-
- if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops))
- goto out_netstat;
-
- if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
- goto out_snmp;
-out:
- return rc;
-out_snmp:
- proc_net_remove(&init_net, "netstat");
-out_netstat:
- unregister_pernet_subsys(&ip_proc_ops);
-out_pernet:
- rc = -ENOMEM;
- goto out;
+ return register_pernet_subsys(&ip_proc_ops);
}
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 971ab9356e5..ea50da0649f 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -5,8 +5,6 @@
*
* INET protocol dispatch tables.
*
- * Version: $Id: protocol.c,v 1.14 2001/05/18 02:25:49 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 37a1ecd9d60..cd975743bcd 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -5,8 +5,6 @@
*
* RAW - implementation of IP "raw" sockets.
*
- * Version: $Id: raw.c,v 1.64 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*
@@ -322,6 +320,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
struct iphdr *iph;
struct sk_buff *skb;
unsigned int iphlen;
@@ -370,7 +369,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
if (iph->protocol == IPPROTO_ICMP)
- icmp_out_count(((struct icmphdr *)
+ icmp_out_count(net, ((struct icmphdr *)
skb_transport_header(skb))->type);
err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
@@ -386,7 +385,7 @@ error_fault:
err = -EFAULT;
kfree_skb(skb);
error:
- IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
return err;
}
@@ -608,12 +607,11 @@ static void raw_close(struct sock *sk, long timeout)
sk_common_release(sk);
}
-static int raw_destroy(struct sock *sk)
+static void raw_destroy(struct sock *sk)
{
lock_sock(sk);
ip_flush_pending_frames(sk);
release_sock(sk);
- return 0;
}
/* This gets rid of all the nasties in af_inet. -DaveM */
@@ -947,7 +945,7 @@ static int raw_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
seq_printf(seq, " sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
- "inode drops\n");
+ "inode ref pointer drops\n");
else
raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 96be336064f..e4ab0ac94f9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,8 +5,6 @@
*
* ROUTE - implementation of the IP router.
*
- * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox, <gw4pts@gw4pts.ampr.org>
@@ -134,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
static void rt_worker_func(struct work_struct *work);
static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
-static struct timer_list rt_secret_timer;
/*
* Interface to generic destination cache.
@@ -253,20 +250,25 @@ static inline void rt_hash_lock_init(void)
static struct rt_hash_bucket *rt_hash_table __read_mostly;
static unsigned rt_hash_mask __read_mostly;
static unsigned int rt_hash_log __read_mostly;
-static atomic_t rt_genid __read_mostly;
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field) \
(__raw_get_cpu_var(rt_cache_stat).field++)
-static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx)
+static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
+ int genid)
{
return jhash_3words((__force u32)(__be32)(daddr),
(__force u32)(__be32)(saddr),
- idx, atomic_read(&rt_genid))
+ idx, genid)
& rt_hash_mask;
}
+static inline int rt_genid(struct net *net)
+{
+ return atomic_read(&net->ipv4.rt_genid);
+}
+
#ifdef CONFIG_PROC_FS
struct rt_cache_iter_state {
struct seq_net_private p;
@@ -336,7 +338,7 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
struct rt_cache_iter_state *st = seq->private;
if (*pos)
return rt_cache_get_idx(seq, *pos - 1);
- st->genid = atomic_read(&rt_genid);
+ st->genid = rt_genid(seq_file_net(seq));
return SEQ_START_TOKEN;
}
@@ -683,6 +685,11 @@ static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
}
+static inline int rt_is_expired(struct rtable *rth)
+{
+ return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev));
+}
+
/*
* Perform a full scan of hash table and free all entries.
* Can be called by a softirq or a process.
@@ -692,6 +699,7 @@ static void rt_do_flush(int process_context)
{
unsigned int i;
struct rtable *rth, *next;
+ struct rtable * tail;
for (i = 0; i <= rt_hash_mask; i++) {
if (process_context && need_resched())
@@ -701,11 +709,39 @@ static void rt_do_flush(int process_context)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
+#ifdef CONFIG_NET_NS
+ {
+ struct rtable ** prev, * p;
+
+ rth = rt_hash_table[i].chain;
+
+ /* defer releasing the head of the list after spin_unlock */
+ for (tail = rth; tail; tail = tail->u.dst.rt_next)
+ if (!rt_is_expired(tail))
+ break;
+ if (rth != tail)
+ rt_hash_table[i].chain = tail;
+
+ /* call rt_free on entries after the tail requiring flush */
+ prev = &rt_hash_table[i].chain;
+ for (p = *prev; p; p = next) {
+ next = p->u.dst.rt_next;
+ if (!rt_is_expired(p)) {
+ prev = &p->u.dst.rt_next;
+ } else {
+ *prev = next;
+ rt_free(p);
+ }
+ }
+ }
+#else
rth = rt_hash_table[i].chain;
rt_hash_table[i].chain = NULL;
+ tail = NULL;
+#endif
spin_unlock_bh(rt_hash_lock_addr(i));
- for (; rth; rth = next) {
+ for (; rth != tail; rth = next) {
next = rth->u.dst.rt_next;
rt_free(rth);
}
@@ -738,7 +774,7 @@ static void rt_check_expire(void)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
while ((rth = *rthp) != NULL) {
- if (rth->rt_genid != atomic_read(&rt_genid)) {
+ if (rt_is_expired(rth)) {
*rthp = rth->u.dst.rt_next;
rt_free(rth);
continue;
@@ -781,21 +817,21 @@ static void rt_worker_func(struct work_struct *work)
* many times (2^24) without giving recent rt_genid.
* Jenkins hash is strong enough that litle changes of rt_genid are OK.
*/
-static void rt_cache_invalidate(void)
+static void rt_cache_invalidate(struct net *net)
{
unsigned char shuffle;
get_random_bytes(&shuffle, sizeof(shuffle));
- atomic_add(shuffle + 1U, &rt_genid);
+ atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
}
/*
* delay < 0 : invalidate cache (fast : entries will be deleted later)
* delay >= 0 : invalidate & flush cache (can be long)
*/
-void rt_cache_flush(int delay)
+void rt_cache_flush(struct net *net, int delay)
{
- rt_cache_invalidate();
+ rt_cache_invalidate(net);
if (delay >= 0)
rt_do_flush(!in_softirq());
}
@@ -803,10 +839,11 @@ void rt_cache_flush(int delay)
/*
* We change rt_genid and let gc do the cleanup
*/
-static void rt_secret_rebuild(unsigned long dummy)
+static void rt_secret_rebuild(unsigned long __net)
{
- rt_cache_invalidate();
- mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
+ struct net *net = (struct net *)__net;
+ rt_cache_invalidate(net);
+ mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
}
/*
@@ -882,7 +919,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
rthp = &rt_hash_table[k].chain;
spin_lock_bh(rt_hash_lock_addr(k));
while ((rth = *rthp) != NULL) {
- if (rth->rt_genid == atomic_read(&rt_genid) &&
+ if (!rt_is_expired(rth) &&
!rt_may_expire(rth, tmo, expire)) {
tmo >>= 1;
rthp = &rth->u.dst.rt_next;
@@ -964,7 +1001,7 @@ restart:
spin_lock_bh(rt_hash_lock_addr(hash));
while ((rth = *rthp) != NULL) {
- if (rth->rt_genid != atomic_read(&rt_genid)) {
+ if (rt_is_expired(rth)) {
*rthp = rth->u.dst.rt_next;
rt_free(rth);
continue;
@@ -1140,7 +1177,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
spin_lock_bh(rt_hash_lock_addr(hash));
ip_rt_put(rt);
while ((aux = *rthp) != NULL) {
- if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) {
+ if (aux == rt || rt_is_expired(aux)) {
*rthp = aux->u.dst.rt_next;
rt_free(aux);
continue;
@@ -1182,7 +1219,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
for (i = 0; i < 2; i++) {
for (k = 0; k < 2; k++) {
- unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
+ unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
+ rt_genid(net));
rthp=&rt_hash_table[hash].chain;
@@ -1194,7 +1232,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
rth->fl.fl4_src != skeys[i] ||
rth->fl.oif != ikeys[k] ||
rth->fl.iif != 0 ||
- rth->rt_genid != atomic_read(&rt_genid) ||
+ rt_is_expired(rth) ||
!net_eq(dev_net(rth->u.dst.dev), net)) {
rthp = &rth->u.dst.rt_next;
continue;
@@ -1233,7 +1271,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
rt->u.dst.neighbour = NULL;
rt->u.dst.hh = NULL;
rt->u.dst.xfrm = NULL;
- rt->rt_genid = atomic_read(&rt_genid);
+ rt->rt_genid = rt_genid(net);
rt->rt_flags |= RTCF_REDIRECTED;
/* Gateway is different ... */
@@ -1297,7 +1335,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
rt->u.dst.expires) {
unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
- rt->fl.oif);
+ rt->fl.oif,
+ rt_genid(dev_net(dst->dev)));
#if RT_CACHE_DEBUG >= 1
printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
NIPQUAD_FMT "/%02x dropped\n",
@@ -1390,7 +1429,8 @@ static int ip_error(struct sk_buff *skb)
break;
case ENETUNREACH:
code = ICMP_NET_UNREACH;
- IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
+ IP_INC_STATS_BH(dev_net(rt->u.dst.dev),
+ IPSTATS_MIB_INNOROUTES);
break;
case EACCES:
code = ICMP_PKT_FILTERED;
@@ -1446,7 +1486,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
for (k = 0; k < 2; k++) {
for (i = 0; i < 2; i++) {
- unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
+ unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
+ rt_genid(net));
rcu_read_lock();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1461,7 +1502,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
rth->fl.iif != 0 ||
dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
!net_eq(dev_net(rth->u.dst.dev), net) ||
- rth->rt_genid != atomic_read(&rt_genid))
+ !rt_is_expired(rth))
continue;
if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -1696,7 +1737,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->fl.oif = 0;
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(dev_net(dev));
rth->rt_flags = RTCF_MULTICAST;
rth->rt_type = RTN_MULTICAST;
if (our) {
@@ -1711,7 +1752,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
RT_CACHE_STAT_INC(in_slow_mc);
in_dev_put(in_dev);
- hash = rt_hash(daddr, saddr, dev->ifindex);
+ hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
return rt_intern_hash(hash, rth, &skb->rtable);
e_nobufs:
@@ -1837,7 +1878,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->u.dst.input = ip_forward;
rth->u.dst.output = ip_output;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
rt_set_nexthop(rth, res, itag);
@@ -1872,7 +1913,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
return err;
/* put it into the cache */
- hash = rt_hash(daddr, saddr, fl->iif);
+ hash = rt_hash(daddr, saddr, fl->iif,
+ rt_genid(dev_net(rth->u.dst.dev)));
return rt_intern_hash(hash, rth, &skb->rtable);
}
@@ -1998,7 +2040,7 @@ local_input:
goto e_nobufs;
rth->u.dst.output= ip_rt_bug;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(net);
atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
@@ -2028,7 +2070,7 @@ local_input:
rth->rt_flags &= ~RTCF_LOCAL;
}
rth->rt_type = res.type;
- hash = rt_hash(daddr, saddr, fl.iif);
+ hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
err = rt_intern_hash(hash, rth, &skb->rtable);
goto done;
@@ -2079,7 +2121,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
net = dev_net(dev);
tos &= IPTOS_RT_MASK;
- hash = rt_hash(daddr, saddr, iif);
+ hash = rt_hash(daddr, saddr, iif, rt_genid(net));
rcu_read_lock();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2091,7 +2133,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
(rth->fl.fl4_tos ^ tos)) == 0 &&
rth->fl.mark == skb->mark &&
net_eq(dev_net(rth->u.dst.dev), net) &&
- rth->rt_genid == atomic_read(&rt_genid)) {
+ !rt_is_expired(rth)) {
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(in_hit);
rcu_read_unlock();
@@ -2219,7 +2261,7 @@ static int __mkroute_output(struct rtable **result,
rth->rt_spec_dst= fl->fl4_src;
rth->u.dst.output=ip_output;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(dev_net(dev_out));
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2268,7 +2310,8 @@ static int ip_mkroute_output(struct rtable **rp,
int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
unsigned hash;
if (err == 0) {
- hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif);
+ hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
+ rt_genid(dev_net(dev_out)));
err = rt_intern_hash(hash, rth, rp);
}
@@ -2480,7 +2523,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
unsigned hash;
struct rtable *rth;
- hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif);
+ hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
rcu_read_lock_bh();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2493,7 +2536,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK)) &&
net_eq(dev_net(rth->u.dst.dev), net) &&
- rth->rt_genid == atomic_read(&rt_genid)) {
+ !rt_is_expired(rth)) {
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(out_hit);
rcu_read_unlock_bh();
@@ -2524,7 +2567,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
};
-static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
+static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
{
struct rtable *ort = *rp;
struct rtable *rt = (struct rtable *)
@@ -2548,7 +2591,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
rt->idev = ort->idev;
if (rt->idev)
in_dev_hold(rt->idev);
- rt->rt_genid = atomic_read(&rt_genid);
+ rt->rt_genid = rt_genid(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_dst = ort->rt_dst;
@@ -2584,7 +2627,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
flags ? XFRM_LOOKUP_WAIT : 0);
if (err == -EREMOTE)
- err = ipv4_dst_blackhole(rp, flp);
+ err = ipv4_dst_blackhole(net, rp, flp);
return err;
}
@@ -2803,7 +2846,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
continue;
- if (rt->rt_genid != atomic_read(&rt_genid))
+ if (rt_is_expired(rt))
continue;
skb->dst = dst_clone(&rt->u.dst);
if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
@@ -2827,19 +2870,25 @@ done:
void ip_rt_multicast_event(struct in_device *in_dev)
{
- rt_cache_flush(0);
+ rt_cache_flush(dev_net(in_dev->dev), 0);
}
#ifdef CONFIG_SYSCTL
-static int flush_delay;
-
-static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
struct file *filp, void __user *buffer,
size_t *lenp, loff_t *ppos)
{
if (write) {
- proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
- rt_cache_flush(flush_delay);
+ int flush_delay;
+ ctl_table ctl;
+ struct net *net;
+
+ memcpy(&ctl, __ctl, sizeof(ctl));
+ ctl.data = &flush_delay;
+ proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
+
+ net = (struct net *)__ctl->extra1;
+ rt_cache_flush(net, flush_delay);
return 0;
}
@@ -2855,25 +2904,18 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
size_t newlen)
{
int delay;
+ struct net *net;
if (newlen != sizeof(int))
return -EINVAL;
if (get_user(delay, (int __user *)newval))
return -EFAULT;
- rt_cache_flush(delay);
+ net = (struct net *)table->extra1;
+ rt_cache_flush(net, delay);
return 0;
}
ctl_table ipv4_route_table[] = {
{
- .ctl_name = NET_IPV4_ROUTE_FLUSH,
- .procname = "flush",
- .data = &flush_delay,
- .maxlen = sizeof(int),
- .mode = 0200,
- .proc_handler = &ipv4_sysctl_rtcache_flush,
- .strategy = &ipv4_sysctl_rtcache_flush_strategy,
- },
- {
.ctl_name = NET_IPV4_ROUTE_GC_THRESH,
.procname = "gc_thresh",
.data = &ipv4_dst_ops.gc_thresh,
@@ -3011,8 +3053,97 @@ ctl_table ipv4_route_table[] = {
},
{ .ctl_name = 0 }
};
+
+static __net_initdata struct ctl_path ipv4_route_path[] = {
+ { .procname = "net", .ctl_name = CTL_NET, },
+ { .procname = "ipv4", .ctl_name = NET_IPV4, },
+ { .procname = "route", .ctl_name = NET_IPV4_ROUTE, },
+ { },
+};
+
+
+static struct ctl_table ipv4_route_flush_table[] = {
+ {
+ .ctl_name = NET_IPV4_ROUTE_FLUSH,
+ .procname = "flush",
+ .maxlen = sizeof(int),
+ .mode = 0200,
+ .proc_handler = &ipv4_sysctl_rtcache_flush,
+ .strategy = &ipv4_sysctl_rtcache_flush_strategy,
+ },
+ { .ctl_name = 0 },
+};
+
+static __net_init int sysctl_route_net_init(struct net *net)
+{
+ struct ctl_table *tbl;
+
+ tbl = ipv4_route_flush_table;
+ if (net != &init_net) {
+ tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
+ if (tbl == NULL)
+ goto err_dup;
+ }
+ tbl[0].extra1 = net;
+
+ net->ipv4.route_hdr =
+ register_net_sysctl_table(net, ipv4_route_path, tbl);
+ if (net->ipv4.route_hdr == NULL)
+ goto err_reg;
+ return 0;
+
+err_reg:
+ if (tbl != ipv4_route_flush_table)
+ kfree(tbl);
+err_dup:
+ return -ENOMEM;
+}
+
+static __net_exit void sysctl_route_net_exit(struct net *net)
+{
+ struct ctl_table *tbl;
+
+ tbl = net->ipv4.route_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->ipv4.route_hdr);
+ BUG_ON(tbl == ipv4_route_flush_table);
+ kfree(tbl);
+}
+
+static __net_initdata struct pernet_operations sysctl_route_ops = {
+ .init = sysctl_route_net_init,
+ .exit = sysctl_route_net_exit,
+};
#endif
+
+static __net_init int rt_secret_timer_init(struct net *net)
+{
+ atomic_set(&net->ipv4.rt_genid,
+ (int) ((num_physpages ^ (num_physpages>>8)) ^
+ (jiffies ^ (jiffies >> 7))));
+
+ net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
+ net->ipv4.rt_secret_timer.data = (unsigned long)net;
+ init_timer_deferrable(&net->ipv4.rt_secret_timer);
+
+ net->ipv4.rt_secret_timer.expires =
+ jiffies + net_random() % ip_rt_secret_interval +
+ ip_rt_secret_interval;
+ add_timer(&net->ipv4.rt_secret_timer);
+ return 0;
+}
+
+static __net_exit void rt_secret_timer_exit(struct net *net)
+{
+ del_timer_sync(&net->ipv4.rt_secret_timer);
+}
+
+static __net_initdata struct pernet_operations rt_secret_timer_ops = {
+ .init = rt_secret_timer_init,
+ .exit = rt_secret_timer_exit,
+};
+
+
#ifdef CONFIG_NET_CLS_ROUTE
struct ip_rt_acct *ip_rt_acct __read_mostly;
#endif /* CONFIG_NET_CLS_ROUTE */
@@ -3031,9 +3162,6 @@ int __init ip_rt_init(void)
{
int rc = 0;
- atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^
- (jiffies ^ (jiffies >> 7))));
-
#ifdef CONFIG_NET_CLS_ROUTE
ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
if (!ip_rt_acct)
@@ -3065,19 +3193,14 @@ int __init ip_rt_init(void)
devinet_init();
ip_fib_init();
- rt_secret_timer.function = rt_secret_rebuild;
- rt_secret_timer.data = 0;
- init_timer_deferrable(&rt_secret_timer);
-
/* All the timers, started at system startup tend
to synchronize. Perturb it a bit.
*/
schedule_delayed_work(&expires_work,
net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
- rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
- ip_rt_secret_interval;
- add_timer(&rt_secret_timer);
+ if (register_pernet_subsys(&rt_secret_timer_ops))
+ printk(KERN_ERR "Unable to setup rt_secret_timer\n");
if (ip_rt_proc_init())
printk(KERN_ERR "Unable to create route proc files\n");
@@ -3087,6 +3210,9 @@ int __init ip_rt_init(void)
#endif
rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
+#ifdef CONFIG_SYSCTL
+ register_pernet_subsys(&sysctl_route_ops);
+#endif
return rc;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d182a2a2629..51bc24d3b8a 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -8,8 +8,6 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
- *
- * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
*/
#include <linux/tcp.h>
@@ -175,7 +173,7 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
;
*mssp = msstab[mssind] + 1;
- NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
th->source, th->dest, ntohl(th->seq),
@@ -271,11 +269,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
(mss = cookie_check(skb, cookie)) == 0) {
- NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c437f804ee3..14ef202a225 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,8 +1,6 @@
/*
* sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
*
- * $Id: sysctl_net_ipv4.c,v 1.50 2001/10/20 00:00:11 davem Exp $
- *
* Begun April 1, 1996, Mike Shaver.
* Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
*/
@@ -795,7 +793,8 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_icmp_ratelimit,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec_ms_jiffies,
+ .strategy = &sysctl_ms_jiffies
},
{
.ctl_name = NET_IPV4_ICMP_RATEMASK,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fc54a48fde1..0b491bf03db 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -255,11 +253,14 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/skbuff.h>
+#include <linux/scatterlist.h>
#include <linux/splice.h>
#include <linux/net.h>
#include <linux/socket.h>
#include <linux/random.h>
#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
#include <linux/cache.h>
#include <linux/err.h>
#include <linux/crypto.h>
@@ -276,8 +277,6 @@
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
-DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
-
atomic_t tcp_orphan_count = ATOMIC_INIT(0);
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -315,10 +314,10 @@ int tcp_memory_pressure __read_mostly;
EXPORT_SYMBOL(tcp_memory_pressure);
-void tcp_enter_memory_pressure(void)
+void tcp_enter_memory_pressure(struct sock *sk)
{
if (!tcp_memory_pressure) {
- NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
tcp_memory_pressure = 1;
}
}
@@ -343,8 +342,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
return inet_csk_listen_poll(sk);
/* Socket is not locked. We are protected from async events
- by poll logic and correct handling of state changes
- made by another threads is impossible in any case.
+ * by poll logic and correct handling of state changes
+ * made by other threads is impossible in any case.
*/
mask = 0;
@@ -370,10 +369,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
* in state CLOSE_WAIT. One solution is evident --- to set POLLHUP
* if and only if shutdown has been made in both directions.
* Actually, it is interesting to look how Solaris and DUX
- * solve this dilemma. I would prefer, if PULLHUP were maskable,
+ * solve this dilemma. I would prefer, if POLLHUP were maskable,
* then we could set it on SND_SHUTDOWN. BTW examples given
* in Stevens' books assume exactly this behaviour, it explains
- * why PULLHUP is incompatible with POLLOUT. --ANK
+ * why POLLHUP is incompatible with POLLOUT. --ANK
*
* NOTE. Check for TCP_CLOSE is added. The goal is to prevent
* blocking on fresh not-connected or disconnected socket. --ANK
@@ -648,7 +647,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
}
__kfree_skb(skb);
} else {
- sk->sk_prot->enter_memory_pressure();
+ sk->sk_prot->enter_memory_pressure(sk);
sk_stream_moderate_sndbuf(sk);
}
return NULL;
@@ -1152,7 +1151,7 @@ static void tcp_prequeue_process(struct sock *sk)
struct sk_buff *skb;
struct tcp_sock *tp = tcp_sk(sk);
- NET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED);
+ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
/* RX process wants to run with disabled BHs, though it is not
* necessary */
@@ -1206,7 +1205,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
return -ENOTCONN;
while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
if (offset < skb->len) {
- size_t used, len;
+ int used;
+ size_t len;
len = skb->len - offset;
/* Stop reading if we hit a patch of urgent data */
@@ -1473,7 +1473,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/* __ Restore normal policy in scheduler __ */
if ((chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
+ NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
len -= chunk;
copied += chunk;
}
@@ -1484,7 +1484,7 @@ do_prequeue:
tcp_prequeue_process(sk);
if ((chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+ NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
len -= chunk;
copied += chunk;
}
@@ -1599,7 +1599,7 @@ skip_copy:
tcp_prequeue_process(sk);
if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+ NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
len -= chunk;
copied += chunk;
}
@@ -1666,12 +1666,12 @@ void tcp_set_state(struct sock *sk, int state)
switch (state) {
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED)
- TCP_INC_STATS(TCP_MIB_CURRESTAB);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
break;
case TCP_CLOSE:
if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
- TCP_INC_STATS(TCP_MIB_ESTABRESETS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
sk->sk_prot->unhash(sk);
if (inet_csk(sk)->icsk_bind_hash &&
@@ -1680,7 +1680,7 @@ void tcp_set_state(struct sock *sk, int state)
/* fall through */
default:
if (oldstate==TCP_ESTABLISHED)
- TCP_DEC_STATS(TCP_MIB_CURRESTAB);
+ TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
}
/* Change state AFTER socket is unhashed to avoid closed
@@ -1791,13 +1791,13 @@ void tcp_close(struct sock *sk, long timeout)
*/
if (data_was_unread) {
/* Unread data was tossed, zap the connection. */
- NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);
+ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_KERNEL);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
- NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
} else if (tcp_close_state(sk)) {
/* We FIN if the application ate all the data before
* zapping the connection.
@@ -1869,7 +1869,8 @@ adjudge_to_death:
if (tp->linger2 < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPABORTONLINGER);
} else {
const int tmo = tcp_fin_time(sk);
@@ -1891,7 +1892,8 @@ adjudge_to_death:
"sockets\n");
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPABORTONMEMORY);
}
}
@@ -2586,12 +2588,69 @@ void __tcp_put_md5sig_pool(void)
}
EXPORT_SYMBOL(__tcp_put_md5sig_pool);
+
+int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
+ struct tcphdr *th)
+{
+ struct scatterlist sg;
+ int err;
+
+ __sum16 old_checksum = th->check;
+ th->check = 0;
+ /* options aren't included in the hash */
+ sg_init_one(&sg, th, sizeof(struct tcphdr));
+ err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr));
+ th->check = old_checksum;
+ return err;
+}
+
+EXPORT_SYMBOL(tcp_md5_hash_header);
+
+int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
+ struct sk_buff *skb, unsigned header_len)
+{
+ struct scatterlist sg;
+ const struct tcphdr *tp = tcp_hdr(skb);
+ struct hash_desc *desc = &hp->md5_desc;
+ unsigned i;
+ const unsigned head_data_len = skb_headlen(skb) > header_len ?
+ skb_headlen(skb) - header_len : 0;
+ const struct skb_shared_info *shi = skb_shinfo(skb);
+
+ sg_init_table(&sg, 1);
+
+ sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
+ if (crypto_hash_update(desc, &sg, head_data_len))
+ return 1;
+
+ for (i = 0; i < shi->nr_frags; ++i) {
+ const struct skb_frag_struct *f = &shi->frags[i];
+ sg_set_page(&sg, f->page, f->size, f->page_offset);
+ if (crypto_hash_update(desc, &sg, f->size))
+ return 1;
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL(tcp_md5_hash_skb_data);
+
+int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
+{
+ struct scatterlist sg;
+
+ sg_init_one(&sg, key->key, key->keylen);
+ return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
+}
+
+EXPORT_SYMBOL(tcp_md5_hash_key);
+
#endif
void tcp_done(struct sock *sk)
{
if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
tcp_set_state(sk, TCP_CLOSE);
tcp_clear_xmit_timers(sk);
@@ -2620,7 +2679,7 @@ __setup("thash_entries=", set_thash_entries);
void __init tcp_init(void)
{
struct sk_buff *skb = NULL;
- unsigned long limit;
+ unsigned long nr_pages, limit;
int order, i, max_share;
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -2689,8 +2748,9 @@ void __init tcp_init(void)
* is up to 1/2 at 256 MB, decreasing toward zero with the amount of
* memory, with a floor of 128 pages.
*/
- limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
- limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+ nr_pages = totalram_pages - totalhigh_pages;
+ limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
+ limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
limit = max(limit, 128UL);
sysctl_tcp_mem[0] = limit / 4 * 3;
sysctl_tcp_mem[1] = limit;
@@ -2727,4 +2787,3 @@ EXPORT_SYMBOL(tcp_splice_read);
EXPORT_SYMBOL(tcp_sendpage);
EXPORT_SYMBOL(tcp_setsockopt);
EXPORT_SYMBOL(tcp_shutdown);
-EXPORT_SYMBOL(tcp_statistics);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 2fbcc7d1b1a..838d491dfda 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -1,8 +1,6 @@
/*
* tcp_diag.c Module for monitoring TCP transport protocols sockets.
*
- * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cad73b7dfef..1f5e6049883 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.243 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -604,7 +602,7 @@ static u32 tcp_rto_min(struct sock *sk)
u32 rto_min = TCP_RTO_MIN;
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
- rto_min = dst_metric(dst, RTAX_RTO_MIN);
+ rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
return rto_min;
}
@@ -731,6 +729,7 @@ void tcp_update_metrics(struct sock *sk)
if (dst && (dst->flags & DST_HOST)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
int m;
+ unsigned long rtt;
if (icsk->icsk_backoff || !tp->srtt) {
/* This session failed to estimate rtt. Why?
@@ -742,7 +741,8 @@ void tcp_update_metrics(struct sock *sk)
return;
}
- m = dst_metric(dst, RTAX_RTT) - tp->srtt;
+ rtt = dst_metric_rtt(dst, RTAX_RTT);
+ m = rtt - tp->srtt;
/* If newly calculated rtt larger than stored one,
* store new one. Otherwise, use EWMA. Remember,
@@ -750,12 +750,13 @@ void tcp_update_metrics(struct sock *sk)
*/
if (!(dst_metric_locked(dst, RTAX_RTT))) {
if (m <= 0)
- dst->metrics[RTAX_RTT - 1] = tp->srtt;
+ set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
else
- dst->metrics[RTAX_RTT - 1] -= (m >> 3);
+ set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
}
if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
+ unsigned long var;
if (m < 0)
m = -m;
@@ -764,11 +765,13 @@ void tcp_update_metrics(struct sock *sk)
if (m < tp->mdev)
m = tp->mdev;
- if (m >= dst_metric(dst, RTAX_RTTVAR))
- dst->metrics[RTAX_RTTVAR - 1] = m;
+ var = dst_metric_rtt(dst, RTAX_RTTVAR);
+ if (m >= var)
+ var = m;
else
- dst->metrics[RTAX_RTTVAR-1] -=
- (dst_metric(dst, RTAX_RTTVAR) - m)>>2;
+ var -= (var - m) >> 2;
+
+ set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
}
if (tp->snd_ssthresh >= 0xFFFF) {
@@ -899,7 +902,7 @@ static void tcp_init_metrics(struct sock *sk)
if (dst_metric(dst, RTAX_RTT) == 0)
goto reset;
- if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
+ if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
goto reset;
/* Initial rtt is determined from SYN,SYN-ACK.
@@ -916,12 +919,12 @@ static void tcp_init_metrics(struct sock *sk)
* to low value, and then abruptly stops to do it and starts to delay
* ACKs, wait for troubles.
*/
- if (dst_metric(dst, RTAX_RTT) > tp->srtt) {
- tp->srtt = dst_metric(dst, RTAX_RTT);
+ if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
+ tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
tp->rtt_seq = tp->snd_nxt;
}
- if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
- tp->mdev = dst_metric(dst, RTAX_RTTVAR);
+ if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
+ tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
}
tcp_set_rto(sk);
@@ -949,17 +952,21 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
{
struct tcp_sock *tp = tcp_sk(sk);
if (metric > tp->reordering) {
+ int mib_idx;
+
tp->reordering = min(TCP_MAX_REORDERING, metric);
/* This exciting event is worth to be remembered. 8) */
if (ts)
- NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER);
+ mib_idx = LINUX_MIB_TCPTSREORDER;
else if (tcp_is_reno(tp))
- NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER);
+ mib_idx = LINUX_MIB_TCPRENOREORDER;
else if (tcp_is_fack(tp))
- NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER);
+ mib_idx = LINUX_MIB_TCPFACKREORDER;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
+ mib_idx = LINUX_MIB_TCPSACKREORDER;
+
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
#if FASTRETRANS_DEBUG > 1
printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -1155,7 +1162,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
tp->lost_out += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
}
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
} else {
if (before(ack_seq, new_low_seq))
new_low_seq = ack_seq;
@@ -1167,10 +1174,11 @@ static void tcp_mark_lost_retrans(struct sock *sk)
tp->lost_retrans_low = new_low_seq;
}
-static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
+static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
struct tcp_sack_block_wire *sp, int num_sacks,
u32 prior_snd_una)
{
+ struct tcp_sock *tp = tcp_sk(sk);
u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
int dup_sack = 0;
@@ -1178,7 +1186,7 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
dup_sack = 1;
tcp_dsack_seen(tp);
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
} else if (num_sacks > 1) {
u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
@@ -1187,7 +1195,8 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
!before(start_seq_0, start_seq_1)) {
dup_sack = 1;
tcp_dsack_seen(tp);
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPDSACKOFORECV);
}
}
@@ -1414,10 +1423,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
unsigned char *ptr = (skb_transport_header(ack_skb) +
TCP_SKB_CB(ack_skb)->sacked);
struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
- struct tcp_sack_block sp[4];
+ struct tcp_sack_block sp[TCP_NUM_SACKS];
struct tcp_sack_block *cache;
struct sk_buff *skb;
- int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3;
+ int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
int used_sacks;
int reord = tp->packets_out;
int flag = 0;
@@ -1432,7 +1441,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
tcp_highest_sack_reset(sk);
}
- found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
+ found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
if (found_dup_sack)
flag |= FLAG_DSACKING_ACK;
@@ -1458,18 +1467,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
if (!tcp_is_sackblock_valid(tp, dup_sack,
sp[used_sacks].start_seq,
sp[used_sacks].end_seq)) {
+ int mib_idx;
+
if (dup_sack) {
if (!tp->undo_marker)
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
+ mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD);
+ mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
} else {
/* Don't count olds caused by ACK reordering */
if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
!after(sp[used_sacks].end_seq, tp->snd_una))
continue;
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
+ mib_idx = LINUX_MIB_TCPSACKDISCARD;
}
+
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
if (i == 0)
first_sack_index = -1;
continue;
@@ -1962,7 +1975,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
{
if (flag & FLAG_SACK_RENEGING) {
struct inet_connection_sock *icsk = inet_csk(sk);
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tcp_enter_loss(sk, 1);
icsk->icsk_retransmits++;
@@ -2382,15 +2395,19 @@ static int tcp_try_undo_recovery(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_may_undo(tp)) {
+ int mib_idx;
+
/* Happy end! We did not retransmit anything
* or our original transmission succeeded.
*/
DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
tcp_undo_cwr(sk, 1);
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
+ mib_idx = LINUX_MIB_TCPLOSSUNDO;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);
+ mib_idx = LINUX_MIB_TCPFULLUNDO;
+
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
tp->undo_marker = 0;
}
if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
@@ -2413,7 +2430,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
DBGUNDO(sk, "D-SACK");
tcp_undo_cwr(sk, 1);
tp->undo_marker = 0;
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
}
}
@@ -2436,7 +2453,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
DBGUNDO(sk, "Hoe");
tcp_undo_cwr(sk, 0);
- NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
/* So... Do not make Hoe's retransmit yet.
* If the first packet was delayed, the rest
@@ -2465,7 +2482,7 @@ static int tcp_try_undo_loss(struct sock *sk)
DBGUNDO(sk, "partial loss");
tp->lost_out = 0;
tcp_undo_cwr(sk, 1);
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
inet_csk(sk)->icsk_retransmits = 0;
tp->undo_marker = 0;
if (tcp_is_sack(tp))
@@ -2562,7 +2579,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
- int fast_rexmit = 0;
+ int fast_rexmit = 0, mib_idx;
if (WARN_ON(!tp->packets_out && tp->sacked_out))
tp->sacked_out = 0;
@@ -2584,7 +2601,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
}
/* D. Check consistency of the current state. */
@@ -2685,9 +2702,11 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
/* Otherwise enter Recovery state */
if (tcp_is_reno(tp))
- NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY);
+ mib_idx = LINUX_MIB_TCPRENORECOVERY;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY);
+ mib_idx = LINUX_MIB_TCPSACKRECOVERY;
+
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
tp->high_seq = tp->snd_nxt;
tp->prior_ssthresh = 0;
@@ -3198,7 +3217,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
}
tp->frto_counter = 0;
tp->undo_marker = 0;
- NET_INC_STATS_BH(LINUX_MIB_TCPSPURIOUSRTOS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
}
return 0;
}
@@ -3251,12 +3270,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
tcp_ca_event(sk, CA_EVENT_FAST_ACK);
- NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
} else {
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
flag |= FLAG_DATA;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
@@ -3450,6 +3469,43 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
return 1;
}
+#ifdef CONFIG_TCP_MD5SIG
+/*
+ * Parse MD5 Signature option
+ */
+u8 *tcp_parse_md5sig_option(struct tcphdr *th)
+{
+ int length = (th->doff << 2) - sizeof (*th);
+ u8 *ptr = (u8*)(th + 1);
+
+ /* If the TCP option is too short, we can short cut */
+ if (length < TCPOLEN_MD5SIG)
+ return NULL;
+
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch(opcode) {
+ case TCPOPT_EOL:
+ return NULL;
+ case TCPOPT_NOP:
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2 || opsize > length)
+ return NULL;
+ if (opcode == TCPOPT_MD5SIG)
+ return ptr;
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ return NULL;
+}
+#endif
+
static inline void tcp_store_ts_recent(struct tcp_sock *tp)
{
tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
@@ -3662,26 +3718,33 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
return 0;
}
-static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+ int mib_idx;
+
if (before(seq, tp->rcv_nxt))
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT);
+ mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT);
+ mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
+
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
tp->rx_opt.dsack = 1;
tp->duplicate_sack[0].start_seq = seq;
tp->duplicate_sack[0].end_seq = end_seq;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1,
- 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1;
}
}
-static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (!tp->rx_opt.dsack)
- tcp_dsack_set(tp, seq, end_seq);
+ tcp_dsack_set(sk, seq, end_seq);
else
tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
}
@@ -3692,7 +3755,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk);
if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
@@ -3700,7 +3763,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
end_seq = tp->rcv_nxt;
- tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq);
+ tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
}
}
@@ -3727,9 +3790,8 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
* Decrease num_sacks.
*/
tp->rx_opt.num_sacks--;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
- tp->rx_opt.dsack,
- 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
+ tp->rx_opt.dsack;
for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
sp[i] = sp[i + 1];
continue;
@@ -3779,7 +3841,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
*
* If the sack array is full, forget about the last one.
*/
- if (this_sack >= 4) {
+ if (this_sack >= TCP_NUM_SACKS) {
this_sack--;
tp->rx_opt.num_sacks--;
sp--;
@@ -3792,8 +3854,7 @@ new_sack:
sp->start_seq = seq;
sp->end_seq = end_seq;
tp->rx_opt.num_sacks++;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack,
- 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
}
/* RCV.NXT advances, some SACKs should be eaten. */
@@ -3830,9 +3891,8 @@ static void tcp_sack_remove(struct tcp_sock *tp)
}
if (num_sacks != tp->rx_opt.num_sacks) {
tp->rx_opt.num_sacks = num_sacks;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
- tp->rx_opt.dsack,
- 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
+ tp->rx_opt.dsack;
}
}
@@ -3853,7 +3913,7 @@ static void tcp_ofo_queue(struct sock *sk)
__u32 dsack = dsack_high;
if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
dsack_high = TCP_SKB_CB(skb)->end_seq;
- tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack);
+ tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
}
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
@@ -3911,8 +3971,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (tp->rx_opt.dsack) {
tp->rx_opt.dsack = 0;
- tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
- 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
}
/* Queue data for delivery to the user.
@@ -3981,8 +4040,8 @@ queue_and_out:
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
/* A retransmit, 2nd most common case. Force an immediate ack. */
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
- tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+ tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
out_of_window:
tcp_enter_quickack_mode(sk);
@@ -4004,7 +4063,7 @@ drop:
tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
TCP_SKB_CB(skb)->end_seq);
- tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
+ tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
/* If window is closed, drop tail of packet. But after
* remembering D-SACK for its head made in previous line.
@@ -4069,12 +4128,12 @@ drop:
if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
/* All the bits are present. Drop. */
__kfree_skb(skb);
- tcp_dsack_set(tp, seq, end_seq);
+ tcp_dsack_set(sk, seq, end_seq);
goto add_sack;
}
if (after(seq, TCP_SKB_CB(skb1)->seq)) {
/* Partial overlap. */
- tcp_dsack_set(tp, seq,
+ tcp_dsack_set(sk, seq,
TCP_SKB_CB(skb1)->end_seq);
} else {
skb1 = skb1->prev;
@@ -4087,12 +4146,12 @@ drop:
(struct sk_buff *)&tp->out_of_order_queue &&
after(end_seq, TCP_SKB_CB(skb1)->seq)) {
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
- tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+ tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
end_seq);
break;
}
__skb_unlink(skb1, &tp->out_of_order_queue);
- tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+ tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
__kfree_skb(skb1);
}
@@ -4123,7 +4182,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
struct sk_buff *next = skb->next;
__skb_unlink(skb, list);
__kfree_skb(skb);
- NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
skb = next;
continue;
}
@@ -4191,7 +4250,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
struct sk_buff *next = skb->next;
__skb_unlink(skb, list);
__kfree_skb(skb);
- NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
skb = next;
if (skb == tail ||
tcp_hdr(skb)->syn ||
@@ -4254,7 +4313,7 @@ static int tcp_prune_ofo_queue(struct sock *sk)
int res = 0;
if (!skb_queue_empty(&tp->out_of_order_queue)) {
- NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
__skb_queue_purge(&tp->out_of_order_queue);
/* Reset SACK state. A conforming SACK implementation will
@@ -4283,7 +4342,7 @@ static int tcp_prune_queue(struct sock *sk)
SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
- NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
@@ -4312,7 +4371,7 @@ static int tcp_prune_queue(struct sock *sk)
* drop receive data on the floor. It will get retransmitted
* and hopefully then we'll have sufficient space.
*/
- NET_INC_STATS_BH(LINUX_MIB_RCVPRUNED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
/* Massive buffer overcommit. */
tp->pred_flags = 0;
@@ -4742,7 +4801,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_data_snd_check(sk);
return 0;
} else { /* Header too small */
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
} else {
@@ -4779,7 +4838,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
__skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
}
if (copied_early)
tcp_cleanup_rbuf(sk, skb->len);
@@ -4802,7 +4861,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if ((int)skb->truesize > sk->sk_forward_alloc)
goto step5;
- NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
__skb_pull(skb, tcp_header_len);
@@ -4846,7 +4905,7 @@ slow_path:
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
- NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
tcp_send_dupack(sk, skb);
goto discard;
}
@@ -4881,8 +4940,8 @@ slow_path:
tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
tcp_reset(sk);
return 1;
}
@@ -4904,7 +4963,7 @@ step5:
return 0;
csum_error:
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
discard:
__kfree_skb(skb);
@@ -4938,7 +4997,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
tcp_time_stamp)) {
- NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
}
@@ -5222,7 +5281,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
- NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
tcp_send_dupack(sk, skb);
goto discard;
}
@@ -5251,7 +5310,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* Check for a SYN in window.
*/
if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
tcp_reset(sk);
return 1;
}
@@ -5333,7 +5392,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
(TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
tcp_done(sk);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
}
@@ -5393,7 +5452,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (sk->sk_shutdown & RCV_SHUTDOWN) {
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk);
return 1;
}
@@ -5422,6 +5481,9 @@ EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering);
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
EXPORT_SYMBOL(tcp_parse_options);
+#ifdef CONFIG_TCP_MD5SIG
+EXPORT_SYMBOL(tcp_parse_md5sig_option);
+#endif
EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process);
EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 12695be2c25..a82df630756 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
- *
* IPv4 specific functions
*
*
@@ -89,10 +87,14 @@ int sysctl_tcp_low_latency __read_mostly;
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
__be32 addr);
-static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
- __be32 saddr, __be32 daddr,
- struct tcphdr *th, int protocol,
- unsigned int tcplen);
+static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+ __be32 daddr, __be32 saddr, struct tcphdr *th);
+#else
+static inline
+struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
+{
+ return NULL;
+}
#endif
struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
@@ -172,7 +174,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->sport, usin->sin_port, sk, 1);
if (tmp < 0) {
if (tmp == -ENETUNREACH)
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return tmp;
}
@@ -340,16 +342,17 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
struct sock *sk;
__u32 seq;
int err;
+ struct net *net = dev_net(skb->dev);
if (skb->len < (iph->ihl << 2) + 8) {
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
- sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest,
+ sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
iph->saddr, th->source, inet_iif(skb));
if (!sk) {
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
if (sk->sk_state == TCP_TIME_WAIT) {
@@ -362,7 +365,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
goto out;
@@ -371,7 +374,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
seq = ntohl(th->seq);
if (sk->sk_state != TCP_LISTEN &&
!between(seq, tp->snd_una, tp->snd_nxt)) {
- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -418,7 +421,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
BUG_TRAP(!req->sk);
if (seq != tcp_rsk(req)->snt_isn) {
- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -540,6 +543,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
+ struct net *net;
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -578,12 +582,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
rep.th.doff = arg.iov[0].iov_len / 4;
- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
- key,
- ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr,
- &rep.th, IPPROTO_TCP,
- arg.iov[0].iov_len);
+ tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
+ key, ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr, &rep.th);
}
#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
@@ -591,20 +592,21 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
sizeof(struct tcphdr), IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
- ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb,
+ net = dev_net(skb->dst->dev);
+ ip_send_reply(net->ipv4.tcp_sock, skb,
&arg, arg.iov[0].iov_len);
- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
- TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
}
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
- struct sk_buff *skb, u32 seq, u32 ack,
- u32 win, u32 ts)
+static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+ u32 win, u32 ts, int oif,
+ struct tcp_md5sig_key *key)
{
struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -616,10 +618,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
];
} rep;
struct ip_reply_arg arg;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
- struct tcp_md5sig_key tw_key;
-#endif
+ struct net *net = dev_net(skb->dev);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -645,23 +644,6 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
rep.th.window = htons(win);
#ifdef CONFIG_TCP_MD5SIG
- /*
- * The SKB holds an imcoming packet, but may not have a valid ->sk
- * pointer. This is especially the case when we're dealing with a
- * TIME_WAIT ack, because the sk structure is long gone, and only
- * the tcp_timewait_sock remains. So the md5 key is stashed in that
- * structure, and we use it in preference. I believe that (twsk ||
- * skb->sk) holds true, but we program defensively.
- */
- if (!twsk && skb->sk) {
- key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
- } else if (twsk && twsk->tw_md5_keylen) {
- tw_key.key = twsk->tw_md5_key;
- tw_key.keylen = twsk->tw_md5_keylen;
- key = &tw_key;
- } else
- key = NULL;
-
if (key) {
int offset = (ts) ? 3 : 0;
@@ -672,25 +654,22 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
rep.th.doff = arg.iov[0].iov_len/4;
- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
- key,
- ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr,
- &rep.th, IPPROTO_TCP,
- arg.iov[0].iov_len);
+ tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
+ key, ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr, &rep.th);
}
#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
- if (twsk)
- arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
+ if (oif)
+ arg.bound_dev_if = oif;
- ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb,
+ ip_send_reply(net->ipv4.tcp_sock, skb,
&arg, arg.iov[0].iov_len);
- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
}
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -698,9 +677,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcptw->tw_ts_recent);
+ tcptw->tw_ts_recent,
+ tw->tw_bound_dev_if,
+ tcp_twsk_md5_key(tcptw)
+ );
inet_twsk_put(tw);
}
@@ -708,9 +690,11 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
struct request_sock *req)
{
- tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
+ tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
- req->ts_recent);
+ req->ts_recent,
+ 0,
+ tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr));
}
/*
@@ -1000,32 +984,13 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
newkey, cmd.tcpm_keylen);
}
-static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
- __be32 saddr, __be32 daddr,
- struct tcphdr *th, int protocol,
- unsigned int tcplen)
+static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
+ __be32 daddr, __be32 saddr, int nbytes)
{
- struct scatterlist sg[4];
- __u16 data_len;
- int block = 0;
- __sum16 old_checksum;
- struct tcp_md5sig_pool *hp;
struct tcp4_pseudohdr *bp;
- struct hash_desc *desc;
- int err;
- unsigned int nbytes = 0;
-
- /*
- * Okay, so RFC2385 is turned on for this connection,
- * so we need to generate the MD5 hash for the packet now.
- */
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
+ struct scatterlist sg;
bp = &hp->md5_blk.ip4;
- desc = &hp->md5_desc;
/*
* 1. the TCP pseudo-header (in the order: source IP address,
@@ -1035,86 +1000,96 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
bp->saddr = saddr;
bp->daddr = daddr;
bp->pad = 0;
- bp->protocol = protocol;
- bp->len = htons(tcplen);
-
- sg_init_table(sg, 4);
-
- sg_set_buf(&sg[block++], bp, sizeof(*bp));
- nbytes += sizeof(*bp);
-
- /* 2. the TCP header, excluding options, and assuming a
- * checksum of zero/
- */
- old_checksum = th->check;
- th->check = 0;
- sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
- nbytes += sizeof(struct tcphdr);
+ bp->protocol = IPPROTO_TCP;
+ bp->len = cpu_to_be16(nbytes);
- /* 3. the TCP segment data (if any) */
- data_len = tcplen - (th->doff << 2);
- if (data_len > 0) {
- unsigned char *data = (unsigned char *)th + (th->doff << 2);
- sg_set_buf(&sg[block++], data, data_len);
- nbytes += data_len;
- }
+ sg_init_one(&sg, bp, sizeof(*bp));
+ return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+}
- /* 4. an independently-specified key or password, known to both
- * TCPs and presumably connection-specific
- */
- sg_set_buf(&sg[block++], key->key, key->keylen);
- nbytes += key->keylen;
+static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+ __be32 daddr, __be32 saddr, struct tcphdr *th)
+{
+ struct tcp_md5sig_pool *hp;
+ struct hash_desc *desc;
- sg_mark_end(&sg[block - 1]);
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ desc = &hp->md5_desc;
- /* Now store the Hash into the packet */
- err = crypto_hash_init(desc);
- if (err)
+ if (crypto_hash_init(desc))
goto clear_hash;
- err = crypto_hash_update(desc, sg, nbytes);
- if (err)
+ if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
goto clear_hash;
- err = crypto_hash_final(desc, md5_hash);
- if (err)
+ if (tcp_md5_hash_header(hp, th))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ if (crypto_hash_final(desc, md5_hash))
goto clear_hash;
- /* Reset header, and free up the crypto */
tcp_put_md5sig_pool();
- th->check = old_checksum;
-
-out:
return 0;
+
clear_hash:
tcp_put_md5sig_pool();
clear_hash_noput:
memset(md5_hash, 0, 16);
- goto out;
+ return 1;
}
-int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
- struct sock *sk,
- struct dst_entry *dst,
- struct request_sock *req,
- struct tcphdr *th, int protocol,
- unsigned int tcplen)
+int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+ struct sock *sk, struct request_sock *req,
+ struct sk_buff *skb)
{
+ struct tcp_md5sig_pool *hp;
+ struct hash_desc *desc;
+ struct tcphdr *th = tcp_hdr(skb);
__be32 saddr, daddr;
if (sk) {
saddr = inet_sk(sk)->saddr;
daddr = inet_sk(sk)->daddr;
+ } else if (req) {
+ saddr = inet_rsk(req)->loc_addr;
+ daddr = inet_rsk(req)->rmt_addr;
} else {
- struct rtable *rt = (struct rtable *)dst;
- BUG_ON(!rt);
- saddr = rt->rt_src;
- daddr = rt->rt_dst;
+ const struct iphdr *iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
}
- return tcp_v4_do_calc_md5_hash(md5_hash, key,
- saddr, daddr,
- th, protocol, tcplen);
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ desc = &hp->md5_desc;
+
+ if (crypto_hash_init(desc))
+ goto clear_hash;
+
+ if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_header(hp, th))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ if (crypto_hash_final(desc, md5_hash))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
}
-EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
+EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
{
@@ -1130,52 +1105,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
struct tcp_md5sig_key *hash_expected;
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
- int length = (th->doff << 2) - sizeof(struct tcphdr);
int genhash;
- unsigned char *ptr;
unsigned char newhash[16];
hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
+ hash_location = tcp_parse_md5sig_option(th);
- /*
- * If the TCP option length is less than the TCP_MD5SIG
- * option length, then we can shortcut
- */
- if (length < TCPOLEN_MD5SIG) {
- if (hash_expected)
- return 1;
- else
- return 0;
- }
-
- /* Okay, we can't shortcut - we have to grub through the options */
- ptr = (unsigned char *)(th + 1);
- while (length > 0) {
- int opcode = *ptr++;
- int opsize;
-
- switch (opcode) {
- case TCPOPT_EOL:
- goto done_opts;
- case TCPOPT_NOP:
- length--;
- continue;
- default:
- opsize = *ptr++;
- if (opsize < 2)
- goto done_opts;
- if (opsize > length)
- goto done_opts;
-
- if (opcode == TCPOPT_MD5SIG) {
- hash_location = ptr;
- goto done_opts;
- }
- }
- ptr += opsize-2;
- length -= opsize;
- }
-done_opts:
/* We've parsed the options - do we have a hash? */
if (!hash_expected && !hash_location)
return 0;
@@ -1199,11 +1134,9 @@ done_opts:
/* Okay, so this is hash_expected and hash_location -
* so we need to calculate the checksum.
*/
- genhash = tcp_v4_do_calc_md5_hash(newhash,
- hash_expected,
- iph->saddr, iph->daddr,
- th, sk->sk_protocol,
- skb->len);
+ genhash = tcp_v4_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
if (net_ratelimit()) {
@@ -1347,7 +1280,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
(s32)(peer->tcp_ts - req->ts_recent) >
TCP_PAWS_WINDOW) {
- NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
}
@@ -1452,6 +1385,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
if (newkey != NULL)
tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
newkey, key->keylen);
+ newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
}
#endif
@@ -1461,9 +1395,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
return newsk;
exit_overflow:
- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
exit:
- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
dst_release(dst);
return NULL;
}
@@ -1590,7 +1524,7 @@ discard:
return 0;
csum_err:
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
@@ -1604,12 +1538,13 @@ int tcp_v4_rcv(struct sk_buff *skb)
struct tcphdr *th;
struct sock *sk;
int ret;
+ struct net *net = dev_net(skb->dev);
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
/* Count it even if it's bad */
- TCP_INC_STATS_BH(TCP_MIB_INSEGS);
+ TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
@@ -1638,7 +1573,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->flags = iph->tos;
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr,
+ sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr,
th->source, iph->daddr, th->dest, inet_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1685,7 +1620,7 @@ no_tcp_socket:
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
bad_packet:
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
tcp_v4_send_reset(NULL, skb);
}
@@ -1706,7 +1641,7 @@ do_time_wait:
}
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
@@ -1814,7 +1749,7 @@ struct inet_connection_sock_af_ops ipv4_specific = {
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
.md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_calc_md5_hash,
+ .calc_md5_hash = tcp_v4_md5_hash_skb,
.md5_add = tcp_v4_md5_add_func,
.md5_parse = tcp_v4_parse_md5_keys,
};
@@ -1871,7 +1806,7 @@ static int tcp_v4_init_sock(struct sock *sk)
return 0;
}
-int tcp_v4_destroy_sock(struct sock *sk)
+void tcp_v4_destroy_sock(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1915,8 +1850,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
}
atomic_dec(&tcp_sockets_allocated);
-
- return 0;
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -1959,8 +1892,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
req = req->dl_next;
while (1) {
while (req) {
- if (req->rsk_ops->family == st->family &&
- net_eq(sock_net(req->sk), net)) {
+ if (req->rsk_ops->family == st->family) {
cur = req;
goto out;
}
@@ -2291,7 +2223,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
}
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
- "%08X %5d %8d %lu %d %p %u %u %u %u %d%n",
+ "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
i, src, srcp, dest, destp, sk->sk_state,
tp->write_seq - tp->snd_una,
sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
@@ -2303,8 +2235,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
icsk->icsk_probes_out,
sock_i_ino(sk),
atomic_read(&sk->sk_refcnt), sk,
- icsk->icsk_rto,
- icsk->icsk_ack.ato,
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8245247a6ce..204c4216266 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_minisocks.c,v 1.15 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -246,7 +244,7 @@ kill:
}
if (paws_reject)
- NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
if (!th->rst) {
/* In this case we must reset the TIMEWAIT timer.
@@ -482,7 +480,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rx_opt.mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req);
- TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
return newsk;
}
@@ -613,7 +611,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
if (!(flg & TCP_FLAG_RST))
req->rsk_ops->send_ack(skb, req);
if (paws_reject)
- NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
return NULL;
}
@@ -632,7 +630,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
* "fourth, check the SYN bit"
*/
if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
goto embryonic_reset;
}
@@ -697,7 +695,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
}
embryonic_reset:
- NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
if (!(flg & TCP_FLAG_RST))
req->rsk_ops->send_reset(sk, skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ad993ecb481..1fa683c0ba9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -347,28 +345,82 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
TCP_SKB_CB(skb)->end_seq = seq;
}
-static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
- __u32 tstamp, __u8 **md5_hash)
-{
- if (tp->rx_opt.tstamp_ok) {
+#define OPTION_SACK_ADVERTISE (1 << 0)
+#define OPTION_TS (1 << 1)
+#define OPTION_MD5 (1 << 2)
+
+struct tcp_out_options {
+ u8 options; /* bit field of OPTION_* */
+ u8 ws; /* window scale, 0 to disable */
+ u8 num_sack_blocks; /* number of SACK blocks to include */
+ u16 mss; /* 0 to disable */
+ __u32 tsval, tsecr; /* need to include OPTION_TS */
+};
+
+static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
+ const struct tcp_out_options *opts,
+ __u8 **md5_hash) {
+ if (unlikely(OPTION_MD5 & opts->options)) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+ *md5_hash = (__u8 *)ptr;
+ ptr += 4;
+ } else {
+ *md5_hash = NULL;
+ }
+
+ if (likely(OPTION_TS & opts->options)) {
+ if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
+ *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
+ (TCPOLEN_SACK_PERM << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ } else {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ }
+ *ptr++ = htonl(opts->tsval);
+ *ptr++ = htonl(opts->tsecr);
+ }
+
+ if (unlikely(opts->mss)) {
+ *ptr++ = htonl((TCPOPT_MSS << 24) |
+ (TCPOLEN_MSS << 16) |
+ opts->mss);
+ }
+
+ if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
+ !(OPTION_TS & opts->options))) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
- (TCPOPT_TIMESTAMP << 8) |
- TCPOLEN_TIMESTAMP);
- *ptr++ = htonl(tstamp);
- *ptr++ = htonl(tp->rx_opt.ts_recent);
+ (TCPOPT_SACK_PERM << 8) |
+ TCPOLEN_SACK_PERM);
+ }
+
+ if (unlikely(opts->ws)) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_WINDOW << 16) |
+ (TCPOLEN_WINDOW << 8) |
+ opts->ws);
}
- if (tp->rx_opt.eff_sacks) {
- struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
+
+ if (unlikely(opts->num_sack_blocks)) {
+ struct tcp_sack_block *sp = tp->rx_opt.dsack ?
+ tp->duplicate_sack : tp->selective_acks;
int this_sack;
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_SACK << 8) |
- (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
+ (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
TCPOLEN_SACK_PERBLOCK)));
- for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+ for (this_sack = 0; this_sack < opts->num_sack_blocks;
+ ++this_sack) {
*ptr++ = htonl(sp[this_sack].start_seq);
*ptr++ = htonl(sp[this_sack].end_seq);
}
@@ -378,81 +430,137 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
tp->rx_opt.eff_sacks--;
}
}
+}
+
+static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct tcp_md5sig_key **md5) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned size = 0;
+
#ifdef CONFIG_TCP_MD5SIG
- if (md5_hash) {
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
- *md5_hash = (__u8 *)ptr;
+ *md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (*md5) {
+ opts->options |= OPTION_MD5;
+ size += TCPOLEN_MD5SIG_ALIGNED;
}
+#else
+ *md5 = NULL;
#endif
+
+ /* We always get an MSS option. The option bytes which will be seen in
+ * normal data packets should timestamps be used, must be in the MSS
+ * advertised. But we subtract them from tp->mss_cache so that
+ * calculations in tcp_sendmsg are simpler etc. So account for this
+ * fact here if necessary. If we don't do this correctly, as a
+ * receiver we won't recognize data packets as being full sized when we
+ * should, and thus we won't abide by the delayed ACK rules correctly.
+ * SACKs don't matter, we never delay an ACK when we have any of those
+ * going out. */
+ opts->mss = tcp_advertise_mss(sk);
+ size += TCPOLEN_MSS_ALIGNED;
+
+ if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = TCP_SKB_CB(skb)->when;
+ opts->tsecr = tp->rx_opt.ts_recent;
+ size += TCPOLEN_TSTAMP_ALIGNED;
+ }
+ if (likely(sysctl_tcp_window_scaling)) {
+ opts->ws = tp->rx_opt.rcv_wscale;
+ size += TCPOLEN_WSCALE_ALIGNED;
+ }
+ if (likely(sysctl_tcp_sack)) {
+ opts->options |= OPTION_SACK_ADVERTISE;
+ if (unlikely(!OPTION_TS & opts->options))
+ size += TCPOLEN_SACKPERM_ALIGNED;
+ }
+
+ return size;
}
-/* Construct a tcp options header for a SYN or SYN_ACK packet.
- * If this is every changed make sure to change the definition of
- * MAX_SYN_SIZE to match the new maximum number of options that you
- * can generate.
- *
- * Note - that with the RFC2385 TCP option, we make room for the
- * 16 byte MD5 hash. This will be filled in later, so the pointer for the
- * location to be filled is passed back up.
- */
-static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
- int offer_wscale, int wscale, __u32 tstamp,
- __u32 ts_recent, __u8 **md5_hash)
-{
- /* We always get an MSS option.
- * The option bytes which will be seen in normal data
- * packets should timestamps be used, must be in the MSS
- * advertised. But we subtract them from tp->mss_cache so
- * that calculations in tcp_sendmsg are simpler etc.
- * So account for this fact here if necessary. If we
- * don't do this correctly, as a receiver we won't
- * recognize data packets as being full sized when we
- * should, and thus we won't abide by the delayed ACK
- * rules correctly.
- * SACKs don't matter, we never delay an ACK when we
- * have any of those going out.
- */
- *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
- if (ts) {
- if (sack)
- *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
- (TCPOLEN_SACK_PERM << 16) |
- (TCPOPT_TIMESTAMP << 8) |
- TCPOLEN_TIMESTAMP);
- else
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_TIMESTAMP << 8) |
- TCPOLEN_TIMESTAMP);
- *ptr++ = htonl(tstamp); /* TSVAL */
- *ptr++ = htonl(ts_recent); /* TSECR */
- } else if (sack)
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_SACK_PERM << 8) |
- TCPOLEN_SACK_PERM);
- if (offer_wscale)
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_WINDOW << 16) |
- (TCPOLEN_WINDOW << 8) |
- (wscale));
+static unsigned tcp_synack_options(struct sock *sk,
+ struct request_sock *req,
+ unsigned mss, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct tcp_md5sig_key **md5) {
+ unsigned size = 0;
+ struct inet_request_sock *ireq = inet_rsk(req);
+ char doing_ts;
+
#ifdef CONFIG_TCP_MD5SIG
- /*
- * If MD5 is enabled, then we set the option, and include the size
- * (always 18). The actual MD5 hash is added just before the
- * packet is sent.
- */
- if (md5_hash) {
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
- *md5_hash = (__u8 *)ptr;
+ *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
+ if (*md5) {
+ opts->options |= OPTION_MD5;
+ size += TCPOLEN_MD5SIG_ALIGNED;
}
+#else
+ *md5 = NULL;
#endif
+
+ /* we can't fit any SACK blocks in a packet with MD5 + TS
+ options. There was discussion about disabling SACK rather than TS in
+ order to fit in better with old, buggy kernels, but that was deemed
+ to be unnecessary. */
+ doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok);
+
+ opts->mss = mss;
+ size += TCPOLEN_MSS_ALIGNED;
+
+ if (likely(ireq->wscale_ok)) {
+ opts->ws = ireq->rcv_wscale;
+ size += TCPOLEN_WSCALE_ALIGNED;
+ }
+ if (likely(doing_ts)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = TCP_SKB_CB(skb)->when;
+ opts->tsecr = req->ts_recent;
+ size += TCPOLEN_TSTAMP_ALIGNED;
+ }
+ if (likely(ireq->sack_ok)) {
+ opts->options |= OPTION_SACK_ADVERTISE;
+ if (unlikely(!doing_ts))
+ size += TCPOLEN_SACKPERM_ALIGNED;
+ }
+
+ return size;
+}
+
+static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct tcp_md5sig_key **md5) {
+ struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned size = 0;
+
+#ifdef CONFIG_TCP_MD5SIG
+ *md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (unlikely(*md5)) {
+ opts->options |= OPTION_MD5;
+ size += TCPOLEN_MD5SIG_ALIGNED;
+ }
+#else
+ *md5 = NULL;
+#endif
+
+ if (likely(tp->rx_opt.tstamp_ok)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = tcb ? tcb->when : 0;
+ opts->tsecr = tp->rx_opt.ts_recent;
+ size += TCPOLEN_TSTAMP_ALIGNED;
+ }
+
+ if (unlikely(tp->rx_opt.eff_sacks)) {
+ const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
+ opts->num_sack_blocks =
+ min_t(unsigned, tp->rx_opt.eff_sacks,
+ (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
+ TCPOLEN_SACK_PERBLOCK);
+ size += TCPOLEN_SACK_BASE_ALIGNED +
+ opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
+ }
+
+ return size;
}
/* This routine actually transmits TCP packets queued in by
@@ -473,13 +581,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
struct inet_sock *inet;
struct tcp_sock *tp;
struct tcp_skb_cb *tcb;
- int tcp_header_size;
-#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_out_options opts;
+ unsigned tcp_options_size, tcp_header_size;
struct tcp_md5sig_key *md5;
__u8 *md5_hash_location;
-#endif
struct tcphdr *th;
- int sysctl_flags;
int err;
BUG_ON(!skb || !tcp_skb_pcount(skb));
@@ -502,50 +608,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
inet = inet_sk(sk);
tp = tcp_sk(sk);
tcb = TCP_SKB_CB(skb);
- tcp_header_size = tp->tcp_header_len;
-
-#define SYSCTL_FLAG_TSTAMPS 0x1
-#define SYSCTL_FLAG_WSCALE 0x2
-#define SYSCTL_FLAG_SACK 0x4
+ memset(&opts, 0, sizeof(opts));
- sysctl_flags = 0;
- if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
- tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
- if (sysctl_tcp_timestamps) {
- tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
- }
- if (sysctl_tcp_window_scaling) {
- tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_WSCALE;
- }
- if (sysctl_tcp_sack) {
- sysctl_flags |= SYSCTL_FLAG_SACK;
- if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
- tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
- }
- } else if (unlikely(tp->rx_opt.eff_sacks)) {
- /* A SACK is 2 pad bytes, a 2 byte header, plus
- * 2 32-bit sequence numbers for each SACK block.
- */
- tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
- (tp->rx_opt.eff_sacks *
- TCPOLEN_SACK_PERBLOCK));
- }
+ if (unlikely(tcb->flags & TCPCB_FLAG_SYN))
+ tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
+ else
+ tcp_options_size = tcp_established_options(sk, skb, &opts,
+ &md5);
+ tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
-#ifdef CONFIG_TCP_MD5SIG
- /*
- * Are we doing MD5 on this segment? If so - make
- * room for it.
- */
- md5 = tp->af_specific->md5_lookup(sk, sk);
- if (md5)
- tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
-#endif
-
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
skb_set_owner_w(skb, sk);
@@ -576,39 +650,16 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->urg = 1;
}
- if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
- tcp_syn_build_options((__be32 *)(th + 1),
- tcp_advertise_mss(sk),
- (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
- (sysctl_flags & SYSCTL_FLAG_SACK),
- (sysctl_flags & SYSCTL_FLAG_WSCALE),
- tp->rx_opt.rcv_wscale,
- tcb->when,
- tp->rx_opt.ts_recent,
-
-#ifdef CONFIG_TCP_MD5SIG
- md5 ? &md5_hash_location :
-#endif
- NULL);
- } else {
- tcp_build_and_update_options((__be32 *)(th + 1),
- tp, tcb->when,
-#ifdef CONFIG_TCP_MD5SIG
- md5 ? &md5_hash_location :
-#endif
- NULL);
+ tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
+ if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
TCP_ECN_send(sk, skb, tcp_header_size);
- }
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
+ sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
tp->af_specific->calc_md5_hash(md5_hash_location,
- md5,
- sk, NULL, NULL,
- tcp_hdr(skb),
- sk->sk_protocol,
- skb->len);
+ md5, sk, NULL, skb);
}
#endif
@@ -621,7 +672,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcp_event_data_sent(tp, skb, sk);
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
- TCP_INC_STATS(TCP_MIB_OUTSEGS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
err = icsk->icsk_af_ops->queue_xmit(skb, 0);
if (likely(err <= 0))
@@ -630,10 +681,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcp_enter_cwr(sk, 1);
return net_xmit_eval(err);
-
-#undef SYSCTL_FLAG_TSTAMPS
-#undef SYSCTL_FLAG_WSCALE
-#undef SYSCTL_FLAG_SACK
}
/* This routine just queue's the buffer
@@ -974,6 +1021,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
u32 mss_now;
u16 xmit_size_goal;
int doing_tso = 0;
+ unsigned header_len;
+ struct tcp_out_options opts;
+ struct tcp_md5sig_key *md5;
mss_now = tp->mss_cache;
@@ -986,14 +1036,16 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
mss_now = tcp_sync_mss(sk, mtu);
}
- if (tp->rx_opt.eff_sacks)
- mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
- (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
-
-#ifdef CONFIG_TCP_MD5SIG
- if (tp->af_specific->md5_lookup(sk, sk))
- mss_now -= TCPOLEN_MD5SIG_ALIGNED;
-#endif
+ header_len = tcp_established_options(sk, NULL, &opts, &md5) +
+ sizeof(struct tcphdr);
+ /* The mss_cache is sized based on tp->tcp_header_len, which assumes
+ * some common options. If this is an odd packet (because we have SACK
+ * blocks etc) then our calculated header_len will be different, and
+ * we have to adjust mss_now correspondingly */
+ if (header_len != tp->tcp_header_len) {
+ int delta = (int) header_len - tp->tcp_header_len;
+ mss_now -= delta;
+ }
xmit_size_goal = mss_now;
@@ -1913,7 +1965,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (err == 0) {
/* Update global TCP statistics. */
- TCP_INC_STATS(TCP_MIB_RETRANSSEGS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
tp->total_retrans++;
@@ -1988,14 +2040,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (sacked & TCPCB_LOST) {
if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
+ int mib_idx;
+
if (tcp_retransmit_skb(sk, skb)) {
tp->retransmit_skb_hint = NULL;
return;
}
if (icsk->icsk_ca_state != TCP_CA_Loss)
- NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
+ mib_idx = LINUX_MIB_TCPFASTRETRANS;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
+ mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
if (skb == tcp_write_queue_head(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -2065,7 +2120,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
inet_csk(sk)->icsk_rto,
TCP_RTO_MAX);
- NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS);
}
}
@@ -2119,7 +2174,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* NOTE: No TCP options attached and we never retransmit this. */
skb = alloc_skb(MAX_TCP_HEADER, priority);
if (!skb) {
- NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
return;
}
@@ -2130,9 +2185,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* Send it off. */
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (tcp_transmit_skb(sk, skb, 0, priority))
- NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
- TCP_INC_STATS(TCP_MIB_OUTRSTS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
}
/* WARNING: This routine must only be called when we have already sent
@@ -2180,11 +2235,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct tcp_sock *tp = tcp_sk(sk);
struct tcphdr *th;
int tcp_header_size;
+ struct tcp_out_options opts;
struct sk_buff *skb;
-#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *md5;
__u8 *md5_hash_location;
-#endif
skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
if (skb == NULL)
@@ -2195,18 +2249,27 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
skb->dst = dst_clone(dst);
- tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
- (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
- (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
- /* SACK_PERM is in the place of NOP NOP of TS */
- ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
+ if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
+ __u8 rcv_wscale;
+ /* Set this up on the first call only */
+ req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+ /* tcp_full_space because it is guaranteed to be the first packet */
+ tcp_select_initial_window(tcp_full_space(sk),
+ dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+ &req->rcv_wnd,
+ &req->window_clamp,
+ ireq->wscale_ok,
+ &rcv_wscale);
+ ireq->rcv_wscale = rcv_wscale;
+ }
+
+ memset(&opts, 0, sizeof(opts));
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
+ tcp_header_size = tcp_synack_options(sk, req,
+ dst_metric(dst, RTAX_ADVMSS),
+ skb, &opts, &md5) +
+ sizeof(struct tcphdr);
-#ifdef CONFIG_TCP_MD5SIG
- /* Are we doing MD5 on this segment? If so - make room for it */
- md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
- if (md5)
- tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
-#endif
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
@@ -2224,19 +2287,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
- if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
- __u8 rcv_wscale;
- /* Set this up on the first call only */
- req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
- /* tcp_full_space because it is guaranteed to be the first packet */
- tcp_select_initial_window(tcp_full_space(sk),
- dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
- &req->rcv_wnd,
- &req->window_clamp,
- ireq->wscale_ok,
- &rcv_wscale);
- ireq->rcv_wscale = rcv_wscale;
- }
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rcv_wnd, 65535U));
@@ -2245,29 +2295,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
else
#endif
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
- ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
- TCP_SKB_CB(skb)->when,
- req->ts_recent,
- (
-#ifdef CONFIG_TCP_MD5SIG
- md5 ? &md5_hash_location :
-#endif
- NULL)
- );
-
+ tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
th->doff = (tcp_header_size >> 2);
- TCP_INC_STATS(TCP_MIB_OUTSEGS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
if (md5) {
tp->af_specific->calc_md5_hash(md5_hash_location,
- md5,
- NULL, dst, req,
- tcp_hdr(skb), sk->sk_protocol,
- skb->len);
+ md5, NULL, req, skb);
}
#endif
@@ -2367,7 +2403,7 @@ int tcp_connect(struct sock *sk)
*/
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
- TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 5ff0ce6e9d3..7ddc30f0744 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -224,7 +224,7 @@ static __init int tcpprobe_init(void)
if (bufsize < 0)
return -EINVAL;
- tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL);
+ tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL);
if (!tcp_probe.log)
goto err0;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 63ed9d6830e..328e0cf42b3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -50,7 +48,7 @@ static void tcp_write_err(struct sock *sk)
sk->sk_error_report(sk);
tcp_done(sk);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
}
/* Do not allow orphaned sockets to eat all our resources.
@@ -91,7 +89,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
}
return 0;
@@ -181,7 +179,7 @@ static void tcp_delack_timer(unsigned long data)
if (sock_owned_by_user(sk)) {
/* Try again later. */
icsk->icsk_ack.blocked = 1;
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
goto out_unlock;
}
@@ -200,7 +198,7 @@ static void tcp_delack_timer(unsigned long data)
if (!skb_queue_empty(&tp->ucopy.prequeue)) {
struct sk_buff *skb;
- NET_INC_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
sk->sk_backlog_rcv(sk, skb);
@@ -220,7 +218,7 @@ static void tcp_delack_timer(unsigned long data)
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
tcp_send_ack(sk);
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
}
TCP_CHECK_TIMER(sk);
@@ -328,24 +326,27 @@ static void tcp_retransmit_timer(struct sock *sk)
goto out;
if (icsk->icsk_retransmits == 0) {
+ int mib_idx;
+
if (icsk->icsk_ca_state == TCP_CA_Disorder ||
icsk->icsk_ca_state == TCP_CA_Recovery) {
if (tcp_is_sack(tp)) {
if (icsk->icsk_ca_state == TCP_CA_Recovery)
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
+ mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
+ mib_idx = LINUX_MIB_TCPSACKFAILURES;
} else {
if (icsk->icsk_ca_state == TCP_CA_Recovery)
- NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
+ mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
+ mib_idx = LINUX_MIB_TCPRENOFAILURES;
}
} else if (icsk->icsk_ca_state == TCP_CA_Loss) {
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
+ mib_idx = LINUX_MIB_TCPLOSSFAILURES;
} else {
- NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
+ mib_idx = LINUX_MIB_TCPTIMEOUTS;
}
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
}
if (tcp_use_frto(sk)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 56fcda3694b..a751770947a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -5,8 +5,6 @@
*
* The User Datagram Protocol (UDP).
*
- * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
@@ -110,9 +108,6 @@
* Snmp MIB for the UDP layer
*/
-DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
-EXPORT_SYMBOL(udp_statistics);
-
DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
EXPORT_SYMBOL(udp_stats_in6);
@@ -136,7 +131,7 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
struct sock *sk;
struct hlist_node *node;
- sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
+ sk_for_each(sk, node, &udptable[udp_hashfn(net, num)])
if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
return 1;
return 0;
@@ -176,7 +171,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
for (i = 0; i < UDP_HTABLE_SIZE; i++) {
int size = 0;
- head = &udptable[rover & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[udp_hashfn(net, rover)];
if (hlist_empty(head))
goto gotit;
@@ -213,7 +208,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
gotit:
snum = rover;
} else {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[udp_hashfn(net, snum)];
sk_for_each(sk2, node, head)
if (sk2->sk_hash == snum &&
@@ -229,7 +224,7 @@ gotit:
inet_sk(sk)->num = snum;
sk->sk_hash = snum;
if (sk_unhashed(sk)) {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[udp_hashfn(net, snum)];
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
@@ -266,7 +261,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
int badness = -1;
read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+ sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
struct inet_sock *inet = inet_sk(sk);
if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -356,11 +351,12 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
struct sock *sk;
int harderr;
int err;
+ struct net *net = dev_net(skb->dev);
- sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest,
+ sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
iph->saddr, uh->source, skb->dev->ifindex, udptable);
if (sk == NULL) {
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return; /* No socket for error */
}
@@ -528,7 +524,8 @@ out:
up->len = 0;
up->pending = 0;
if (!err)
- UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_OUTDATAGRAMS, is_udplite);
return err;
}
@@ -656,11 +653,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
+ struct net *net = sock_net(sk);
+
security_sk_classify_flow(sk, &fl);
- err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
+ err = ip_route_output_flow(net, &rt, &fl, sk, 1);
if (err) {
if (err == -ENETUNREACH)
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
goto out;
}
@@ -727,7 +726,8 @@ out:
* seems like overkill.
*/
if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
- UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
}
return err;
@@ -890,7 +890,8 @@ try_again:
goto out_free;
if (!peeked)
- UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INDATAGRAMS, is_udplite);
sock_recv_timestamp(msg, sk, skb);
@@ -919,7 +920,7 @@ out:
csum_copy_err:
lock_sock(sk);
if (!skb_kill_datagram(sk, skb, flags))
- UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
release_sock(sk);
if (noblock)
@@ -990,7 +991,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
ret = (*up->encap_rcv)(sk, skb);
if (ret <= 0) {
- UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
+ UDP_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_INDATAGRAMS,
is_udplite);
return -ret;
}
@@ -1042,15 +1044,18 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
- if (rc == -ENOMEM)
- UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
+ if (rc == -ENOMEM) {
+ UDP_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
+ atomic_inc(&sk->sk_drops);
+ }
goto drop;
}
return 0;
drop:
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
}
@@ -1061,7 +1066,7 @@ drop:
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
*/
-static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
+static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udphdr *uh,
__be32 saddr, __be32 daddr,
struct hlist_head udptable[])
@@ -1070,7 +1075,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
int dif;
read_lock(&udp_hash_lock);
- sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+ sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
dif = skb->dev->ifindex;
sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
@@ -1158,6 +1163,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
struct rtable *rt = (struct rtable*)skb->dst;
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
+ struct net *net = dev_net(skb->dev);
/*
* Validate the packet.
@@ -1180,9 +1186,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
goto csum_error;
if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
- return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
+ return __udp4_lib_mcast_deliver(net, skb, uh,
+ saddr, daddr, udptable);
- sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr,
+ sk = __udp4_lib_lookup(net, saddr, uh->source, daddr,
uh->dest, inet_iif(skb), udptable);
if (sk != NULL) {
@@ -1211,7 +1218,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (udp_lib_checksum_complete(skb))
goto csum_error;
- UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+ UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
/*
@@ -1245,7 +1252,7 @@ csum_error:
ntohs(uh->dest),
ulen);
drop:
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+ UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
return 0;
}
@@ -1255,12 +1262,11 @@ int udp_rcv(struct sk_buff *skb)
return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
}
-int udp_destroy_sock(struct sock *sk)
+void udp_destroy_sock(struct sock *sk)
{
lock_sock(sk);
udp_flush_pending_frames(sk);
release_sock(sk);
- return 0;
}
/*
@@ -1453,7 +1459,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
spin_lock_bh(&rcvq->lock);
while ((skb = skb_peek(rcvq)) != NULL &&
udp_lib_checksum_complete(skb)) {
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
+ UDP_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_INERRORS, is_lite);
__skb_unlink(skb, rcvq);
kfree_skb(skb);
}
@@ -1629,12 +1636,13 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
__u16 srcp = ntohs(inet->sport);
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p%n",
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
bucket, src, srcp, dest, destp, sp->sk_state,
atomic_read(&sp->sk_wmem_alloc),
atomic_read(&sp->sk_rmem_alloc),
0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp, len);
+ atomic_read(&sp->sk_refcnt), sp,
+ atomic_read(&sp->sk_drops), len);
}
int udp4_seq_show(struct seq_file *seq, void *v)
@@ -1643,7 +1651,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%-127s\n",
" sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
- "inode");
+ "inode ref pointer drops");
else {
struct udp_iter_state *state = seq->private;
int len;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 7288bf7977f..2e9bad2fa1b 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -26,7 +26,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
extern int udp_sendpage(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
-extern int udp_destroy_sock(struct sock *sk);
+extern void udp_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
extern int udp4_seq_show(struct seq_file *seq, void *v);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 72ce26b6c4d..3c807964da9 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -1,8 +1,6 @@
/*
* UDPLITE An implementation of the UDP-Lite protocol (RFC 3828).
*
- * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $
- *
* Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk>
*
* Changes:
@@ -13,7 +11,6 @@
* 2 of the License, or (at your option) any later version.
*/
#include "udp_impl.h"
-DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly;
struct hlist_head udplite_hash[UDP_HTABLE_SIZE];