summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile5
-rw-r--r--net/ipv4/af_inet.c27
-rw-r--r--net/ipv4/ah4.c1
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c1
-rw-r--r--net/ipv4/fib_frontend.c16
-rw-r--r--net/ipv4/fib_hash.c1
-rw-r--r--net/ipv4/fib_rules.c1
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fib_trie.c8
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/igmp.c174
-rw-r--r--net/ipv4/inet_connection_sock.c25
-rw-r--r--net/ipv4/inet_diag.c251
-rw-r--r--net/ipv4/inet_hashtables.c178
-rw-r--r--net/ipv4/inet_timewait_sock.c5
-rw-r--r--net/ipv4/inetpeer.c7
-rw-r--r--net/ipv4/ip_fragment.c70
-rw-r--r--net/ipv4/ip_gre.c42
-rw-r--r--net/ipv4/ip_input.c16
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_output.c46
-rw-r--r--net/ipv4/ip_sockglue.c20
-rw-r--r--net/ipv4/ipcomp.c1
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipip.c23
-rw-r--r--net/ipv4/ipmr.c27
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c28
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c22
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c7
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c11
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c29
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c29
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_esp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c24
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c3
-rw-r--r--net/ipv4/ipvs/ip_vs_sched.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c2
-rw-r--r--net/ipv4/netfilter.c25
-rw-r--r--net/ipv4/netfilter/Kconfig264
-rw-r--r--net/ipv4/netfilter/Makefile26
-rw-r--r--net/ipv4/netfilter/arp_tables.c502
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c7
-rw-r--r--net/ipv4/netfilter/arptable_filter.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c20
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c10
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c48
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c50
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c17
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c55
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c57
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c81
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c38
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c34
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c36
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c36
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c16
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c5
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c126
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c5
-rw-r--r--net/ipv4/netfilter/ip_tables.c918
-rw-r--r--net/ipv4/netfilter/ipt_CLASSIFY.c90
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c3
-rw-r--r--net/ipv4/netfilter/ipt_CONNMARK.c122
-rw-r--r--net/ipv4/netfilter/ipt_DSCP.c2
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c3
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c2
-rw-r--r--net/ipv4/netfilter/ipt_MARK.c172
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c4
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c2
-rw-r--r--net/ipv4/netfilter/ipt_NFQUEUE.c70
-rw-r--r--net/ipv4/netfilter/ipt_NOTRACK.c76
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c5
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c2
-rw-r--r--net/ipv4/netfilter/ipt_TCPMSS.c3
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c2
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c12
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c4
-rw-r--r--net/ipv4/netfilter/ipt_ah.c6
-rw-r--r--net/ipv4/netfilter/ipt_comment.c59
-rw-r--r--net/ipv4/netfilter/ipt_connbytes.c161
-rw-r--r--net/ipv4/netfilter/ipt_connmark.c88
-rw-r--r--net/ipv4/netfilter/ipt_conntrack.c232
-rw-r--r--net/ipv4/netfilter/ipt_dccp.c176
-rw-r--r--net/ipv4/netfilter/ipt_dscp.c4
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c5
-rw-r--r--net/ipv4/netfilter/ipt_esp.c6
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c3
-rw-r--r--net/ipv4/netfilter/ipt_helper.c167
-rw-r--r--net/ipv4/netfilter/ipt_iprange.c4
-rw-r--r--net/ipv4/netfilter/ipt_length.c64
-rw-r--r--net/ipv4/netfilter/ipt_limit.c157
-rw-r--r--net/ipv4/netfilter/ipt_mac.c79
-rw-r--r--net/ipv4/netfilter/ipt_mark.c71
-rw-r--r--net/ipv4/netfilter/ipt_multiport.c10
-rw-r--r--net/ipv4/netfilter/ipt_owner.c3
-rw-r--r--net/ipv4/netfilter/ipt_physdev.c134
-rw-r--r--net/ipv4/netfilter/ipt_pkttype.c70
-rw-r--r--net/ipv4/netfilter/ipt_policy.c173
-rw-r--r--net/ipv4/netfilter/ipt_realm.c76
-rw-r--r--net/ipv4/netfilter/ipt_recent.c27
-rw-r--r--net/ipv4/netfilter/ipt_sctp.c203
-rw-r--r--net/ipv4/netfilter/ipt_state.c74
-rw-r--r--net/ipv4/netfilter/ipt_string.c91
-rw-r--r--net/ipv4/netfilter/ipt_tcpmss.c127
-rw-r--r--net/ipv4/netfilter/ipt_tos.c3
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c4
-rw-r--r--net/ipv4/netfilter/iptable_filter.c3
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c1
-rw-r--r--net/ipv4/netfilter/iptable_raw.c3
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c83
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c97
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_bic.c85
-rw-r--r--net/ipv4/tcp_cong.c28
-rw-r--r--net/ipv4/tcp_cubic.c411
-rw-r--r--net/ipv4/tcp_input.c101
-rw-r--r--net/ipv4/tcp_ipv4.c270
-rw-r--r--net/ipv4/tcp_minisocks.c16
-rw-r--r--net/ipv4/tcp_output.c347
-rw-r--r--net/ipv4/tcp_vegas.c20
-rw-r--r--net/ipv4/udp.c24
-rw-r--r--net/ipv4/xfrm4_input.c31
-rw-r--r--net/ipv4/xfrm4_output.c72
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv4/xfrm4_state.c15
150 files changed, 2698 insertions, 5354 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e55136ae09f..011cca7ae02 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -456,6 +456,14 @@ config TCP_CONG_BIC
increase provides TCP friendliness.
See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/
+config TCP_CONG_CUBIC
+ tristate "CUBIC TCP"
+ default m
+ ---help---
+ This is version 2.0 of BIC-TCP which uses a cubic growth function
+ among other techniques.
+ See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
+
config TCP_CONG_WESTWOOD
tristate "TCP Westwood+"
default m
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f0435d00db6..35e5f599909 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -9,7 +9,7 @@ obj-y := route.o inetpeer.o protocol.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
tcp_minisocks.o tcp_cong.o \
datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
- sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o
+ sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
@@ -28,12 +28,13 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
-obj-$(CONFIG_NETFILTER) += netfilter/
+obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
obj-$(CONFIG_IP_VS) += ipvs/
obj-$(CONFIG_INET_DIAG) += inet_diag.o
obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
+obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eaa150c33b0..97c276f95b3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -79,6 +79,7 @@
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
+#include <linux/capability.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
@@ -93,6 +94,7 @@
#include <linux/smp_lock.h>
#include <linux/inet.h>
#include <linux/igmp.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -228,13 +230,14 @@ static int inet_create(struct socket *sock, int protocol)
unsigned char answer_flags;
char answer_no_check;
int try_loading_module = 0;
- int err = -ESOCKTNOSUPPORT;
+ int err;
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
answer = NULL;
lookup_protocol:
+ err = -ESOCKTNOSUPPORT;
rcu_read_lock();
list_for_each_rcu(p, &inetsw[sock->type]) {
answer = list_entry(p, struct inet_protosw, list);
@@ -252,6 +255,7 @@ lookup_protocol:
if (IPPROTO_IP == answer->protocol)
break;
}
+ err = -EPROTONOSUPPORT;
answer = NULL;
}
@@ -280,9 +284,6 @@ lookup_protocol:
err = -EPERM;
if (answer->capability > 0 && !capable(answer->capability))
goto out_rcu_unlock;
- err = -EPROTONOSUPPORT;
- if (!protocol)
- goto out_rcu_unlock;
sock->ops = answer->ops;
answer_prot = answer->prot;
@@ -303,6 +304,7 @@ lookup_protocol:
sk->sk_reuse = 1;
inet = inet_sk(sk);
+ inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
if (SOCK_RAW == sock->type) {
inet->num = protocol;
@@ -776,16 +778,16 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
err = devinet_ioctl(cmd, (void __user *)arg);
break;
default:
- if (!sk->sk_prot->ioctl ||
- (err = sk->sk_prot->ioctl(sk, cmd, arg)) ==
- -ENOIOCTLCMD)
- err = dev_ioctl(cmd, (void __user *)arg);
+ if (sk->sk_prot->ioctl)
+ err = sk->sk_prot->ioctl(sk, cmd, arg);
+ else
+ err = -ENOIOCTLCMD;
break;
}
return err;
}
-struct proto_ops inet_stream_ops = {
+const struct proto_ops inet_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
@@ -806,7 +808,7 @@ struct proto_ops inet_stream_ops = {
.sendpage = tcp_sendpage
};
-struct proto_ops inet_dgram_ops = {
+const struct proto_ops inet_dgram_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
@@ -831,7 +833,7 @@ struct proto_ops inet_dgram_ops = {
* For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
* udp_poll
*/
-static struct proto_ops inet_sockraw_ops = {
+static const struct proto_ops inet_sockraw_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
@@ -870,7 +872,8 @@ static struct inet_protosw inetsw_array[] =
.ops = &inet_stream_ops,
.capability = -1,
.no_check = 0,
- .flags = INET_PROTOSW_PERMANENT,
+ .flags = INET_PROTOSW_PERMANENT |
+ INET_PROTOSW_ICSK,
},
{
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 035ad2c9e1b..aed537fa2c8 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -6,6 +6,7 @@
#include <linux/crypto.h>
#include <linux/pfkeyv2.h>
#include <net/icmp.h>
+#include <net/protocol.h>
#include <asm/scatterlist.h>
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b425748f02d..accdefedfed 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -79,6 +79,7 @@
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/capability.h>
#include <linux/config.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -86,6 +87,7 @@
#include <linux/in.h>
#include <linux/mm.h>
#include <linux/inet.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/fddidevice.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 04a6fe3e95a..95b9d81ac48 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -32,6 +32,7 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
+#include <linux/capability.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -58,6 +59,7 @@
#endif
#include <linux/kmod.h>
+#include <net/arp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/ip_fib.h>
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1b18ce66e7b..73bfcae8af9 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -9,6 +9,7 @@
#include <linux/pfkeyv2.h>
#include <linux/random.h>
#include <net/icmp.h>
+#include <net/protocol.h>
#include <net/udp.h>
/* decapsulation data for use when post-processing */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 882f88f6d13..4e3d3811dea 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -20,6 +20,7 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
+#include <linux/capability.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -30,6 +31,7 @@
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
@@ -287,13 +289,13 @@ static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
{
int i;
- for (i=1; i<=RTA_MAX; i++) {
- struct rtattr *attr = rta[i-1];
+ for (i=1; i<=RTA_MAX; i++, rta++) {
+ struct rtattr *attr = *rta;
if (attr) {
if (RTA_PAYLOAD(attr) < 4)
return -EINVAL;
if (i != RTA_MULTIPATH && i != RTA_METRICS)
- rta[i-1] = (struct rtattr*)RTA_DATA(attr);
+ *rta = (struct rtattr*)RTA_DATA(attr);
}
}
return 0;
@@ -544,12 +546,16 @@ static void nl_fib_input(struct sock *sk, int len)
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh = NULL;
struct fib_result_nl *frn;
- int err;
u32 pid;
struct fib_table *tb;
- skb = skb_recv_datagram(sk, 0, 0, &err);
+ skb = skb_dequeue(&sk->sk_receive_queue);
nlh = (struct nlmsghdr *)skb->data;
+ if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
+ nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
+ kfree_skb(skb);
+ return;
+ }
frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
tb = fib_get_table(frn->tb_id_in);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 7ea0209cb16..e2890ec8159 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -29,6 +29,7 @@
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 0b298bbc151..0dd4d06e456 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -33,6 +33,7 @@
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6d2a6ac070e..ef4724de735 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
@@ -36,6 +37,7 @@
#include <linux/netlink.h>
#include <linux/init.h>
+#include <net/arp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 705e3ce86df..e320b32373e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -41,6 +41,13 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * Substantial contributions to this work comes from:
+ *
+ * David S. Miller, <davem@davemloft.net>
+ * Stephen Hemminger <shemminger@osdl.org>
+ * Paul E. McKenney <paulmck@us.ibm.com>
+ * Patrick McHardy <kaber@trash.net>
*/
#define VERSION "0.404"
@@ -59,6 +66,7 @@
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 92e23b2ad4d..105039eb762 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -73,6 +73,7 @@
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/string.h>
#include <linux/netfilter_ipv4.h>
@@ -898,8 +899,7 @@ static void icmp_address_reply(struct sk_buff *skb)
u32 _mask, *mp;
mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
- if (mp == NULL)
- BUG();
+ BUG_ON(mp == NULL);
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
if (*mp == ifa->ifa_mask &&
inet_ifa_match(rt->rt_src, ifa))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c04607b4921..d8ce7133cd8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -91,6 +91,8 @@
#include <linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/times.h>
+
+#include <net/arp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -231,7 +233,18 @@ static int is_in(struct ip_mc_list *pmc, struct ip_sf_list *psf, int type,
case IGMPV3_MODE_IS_EXCLUDE:
if (gdeleted || sdeleted)
return 0;
- return !(pmc->gsquery && !psf->sf_gsresp);
+ if (!(pmc->gsquery && !psf->sf_gsresp)) {
+ if (pmc->sfmode == MCAST_INCLUDE)
+ return 1;
+ /* don't include if this source is excluded
+ * in all filters
+ */
+ if (psf->sf_count[MCAST_INCLUDE])
+ return type == IGMPV3_MODE_IS_INCLUDE;
+ return pmc->sfcount[MCAST_EXCLUDE] ==
+ psf->sf_count[MCAST_EXCLUDE];
+ }
+ return 0;
case IGMPV3_CHANGE_TO_INCLUDE:
if (gdeleted || sdeleted)
return 0;
@@ -383,7 +396,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
struct igmpv3_report *pih;
struct igmpv3_grec *pgr = NULL;
struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
- int scount, first, isquery, truncate;
+ int scount, stotal, first, isquery, truncate;
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
@@ -393,25 +406,13 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
type == IGMPV3_CHANGE_TO_EXCLUDE;
+ stotal = scount = 0;
+
psf_list = sdeleted ? &pmc->tomb : &pmc->sources;
- if (!*psf_list) {
- if (type == IGMPV3_ALLOW_NEW_SOURCES ||
- type == IGMPV3_BLOCK_OLD_SOURCES)
- return skb;
- if (pmc->crcount || isquery) {
- /* make sure we have room for group header and at
- * least one source.
- */
- if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)+
- sizeof(__u32)) {
- igmpv3_sendpack(skb);
- skb = NULL; /* add_grhead will get a new one */
- }
- skb = add_grhead(skb, pmc, type, &pgr);
- }
- return skb;
- }
+ if (!*psf_list)
+ goto empty_source;
+
pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL;
/* EX and TO_EX get a fresh packet, if needed */
@@ -424,7 +425,6 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
}
}
first = 1;
- scount = 0;
psf_prev = NULL;
for (psf=*psf_list; psf; psf=psf_next) {
u32 *psrc;
@@ -458,7 +458,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
}
psrc = (u32 *)skb_put(skb, sizeof(u32));
*psrc = psf->sf_inaddr;
- scount++;
+ scount++; stotal++;
if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
psf->sf_crcount--;
@@ -473,6 +473,21 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
}
psf_prev = psf;
}
+
+empty_source:
+ if (!stotal) {
+ if (type == IGMPV3_ALLOW_NEW_SOURCES ||
+ type == IGMPV3_BLOCK_OLD_SOURCES)
+ return skb;
+ if (pmc->crcount || isquery) {
+ /* make sure we have room for group header */
+ if (skb && AVAILABLE(skb)<sizeof(struct igmpv3_grec)) {
+ igmpv3_sendpack(skb);
+ skb = NULL; /* add_grhead will get a new one */
+ }
+ skb = add_grhead(skb, pmc, type, &pgr);
+ }
+ }
if (pgr)
pgr->grec_nsrcs = htons(scount);
@@ -555,11 +570,11 @@ static void igmpv3_send_cr(struct in_device *in_dev)
skb = add_grec(skb, pmc, dtype, 1, 1);
}
if (pmc->crcount) {
- pmc->crcount--;
if (pmc->sfmode == MCAST_EXCLUDE) {
type = IGMPV3_CHANGE_TO_INCLUDE;
skb = add_grec(skb, pmc, type, 1, 0);
}
+ pmc->crcount--;
if (pmc->crcount == 0) {
igmpv3_clear_zeros(&pmc->tomb);
igmpv3_clear_zeros(&pmc->sources);
@@ -592,12 +607,12 @@ static void igmpv3_send_cr(struct in_device *in_dev)
/* filter mode changes */
if (pmc->crcount) {
- pmc->crcount--;
if (pmc->sfmode == MCAST_EXCLUDE)
type = IGMPV3_CHANGE_TO_EXCLUDE;
else
type = IGMPV3_CHANGE_TO_INCLUDE;
skb = add_grec(skb, pmc, type, 0, 0);
+ pmc->crcount--;
}
spin_unlock_bh(&pmc->lock);
}
@@ -733,7 +748,8 @@ static void igmp_timer_expire(unsigned long data)
ip_ma_put(im);
}
-static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+/* mark EXCLUDE-mode sources */
+static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
{
struct ip_sf_list *psf;
int i, scount;
@@ -742,6 +758,37 @@ static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
for (psf=pmc->sources; psf; psf=psf->sf_next) {
if (scount == nsrcs)
break;
+ for (i=0; i<nsrcs; i++) {
+ /* skip inactive filters */
+ if (pmc->sfcount[MCAST_INCLUDE] ||
+ pmc->sfcount[MCAST_EXCLUDE] !=
+ psf->sf_count[MCAST_EXCLUDE])
+ continue;
+ if (srcs[i] == psf->sf_inaddr) {
+ scount++;
+ break;
+ }
+ }
+ }
+ pmc->gsquery = 0;
+ if (scount == nsrcs) /* all sources excluded */
+ return 0;
+ return 1;
+}
+
+static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+{
+ struct ip_sf_list *psf;
+ int i, scount;
+
+ if (pmc->sfmode == MCAST_EXCLUDE)
+ return igmp_xmarksources(pmc, nsrcs, srcs);
+
+ /* mark INCLUDE-mode sources */
+ scount = 0;
+ for (psf=pmc->sources; psf; psf=psf->sf_next) {
+ if (scount == nsrcs)
+ break;
for (i=0; i<nsrcs; i++)
if (srcs[i] == psf->sf_inaddr) {
psf->sf_gsresp = 1;
@@ -749,6 +796,12 @@ static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
break;
}
}
+ if (!scount) {
+ pmc->gsquery = 0;
+ return 0;
+ }
+ pmc->gsquery = 1;
+ return 1;
}
static void igmp_heard_report(struct in_device *in_dev, u32 group)
@@ -843,6 +896,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
*/
read_lock(&in_dev->mc_list_lock);
for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+ int changed;
+
if (group && group != im->multiaddr)
continue;
if (im->multiaddr == IGMP_ALL_HOSTS)
@@ -852,10 +907,11 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
im->gsquery = im->gsquery && mark;
else
im->gsquery = mark;
- if (im->gsquery)
- igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs);
+ changed = !im->gsquery ||
+ igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs);
spin_unlock_bh(&im->lock);
- igmp_mod_timer(im, max_delay);
+ if (changed)
+ igmp_mod_timer(im, max_delay);
}
read_unlock(&in_dev->mc_list_lock);
}
@@ -897,7 +953,10 @@ int igmp_rcv(struct sk_buff *skb)
/* Is it our report looped back? */
if (((struct rtable*)skb->dst)->fl.iif == 0)
break;
- igmp_heard_report(in_dev, ih->group);
+ /* don't rely on MC router hearing unicast reports */
+ if (skb->pkt_type == PACKET_MULTICAST ||
+ skb->pkt_type == PACKET_BROADCAST)
+ igmp_heard_report(in_dev, ih->group);
break;
case IGMP_PIM:
#ifdef CONFIG_IP_PIMSM_V1
@@ -970,7 +1029,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
* for deleted items allows change reports to use common code with
* non-deleted or query-response MCA's.
*/
- pmc = (struct ip_mc_list *)kmalloc(sizeof(*pmc), GFP_KERNEL);
+ pmc = kmalloc(sizeof(*pmc), GFP_KERNEL);
if (!pmc)
return;
memset(pmc, 0, sizeof(*pmc));
@@ -1150,7 +1209,7 @@ void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
}
}
- im = (struct ip_mc_list *)kmalloc(sizeof(*im), GFP_KERNEL);
+ im = kmalloc(sizeof(*im), GFP_KERNEL);
if (!im)
goto out;
@@ -1471,7 +1530,7 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
psf_prev = psf;
}
if (!psf) {
- psf = (struct ip_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC);
+ psf = kmalloc(sizeof(*psf), GFP_ATOMIC);
if (!psf)
return -ENOBUFS;
memset(psf, 0, sizeof(*psf));
@@ -1505,7 +1564,7 @@ static void sf_markstate(struct ip_mc_list *pmc)
static int sf_setstate(struct ip_mc_list *pmc)
{
- struct ip_sf_list *psf;
+ struct ip_sf_list *psf, *dpsf;
int mca_xcount = pmc->sfcount[MCAST_EXCLUDE];
int qrv = pmc->interface->mr_qrv;
int new_in, rv;
@@ -1517,8 +1576,46 @@ static int sf_setstate(struct ip_mc_list *pmc)
!psf->sf_count[MCAST_INCLUDE];
} else
new_in = psf->sf_count[MCAST_INCLUDE] != 0;
- if (new_in != psf->sf_oldin) {
- psf->sf_crcount = qrv;
+ if (new_in) {
+ if (!psf->sf_oldin) {
+ struct ip_sf_list *prev = 0;
+
+ for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) {
+ if (dpsf->sf_inaddr == psf->sf_inaddr)
+ break;
+ prev = dpsf;
+ }
+ if (dpsf) {
+ if (prev)
+ prev->sf_next = dpsf->sf_next;
+ else
+ pmc->tomb = dpsf->sf_next;
+ kfree(dpsf);
+ }
+ psf->sf_crcount = qrv;
+ rv++;
+ }
+ } else if (psf->sf_oldin) {
+
+ psf->sf_crcount = 0;
+ /*
+ * add or update "delete" records if an active filter
+ * is now inactive
+ */
+ for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next)
+ if (dpsf->sf_inaddr == psf->sf_inaddr)
+ break;
+ if (!dpsf) {
+ dpsf = (struct ip_sf_list *)
+ kmalloc(sizeof(*dpsf), GFP_ATOMIC);
+ if (!dpsf)
+ continue;
+ *dpsf = *psf;
+ /* pmc->lock held by callers */
+ dpsf->sf_next = pmc->tomb;
+ pmc->tomb = dpsf;
+ }
+ dpsf->sf_crcount = qrv;
rv++;
}
}
@@ -1654,7 +1751,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
err = -ENOBUFS;
if (count >= sysctl_igmp_max_memberships)
goto done;
- iml = (struct ip_mc_socklist *)sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL);
+ iml = sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL);
if (iml == NULL)
goto done;
@@ -1818,8 +1915,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
if (psl)
count += psl->sl_max;
- newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk,
- IP_SFLSIZE(count), GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
@@ -1902,8 +1998,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
goto done;
}
if (msf->imsf_numsrc) {
- newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk,
- IP_SFLSIZE(msf->imsf_numsrc), GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3fe021f1a56..ae20281d8de 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,7 +37,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
*/
int sysctl_local_port_range[2] = { 1024, 4999 };
-static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb)
+int inet_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb)
{
const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
struct sock *sk2;
@@ -62,11 +63,15 @@ static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucke
return node != NULL;
}
+EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
+
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
*/
int inet_csk_get_port(struct inet_hashinfo *hashinfo,
- struct sock *sk, unsigned short snum)
+ struct sock *sk, unsigned short snum,
+ int (*bind_conflict)(const struct sock *sk,
+ const struct inet_bind_bucket *tb))
{
struct inet_bind_hashbucket *head;
struct hlist_node *node;
@@ -125,7 +130,7 @@ tb_found:
goto success;
} else {
ret = 1;
- if (inet_csk_bind_conflict(sk, tb))
+ if (bind_conflict(sk, tb))
goto fail_unlock;
}
}
@@ -380,7 +385,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
EXPORT_SYMBOL_GPL(inet_csk_search_req);
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
- const unsigned timeout)
+ unsigned long timeout)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
@@ -631,3 +636,15 @@ void inet_csk_listen_stop(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
+
+void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
+ const struct inet_sock *inet = inet_sk(sk);
+
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = inet->daddr;
+ sin->sin_port = inet->dport;
+}
+
+EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 39061ed53cf..457db99c76d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -50,9 +50,10 @@ static struct sock *idiagnl;
#define INET_DIAG_PUT(skb, attrtype, attrlen) \
RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
-static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
- int ext, u32 pid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh)
+static int inet_csk_diag_fill(struct sock *sk,
+ struct sk_buff *skb,
+ int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh)
{
const struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -70,20 +71,22 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
nlh->nlmsg_flags = nlmsg_flags;
r = NLMSG_DATA(nlh);
- if (sk->sk_state != TCP_TIME_WAIT) {
- if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
- minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO,
- sizeof(*minfo));
- if (ext & (1 << (INET_DIAG_INFO - 1)))
- info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
- handler->idiag_info_size);
-
- if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
- size_t len = strlen(icsk->icsk_ca_ops->name);
- strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
- icsk->icsk_ca_ops->name);
- }
+ BUG_ON(sk->sk_state == TCP_TIME_WAIT);
+
+ if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
+ minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
+
+ if (ext & (1 << (INET_DIAG_INFO - 1)))
+ info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
+ handler->idiag_info_size);
+
+ if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
+ const size_t len = strlen(icsk->icsk_ca_ops->name);
+
+ strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
+ icsk->icsk_ca_ops->name);
}
+
r->idiag_family = sk->sk_family;
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
@@ -93,37 +96,6 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
- if (r->idiag_state == TCP_TIME_WAIT) {
- const struct inet_timewait_sock *tw = inet_twsk(sk);
- long tmo = tw->tw_ttd - jiffies;
- if (tmo < 0)
- tmo = 0;
-
- r->id.idiag_sport = tw->tw_sport;
- r->id.idiag_dport = tw->tw_dport;
- r->id.idiag_src[0] = tw->tw_rcv_saddr;
- r->id.idiag_dst[0] = tw->tw_daddr;
- r->idiag_state = tw->tw_substate;
- r->idiag_timer = 3;
- r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
- r->idiag_rqueue = 0;
- r->idiag_wqueue = 0;
- r->idiag_uid = 0;
- r->idiag_inode = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
- if (r->idiag_family == AF_INET6) {
- const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
-
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &tcp6tw->tw_v6_rcv_saddr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &tcp6tw->tw_v6_daddr);
- }
-#endif
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
- }
-
r->id.idiag_sport = inet->sport;
r->id.idiag_dport = inet->dport;
r->id.idiag_src[0] = inet->rcv_saddr;
@@ -185,7 +157,75 @@ nlmsg_failure:
return -1;
}
-static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
+static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
+ struct sk_buff *skb, int ext, u32 pid,
+ u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh)
+{
+ long tmo;
+ struct inet_diag_msg *r;
+ const unsigned char *previous_tail = skb->tail;
+ struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
+ unlh->nlmsg_type, sizeof(*r));
+
+ r = NLMSG_DATA(nlh);
+ BUG_ON(tw->tw_state != TCP_TIME_WAIT);
+
+ nlh->nlmsg_flags = nlmsg_flags;
+
+ tmo = tw->tw_ttd - jiffies;
+ if (tmo < 0)
+ tmo = 0;
+
+ r->idiag_family = tw->tw_family;
+ r->idiag_state = tw->tw_state;
+ r->idiag_timer = 0;
+ r->idiag_retrans = 0;
+ r->id.idiag_if = tw->tw_bound_dev_if;
+ r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
+ r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
+ r->id.idiag_sport = tw->tw_sport;
+ r->id.idiag_dport = tw->tw_dport;
+ r->id.idiag_src[0] = tw->tw_rcv_saddr;
+ r->id.idiag_dst[0] = tw->tw_daddr;
+ r->idiag_state = tw->tw_substate;
+ r->idiag_timer = 3;
+ r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
+ r->idiag_rqueue = 0;
+ r->idiag_wqueue = 0;
+ r->idiag_uid = 0;
+ r->idiag_inode = 0;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+ if (tw->tw_family == AF_INET6) {
+ const struct inet6_timewait_sock *tw6 =
+ inet6_twsk((struct sock *)tw);
+
+ ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
+ &tw6->tw_v6_rcv_saddr);
+ ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
+ &tw6->tw_v6_daddr);
+ }
+#endif
+ nlh->nlmsg_len = skb->tail - previous_tail;
+ return skb->len;
+nlmsg_failure:
+ skb_trim(skb, previous_tail - skb->data);
+ return -1;
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh)
+{
+ if (sk->sk_state == TCP_TIME_WAIT)
+ return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
+ skb, ext, pid, seq, nlmsg_flags,
+ unlh);
+ return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh);
+}
+
+static int inet_diag_get_exact(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh)
{
int err;
struct sock *sk;
@@ -235,7 +275,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nl
if (!rep)
goto out;
- if (inet_diag_fill(rep, sk, req->idiag_ext,
+ if (sk_diag_fill(sk, rep, req->idiag_ext,
NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, 0, nlh) <= 0)
BUG();
@@ -283,7 +323,7 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
static int inet_diag_bc_run(const void *bc, int len,
- const struct inet_diag_entry *entry)
+ const struct inet_diag_entry *entry)
{
while (len > 0) {
int yes = 1;
@@ -322,7 +362,7 @@ static int inet_diag_bc_run(const void *bc, int len,
yes = 0;
break;
}
-
+
if (cond->prefix_len == 0)
break;
@@ -331,7 +371,8 @@ static int inet_diag_bc_run(const void *bc, int len,
else
addr = entry->daddr;
- if (bitstring_match(addr, cond->addr, cond->prefix_len))
+ if (bitstring_match(addr, cond->addr,
+ cond->prefix_len))
break;
if (entry->family == AF_INET6 &&
cond->family == AF_INET) {
@@ -346,7 +387,7 @@ static int inet_diag_bc_run(const void *bc, int len,
}
}
- if (yes) {
+ if (yes) {
len -= op->yes;
bc += op->yes;
} else {
@@ -407,14 +448,15 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
default:
return -EINVAL;
}
- bc += op->yes;
+ bc += op->yes;
len -= op->yes;
}
return len == 0 ? 0 : -EINVAL;
}
-static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
- struct netlink_callback *cb)
+static int inet_csk_diag_dump(struct sock *sk,
+ struct sk_buff *skb,
+ struct netlink_callback *cb)
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
@@ -444,14 +486,50 @@ static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
return 0;
}
- return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+ return inet_csk_diag_fill(sk, skb, r->idiag_ext,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+}
+
+static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
+ struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+
+ if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ struct inet_diag_entry entry;
+ struct rtattr *bc = (struct rtattr *)(r + 1);
+
+ entry.family = tw->tw_family;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+ if (tw->tw_family == AF_INET6) {
+ struct inet6_timewait_sock *tw6 =
+ inet6_twsk((struct sock *)tw);
+ entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32;
+ entry.daddr = tw6->tw_v6_daddr.s6_addr32;
+ } else
+#endif
+ {
+ entry.saddr = &tw->tw_rcv_saddr;
+ entry.daddr = &tw->tw_daddr;
+ }
+ entry.sport = tw->tw_num;
+ entry.dport = ntohs(tw->tw_dport);
+ entry.userlocks = 0;
+
+ if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ return 0;
+ }
+
+ return inet_twsk_diag_fill(tw, skb, r->idiag_ext,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
- struct request_sock *req,
- u32 pid, u32 seq,
- const struct nlmsghdr *unlh)
+ struct request_sock *req, u32 pid, u32 seq,
+ const struct nlmsghdr *unlh)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct inet_sock *inet = inet_sk(sk);
@@ -489,9 +567,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
if (r->idiag_family == AF_INET6) {
ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &tcp6_rsk(req)->loc_addr);
+ &inet6_rsk(req)->loc_addr);
ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &tcp6_rsk(req)->rmt_addr);
+ &inet6_rsk(req)->rmt_addr);
}
#endif
nlh->nlmsg_len = skb->tail - b;
@@ -504,7 +582,7 @@ nlmsg_failure:
}
static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
- struct netlink_callback *cb)
+ struct netlink_callback *cb)
{
struct inet_diag_entry entry;
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
@@ -553,13 +631,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
entry.saddr =
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
(entry.family == AF_INET6) ?
- tcp6_rsk(req)->loc_addr.s6_addr32 :
+ inet6_rsk(req)->loc_addr.s6_addr32 :
#endif
&ireq->loc_addr;
- entry.daddr =
+ entry.daddr =
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
(entry.family == AF_INET6) ?
- tcp6_rsk(req)->rmt_addr.s6_addr32 :
+ inet6_rsk(req)->rmt_addr.s6_addr32 :
#endif
&ireq->rmt_addr;
entry.dport = ntohs(ireq->rmt_port);
@@ -599,7 +677,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
handler = inet_diag_table[cb->nlh->nlmsg_type];
BUG_ON(handler == NULL);
hashinfo = handler->idiag_hashinfo;
-
+
s_i = cb->args[1];
s_num = num = cb->args[2];
@@ -630,7 +708,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[3] > 0)
goto syn_recv;
- if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb) < 0) {
inet_listen_unlock(hashinfo);
goto done;
}
@@ -672,7 +750,6 @@ skip_listen_ht:
s_num = 0;
read_lock_bh(&head->lock);
-
num = 0;
sk_for_each(sk, node, &head->chain) {
struct inet_sock *inet = inet_sk(sk);
@@ -684,9 +761,10 @@ skip_listen_ht:
if (r->id.idiag_sport != inet->sport &&
r->id.idiag_sport)
goto next_normal;
- if (r->id.idiag_dport != inet->dport && r->id.idiag_dport)
+ if (r->id.idiag_dport != inet->dport &&
+ r->id.idiag_dport)
goto next_normal;
- if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb) < 0) {
read_unlock_bh(&head->lock);
goto done;
}
@@ -695,19 +773,20 @@ next_normal:
}
if (r->idiag_states & TCPF_TIME_WAIT) {
- sk_for_each(sk, node,
+ struct inet_timewait_sock *tw;
+
+ inet_twsk_for_each(tw, node,
&hashinfo->ehash[i + hashinfo->ehash_size].chain) {
- struct inet_sock *inet = inet_sk(sk);
if (num < s_num)
goto next_dying;
- if (r->id.idiag_sport != inet->sport &&
+ if (r->id.idiag_sport != tw->tw_sport &&
r->id.idiag_sport)
goto next_dying;
- if (r->id.idiag_dport != inet->dport &&
+ if (r->id.idiag_dport != tw->tw_dport &&
r->id.idiag_dport)
goto next_dying;
- if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+ if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
read_unlock_bh(&head->lock);
goto done;
}
@@ -724,8 +803,7 @@ done:
return skb->len;
}
-static __inline__ int
-inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
@@ -755,9 +833,8 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
}
return netlink_dump_start(idiagnl, skb, nlh,
inet_diag_dump, NULL);
- } else {
+ } else
return inet_diag_get_exact(skb, nlh);
- }
err_inval:
return -EINVAL;
@@ -766,15 +843,15 @@ err_inval:
static inline void inet_diag_rcv_skb(struct sk_buff *skb)
{
- int err;
- struct nlmsghdr * nlh;
-
if (skb->len >= NLMSG_SPACE(0)) {
- nlh = (struct nlmsghdr *)skb->data;
- if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+ int err;
+ struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+
+ if (nlh->nlmsg_len < sizeof(*nlh) ||
+ skb->len < nlh->nlmsg_len)
return;
err = inet_diag_rcv_msg(skb, nlh);
- if (err || nlh->nlmsg_flags & NLM_F_ACK)
+ if (err || nlh->nlmsg_flags & NLM_F_ACK)
netlink_ack(skb, nlh, err);
}
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e8d29fe736d..33228115cda 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -15,12 +15,14 @@
#include <linux/config.h>
#include <linux/module.h>
+#include <linux/random.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/wait.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
+#include <net/ip.h>
/*
* Allocate and initialize a new local port bind bucket.
@@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
}
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
+
+/* called with local bh disabled */
+static int __inet_check_established(struct inet_timewait_death_row *death_row,
+ struct sock *sk, __u16 lport,
+ struct inet_timewait_sock **twp)
+{
+ struct inet_hashinfo *hinfo = death_row->hashinfo;
+ struct inet_sock *inet = inet_sk(sk);
+ u32 daddr = inet->rcv_saddr;
+ u32 saddr = inet->daddr;
+ int dif = sk->sk_bound_dev_if;
+ INET_ADDR_COOKIE(acookie, saddr, daddr)
+ const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+ unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+ struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+ struct sock *sk2;
+ const struct hlist_node *node;
+ struct inet_timewait_sock *tw;
+
+ prefetch(head->chain.first);
+ write_lock(&head->lock);
+
+ /* Check TIME-WAIT sockets first. */
+ sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+ tw = inet_twsk(sk2);
+
+ if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
+ if (twsk_unique(sk, sk2, twp))
+ goto unique;
+ else
+ goto not_unique;
+ }
+ }
+ tw = NULL;
+
+ /* And established part... */
+ sk_for_each(sk2, node, &head->chain) {
+ if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
+ goto not_unique;
+ }
+
+unique:
+ /* Must record num and sport now. Otherwise we will see
+ * in hash table socket with a funny identity. */
+ inet->num = lport;
+ inet->sport = htons(lport);
+ sk->sk_hash = hash;
+ BUG_TRAP(sk_unhashed(sk));
+ __sk_add_node(sk, &head->chain);
+ sock_prot_inc_use(sk->sk_prot);
+ write_unlock(&head->lock);
+
+ if (twp) {
+ *twp = tw;
+ NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+ } else if (tw) {
+ /* Silly. Should hash-dance instead... */
+ inet_twsk_deschedule(tw, death_row);
+ NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+ inet_twsk_put(tw);
+ }
+
+ return 0;
+
+not_unique:
+ write_unlock(&head->lock);
+ return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet_sk_port_offset(const struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr,
+ inet->dport);
+}
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+int inet_hash_connect(struct inet_timewait_death_row *death_row,
+ struct sock *sk)
+{
+ struct inet_hashinfo *hinfo = death_row->hashinfo;
+ const unsigned short snum = inet_sk(sk)->num;
+ struct inet_bind_hashbucket *head;
+ struct inet_bind_bucket *tb;
+ int ret;
+
+ if (!snum) {
+ int low = sysctl_local_port_range[0];
+ int high = sysctl_local_port_range[1];
+ int range = high - low;
+ int i;
+ int port;
+ static u32 hint;
+ u32 offset = hint + inet_sk_port_offset(sk);
+ struct hlist_node *node;
+ struct inet_timewait_sock *tw = NULL;
+
+ local_bh_disable();
+ for (i = 1; i <= range; i++) {
+ port = low + (i + offset) % range;
+ head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ spin_lock(&head->lock);
+
+ /* Does not bother with rcv_saddr checks,
+ * because the established check is already
+ * unique enough.
+ */
+ inet_bind_bucket_for_each(tb, node, &head->chain) {
+ if (tb->port == port) {
+ BUG_TRAP(!hlist_empty(&tb->owners));
+ if (tb->fastreuse >= 0)
+ goto next_port;
+ if (!__inet_check_established(death_row,
+ sk, port,
+ &tw))
+ goto ok;
+ goto next_port;
+ }
+ }
+
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
+ if (!tb) {
+ spin_unlock(&head->lock);
+ break;
+ }
+ tb->fastreuse = -1;
+ goto ok;
+
+ next_port:
+ spin_unlock(&head->lock);
+ }
+ local_bh_enable();
+
+ return -EADDRNOTAVAIL;
+
+ok:
+ hint += i;
+
+ /* Head lock still held and bh's disabled */
+ inet_bind_hash(sk, tb, port);
+ if (sk_unhashed(sk)) {
+ inet_sk(sk)->sport = htons(port);
+ __inet_hash(hinfo, sk, 0);
+ }
+ spin_unlock(&head->lock);
+
+ if (tw) {
+ inet_twsk_deschedule(tw, death_row);;
+ inet_twsk_put(tw);
+ }
+
+ ret = 0;
+ goto out;
+ }
+
+ head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ tb = inet_csk(sk)->icsk_bind_hash;
+ spin_lock_bh(&head->lock);
+ if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+ __inet_hash(hinfo, sk, 0);
+ spin_unlock_bh(&head->lock);
+ return 0;
+ } else {
+ spin_unlock(&head->lock);
+ /* No definite answer... Walk to established hash table */
+ ret = __inet_check_established(death_row, sk, snum, NULL);
+out:
+ local_bh_enable();
+ return ret;
+ }
+}
+
+EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index a010e9a6881..417f126c749 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -90,8 +90,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
{
- struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
- SLAB_ATOMIC);
+ struct inet_timewait_sock *tw =
+ kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+ SLAB_ATOMIC);
if (tw != NULL) {
const struct inet_sock *inet = inet_sk(sk);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2fc3fd38924..2160874ce7a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -304,8 +304,7 @@ static void unlink_from_pool(struct inet_peer *p)
/* look for a node to insert instead of p */
struct inet_peer *t;
t = lookup_rightempty(p);
- if (*stackptr[-1] != t)
- BUG();
+ BUG_ON(*stackptr[-1] != t);
**--stackptr = t->avl_left;
/* t is removed, t->v4daddr > x->v4daddr for any
* x in p->avl_left subtree.
@@ -314,8 +313,7 @@ static void unlink_from_pool(struct inet_peer *p)
t->avl_left = p->avl_left;
t->avl_right = p->avl_right;
t->avl_height = p->avl_height;
- if (delp[1] != &p->avl_left)
- BUG();
+ BUG_ON(delp[1] != &p->avl_left);
delp[1] = &t->avl_left; /* was &p->avl_left */
}
peer_avl_rebalance(stack, stackptr);
@@ -401,6 +399,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create)
return NULL;
n->v4daddr = daddr;
atomic_set(&n->refcnt, 1);
+ atomic_set(&n->rid, 0);
n->ip_id_count = secure_ip_id(daddr);
n->tcp_ts_stamp = 0;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8ce0ce2ee48..2a8adda15e1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -22,6 +22,7 @@
* Patrick McHardy : LRU queue of frag heads for evictor.
*/
+#include <linux/compiler.h>
#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -38,6 +39,7 @@
#include <net/ip.h>
#include <net/icmp.h>
#include <net/checksum.h>
+#include <net/inetpeer.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/inet.h>
@@ -56,6 +58,8 @@
int sysctl_ipfrag_high_thresh = 256*1024;
int sysctl_ipfrag_low_thresh = 192*1024;
+int sysctl_ipfrag_max_dist = 64;
+
/* Important NOTE! Fragment queue must be destroyed before MSL expires.
* RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
*/
@@ -89,8 +93,10 @@ struct ipq {
spinlock_t lock;
atomic_t refcnt;
struct timer_list timer; /* when will this queue expire? */
- int iif;
struct timeval stamp;
+ int iif;
+ unsigned int rid;
+ struct inet_peer *peer;
};
/* Hash table. */
@@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work)
BUG_TRAP(qp->last_in&COMPLETE);
BUG_TRAP(del_timer(&qp->timer) == 0);
+ if (qp->peer)
+ inet_putpeer(qp->peer);
+
/* Release all fragment data. */
fp = qp->fragments;
while (fp) {
@@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
qp->meat = 0;
qp->fragments = NULL;
qp->iif = 0;
+ qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
/* Initialize a timer for this entry. */
init_timer(&qp->timer);
@@ -373,7 +383,7 @@ out_nomem:
*/
static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
{
- __u16 id = iph->id;
+ __be16 id = iph->id;
__u32 saddr = iph->saddr;
__u32 daddr = iph->daddr;
__u8 protocol = iph->protocol;
@@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
return ip_frag_create(hash, iph, user);
}
+/* Is the fragment too far ahead to be part of ipq? */
+static inline int ip_frag_too_far(struct ipq *qp)
+{
+ struct inet_peer *peer = qp->peer;
+ unsigned int max = sysctl_ipfrag_max_dist;
+ unsigned int start, end;
+
+ int rc;
+
+ if (!peer || !max)
+ return 0;
+
+ start = qp->rid;
+ end = atomic_inc_return(&peer->rid);
+ qp->rid = end;
+
+ rc = qp->fragments && (end - start) > max;
+
+ if (rc) {
+ IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ }
+
+ return rc;
+}
+
+static int ip_frag_reinit(struct ipq *qp)
+{
+ struct sk_buff *fp;
+
+ if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
+ atomic_inc(&qp->refcnt);
+ return -ETIMEDOUT;
+ }
+
+ fp = qp->fragments;
+ do {
+ struct sk_buff *xp = fp->next;
+ frag_kfree_skb(fp, NULL);
+ fp = xp;
+ } while (fp);
+
+ qp->last_in = 0;
+ qp->len = 0;
+ qp->meat = 0;
+ qp->fragments = NULL;
+ qp->iif = 0;
+
+ return 0;
+}
+
/* Add new segment to existing queue. */
static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
@@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (qp->last_in & COMPLETE)
goto err;
+ if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
+ unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+ ipq_kill(qp);
+ goto err;
+ }
+
offset = ntohs(skb->nh.iph->frag_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a4c347c3b8e..abe23923e4e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -10,6 +10,7 @@
*
*/
+#include <linux/capability.h>
#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -28,6 +29,7 @@
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
#include <net/sock.h>
#include <net/ip.h>
@@ -187,7 +189,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
}
if (ipgre_fb_tunnel_dev->flags&IFF_UP)
- return ipgre_fb_tunnel_dev->priv;
+ return netdev_priv(ipgre_fb_tunnel_dev);
return NULL;
}
@@ -277,7 +279,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
return NULL;
dev->init = ipgre_tunnel_init;
- nt = dev->priv;
+ nt = netdev_priv(dev);
nt->parms = *parms;
if (register_netdevice(dev) < 0) {
@@ -285,9 +287,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
goto failed;
}
- nt = dev->priv;
- nt->parms = *parms;
-
dev_hold(dev);
ipgre_tunnel_link(nt);
return nt;
@@ -298,7 +297,7 @@ failed:
static void ipgre_tunnel_uninit(struct net_device *dev)
{
- ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv);
+ ipgre_tunnel_unlink(netdev_priv(dev));
dev_put(dev);
}
@@ -517,7 +516,7 @@ out:
skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
rel_info = htonl(rel_info);
} else if (type == ICMP_TIME_EXCEEDED) {
- struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+ struct ip_tunnel *t = netdev_priv(skb2->dev);
if (t->parms.iph.ttl) {
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_HOST_UNREACH;
@@ -618,7 +617,7 @@ static int ipgre_rcv(struct sk_buff *skb)
skb->mac.raw = skb->nh.raw;
skb->nh.raw = __pskb_pull(skb, offset);
- skb_postpull_rcsum(skb, skb->mac.raw, offset);
+ skb_postpull_rcsum(skb, skb->h.raw, offset);
memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_NET_IPGRE_BROADCAST
@@ -668,7 +667,7 @@ drop_nolock:
static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->stat;
struct iphdr *old_iph = skb->nh.iph;
struct iphdr *tiph;
@@ -831,6 +830,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb->h.raw = skb->nh.raw;
skb->nh.raw = skb_push(skb, gre_hlen);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
dst_release(skb->dst);
skb->dst = &rt->u.dst;
@@ -913,7 +913,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
t = ipgre_tunnel_locate(&p, 0);
}
if (t == NULL)
- t = (struct ip_tunnel*)dev->priv;
+ t = netdev_priv(dev);
memcpy(&p, &t->parms, sizeof(p));
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
@@ -953,7 +953,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
} else {
unsigned nflags=0;
- t = (struct ip_tunnel*)dev->priv;
+ t = netdev_priv(dev);
if (MULTICAST(p.iph.daddr))
nflags = IFF_BROADCAST;
@@ -1002,7 +1002,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
goto done;
err = -EPERM;
- if (t == ipgre_fb_tunnel_dev->priv)
+ if (t == netdev_priv(ipgre_fb_tunnel_dev))
goto done;
dev = t->dev;
}
@@ -1019,12 +1019,12 @@ done:
static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
{
- return &(((struct ip_tunnel*)dev->priv)->stat);
+ return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
}
static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
- struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
return -EINVAL;
dev->mtu = new_mtu;
@@ -1064,7 +1064,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
void *daddr, void *saddr, unsigned len)
{
- struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *t = netdev_priv(dev);
struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
u16 *p = (u16*)(iph+1);
@@ -1091,7 +1091,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh
static int ipgre_open(struct net_device *dev)
{
- struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *t = netdev_priv(dev);
if (MULTICAST(t->parms.iph.daddr)) {
struct flowi fl = { .oif = t->parms.link,
@@ -1115,7 +1115,7 @@ static int ipgre_open(struct net_device *dev)
static int ipgre_close(struct net_device *dev)
{
- struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *t = netdev_priv(dev);
if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
struct in_device *in_dev = inetdev_by_index(t->mlink);
if (in_dev) {
@@ -1140,7 +1140,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
dev->type = ARPHRD_IPGRE;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
- dev->mtu = 1500 - sizeof(struct iphdr) - 4;
+ dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
@@ -1152,10 +1152,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
struct ip_tunnel *tunnel;
struct iphdr *iph;
int hlen = LL_MAX_HEADER;
- int mtu = 1500;
+ int mtu = ETH_DATA_LEN;
int addend = sizeof(struct iphdr) + 4;
- tunnel = (struct ip_tunnel*)dev->priv;
+ tunnel = netdev_priv(dev);
iph = &tunnel->parms.iph;
tunnel->dev = dev;
@@ -1219,7 +1219,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
static int __init ipgre_fb_tunnel_init(struct net_device *dev)
{
- struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
tunnel->dev = dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 473d0f2b2e0..18d7fad474d 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -128,6 +128,7 @@
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/inet.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -184,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb)
raw_rcv(last, skb2);
}
last = sk;
- nf_reset(skb);
}
}
@@ -203,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
__skb_pull(skb, ihl);
- /* Free reference early: we don't need it any more, and it may
- hold ip_conntrack module loaded indefinitely. */
- nf_reset(skb);
-
/* Point into the IP datagram, just past the header. */
skb->h.raw = skb->data;
@@ -231,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
int ret;
- if (!ipprot->no_policy &&
- !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
- goto out;
+ if (!ipprot->no_policy) {
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ kfree_skb(skb);
+ goto out;
+ }
+ nf_reset(skb);
}
ret = ipprot->handler(skb);
if (ret < 0) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index dbe12da8d8b..9bebad07bf2 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -11,6 +11,7 @@
*
*/
+#include <linux/capability.h>
#include <linux/module.h>
#include <linux/types.h>
#include <asm/uaccess.h>
@@ -22,6 +23,7 @@
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
+#include <net/route.h>
/*
* Write options to IP header, record destination address to
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index eba64e2bd39..3324fbfe528 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -69,6 +69,7 @@
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
+#include <net/xfrm.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/arp.h>
@@ -85,6 +86,8 @@
int sysctl_ip_default_ttl = IPDEFTTL;
+static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
+
/* Generate a checksum for an outgoing IP datagram. */
__inline__ void ip_send_check(struct iphdr *iph)
{
@@ -202,13 +205,16 @@ static inline int ip_finish_output2(struct sk_buff *skb)
static inline int ip_finish_output(struct sk_buff *skb)
{
- struct net_device *dev = skb->dst->dev;
-
- skb->dev = dev;
- skb->protocol = htons(ETH_P_IP);
-
- return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
- ip_finish_output2);
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
+ /* Policy lookup after SNAT yielded a new policy */
+ if (skb->dst->xfrm != NULL)
+ return xfrm4_output_finish(skb);
+#endif
+ if (skb->len > dst_mtu(skb->dst) &&
+ !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
+ return ip_fragment(skb, ip_finish_output2);
+ else
+ return ip_finish_output2(skb);
}
int ip_mc_output(struct sk_buff *skb)
@@ -265,21 +271,21 @@ int ip_mc_output(struct sk_buff *skb)
newskb->dev, ip_dev_loopback_xmit);
}
- if (skb->len > dst_mtu(&rt->u.dst))
- return ip_fragment(skb, ip_finish_output);
- else
- return ip_finish_output(skb);
+ return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
+ ip_finish_output);
}
int ip_output(struct sk_buff *skb)
{
+ struct net_device *dev = skb->dst->dev;
+
IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
- if (skb->len > dst_mtu(skb->dst) &&
- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
- return ip_fragment(skb, ip_finish_output);
- else
- return ip_finish_output(skb);
+ skb->dev = dev;
+ skb->protocol = htons(ETH_P_IP);
+
+ return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+ ip_finish_output);
}
int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
@@ -411,7 +417,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
* single device frame, and queue such a frame for sending.
*/
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
{
struct iphdr *iph;
int raw = 0;
@@ -420,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
struct sk_buff *skb2;
unsigned int mtu, hlen, left, len, ll_rs;
int offset;
- int not_last_frag;
+ __be16 not_last_frag;
struct rtable *rt = (struct rtable*)skb->dst;
int err = 0;
@@ -445,6 +451,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
hlen = iph->ihl * 4;
mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */
+ IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
/* When frag_list is given, use it. First, check its validity:
* some transformers could create wrong frag_list or break existing
@@ -1181,7 +1188,7 @@ int ip_push_pending_frames(struct sock *sk)
struct ip_options *opt = NULL;
struct rtable *rt = inet->cork.rt;
struct iphdr *iph;
- int df = 0;
+ __be16 df = 0;
__u8 ttl;
int err = 0;
@@ -1392,7 +1399,6 @@ void __init ip_init(void)
#endif
}
-EXPORT_SYMBOL(ip_fragment);
EXPORT_SYMBOL(ip_generic_getfrag);
EXPORT_SYMBOL(ip_queue_xmit);
EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4f2d8725730..2bf8d782f67 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -25,12 +25,12 @@
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/icmp.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
-#include <net/tcp.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
#include <linux/udp.h>
#include <linux/igmp.h>
#include <linux/netfilter.h>
@@ -427,8 +427,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
err = ip_options_get_from_user(&opt, optval, optlen);
if (err)
break;
- if (sk->sk_type == SOCK_STREAM) {
- struct tcp_sock *tp = tcp_sk(sk);
+ if (inet->is_icsk) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
if (sk->sk_family == PF_INET ||
(!((1 << sk->sk_state) &
@@ -436,10 +436,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
inet->daddr != LOOPBACK4_IPV6)) {
#endif
if (inet->opt)
- tp->ext_header_len -= inet->opt->optlen;
+ icsk->icsk_ext_hdr_len -= inet->opt->optlen;
if (opt)
- tp->ext_header_len += opt->optlen;
- tcp_sync_mss(sk, tp->pmtu_cookie);
+ icsk->icsk_ext_hdr_len += opt->optlen;
+ icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
}
#endif
@@ -621,7 +621,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
err = -ENOBUFS;
break;
}
- msf = (struct ip_msfilter *)kmalloc(optlen, GFP_KERNEL);
+ msf = kmalloc(optlen, GFP_KERNEL);
if (msf == 0) {
err = -ENOBUFS;
break;
@@ -778,7 +778,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
err = -ENOBUFS;
break;
}
- gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
+ gsf = kmalloc(optlen,GFP_KERNEL);
if (gsf == 0) {
err = -ENOBUFS;
break;
@@ -798,7 +798,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
goto mc_msf_out;
}
msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
- msf = (struct ip_msfilter *)kmalloc(msize,GFP_KERNEL);
+ msf = kmalloc(msize,GFP_KERNEL);
if (msf == 0) {
err = -ENOBUFS;
goto mc_msf_out;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index fc718df17b4..d64e2ec8da7 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -28,6 +28,7 @@
#include <net/xfrm.h>
#include <net/icmp.h>
#include <net/ipcomp.h>
+#include <net/protocol.h>
struct ipcomp_tfms {
struct list_head list;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e8674baaa8d..bb3613ec448 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -42,6 +42,7 @@
#include <linux/in.h>
#include <linux/if.h>
#include <linux/inet.h>
+#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
@@ -58,6 +59,7 @@
#include <net/arp.h>
#include <net/ip.h>
#include <net/ipconfig.h>
+#include <net/route.h>
#include <asm/uaccess.h>
#include <net/checksum.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c05c1df0bb0..e5cbe72c6b8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -93,6 +93,7 @@
*/
+#include <linux/capability.h>
#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -108,6 +109,7 @@
#include <linux/mroute.h>
#include <linux/init.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
#include <net/sock.h>
#include <net/ip.h>
@@ -243,7 +245,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
if (dev == NULL)
return NULL;
- nt = dev->priv;
+ nt = netdev_priv(dev);
SET_MODULE_OWNER(dev);
dev->init = ipip_tunnel_init;
nt->parms = *parms;
@@ -268,7 +270,7 @@ static void ipip_tunnel_uninit(struct net_device *dev)
tunnels_wc[0] = NULL;
write_unlock_bh(&ipip_lock);
} else
- ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
+ ipip_tunnel_unlink(netdev_priv(dev));
dev_put(dev);
}
@@ -442,7 +444,7 @@ out:
skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
rel_info = htonl(rel_info);
} else if (type == ICMP_TIME_EXCEEDED) {
- struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+ struct ip_tunnel *t = netdev_priv(skb2->dev);
if (t->parms.iph.ttl) {
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_HOST_UNREACH;
@@ -513,7 +515,7 @@ out:
static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->stat;
struct iphdr *tiph = &tunnel->parms.iph;
u8 tos = tunnel->parms.iph.tos;
@@ -620,6 +622,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb->h.raw = skb->nh.raw;
skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
dst_release(skb->dst);
skb->dst = &rt->u.dst;
@@ -672,7 +675,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
t = ipip_tunnel_locate(&p, 0);
}
if (t == NULL)
- t = (struct ip_tunnel*)dev->priv;
+ t = netdev_priv(dev);
memcpy(&p, &t->parms, sizeof(p));
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
@@ -709,7 +712,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EINVAL;
break;
}
- t = (struct ip_tunnel*)dev->priv;
+ t = netdev_priv(dev);
ipip_tunnel_unlink(t);
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
@@ -763,7 +766,7 @@ done:
static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
{
- return &(((struct ip_tunnel*)dev->priv)->stat);
+ return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
}
static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
@@ -786,7 +789,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->type = ARPHRD_TUNNEL;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
- dev->mtu = 1500 - sizeof(struct iphdr);
+ dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
@@ -798,7 +801,7 @@ static int ipip_tunnel_init(struct net_device *dev)
struct ip_tunnel *tunnel;
struct iphdr *iph;
- tunnel = (struct ip_tunnel*)dev->priv;
+ tunnel = netdev_priv(dev);
iph = &tunnel->parms.iph;
tunnel->dev = dev;
@@ -836,7 +839,7 @@ static int ipip_tunnel_init(struct net_device *dev)
static int __init ipip_fb_tunnel_init(struct net_device *dev)
{
- struct ip_tunnel *tunnel = dev->priv;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
tunnel->dev = dev;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 302b7eb507c..5c94c222e3f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -33,6 +33,7 @@
#include <asm/uaccess.h>
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/mm.h>
@@ -49,9 +50,11 @@
#include <linux/seq_file.h>
#include <linux/mroute.h>
#include <linux/init.h>
+#include <linux/if_ether.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
+#include <net/route.h>
#include <net/sock.h>
#include <net/icmp.h>
#include <net/udp.h>
@@ -176,8 +179,8 @@ static int reg_vif_num = -1;
static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
read_lock(&mrt_lock);
- ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
- ((struct net_device_stats*)dev->priv)->tx_packets++;
+ ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
+ ((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
read_unlock(&mrt_lock);
kfree_skb(skb);
@@ -186,13 +189,13 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
{
- return (struct net_device_stats*)dev->priv;
+ return (struct net_device_stats*)netdev_priv(dev);
}
static void reg_vif_setup(struct net_device *dev)
{
dev->type = ARPHRD_PIMREG;
- dev->mtu = 1500 - sizeof(struct iphdr) - 8;
+ dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
dev->flags = IFF_NOARP;
dev->hard_start_xmit = reg_vif_xmit;
dev->get_stats = reg_vif_get_stats;
@@ -1147,8 +1150,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
if (vif->flags & VIFF_REGISTER) {
vif->pkt_out++;
vif->bytes_out+=skb->len;
- ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
- ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
+ ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
+ ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
kfree_skb(skb);
return;
@@ -1208,8 +1211,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
if (vif->flags & VIFF_TUNNEL) {
ip_encap(skb, vif->local, vif->remote);
/* FIXME: extra output firewall step used to be here. --RR */
- ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
- ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len;
+ ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
+ ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
}
IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1465,8 +1468,8 @@ int pim_rcv_v1(struct sk_buff * skb)
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
skb->dst = NULL;
- ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
- ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+ ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
+ ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
nf_reset(skb);
netif_rx(skb);
dev_put(reg_dev);
@@ -1520,8 +1523,8 @@ static int pim_rcv(struct sk_buff * skb)
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
- ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
- ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+ ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
+ ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
skb->dst = NULL;
nf_reset(skb);
netif_rx(skb);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index d7eb680101c..9b176a942ac 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -224,34 +224,6 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
}
-#if 0000
-/*
- * Get reference to app by name (called from user context)
- */
-struct ip_vs_app *ip_vs_app_get_by_name(char *appname)
-{
- struct ip_vs_app *app, *a = NULL;
-
- down(&__ip_vs_app_mutex);
-
- list_for_each_entry(ent, &ip_vs_app_list, a_list) {
- if (strcmp(app->name, appname))
- continue;
-
- /* softirq may call ip_vs_app_get too, so the caller
- must disable softirq on the current CPU */
- if (ip_vs_app_get(app))
- a = app;
- break;
- }
-
- up(&__ip_vs_app_mutex);
-
- return a;
-}
-#endif
-
-
/*
* Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
*/
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 2a3a8c59c65..87b83813cf2 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -24,7 +24,11 @@
*
*/
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/net.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/proc_fs.h> /* for proc_net_* */
#include <linux/seq_file.h>
@@ -219,7 +223,7 @@ struct ip_vs_conn *ip_vs_conn_in_get
if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
- IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+ IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
ip_vs_proto_name(protocol),
NIPQUAD(s_addr), ntohs(s_port),
NIPQUAD(d_addr), ntohs(d_port),
@@ -254,7 +258,7 @@ struct ip_vs_conn *ip_vs_ct_in_get
out:
ct_read_unlock(hash);
- IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+ IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
ip_vs_proto_name(protocol),
NIPQUAD(s_addr), ntohs(s_port),
NIPQUAD(d_addr), ntohs(d_port),
@@ -295,7 +299,7 @@ struct ip_vs_conn *ip_vs_conn_out_get
ct_read_unlock(hash);
- IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+ IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
ip_vs_proto_name(protocol),
NIPQUAD(s_addr), ntohs(s_port),
NIPQUAD(d_addr), ntohs(d_port),
@@ -391,8 +395,9 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
cp->flags |= atomic_read(&dest->conn_flags);
cp->dest = dest;
- IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
- "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n",
+ IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+ "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+ "dest->refcnt:%d\n",
ip_vs_proto_name(cp->protocol),
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -430,8 +435,9 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
if (!dest)
return;
- IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
- "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n",
+ IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+ "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+ "dest->refcnt:%d\n",
ip_vs_proto_name(cp->protocol),
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -571,7 +577,7 @@ static void ip_vs_conn_expire(unsigned long data)
ip_vs_conn_hash(cp);
expire_later:
- IP_VS_DBG(7, "delayed: refcnt-1=%d conn.n_control=%d\n",
+ IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
atomic_read(&cp->refcnt)-1,
atomic_read(&cp->n_control));
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 1a0843cd58a..3f47ad8e1ca 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -426,7 +426,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
return NULL;
IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
- "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
+ "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
ip_vs_fwd_tag(cp),
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -532,11 +532,8 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
{
if (!((*pskb)->ipvs_property))
return NF_ACCEPT;
-
/* The packet was sent from IPVS, exit this chain */
- (*okfn)(*pskb);
-
- return NF_STOLEN;
+ return NF_STOP;
}
u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9bdcf31b760..7f0288b25fa 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
+#include <linux/capability.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
@@ -35,6 +36,7 @@
#include <linux/netfilter_ipv4.h>
#include <net/ip.h>
+#include <net/route.h>
#include <net/sock.h>
#include <asm/uaccess.h>
@@ -447,7 +449,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
out:
read_unlock(&__ip_vs_svc_lock);
- IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
+ IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
fwmark, ip_vs_proto_name(protocol),
NIPQUAD(vaddr), ntohs(vport),
svc?"hit":"not hit");
@@ -597,7 +599,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
*/
list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
- "refcnt=%d\n",
+ "dest->refcnt=%d\n",
dest->vfwmark,
NIPQUAD(dest->addr), ntohs(dest->port),
atomic_read(&dest->refcnt));
@@ -804,7 +806,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
dest = ip_vs_trash_get_dest(svc, daddr, dport);
if (dest != NULL) {
IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
- "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
+ "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
NIPQUAD(daddr), ntohs(dport),
atomic_read(&dest->refcnt),
dest->vfwmark,
@@ -949,7 +951,8 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
atomic_dec(&dest->svc->refcnt);
kfree(dest);
} else {
- IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
+ IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
+ "dest->refcnt=%d\n",
NIPQUAD(dest->addr), ntohs(dest->port),
atomic_read(&dest->refcnt));
list_add(&dest->n_list, &ip_vs_dest_trash);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index f3bc320dce9..9fee19c4c61 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -37,8 +37,10 @@
*
*/
+#include <linux/ip.h>
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/skbuff.h>
#include <net/ip_vs.h>
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 67b3e2fc1fa..c453e1e57f4 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -13,8 +13,12 @@
* Changes:
*
*/
+#include <linux/config.h>
#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
#include <linux/types.h>
+#include <linux/interrupt.h>
#include <net/ip_vs.h>
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 561cda326fa..6e5cb92a5c8 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -41,8 +41,10 @@
* me to write this module.
*/
+#include <linux/ip.h>
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/skbuff.h>
/* for sysctl */
#include <linux/fs.h>
@@ -228,33 +230,6 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
}
-#if 0000
-/*
- * Unhash ip_vs_lblc_entry from ip_vs_lblc_table.
- * returns bool success.
- */
-static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
- struct ip_vs_lblc_entry *en)
-{
- if (list_empty(&en->list)) {
- IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
- "called from %p\n", __builtin_return_address(0));
- return 0;
- }
-
- /*
- * Remove it from the table
- */
- write_lock(&tbl->lock);
- list_del(&en->list);
- INIT_LIST_HEAD(&en->list);
- write_unlock(&tbl->lock);
-
- return 1;
-}
-#endif
-
-
/*
* Get ip_vs_lblc_entry associated with supplied parameters.
*/
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index ce456dbf09a..32ba37ba72d 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -39,8 +39,10 @@
*
*/
+#include <linux/ip.h>
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/skbuff.h>
/* for sysctl */
#include <linux/fs.h>
@@ -414,33 +416,6 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
}
-#if 0000
-/*
- * Unhash ip_vs_lblcr_entry from ip_vs_lblcr_table.
- * returns bool success.
- */
-static int ip_vs_lblcr_unhash(struct ip_vs_lblcr_table *tbl,
- struct ip_vs_lblcr_entry *en)
-{
- if (list_empty(&en->list)) {
- IP_VS_ERR("ip_vs_lblcr_unhash(): request for not hashed entry, "
- "called from %p\n", __builtin_return_address(0));
- return 0;
- }
-
- /*
- * Remove it from the table
- */
- write_lock(&tbl->lock);
- list_del(&en->list);
- INIT_LIST_HEAD(&en->list);
- write_unlock(&tbl->lock);
-
- return 1;
-}
-#endif
-
-
/*
* Get ip_vs_lblcr_entry associated with supplied parameters.
*/
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 453e94a0bbd..8b0505b0931 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -12,6 +12,8 @@
*
*/
+#include <linux/in.h>
+#include <linux/ip.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index 478e5c7c7e8..c36ccf057a1 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -12,6 +12,8 @@
*
*/
+#include <linux/in.h>
+#include <linux/ip.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 0e878fd6215..bc28b1160a3 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -275,28 +275,6 @@ static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
[IP_VS_TCP_S_LAST] = 2*HZ,
};
-
-#if 0
-
-/* FIXME: This is going to die */
-
-static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = {
- [IP_VS_TCP_S_NONE] = 2*HZ,
- [IP_VS_TCP_S_ESTABLISHED] = 8*60*HZ,
- [IP_VS_TCP_S_SYN_SENT] = 60*HZ,
- [IP_VS_TCP_S_SYN_RECV] = 10*HZ,
- [IP_VS_TCP_S_FIN_WAIT] = 60*HZ,
- [IP_VS_TCP_S_TIME_WAIT] = 60*HZ,
- [IP_VS_TCP_S_CLOSE] = 10*HZ,
- [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
- [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
- [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
- [IP_VS_TCP_S_SYNACK] = 100*HZ,
- [IP_VS_TCP_S_LAST] = 2*HZ,
-};
-
-#endif
-
static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
[IP_VS_TCP_S_NONE] = "NONE",
[IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
@@ -448,7 +426,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
struct ip_vs_dest *dest = cp->dest;
IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
- "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",
+ "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
pp->name,
(state_off==TCP_DIR_OUTPUT)?"output ":"input ",
th->syn? 'S' : '.',
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ae5f2e0aef..89d9175d8f2 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -15,8 +15,11 @@
*
*/
+#include <linux/in.h>
+#include <linux/ip.h>
#include <linux/kernel.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/udp.h>
#include <net/ip_vs.h>
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 0f7c56a225b..8bc42b76223 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
+#include <linux/interrupt.h>
#include <asm/string.h>
#include <linux/kmod.h>
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 6f7c50e44a3..7775e6cc68b 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -34,8 +34,10 @@
*
*/
+#include <linux/ip.h>
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/skbuff.h>
#include <net/ip_vs.h>
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 2e5ced3d806..1bca714bda3 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -21,12 +21,14 @@
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/inetdevice.h>
#include <linux/net.h>
#include <linux/completion.h>
#include <linux/delay.h>
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/igmp.h> /* for ip_mc_join_group */
+#include <linux/udp.h>
#include <net/ip.h>
#include <net/sock.h>
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 3b87482049c..52c12e9edbb 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct net_device *tdev; /* Device to other host */
struct iphdr *old_iph = skb->nh.iph;
u8 tos = old_iph->tos;
- u16 df = old_iph->frag_off;
+ __be16 df = old_iph->frag_off;
struct iphdr *iph; /* Our new IP header */
int max_headroom; /* The extra header space needed */
int mtu;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index ae0779d82c5..52a3d7c5790 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -1,17 +1,11 @@
/* IPv4 specific functions of netfilter core */
-
-#include <linux/config.h>
-#ifdef CONFIG_NETFILTER
-
#include <linux/kernel.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
-
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <net/route.h>
#include <linux/ip.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff **pskb)
@@ -33,7 +27,6 @@ int ip_route_me_harder(struct sk_buff **pskb)
#ifdef CONFIG_IP_ROUTE_FWMARK
fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
#endif
- fl.proto = iph->protocol;
if (ip_route_output_key(&rt, &fl) != 0)
return -1;
@@ -60,6 +53,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
if ((*pskb)->dst->error)
return -1;
+#ifdef CONFIG_XFRM
+ if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
+ if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
+ return -1;
+#endif
+
/* Change in oif may mean change in hh_len. */
hh_len = (*pskb)->dst->dev->hard_header_len;
if (skb_headroom(*pskb) < hh_len) {
@@ -78,6 +78,9 @@ int ip_route_me_harder(struct sk_buff **pskb)
}
EXPORT_SYMBOL(ip_route_me_harder);
+void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
+EXPORT_SYMBOL(ip_nat_decode_session);
+
/*
* Extra routing may needed on local out, as the QUEUE target never
* returns control to the table.
@@ -135,5 +138,3 @@ static void fini(void)
module_init(init);
module_exit(fini);
-
-#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 0bc00528d88..db783036e4d 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -56,8 +56,8 @@ config IP_NF_CONNTRACK_MARK
instead of the individual packets.
config IP_NF_CONNTRACK_EVENTS
- bool "Connection tracking events"
- depends on IP_NF_CONNTRACK
+ bool "Connection tracking events (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && IP_NF_CONNTRACK
help
If this option is enabled, the connection tracking code will
provide a notifier chain that can be used by other kernel code
@@ -66,8 +66,8 @@ config IP_NF_CONNTRACK_EVENTS
IF unsure, say `N'.
config IP_NF_CONNTRACK_NETLINK
- tristate 'Connection tracking netlink interface'
- depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
+ tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
+ depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
help
This option enables support for a netlink-based userspace interface
@@ -182,6 +182,7 @@ config IP_NF_QUEUE
config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)"
+ depends on NETFILTER_XTABLES
help
iptables is a general, extensible packet identification framework.
The packet filtering and full NAT (masquerading, port forwarding,
@@ -191,16 +192,6 @@ config IP_NF_IPTABLES
To compile it as a module, choose M here. If unsure, say N.
# The matches.
-config IP_NF_MATCH_LIMIT
- tristate "limit match support"
- depends on IP_NF_IPTABLES
- help
- limit matching allows you to control the rate at which a rule can be
- matched: mainly useful in combination with the LOG target ("LOG
- target support", below) and to avoid some Denial of Service attacks.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_MATCH_IPRANGE
tristate "IP range match support"
depends on IP_NF_IPTABLES
@@ -210,37 +201,6 @@ config IP_NF_MATCH_IPRANGE
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_MATCH_MAC
- tristate "MAC address match support"
- depends on IP_NF_IPTABLES
- help
- MAC matching allows you to match packets based on the source
- Ethernet address of the packet.
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_MATCH_PKTTYPE
- tristate "Packet type match support"
- depends on IP_NF_IPTABLES
- help
- Packet type matching allows you to match a packet by
- its "class", eg. BROADCAST, MULTICAST, ...
-
- Typical usage:
- iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_MATCH_MARK
- tristate "netfilter MARK match support"
- depends on IP_NF_IPTABLES
- help
- Netfilter mark matching allows you to match packets based on the
- `nfmark' value in the packet. This can be set by the MARK target
- (see below).
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_MATCH_MULTIPORT
tristate "Multiple port match support"
depends on IP_NF_IPTABLES
@@ -301,15 +261,6 @@ config IP_NF_MATCH_AH_ESP
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_MATCH_LENGTH
- tristate "LENGTH match support"
- depends on IP_NF_IPTABLES
- help
- This option allows you to match the length of a packet against a
- specific value or range of values.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_MATCH_TTL
tristate "TTL match support"
depends on IP_NF_IPTABLES
@@ -319,50 +270,6 @@ config IP_NF_MATCH_TTL
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_MATCH_TCPMSS
- tristate "tcpmss match support"
- depends on IP_NF_IPTABLES
- help
- This option adds a `tcpmss' match, which allows you to examine the
- MSS value of TCP SYN packets, which control the maximum packet size
- for that connection.
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_MATCH_HELPER
- tristate "Helper match support"
- depends on IP_NF_IPTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
- help
- Helper matching allows you to match packets in dynamic connections
- tracked by a conntrack-helper, ie. ip_conntrack_ftp
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_MATCH_STATE
- tristate "Connection state match support"
- depends on IP_NF_IPTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
- help
- Connection state matching allows you to match packets based on their
- relationship to a tracked connection (ie. previous packets). This
- is a powerful tool for packet classification.
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_MATCH_CONNTRACK
- tristate "Connection tracking match support"
- depends on IP_NF_IPTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
- help
- This is a general conntrack match module, a superset of the state match.
-
- It allows matching on additional conntrack information, which is
- useful in complex configurations, such as NAT gateways with multiple
- internet links or tunnels.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_MATCH_OWNER
tristate "Owner match support"
depends on IP_NF_IPTABLES
@@ -372,15 +279,6 @@ config IP_NF_MATCH_OWNER
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_MATCH_PHYSDEV
- tristate "Physdev match support"
- depends on IP_NF_IPTABLES && BRIDGE_NETFILTER
- help
- Physdev packet matching matches against the physical bridge ports
- the IP packet arrived on or will leave by.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_MATCH_ADDRTYPE
tristate 'address type match support'
depends on IP_NF_IPTABLES
@@ -391,75 +289,6 @@ config IP_NF_MATCH_ADDRTYPE
If you want to compile it as a module, say M here and read
<file:Documentation/modules.txt>. If unsure, say `N'.
-config IP_NF_MATCH_REALM
- tristate 'realm match support'
- depends on IP_NF_IPTABLES
- select NET_CLS_ROUTE
- help
- This option adds a `realm' match, which allows you to use the realm
- key from the routing subsystem inside iptables.
-
- This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
- in tc world.
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
-
-config IP_NF_MATCH_SCTP
- tristate 'SCTP protocol match support'
- depends on IP_NF_IPTABLES
- help
- With this option enabled, you will be able to use the iptables
- `sctp' match in order to match on SCTP source/destination ports
- and SCTP chunk types.
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
-
-config IP_NF_MATCH_DCCP
- tristate 'DCCP protocol match support'
- depends on IP_NF_IPTABLES
- help
- With this option enabled, you will be able to use the iptables
- `dccp' match in order to match on DCCP source/destination ports
- and DCCP flags.
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
-
-config IP_NF_MATCH_COMMENT
- tristate 'comment match support'
- depends on IP_NF_IPTABLES
- help
- This option adds a `comment' dummy-match, which allows you to put
- comments in your iptables ruleset.
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
-
-config IP_NF_MATCH_CONNMARK
- tristate 'Connection mark match support'
- depends on IP_NF_IPTABLES
- depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
- help
- This option adds a `connmark' match, which allows you to match the
- connection mark value previously set for the session by `CONNMARK'.
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. The module will be called
- ipt_connmark.o. If unsure, say `N'.
-
-config IP_NF_MATCH_CONNBYTES
- tristate 'Connection byte/packet counter match support'
- depends on IP_NF_IPTABLES
- depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
- help
- This option adds a `connbytes' match, which allows you to match the
- number of bytes and/or packets for each direction within a connection.
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
-
config IP_NF_MATCH_HASHLIMIT
tristate 'hashlimit match support'
depends on IP_NF_IPTABLES
@@ -474,18 +303,15 @@ config IP_NF_MATCH_HASHLIMIT
destination IP' or `500pps from any given source IP' with a single
IPtables rule.
-config IP_NF_MATCH_STRING
- tristate 'string match support'
- depends on IP_NF_IPTABLES
- select TEXTSEARCH
- select TEXTSEARCH_KMP
- select TEXTSEARCH_BM
- select TEXTSEARCH_FSM
- help
- This option adds a `string' match, which allows you to look for
- pattern matchings in packets.
+config IP_NF_MATCH_POLICY
+ tristate "IPsec policy match support"
+ depends on IP_NF_IPTABLES && XFRM
+ help
+ Policy matching allows you to match packets based on the
+ IPsec policy that was used during decapsulation/will
+ be used during encapsulation.
- To compile it as a module, choose M here. If unsure, say N.
+ To compile it as a module, choose M here. If unsure, say N.
# `filter', generic and specific targets
config IP_NF_FILTER
@@ -562,17 +388,6 @@ config IP_NF_TARGET_TCPMSS
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_NFQUEUE
- tristate "NFQUEUE Target Support"
- depends on IP_NF_IPTABLES
- help
- This Target replaced the old obsolete QUEUE target.
-
- As opposed to QUEUE, it supports 65535 different queues,
- not just one.
-
- To compile it as a module, choose M here. If unsure, say N.
-
# NAT + specific targets
config IP_NF_NAT
tristate "Full NAT"
@@ -725,31 +540,6 @@ config IP_NF_TARGET_DSCP
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_MARK
- tristate "MARK target support"
- depends on IP_NF_MANGLE
- help
- This option adds a `MARK' target, which allows you to create rules
- in the `mangle' table which alter the netfilter mark (nfmark) field
- associated with the packet prior to routing. This can change
- the routing method (see `Use netfilter MARK value as routing
- key') and can also be used by other subsystems to change their
- behavior.
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_TARGET_CLASSIFY
- tristate "CLASSIFY target support"
- depends on IP_NF_MANGLE
- help
- This option adds a `CLASSIFY' target, which enables the user to set
- the priority of a packet. Some qdiscs can use this value for
- classification, among these are:
-
- atm, cbq, dsmark, pfifo_fast, htb, prio
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_TARGET_TTL
tristate 'TTL target support'
depends on IP_NF_MANGLE
@@ -764,19 +554,6 @@ config IP_NF_TARGET_TTL
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_CONNMARK
- tristate 'CONNMARK target support'
- depends on IP_NF_MANGLE
- depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
- help
- This option adds a `CONNMARK' target, which allows one to manipulate
- the connection mark value. Similar to the MARK target, but
- affects the connection mark value rather than the packet mark value.
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. The module will be called
- ipt_CONNMARK.o. If unsure, say `N'.
-
config IP_NF_TARGET_CLUSTERIP
tristate "CLUSTERIP target support (EXPERIMENTAL)"
depends on IP_NF_MANGLE && EXPERIMENTAL
@@ -800,23 +577,10 @@ config IP_NF_RAW
If you want to compile it as a module, say M here and read
<file:Documentation/modules.txt>. If unsure, say `N'.
-config IP_NF_TARGET_NOTRACK
- tristate 'NOTRACK target support'
- depends on IP_NF_RAW
- depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
- help
- The NOTRACK target allows a select rule to specify
- which packets *not* to enter the conntrack/NAT
- subsystem with all the consequences (no ICMP error tracking,
- no protocol helpers for the selected packets).
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
-
-
# ARP tables
config IP_NF_ARPTABLES
tristate "ARP tables support"
+ depends on NETFILTER_XTABLES
help
arptables is a general, extensible packet identification framework.
The ARP packet filtering and mangling (manipulation)subsystems
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 058c48e258f..e5c5b3202f0 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -12,6 +12,7 @@ ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
# connection tracking
obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
# conntrack netlink interface
obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
@@ -41,19 +42,12 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
# matches
-obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
-obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o
-obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
-obj-$(CONFIG_IP_NF_MATCH_DCCP) += ipt_dccp.o
-obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
-obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
-obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o
obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
@@ -61,39 +55,25 @@ obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o
-obj-$(CONFIG_IP_NF_MATCH_LENGTH) += ipt_length.o
obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
-obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
-obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
-obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
-obj-$(CONFIG_IP_NF_MATCH_CONNBYTES) += ipt_connbytes.o
-obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
-obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
-obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
-obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
-obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
+obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o
# targets
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
-obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_TARGET_CLASSIFY) += ipt_CLASSIFY.o
obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
-obj-$(CONFIG_IP_NF_TARGET_CONNMARK) += ipt_CONNMARK.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
-obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
-obj-$(CONFIG_IP_NF_TARGET_NFQUEUE) += ipt_NFQUEUE.o
# generic ARP tables
obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3c2e9639bba..afe3d8f8177 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include <linux/capability.h>
#include <linux/if_arp.h>
#include <linux/kmod.h>
#include <linux/vmalloc.h>
@@ -23,6 +24,7 @@
#include <asm/uaccess.h>
#include <asm/semaphore.h>
+#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_arp/arp_tables.h>
MODULE_LICENSE("GPL");
@@ -54,33 +56,9 @@ do { \
#else
#define ARP_NF_ASSERT(x)
#endif
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
-static DECLARE_MUTEX(arpt_mutex);
-
-#define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
-#define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
#include <linux/netfilter_ipv4/listhelp.h>
-struct arpt_table_info {
- unsigned int size;
- unsigned int number;
- unsigned int initial_entries;
- unsigned int hook_entry[NF_ARP_NUMHOOKS];
- unsigned int underflow[NF_ARP_NUMHOOKS];
- char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
-};
-
-static LIST_HEAD(arpt_target);
-static LIST_HEAD(arpt_tables);
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
-
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
char *hdr_addr, int len)
{
@@ -227,9 +205,9 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
}
static unsigned int arpt_error(struct sk_buff **pskb,
- unsigned int hooknum,
const struct net_device *in,
const struct net_device *out,
+ unsigned int hooknum,
const void *targinfo,
void *userinfo)
{
@@ -258,6 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
struct arpt_entry *e, *back;
const char *indev, *outdev;
void *table_base;
+ struct xt_table_info *private = table->private;
/* ARP header, plus 2 device addresses, plus 2 IP addresses. */
if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
@@ -269,11 +248,9 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
outdev = out ? out->name : nulldevname;
read_lock_bh(&table->lock);
- table_base = (void *)table->private->entries
- + TABLE_OFFSET(table->private,
- smp_processor_id());
- e = get_entry(table_base, table->private->hook_entry[hook]);
- back = get_entry(table_base, table->private->underflow[hook]);
+ table_base = (void *)private->entries[smp_processor_id()];
+ e = get_entry(table_base, private->hook_entry[hook]);
+ back = get_entry(table_base, private->underflow[hook]);
arp = (*pskb)->nh.arph;
do {
@@ -321,8 +298,8 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
* abs. verdicts
*/
verdict = t->u.kernel.target->target(pskb,
- hook,
in, out,
+ hook,
t->data,
userdata);
@@ -347,106 +324,6 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
return verdict;
}
-/*
- * These are weird, but module loading must not be done with mutex
- * held (since they will register), and we have to have a single
- * function to use try_then_request_module().
- */
-
-/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
-static inline struct arpt_table *find_table_lock(const char *name)
-{
- struct arpt_table *t;
-
- if (down_interruptible(&arpt_mutex) != 0)
- return ERR_PTR(-EINTR);
-
- list_for_each_entry(t, &arpt_tables, list)
- if (strcmp(t->name, name) == 0 && try_module_get(t->me))
- return t;
- up(&arpt_mutex);
- return NULL;
-}
-
-
-/* Find target, grabs ref. Returns ERR_PTR() on error. */
-static inline struct arpt_target *find_target(const char *name, u8 revision)
-{
- struct arpt_target *t;
- int err = 0;
-
- if (down_interruptible(&arpt_mutex) != 0)
- return ERR_PTR(-EINTR);
-
- list_for_each_entry(t, &arpt_target, list) {
- if (strcmp(t->name, name) == 0) {
- if (t->revision == revision) {
- if (try_module_get(t->me)) {
- up(&arpt_mutex);
- return t;
- }
- } else
- err = -EPROTOTYPE; /* Found something. */
- }
- }
- up(&arpt_mutex);
- return ERR_PTR(err);
-}
-
-struct arpt_target *arpt_find_target(const char *name, u8 revision)
-{
- struct arpt_target *target;
-
- target = try_then_request_module(find_target(name, revision),
- "arpt_%s", name);
- if (IS_ERR(target) || !target)
- return NULL;
- return target;
-}
-
-static int target_revfn(const char *name, u8 revision, int *bestp)
-{
- struct arpt_target *t;
- int have_rev = 0;
-
- list_for_each_entry(t, &arpt_target, list) {
- if (strcmp(t->name, name) == 0) {
- if (t->revision > *bestp)
- *bestp = t->revision;
- if (t->revision == revision)
- have_rev =1;
- }
- }
- return have_rev;
-}
-
-/* Returns true or false (if no such extension at all) */
-static inline int find_revision(const char *name, u8 revision,
- int (*revfn)(const char *, u8, int *),
- int *err)
-{
- int have_rev, best = -1;
-
- if (down_interruptible(&arpt_mutex) != 0) {
- *err = -EINTR;
- return 1;
- }
- have_rev = revfn(name, revision, &best);
- up(&arpt_mutex);
-
- /* Nothing at all? Return 0 to try loading module. */
- if (best == -1) {
- *err = -ENOENT;
- return 0;
- }
-
- *err = best;
- if (!have_rev)
- *err = -EPROTONOSUPPORT;
- return 1;
-}
-
-
/* All zeroes == unconditional rule. */
static inline int unconditional(const struct arpt_arp *arp)
{
@@ -462,7 +339,8 @@ static inline int unconditional(const struct arpt_arp *arp)
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
*/
-static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks)
+static int mark_source_chains(struct xt_table_info *newinfo,
+ unsigned int valid_hooks, void *entry0)
{
unsigned int hook;
@@ -472,7 +350,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
unsigned int pos = newinfo->hook_entry[hook];
struct arpt_entry *e
- = (struct arpt_entry *)(newinfo->entries + pos);
+ = (struct arpt_entry *)(entry0 + pos);
if (!(valid_hooks & (1 << hook)))
continue;
@@ -514,13 +392,13 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
goto next;
e = (struct arpt_entry *)
- (newinfo->entries + pos);
+ (entry0 + pos);
} while (oldpos == pos + e->next_offset);
/* Move along one */
size = e->next_offset;
e = (struct arpt_entry *)
- (newinfo->entries + pos + size);
+ (entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
} else {
@@ -537,7 +415,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
newpos = pos + e->next_offset;
}
e = (struct arpt_entry *)
- (newinfo->entries + newpos);
+ (entry0 + newpos);
e->counters.pcnt = pos;
pos = newpos;
}
@@ -592,8 +470,8 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
}
t = arpt_get_target(e);
- target = try_then_request_module(find_target(t->u.user.name,
- t->u.user.revision),
+ target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
+ t->u.user.revision),
"arpt_%s", t->u.user.name);
if (IS_ERR(target) || !target) {
duprintf("check_entry: `%s' not found\n", t->u.user.name);
@@ -627,7 +505,7 @@ out:
}
static inline int check_entry_size_and_hooks(struct arpt_entry *e,
- struct arpt_table_info *newinfo,
+ struct xt_table_info *newinfo,
unsigned char *base,
unsigned char *limit,
const unsigned int *hook_entries,
@@ -661,7 +539,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
< 0 (not ARPT_RETURN). --RR */
/* Clear counters and comefrom */
- e->counters = ((struct arpt_counters) { 0, 0 });
+ e->counters = ((struct xt_counters) { 0, 0 });
e->comefrom = 0;
(*i)++;
@@ -688,7 +566,8 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
*/
static int translate_table(const char *name,
unsigned int valid_hooks,
- struct arpt_table_info *newinfo,
+ struct xt_table_info *newinfo,
+ void *entry0,
unsigned int size,
unsigned int number,
const unsigned int *hook_entries,
@@ -710,11 +589,11 @@ static int translate_table(const char *name,
i = 0;
/* Walk through entries, checking offsets. */
- ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
check_entry_size_and_hooks,
newinfo,
- newinfo->entries,
- newinfo->entries + size,
+ entry0,
+ entry0 + size,
hook_entries, underflows, &i);
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
@@ -743,79 +622,78 @@ static int translate_table(const char *name,
}
}
- if (!mark_source_chains(newinfo, valid_hooks)) {
+ if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
duprintf("Looping hook\n");
return -ELOOP;
}
/* Finally, each sanity check must pass */
i = 0;
- ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
check_entry, name, size, &i);
if (ret != 0) {
- ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ ARPT_ENTRY_ITERATE(entry0, newinfo->size,
cleanup_entry, &i);
return ret;
}
/* And one copy for every other CPU */
for_each_cpu(i) {
- if (i == 0)
- continue;
- memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
- newinfo->entries,
- SMP_ALIGN(newinfo->size));
+ if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+ memcpy(newinfo->entries[i], entry0, newinfo->size);
}
return ret;
}
-static struct arpt_table_info *replace_table(struct arpt_table *table,
- unsigned int num_counters,
- struct arpt_table_info *newinfo,
- int *error)
+/* Gets counters. */
+static inline int add_entry_to_counter(const struct arpt_entry *e,
+ struct xt_counters total[],
+ unsigned int *i)
{
- struct arpt_table_info *oldinfo;
-
- /* Do the substitution. */
- write_lock_bh(&table->lock);
- /* Check inside lock: is the old number correct? */
- if (num_counters != table->private->number) {
- duprintf("num_counters != table->private->number (%u/%u)\n",
- num_counters, table->private->number);
- write_unlock_bh(&table->lock);
- *error = -EAGAIN;
- return NULL;
- }
- oldinfo = table->private;
- table->private = newinfo;
- newinfo->initial_entries = oldinfo->initial_entries;
- write_unlock_bh(&table->lock);
+ ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
- return oldinfo;
+ (*i)++;
+ return 0;
}
-/* Gets counters. */
-static inline int add_entry_to_counter(const struct arpt_entry *e,
- struct arpt_counters total[],
+static inline int set_entry_to_counter(const struct arpt_entry *e,
+ struct xt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
(*i)++;
return 0;
}
-static void get_counters(const struct arpt_table_info *t,
- struct arpt_counters counters[])
+static void get_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
{
unsigned int cpu;
unsigned int i;
+ unsigned int curcpu;
+
+ /* Instead of clearing (by a previous call to memset())
+ * the counters and using adds, we set the counters
+ * with data used by 'current' CPU
+ * We dont care about preemption here.
+ */
+ curcpu = raw_smp_processor_id();
+
+ i = 0;
+ ARPT_ENTRY_ITERATE(t->entries[curcpu],
+ t->size,
+ set_entry_to_counter,
+ counters,
+ &i);
for_each_cpu(cpu) {
+ if (cpu == curcpu)
+ continue;
i = 0;
- ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+ ARPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
@@ -829,27 +707,29 @@ static int copy_entries_to_user(unsigned int total_size,
{
unsigned int off, num, countersize;
struct arpt_entry *e;
- struct arpt_counters *counters;
+ struct xt_counters *counters;
+ struct xt_table_info *private = table->private;
int ret = 0;
+ void *loc_cpu_entry;
/* We need atomic snapshot of counters: rest doesn't change
* (other than comefrom, which userspace doesn't care
* about).
*/
- countersize = sizeof(struct arpt_counters) * table->private->number;
- counters = vmalloc(countersize);
+ countersize = sizeof(struct xt_counters) * private->number;
+ counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
return -ENOMEM;
/* First, sum counters... */
- memset(counters, 0, countersize);
write_lock_bh(&table->lock);
- get_counters(table->private, counters);
+ get_counters(private, counters);
write_unlock_bh(&table->lock);
- /* ... then copy entire thing from CPU 0... */
- if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ /* ... then copy entire thing ... */
+ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
goto free_counters;
}
@@ -859,7 +739,7 @@ static int copy_entries_to_user(unsigned int total_size,
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
struct arpt_entry_target *t;
- e = (struct arpt_entry *)(table->private->entries + off);
+ e = (struct arpt_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
+ offsetof(struct arpt_entry, counters),
&counters[num],
@@ -890,21 +770,21 @@ static int get_entries(const struct arpt_get_entries *entries,
int ret;
struct arpt_table *t;
- t = find_table_lock(entries->name);
+ t = xt_find_table_lock(NF_ARP, entries->name);
if (t || !IS_ERR(t)) {
+ struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n",
- t->private->number);
- if (entries->size == t->private->size)
- ret = copy_entries_to_user(t->private->size,
+ private->number);
+ if (entries->size == private->size)
+ ret = copy_entries_to_user(private->size,
t, uptr->entrytable);
else {
duprintf("get_entries: I've got %u not %u!\n",
- t->private->size,
- entries->size);
+ private->size, entries->size);
ret = -EINVAL;
}
module_put(t->me);
- up(&arpt_mutex);
+ xt_table_unlock(t);
} else
ret = t ? PTR_ERR(t) : -ENOENT;
@@ -916,8 +796,9 @@ static int do_replace(void __user *user, unsigned int len)
int ret;
struct arpt_replace tmp;
struct arpt_table *t;
- struct arpt_table_info *newinfo, *oldinfo;
- struct arpt_counters *counters;
+ struct xt_table_info *newinfo, *oldinfo;
+ struct xt_counters *counters;
+ void *loc_cpu_entry, *loc_cpu_old_entry;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
@@ -926,38 +807,33 @@ static int do_replace(void __user *user, unsigned int len)
if (len != sizeof(tmp) + tmp.size)
return -ENOPROTOOPT;
- /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
- if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
- return -ENOMEM;
-
- newinfo = vmalloc(sizeof(struct arpt_table_info)
- + SMP_ALIGN(tmp.size) *
- (highest_possible_processor_id()+1));
+ newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
return -ENOMEM;
- if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
- counters = vmalloc(tmp.num_counters * sizeof(struct arpt_counters));
+ counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
goto free_newinfo;
}
- memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
ret = translate_table(tmp.name, tmp.valid_hooks,
- newinfo, tmp.size, tmp.num_entries,
+ newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
tmp.hook_entry, tmp.underflow);
if (ret != 0)
goto free_newinfo_counters;
duprintf("arp_tables: Translated table\n");
- t = try_then_request_module(find_table_lock(tmp.name),
+ t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
"arptable_%s", tmp.name);
if (!t || IS_ERR(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
@@ -972,7 +848,7 @@ static int do_replace(void __user *user, unsigned int len)
goto put_module;
}
- oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+ oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
if (!oldinfo)
goto put_module;
@@ -989,24 +865,26 @@ static int do_replace(void __user *user, unsigned int len)
/* Get the old counters. */
get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
- ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
- vfree(oldinfo);
+ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+
+ xt_free_table_info(oldinfo);
if (copy_to_user(tmp.counters, counters,
- sizeof(struct arpt_counters) * tmp.num_counters) != 0)
+ sizeof(struct xt_counters) * tmp.num_counters) != 0)
ret = -EFAULT;
vfree(counters);
- up(&arpt_mutex);
+ xt_table_unlock(t);
return ret;
put_module:
module_put(t->me);
- up(&arpt_mutex);
+ xt_table_unlock(t);
free_newinfo_counters_untrans:
- ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL);
+ ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
free_newinfo_counters:
vfree(counters);
free_newinfo:
- vfree(newinfo);
+ xt_free_table_info(newinfo);
return ret;
}
@@ -1014,7 +892,7 @@ static int do_replace(void __user *user, unsigned int len)
* and everything is OK.
*/
static inline int add_counter_to_entry(struct arpt_entry *e,
- const struct arpt_counters addme[],
+ const struct xt_counters addme[],
unsigned int *i)
{
@@ -1027,14 +905,16 @@ static inline int add_counter_to_entry(struct arpt_entry *e,
static int do_add_counters(void __user *user, unsigned int len)
{
unsigned int i;
- struct arpt_counters_info tmp, *paddc;
+ struct xt_counters_info tmp, *paddc;
struct arpt_table *t;
+ struct xt_table_info *private;
int ret = 0;
+ void *loc_cpu_entry;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
- if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct arpt_counters))
+ if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
return -EINVAL;
paddc = vmalloc(len);
@@ -1046,27 +926,30 @@ static int do_add_counters(void __user *user, unsigned int len)
goto free;
}
- t = find_table_lock(tmp.name);
+ t = xt_find_table_lock(NF_ARP, tmp.name);
if (!t || IS_ERR(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
}
write_lock_bh(&t->lock);
- if (t->private->number != paddc->num_counters) {
+ private = t->private;
+ if (private->number != paddc->num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
i = 0;
- ARPT_ENTRY_ITERATE(t->private->entries,
- t->private->size,
+ /* Choose the copy that is on our node */
+ loc_cpu_entry = private->entries[smp_processor_id()];
+ ARPT_ENTRY_ITERATE(loc_cpu_entry,
+ private->size,
add_counter_to_entry,
paddc->counters,
&i);
unlock_up_free:
write_unlock_bh(&t->lock);
- up(&arpt_mutex);
+ xt_table_unlock(t);
module_put(t->me);
free:
vfree(paddc);
@@ -1123,25 +1006,26 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
}
name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
- t = try_then_request_module(find_table_lock(name),
+ t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
"arptable_%s", name);
if (t && !IS_ERR(t)) {
struct arpt_getinfo info;
+ struct xt_table_info *private = t->private;
info.valid_hooks = t->valid_hooks;
- memcpy(info.hook_entry, t->private->hook_entry,
+ memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
- memcpy(info.underflow, t->private->underflow,
+ memcpy(info.underflow, private->underflow,
sizeof(info.underflow));
- info.num_entries = t->private->number;
- info.size = t->private->size;
+ info.num_entries = private->number;
+ info.size = private->size;
strcpy(info.name, name);
if (copy_to_user(user, &info, *len) != 0)
ret = -EFAULT;
else
ret = 0;
- up(&arpt_mutex);
+ xt_table_unlock(t);
module_put(t->me);
} else
ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1166,7 +1050,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
}
case ARPT_SO_GET_REVISION_TARGET: {
- struct arpt_get_revision rev;
+ struct xt_get_revision rev;
if (*len != sizeof(rev)) {
ret = -EINVAL;
@@ -1177,8 +1061,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
break;
}
- try_then_request_module(find_revision(rev.name, rev.revision,
- target_revfn, &ret),
+ try_then_request_module(xt_find_revision(NF_ARP, rev.name,
+ rev.revision, 1, &ret),
"arpt_%s", rev.name);
break;
}
@@ -1191,101 +1075,57 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
return ret;
}
-/* Registration hooks for targets. */
-int arpt_register_target(struct arpt_target *target)
-{
- int ret;
-
- ret = down_interruptible(&arpt_mutex);
- if (ret != 0)
- return ret;
-
- list_add(&target->list, &arpt_target);
- up(&arpt_mutex);
-
- return ret;
-}
-
-void arpt_unregister_target(struct arpt_target *target)
-{
- down(&arpt_mutex);
- LIST_DELETE(&arpt_target, target);
- up(&arpt_mutex);
-}
-
int arpt_register_table(struct arpt_table *table,
const struct arpt_replace *repl)
{
int ret;
- struct arpt_table_info *newinfo;
- static struct arpt_table_info bootstrap
+ struct xt_table_info *newinfo;
+ static struct xt_table_info bootstrap
= { 0, 0, 0, { 0 }, { 0 }, { } };
+ void *loc_cpu_entry;
- newinfo = vmalloc(sizeof(struct arpt_table_info)
- + SMP_ALIGN(repl->size) *
- (highest_possible_processor_id()+1));
+ newinfo = xt_alloc_table_info(repl->size);
if (!newinfo) {
ret = -ENOMEM;
return ret;
}
- memcpy(newinfo->entries, repl->entries, repl->size);
+
+ /* choose the copy on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(table->name, table->valid_hooks,
- newinfo, repl->size,
+ newinfo, loc_cpu_entry, repl->size,
repl->num_entries,
repl->hook_entry,
repl->underflow);
+
duprintf("arpt_register_table: translate table gives %d\n", ret);
if (ret != 0) {
- vfree(newinfo);
+ xt_free_table_info(newinfo);
return ret;
}
- ret = down_interruptible(&arpt_mutex);
- if (ret != 0) {
- vfree(newinfo);
+ if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+ xt_free_table_info(newinfo);
return ret;
}
- /* Don't autoload: we'd eat our tail... */
- if (list_named_find(&arpt_tables, table->name)) {
- ret = -EEXIST;
- goto free_unlock;
- }
-
- /* Simplifies replace_table code. */
- table->private = &bootstrap;
- if (!replace_table(table, 0, newinfo, &ret))
- goto free_unlock;
-
- duprintf("table->private->number = %u\n",
- table->private->number);
-
- /* save number of initial entries */
- table->private->initial_entries = table->private->number;
-
- rwlock_init(&table->lock);
- list_prepend(&arpt_tables, table);
-
- unlock:
- up(&arpt_mutex);
- return ret;
-
- free_unlock:
- vfree(newinfo);
- goto unlock;
+ return 0;
}
void arpt_unregister_table(struct arpt_table *table)
{
- down(&arpt_mutex);
- LIST_DELETE(&arpt_tables, table);
- up(&arpt_mutex);
+ struct xt_table_info *private;
+ void *loc_cpu_entry;
+
+ private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
- ARPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
cleanup_entry, NULL);
- vfree(table->private);
+ xt_free_table_info(private);
}
/* The built-in targets: standard (NULL) and error. */
@@ -1308,52 +1148,15 @@ static struct nf_sockopt_ops arpt_sockopts = {
.get = do_arpt_get_ctl,
};
-#ifdef CONFIG_PROC_FS
-static inline int print_name(const struct arpt_table *t,
- off_t start_offset, char *buffer, int length,
- off_t *pos, unsigned int *count)
-{
- if ((*count)++ >= start_offset) {
- unsigned int namelen;
-
- namelen = sprintf(buffer + *pos, "%s\n", t->name);
- if (*pos + namelen > length) {
- /* Stop iterating */
- return 1;
- }
- *pos += namelen;
- }
- return 0;
-}
-
-static int arpt_get_tables(char *buffer, char **start, off_t offset, int length)
-{
- off_t pos = 0;
- unsigned int count = 0;
-
- if (down_interruptible(&arpt_mutex) != 0)
- return 0;
-
- LIST_FIND(&arpt_tables, print_name, struct arpt_table *,
- offset, buffer, length, &pos, &count);
-
- up(&arpt_mutex);
-
- /* `start' hack - see fs/proc/generic.c line ~105 */
- *start=(char *)((unsigned long)count-offset);
- return pos;
-}
-#endif /*CONFIG_PROC_FS*/
-
static int __init init(void)
{
int ret;
+ xt_proto_init(NF_ARP);
+
/* Noone else will be downing sem now, so we won't sleep */
- down(&arpt_mutex);
- list_append(&arpt_target, &arpt_standard_target);
- list_append(&arpt_target, &arpt_error_target);
- up(&arpt_mutex);
+ xt_register_target(NF_ARP, &arpt_standard_target);
+ xt_register_target(NF_ARP, &arpt_error_target);
/* Register setsockopt */
ret = nf_register_sockopt(&arpt_sockopts);
@@ -1362,19 +1165,6 @@ static int __init init(void)
return ret;
}
-#ifdef CONFIG_PROC_FS
- {
- struct proc_dir_entry *proc;
-
- proc = proc_net_create("arp_tables_names", 0, arpt_get_tables);
- if (!proc) {
- nf_unregister_sockopt(&arpt_sockopts);
- return -ENOMEM;
- }
- proc->owner = THIS_MODULE;
- }
-#endif
-
printk("arp_tables: (C) 2002 David S. Miller\n");
return 0;
}
@@ -1382,16 +1172,12 @@ static int __init init(void)
static void __exit fini(void)
{
nf_unregister_sockopt(&arpt_sockopts);
-#ifdef CONFIG_PROC_FS
- proc_net_remove("arp_tables_names");
-#endif
+ xt_proto_fini(NF_ARP);
}
EXPORT_SYMBOL(arpt_register_table);
EXPORT_SYMBOL(arpt_unregister_table);
EXPORT_SYMBOL(arpt_do_table);
-EXPORT_SYMBOL(arpt_register_target);
-EXPORT_SYMBOL(arpt_unregister_target);
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 3e592ec8648..c97650a16a5 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -8,8 +8,9 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
MODULE_DESCRIPTION("arptables arp payload mangle target");
static unsigned int
-target(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in,
- const struct net_device *out, const void *targinfo, void *userinfo)
+target(struct sk_buff **pskb, const struct net_device *in,
+ const struct net_device *out, unsigned int hooknum, const void *targinfo,
+ void *userinfo)
{
const struct arpt_mangle *mangle = targinfo;
struct arphdr *arp;
@@ -64,7 +65,7 @@ target(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in,
}
static int
-checkentry(const char *tablename, const struct arpt_entry *e, void *targinfo,
+checkentry(const char *tablename, const void *e, void *targinfo,
unsigned int targinfosize, unsigned int hook_mask)
{
const struct arpt_mangle *mangle = targinfo;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 0d759f5a4ef..f6ab45f4868 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -145,6 +145,7 @@ static struct arpt_table packet_filter = {
.lock = RW_LOCK_UNLOCKED,
.private = NULL,
.me = THIS_MODULE,
+ .af = NF_ARP,
};
/* The work comes in here from netfilter.c */
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index e52847fa10f..84e4f79b7ff 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -18,11 +18,13 @@
*
*/
+#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
#include <linux/moduleparam.h>
+#include <linux/udp.h>
#include <net/checksum.h>
#include <net/udp.h>
@@ -34,7 +36,7 @@ static unsigned int master_timeout = 300;
MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
MODULE_DESCRIPTION("Amanda connection tracking module");
MODULE_LICENSE("GPL");
-module_param(master_timeout, int, 0600);
+module_param(master_timeout, uint, 0600);
MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
static const char *conns[] = { "DATA ", "MESG ", "INDEX " };
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 7a4ecddd597..84c66dbfeda 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1345,6 +1345,11 @@ static int kill_all(struct ip_conntrack *i, void *data)
return 1;
}
+void ip_conntrack_flush(void)
+{
+ ip_ct_iterate_cleanup(kill_all, NULL);
+}
+
static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
{
if (vmalloced)
@@ -1354,8 +1359,12 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
get_order(sizeof(struct list_head) * size));
}
-void ip_conntrack_flush(void)
+/* Mishearing the voices in his head, our hero wonders how he's
+ supposed to kill the mall. */
+void ip_conntrack_cleanup(void)
{
+ ip_ct_attach = NULL;
+
/* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module
delete... */
@@ -1363,7 +1372,7 @@ void ip_conntrack_flush(void)
ip_ct_event_cache_flush();
i_see_dead_people:
- ip_ct_iterate_cleanup(kill_all, NULL);
+ ip_conntrack_flush();
if (atomic_read(&ip_conntrack_count) != 0) {
schedule();
goto i_see_dead_people;
@@ -1371,14 +1380,7 @@ void ip_conntrack_flush(void)
/* wait until all references to ip_conntrack_untracked are dropped */
while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
schedule();
-}
-/* Mishearing the voices in his head, our hero wonders how he's
- supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
-{
- ip_ct_attach = NULL;
- ip_conntrack_flush();
kmem_cache_destroy(ip_conntrack_cachep);
kmem_cache_destroy(ip_conntrack_expect_cachep);
free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 68b173bcda6..e627e585617 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -34,7 +34,7 @@ static int ports_c;
module_param_array(ports, ushort, &ports_c, 0400);
static int loose;
-module_param(loose, int, 0600);
+module_param(loose, bool, 0600);
unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 4108a5e12b3..d716bba798f 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -762,7 +762,7 @@ static struct ip_conntrack_helper pptp = {
.help = conntrack_pptp_help
};
-extern void __exit ip_ct_proto_gre_fini(void);
+extern void ip_ct_proto_gre_fini(void);
extern int __init ip_ct_proto_gre_init(void);
/* ip_conntrack_pptp initialization */
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index d7c40421d0d..c51a2cf71b4 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -36,7 +36,7 @@
#define MAX_PORTS 8
static unsigned short ports[MAX_PORTS];
static int ports_c;
-static int max_dcc_channels = 8;
+static unsigned int max_dcc_channels = 8;
static unsigned int dcc_timeout = 300;
/* This is slow, but it's simple. --RR */
static char *irc_buffer;
@@ -54,9 +54,9 @@ MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
MODULE_LICENSE("GPL");
module_param_array(ports, ushort, &ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-module_param(max_dcc_channels, int, 0400);
+module_param(max_dcc_channels, uint, 0400);
MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
-module_param(dcc_timeout, int, 0400);
+module_param(dcc_timeout, uint, 0400);
MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
@@ -254,10 +254,6 @@ static int __init init(void)
printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
return -EBUSY;
}
- if (dcc_timeout < 0) {
- printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
- return -EBUSY;
- }
irc_buffer = kmalloc(65536, GFP_KERNEL);
if (!irc_buffer)
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 186646eb249..4e68e16a261 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -37,7 +37,7 @@ MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
MODULE_LICENSE("GPL");
static unsigned int timeout = 3;
-module_param(timeout, int, 0600);
+module_param(timeout, uint, 0400);
MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
static int help(struct sk_buff **pskb,
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 3fce91bcc0b..c9ebbe0d2d9 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -79,6 +79,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
const struct ip_conntrack_tuple *tuple)
{
struct nfattr *nest_parms;
+ int ret;
nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
@@ -86,10 +87,10 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
NFA_NEST_END(skb, nest_parms);
nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
- ctnetlink_dump_tuples_proto(skb, tuple);
+ ret = ctnetlink_dump_tuples_proto(skb, tuple);
NFA_NEST_END(skb, nest_parms);
- return 0;
+ return ret;
nfattr_failure:
return -1;
@@ -160,7 +161,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
return 0;
nest_helper = NFA_NEST(skb, CTA_HELP);
- NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name);
+ NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
if (ct->helper->to_nfattr)
ct->helper->to_nfattr(skb, ct);
@@ -229,7 +230,7 @@ nfattr_failure:
static inline int
ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
{
- unsigned int use = htonl(atomic_read(&ct->ct_general.use));
+ u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
return 0;
@@ -311,29 +312,22 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
if (events & IPCT_DESTROY) {
type = IPCTNL_MSG_CT_DELETE;
group = NFNLGRP_CONNTRACK_DESTROY;
- goto alloc_skb;
- }
- if (events & (IPCT_NEW | IPCT_RELATED)) {
+ } else if (events & (IPCT_NEW | IPCT_RELATED)) {
type = IPCTNL_MSG_CT_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
/* dump everything */
events = ~0UL;
group = NFNLGRP_CONNTRACK_NEW;
- goto alloc_skb;
- }
- if (events & (IPCT_STATUS |
+ } else if (events & (IPCT_STATUS |
IPCT_PROTOINFO |
IPCT_HELPER |
IPCT_HELPINFO |
IPCT_NATINFO)) {
type = IPCTNL_MSG_CT_NEW;
group = NFNLGRP_CONNTRACK_UPDATE;
- goto alloc_skb;
- }
+ } else
+ return NOTIFY_DONE;
- return NOTIFY_DONE;
-
-alloc_skb:
/* FIXME: Check if there are any listeners before, don't hurt performance */
skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
@@ -503,7 +497,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
}
static const size_t cta_min_proto[CTA_PROTO_MAX] = {
- [CTA_PROTO_NUM-1] = sizeof(u_int16_t),
+ [CTA_PROTO_NUM-1] = sizeof(u_int8_t),
[CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
[CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t),
[CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
@@ -528,7 +522,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
if (!tb[CTA_PROTO_NUM-1])
return -EINVAL;
- tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
+ tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
@@ -728,11 +722,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
return -ENOENT;
}
}
- if (del_timer(&ct->timeout)) {
- ip_conntrack_put(ct);
+ if (del_timer(&ct->timeout))
ct->timeout.function((unsigned long)ct);
- return 0;
- }
+
ip_conntrack_put(ct);
DEBUGP("leaving\n");
@@ -877,7 +869,7 @@ ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
DEBUGP("NAT status: %lu\n",
status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
- if (ip_nat_initialized(ct, hooknum))
+ if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
return -EEXIST;
ip_nat_setup_info(ct, &range, hooknum);
@@ -1039,6 +1031,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
return err;
}
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+ if (cda[CTA_MARK-1])
+ ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
ct->helper = ip_conntrack_helper_find_get(rtuple);
add_timer(&ct->timeout);
@@ -1047,11 +1044,6 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
if (ct->helper)
ip_conntrack_helper_put(ct->helper);
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
- if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
DEBUGP("conntrack with id %u inserted\n", ct->id);
return 0;
@@ -1211,7 +1203,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
unsigned int type;
unsigned char *b;
int flags = 0;
- u16 proto;
if (events & IPEXP_NEW) {
type = IPCTNL_MSG_EXP_NEW;
@@ -1238,7 +1229,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
goto nfattr_failure;
nlh->nlmsg_len = skb->tail - b;
- proto = exp->tuple.dst.protonum;
nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
return NOTIFY_DONE;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index 88c3712bd25..f891308b5e4 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-unsigned long ip_ct_generic_timeout = 600*HZ;
+unsigned int ip_ct_generic_timeout = 600*HZ;
static int generic_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 744abb9d377..56794797d55 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -31,6 +31,8 @@
#include <linux/ip.h>
#include <linux/in.h>
#include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
static DEFINE_RWLOCK(ip_ct_gre_lock);
#define ASSERT_READ_LOCK(x)
@@ -308,7 +310,10 @@ int __init ip_ct_proto_gre_init(void)
return ip_conntrack_protocol_register(&gre);
}
-void __exit ip_ct_proto_gre_fini(void)
+/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
+ * init() code on errors.
+ */
+void ip_ct_proto_gre_fini(void)
{
struct list_head *pos, *n;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 5f9925db608..3021af0910f 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -16,13 +16,12 @@
#include <linux/skbuff.h>
#include <net/ip.h>
#include <net/checksum.h>
-#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-unsigned long ip_ct_icmp_timeout = 30*HZ;
+unsigned int ip_ct_icmp_timeout = 30*HZ;
#if 0
#define DEBUGP printk
@@ -47,20 +46,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
return 1;
}
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+ [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+ [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+ [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+ [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+ [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+ [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+ [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+ [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_tuple *orig)
{
- /* Add 1; spaces filled with 0. */
- static const u_int8_t invmap[]
- = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
- [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
- [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
- [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
- [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
- [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
- [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
- [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
-
if (orig->dst.u.icmp.type >= sizeof(invmap)
|| !invmap[orig->dst.u.icmp.type])
return 0;
@@ -110,17 +110,17 @@ static int icmp_packet(struct ip_conntrack *ct,
return NF_ACCEPT;
}
-static const u_int8_t valid_new[] = {
- [ICMP_ECHO] = 1,
- [ICMP_TIMESTAMP] = 1,
- [ICMP_INFO_REQUEST] = 1,
- [ICMP_ADDRESS] = 1
-};
-
/* Called when a new connection for this protocol found. */
static int icmp_new(struct ip_conntrack *conntrack,
const struct sk_buff *skb)
{
+ static const u_int8_t valid_new[] = {
+ [ICMP_ECHO] = 1,
+ [ICMP_TIMESTAMP] = 1,
+ [ICMP_INFO_REQUEST] = 1,
+ [ICMP_ADDRESS] = 1
+ };
+
if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
|| !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
/* Can't create a new ICMP `conn' with this. */
@@ -279,10 +279,6 @@ static int icmp_tuple_to_nfattr(struct sk_buff *skb,
NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
&t->dst.u.icmp.code);
- if (t->dst.u.icmp.type >= sizeof(valid_new)
- || !valid_new[t->dst.u.icmp.type])
- return -EINVAL;
-
return 0;
nfattr_failure:
@@ -295,7 +291,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
if (!tb[CTA_PROTO_ICMP_TYPE-1]
|| !tb[CTA_PROTO_ICMP_CODE-1]
|| !tb[CTA_PROTO_ICMP_ID-1])
- return -1;
+ return -EINVAL;
tuple->dst.u.icmp.type =
*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
@@ -304,6 +300,10 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
tuple->src.u.icmp.id =
*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+ if (tuple->dst.u.icmp.type >= sizeof(invmap)
+ || !invmap[tuple->dst.u.icmp.type])
+ return -EINVAL;
+
return 0;
}
#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 977fb59d456..be602e8aeab 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
+#include <linux/interrupt.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/in.h>
@@ -57,15 +58,15 @@ static const char *sctp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-static unsigned long ip_ct_sctp_timeout_closed = 10 SECS;
-static unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS;
-static unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
-static unsigned long ip_ct_sctp_timeout_established = 5 DAYS;
-static unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
-static unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
-static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
+static unsigned int ip_ct_sctp_timeout_closed = 10 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
+static unsigned int ip_ct_sctp_timeout_established = 5 DAYS;
+static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
-static const unsigned long * sctp_timeouts[]
+static const unsigned int * sctp_timeouts[]
= { NULL, /* SCTP_CONNTRACK_NONE */
&ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
&ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 62598167677..e0dc3706354 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -32,7 +32,6 @@
#include <net/tcp.h>
-#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -85,21 +84,21 @@ static const char *tcp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-unsigned long ip_ct_tcp_timeout_syn_sent = 2 MINS;
-unsigned long ip_ct_tcp_timeout_syn_recv = 60 SECS;
-unsigned long ip_ct_tcp_timeout_established = 5 DAYS;
-unsigned long ip_ct_tcp_timeout_fin_wait = 2 MINS;
-unsigned long ip_ct_tcp_timeout_close_wait = 60 SECS;
-unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS;
-unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS;
-unsigned long ip_ct_tcp_timeout_close = 10 SECS;
+unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS;
+unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS;
+unsigned int ip_ct_tcp_timeout_established = 5 DAYS;
+unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS;
+unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS;
+unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS;
+unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS;
+unsigned int ip_ct_tcp_timeout_close = 10 SECS;
/* RFC1122 says the R2 limit should be at least 100 seconds.
Linux uses 15 packets as limit, which corresponds
to ~13-30min depending on RTO. */
-unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS;
+unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS;
-static const unsigned long * tcp_timeouts[]
+static const unsigned int * tcp_timeouts[]
= { NULL, /* TCP_CONNTRACK_NONE */
&ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
&ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
@@ -272,9 +271,9 @@ static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*
- * sSS -> sIV Might be a half-open connection.
+ * sSS -> sIG Might be a half-open connection.
* sSR -> sSR Might answer late resent SYN.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
@@ -341,9 +340,10 @@ static int tcp_print_conntrack(struct seq_file *s,
static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
const struct ip_conntrack *ct)
{
- struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+ struct nfattr *nest_parms;
read_lock_bh(&tcp_lock);
+ nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
&ct->proto.tcp.state);
read_unlock_bh(&tcp_lock);
@@ -917,8 +917,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
switch (new_state) {
case TCP_CONNTRACK_IGNORE:
- /* Either SYN in ORIGINAL
- * or SYN/ACK in REPLY. */
+ /* Ignored packets:
+ *
+ * a) SYN in ORIGINAL
+ * b) SYN/ACK in REPLY
+ * c) ACK in reply direction after initial SYN in original.
+ */
if (index == TCP_SYNACK_SET
&& conntrack->proto.tcp.last_index == TCP_SYN_SET
&& conntrack->proto.tcp.last_dir != dir
@@ -985,16 +989,23 @@ static int tcp_packet(struct ip_conntrack *conntrack,
}
case TCP_CONNTRACK_CLOSE:
if (index == TCP_RST_SET
- && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
- && conntrack->proto.tcp.last_index == TCP_SYN_SET
+ && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+ && conntrack->proto.tcp.last_index == TCP_SYN_SET)
+ || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+ && conntrack->proto.tcp.last_index == TCP_ACK_SET))
&& ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
- /* RST sent to invalid SYN we had let trough
- * SYN was in window then, tear down connection.
+ /* RST sent to invalid SYN or ACK we had let through
+ * at a) and c) above:
+ *
+ * a) SYN was in window then
+ * c) we hold a half-open connection.
+ *
+ * Delete our connection entry.
* We skip window checking, because packet might ACK
- * segments we ignored in the SYN. */
+ * segments we ignored. */
goto in_window;
}
- /* Just fall trough */
+ /* Just fall through */
default:
/* Keep compilers happy. */
break;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index f2dcac7c766..55b7d3210ad 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -11,15 +11,15 @@
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <linux/in.h>
+#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/seq_file.h>
#include <net/checksum.h>
-#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-unsigned long ip_ct_udp_timeout = 30*HZ;
-unsigned long ip_ct_udp_timeout_stream = 180*HZ;
+unsigned int ip_ct_udp_timeout = 30*HZ;
+unsigned int ip_ct_udp_timeout_stream = 180*HZ;
static int udp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index dd476b191f4..833fcb4be5e 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -27,6 +27,7 @@
#endif
#include <net/checksum.h>
#include <net/ip.h>
+#include <net/route.h>
#define ASSERT_READ_LOCK(x)
#define ASSERT_WRITE_LOCK(x)
@@ -450,30 +451,6 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum,
return NF_ACCEPT;
}
-static unsigned int ip_refrag(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
-
- /* We've seen it coming out the other side: confirm */
- if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
- return NF_DROP;
-
- /* Local packets are never produced too large for their
- interface. We degfragment them at LOCAL_OUT, however,
- so we have to refragment them here. */
- if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
- !skb_shinfo(*pskb)->tso_size) {
- /* No hook can be after us, so this should be OK. */
- ip_fragment(*pskb, okfn);
- return NF_STOLEN;
- }
- return NF_ACCEPT;
-}
-
static unsigned int ip_conntrack_local(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
@@ -543,7 +520,7 @@ static struct nf_hook_ops ip_conntrack_helper_in_ops = {
/* Refragmenter; last chance. */
static struct nf_hook_ops ip_conntrack_out_ops = {
- .hook = ip_refrag,
+ .hook = ip_confirm,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_POST_ROUTING,
@@ -567,28 +544,28 @@ extern int ip_conntrack_max;
extern unsigned int ip_conntrack_htable_size;
/* From ip_conntrack_proto_tcp.c */
-extern unsigned long ip_ct_tcp_timeout_syn_sent;
-extern unsigned long ip_ct_tcp_timeout_syn_recv;
-extern unsigned long ip_ct_tcp_timeout_established;
-extern unsigned long ip_ct_tcp_timeout_fin_wait;
-extern unsigned long ip_ct_tcp_timeout_close_wait;
-extern unsigned long ip_ct_tcp_timeout_last_ack;
-extern unsigned long ip_ct_tcp_timeout_time_wait;
-extern unsigned long ip_ct_tcp_timeout_close;
-extern unsigned long ip_ct_tcp_timeout_max_retrans;
+extern unsigned int ip_ct_tcp_timeout_syn_sent;
+extern unsigned int ip_ct_tcp_timeout_syn_recv;
+extern unsigned int ip_ct_tcp_timeout_established;
+extern unsigned int ip_ct_tcp_timeout_fin_wait;
+extern unsigned int ip_ct_tcp_timeout_close_wait;
+extern unsigned int ip_ct_tcp_timeout_last_ack;
+extern unsigned int ip_ct_tcp_timeout_time_wait;
+extern unsigned int ip_ct_tcp_timeout_close;
+extern unsigned int ip_ct_tcp_timeout_max_retrans;
extern int ip_ct_tcp_loose;
extern int ip_ct_tcp_be_liberal;
extern int ip_ct_tcp_max_retrans;
/* From ip_conntrack_proto_udp.c */
-extern unsigned long ip_ct_udp_timeout;
-extern unsigned long ip_ct_udp_timeout_stream;
+extern unsigned int ip_ct_udp_timeout;
+extern unsigned int ip_ct_udp_timeout_stream;
/* From ip_conntrack_proto_icmp.c */
-extern unsigned long ip_ct_icmp_timeout;
+extern unsigned int ip_ct_icmp_timeout;
/* From ip_conntrack_proto_icmp.c */
-extern unsigned long ip_ct_generic_timeout;
+extern unsigned int ip_ct_generic_timeout;
/* Log invalid packets of a given protocol */
static int log_invalid_proto_min = 0;
@@ -967,7 +944,7 @@ module_exit(fini);
/* Some modules need us, but don't depend directly on any symbol.
They should call this. */
-void need_ip_conntrack(void)
+void need_conntrack(void)
{
}
@@ -985,7 +962,7 @@ EXPORT_SYMBOL(ip_ct_get_tuple);
EXPORT_SYMBOL(invert_tuplepr);
EXPORT_SYMBOL(ip_conntrack_alter_reply);
EXPORT_SYMBOL(ip_conntrack_destroyed);
-EXPORT_SYMBOL(need_ip_conntrack);
+EXPORT_SYMBOL(need_conntrack);
EXPORT_SYMBOL(ip_conntrack_helper_register);
EXPORT_SYMBOL(ip_conntrack_helper_unregister);
EXPORT_SYMBOL(ip_ct_iterate_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index d83757a70d9..b8daab3c64a 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -171,7 +171,7 @@ static int __init init(void)
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
static int warn_set(const char *val, struct kernel_param *kp)
{
- printk(KERN_INFO __stringify(KBUILD_MODNAME)
+ printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
return 0;
}
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index e546203f566..ac004895781 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -148,14 +148,14 @@ pptp_outbound_pkt(struct sk_buff **pskb,
{
struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
- u_int16_t msg, *cid = NULL, new_callid;
+ u_int16_t msg, new_callid;
+ unsigned int cid_off;
new_callid = htons(ct_pptp_info->pns_call_id);
switch (msg = ntohs(ctlh->messageType)) {
case PPTP_OUT_CALL_REQUEST:
- cid = &pptpReq->ocreq.callID;
+ cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
/* FIXME: ideally we would want to reserve a call ID
* here. current netfilter NAT core is not able to do
* this :( For now we use TCP source port. This breaks
@@ -172,10 +172,10 @@ pptp_outbound_pkt(struct sk_buff **pskb,
ct_pptp_info->pns_call_id = ntohs(new_callid);
break;
case PPTP_IN_CALL_REPLY:
- cid = &pptpReq->icreq.callID;
+ cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
break;
case PPTP_CALL_CLEAR_REQUEST:
- cid = &pptpReq->clrreq.callID;
+ cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
break;
default:
DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
@@ -197,18 +197,15 @@ pptp_outbound_pkt(struct sk_buff **pskb,
/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
* down to here */
-
- IP_NF_ASSERT(cid);
-
DEBUGP("altering call id from 0x%04x to 0x%04x\n",
- ntohs(*cid), ntohs(new_callid));
+ ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_callid));
/* mangle packet */
if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
- sizeof(new_callid),
- (char *)&new_callid,
- sizeof(new_callid)) == 0)
+ cid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_callid), (char *)&new_callid,
+ sizeof(new_callid)) == 0)
return NF_DROP;
return NF_ACCEPT;
@@ -299,31 +296,30 @@ pptp_inbound_pkt(struct sk_buff **pskb,
union pptp_ctrl_union *pptpReq)
{
struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
-
- int ret = NF_ACCEPT, rv;
+ u_int16_t msg, new_cid = 0, new_pcid;
+ unsigned int pcid_off, cid_off = 0;
new_pcid = htons(nat_pptp_info->pns_call_id);
switch (msg = ntohs(ctlh->messageType)) {
case PPTP_OUT_CALL_REPLY:
- pcid = &pptpReq->ocack.peersCallID;
- cid = &pptpReq->ocack.callID;
+ pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
+ cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
break;
case PPTP_IN_CALL_CONNECT:
- pcid = &pptpReq->iccon.peersCallID;
+ pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
break;
case PPTP_IN_CALL_REQUEST:
/* only need to nat in case PAC is behind NAT box */
- break;
+ return NF_ACCEPT;
case PPTP_WAN_ERROR_NOTIFY:
- pcid = &pptpReq->wanerr.peersCallID;
+ pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
break;
case PPTP_CALL_DISCONNECT_NOTIFY:
- pcid = &pptpReq->disc.callID;
+ pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
break;
case PPTP_SET_LINK_INFO:
- pcid = &pptpReq->setlink.peersCallID;
+ pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
break;
default:
@@ -345,35 +341,26 @@ pptp_inbound_pkt(struct sk_buff **pskb,
* WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
/* mangle packet */
- IP_NF_ASSERT(pcid);
DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
- ntohs(*pcid), ntohs(new_pcid));
-
- rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
- sizeof(new_pcid), (char *)&new_pcid,
- sizeof(new_pcid));
- if (rv != NF_ACCEPT)
- return rv;
+ ntohs(*(u_int16_t *)pptpReq + pcid_off), ntohs(new_pcid));
+
+ if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ pcid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_pcid), (char *)&new_pcid,
+ sizeof(new_pcid)) == 0)
+ return NF_DROP;
if (new_cid) {
- IP_NF_ASSERT(cid);
DEBUGP("altering call id from 0x%04x to 0x%04x\n",
- ntohs(*cid), ntohs(new_cid));
- rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
- sizeof(new_cid),
- (char *)&new_cid,
- sizeof(new_cid));
- if (rv != NF_ACCEPT)
- return rv;
+ ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_cid));
+ if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ cid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_cid), (char *)&new_cid,
+ sizeof(new_cid)) == 0)
+ return NF_DROP;
}
-
- /* check for earlier return value of 'switch' above */
- if (ret != NF_ACCEPT)
- return ret;
-
- /* great, at least we don't need to resize packets */
return NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index de31942babe..461c833eaca 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -113,7 +113,7 @@ static int __init init(void)
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
static int warn_set(const char *val, struct kernel_param *kp)
{
- printk(KERN_INFO __stringify(KBUILD_MODNAME)
+ printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
return 0;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index f7cad7cf1ae..6c4899d8046 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -151,42 +151,6 @@ gre_manip_pkt(struct sk_buff **pskb,
return 1;
}
-/* print out a nat tuple */
-static unsigned int
-gre_print(char *buffer,
- const struct ip_conntrack_tuple *match,
- const struct ip_conntrack_tuple *mask)
-{
- unsigned int len = 0;
-
- if (mask->src.u.gre.key)
- len += sprintf(buffer + len, "srckey=0x%x ",
- ntohl(match->src.u.gre.key));
-
- if (mask->dst.u.gre.key)
- len += sprintf(buffer + len, "dstkey=0x%x ",
- ntohl(match->src.u.gre.key));
-
- return len;
-}
-
-/* print a range of keys */
-static unsigned int
-gre_print_range(char *buffer, const struct ip_nat_range *range)
-{
- if (range->min.gre.key != 0
- || range->max.gre.key != 0xFFFF) {
- if (range->min.gre.key == range->max.gre.key)
- return sprintf(buffer, "key 0x%x ",
- ntohl(range->min.gre.key));
- else
- return sprintf(buffer, "keys 0x%u-0x%u ",
- ntohl(range->min.gre.key),
- ntohl(range->max.gre.key));
- } else
- return 0;
-}
-
/* nat helper struct */
static struct ip_nat_protocol gre = {
.name = "GRE",
@@ -194,8 +158,6 @@ static struct ip_nat_protocol gre = {
.manip_pkt = gre_manip_pkt,
.in_range = gre_in_range,
.unique_tuple = gre_unique_tuple,
- .print = gre_print,
- .print_range = gre_print_range,
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
.range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 93871904399..31a3f4ccb99 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -74,38 +74,6 @@ icmp_manip_pkt(struct sk_buff **pskb,
return 1;
}
-static unsigned int
-icmp_print(char *buffer,
- const struct ip_conntrack_tuple *match,
- const struct ip_conntrack_tuple *mask)
-{
- unsigned int len = 0;
-
- if (mask->src.u.icmp.id)
- len += sprintf(buffer + len, "id=%u ",
- ntohs(match->src.u.icmp.id));
-
- if (mask->dst.u.icmp.type)
- len += sprintf(buffer + len, "type=%u ",
- ntohs(match->dst.u.icmp.type));
-
- if (mask->dst.u.icmp.code)
- len += sprintf(buffer + len, "code=%u ",
- ntohs(match->dst.u.icmp.code));
-
- return len;
-}
-
-static unsigned int
-icmp_print_range(char *buffer, const struct ip_nat_range *range)
-{
- if (range->min.icmp.id != 0 || range->max.icmp.id != 0xFFFF)
- return sprintf(buffer, "id %u-%u ",
- ntohs(range->min.icmp.id),
- ntohs(range->max.icmp.id));
- else return 0;
-}
-
struct ip_nat_protocol ip_nat_protocol_icmp = {
.name = "ICMP",
.protonum = IPPROTO_ICMP,
@@ -113,8 +81,6 @@ struct ip_nat_protocol ip_nat_protocol_icmp = {
.manip_pkt = icmp_manip_pkt,
.in_range = icmp_in_range,
.unique_tuple = icmp_unique_tuple,
- .print = icmp_print,
- .print_range = icmp_print_range,
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
.range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index 1d381bf6857..a3d14079eba 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -136,40 +136,6 @@ tcp_manip_pkt(struct sk_buff **pskb,
return 1;
}
-static unsigned int
-tcp_print(char *buffer,
- const struct ip_conntrack_tuple *match,
- const struct ip_conntrack_tuple *mask)
-{
- unsigned int len = 0;
-
- if (mask->src.u.tcp.port)
- len += sprintf(buffer + len, "srcpt=%u ",
- ntohs(match->src.u.tcp.port));
-
-
- if (mask->dst.u.tcp.port)
- len += sprintf(buffer + len, "dstpt=%u ",
- ntohs(match->dst.u.tcp.port));
-
- return len;
-}
-
-static unsigned int
-tcp_print_range(char *buffer, const struct ip_nat_range *range)
-{
- if (range->min.tcp.port != 0 || range->max.tcp.port != 0xFFFF) {
- if (range->min.tcp.port == range->max.tcp.port)
- return sprintf(buffer, "port %u ",
- ntohs(range->min.tcp.port));
- else
- return sprintf(buffer, "ports %u-%u ",
- ntohs(range->min.tcp.port),
- ntohs(range->max.tcp.port));
- }
- else return 0;
-}
-
struct ip_nat_protocol ip_nat_protocol_tcp = {
.name = "TCP",
.protonum = IPPROTO_TCP,
@@ -177,8 +143,6 @@ struct ip_nat_protocol ip_nat_protocol_tcp = {
.manip_pkt = tcp_manip_pkt,
.in_range = tcp_in_range,
.unique_tuple = tcp_unique_tuple,
- .print = tcp_print,
- .print_range = tcp_print_range,
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
.range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index c4906e1aa24..ec6053fdc86 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -122,40 +122,6 @@ udp_manip_pkt(struct sk_buff **pskb,
return 1;
}
-static unsigned int
-udp_print(char *buffer,
- const struct ip_conntrack_tuple *match,
- const struct ip_conntrack_tuple *mask)
-{
- unsigned int len = 0;
-
- if (mask->src.u.udp.port)
- len += sprintf(buffer + len, "srcpt=%u ",
- ntohs(match->src.u.udp.port));
-
-
- if (mask->dst.u.udp.port)
- len += sprintf(buffer + len, "dstpt=%u ",
- ntohs(match->dst.u.udp.port));
-
- return len;
-}
-
-static unsigned int
-udp_print_range(char *buffer, const struct ip_nat_range *range)
-{
- if (range->min.udp.port != 0 || range->max.udp.port != 0xFFFF) {
- if (range->min.udp.port == range->max.udp.port)
- return sprintf(buffer, "port %u ",
- ntohs(range->min.udp.port));
- else
- return sprintf(buffer, "ports %u-%u ",
- ntohs(range->min.udp.port),
- ntohs(range->max.udp.port));
- }
- else return 0;
-}
-
struct ip_nat_protocol ip_nat_protocol_udp = {
.name = "UDP",
.protonum = IPPROTO_UDP,
@@ -163,8 +129,6 @@ struct ip_nat_protocol ip_nat_protocol_udp = {
.manip_pkt = udp_manip_pkt,
.in_range = udp_in_range,
.unique_tuple = udp_unique_tuple,
- .print = udp_print,
- .print_range = udp_print_range,
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
.range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index f0099a646a0..3bf04951724 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -46,26 +46,10 @@ unknown_manip_pkt(struct sk_buff **pskb,
return 1;
}
-static unsigned int
-unknown_print(char *buffer,
- const struct ip_conntrack_tuple *match,
- const struct ip_conntrack_tuple *mask)
-{
- return 0;
-}
-
-static unsigned int
-unknown_print_range(char *buffer, const struct ip_nat_range *range)
-{
- return 0;
-}
-
struct ip_nat_protocol ip_nat_unknown_protocol = {
.name = "unknown",
/* .me isn't set: getting a ref to this cannot fail. */
.manip_pkt = unknown_manip_pkt,
.in_range = unknown_in_range,
.unique_tuple = unknown_unique_tuple,
- .print = unknown_print,
- .print_range = unknown_print_range
};
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index cb66b8bddeb..1de86282d23 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -95,6 +95,7 @@ static struct ipt_table nat_table = {
.valid_hooks = NAT_VALID_HOOKS,
.lock = RW_LOCK_UNLOCKED,
.me = THIS_MODULE,
+ .af = AF_INET,
};
/* Source NAT */
@@ -168,7 +169,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
}
static int ipt_snat_checkentry(const char *tablename,
- const struct ipt_entry *e,
+ const void *entry,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
@@ -201,7 +202,7 @@ static int ipt_snat_checkentry(const char *tablename,
}
static int ipt_dnat_checkentry(const char *tablename,
- const struct ipt_entry *e,
+ const void *entry,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 8acb7ed40b4..4f95d477805 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -44,6 +44,7 @@
*
*/
#include <linux/config.h>
+#include <linux/in.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -53,6 +54,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
#include <linux/ip.h>
+#include <linux/udp.h>
#include <net/checksum.h>
#include <net/udp.h>
#include <asm/uaccess.h>
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 30cd4e18c12..ad438fb185b 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -55,6 +55,44 @@
: ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
: "*ERROR*")))
+#ifdef CONFIG_XFRM
+static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+ struct ip_conntrack *ct;
+ struct ip_conntrack_tuple *t;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ unsigned long statusbit;
+
+ ct = ip_conntrack_get(skb, &ctinfo);
+ if (ct == NULL)
+ return;
+ dir = CTINFO2DIR(ctinfo);
+ t = &ct->tuplehash[dir].tuple;
+
+ if (dir == IP_CT_DIR_ORIGINAL)
+ statusbit = IPS_DST_NAT;
+ else
+ statusbit = IPS_SRC_NAT;
+
+ if (ct->status & statusbit) {
+ fl->fl4_dst = t->dst.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP)
+ fl->fl_ip_dport = t->dst.u.tcp.port;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl->fl4_src = t->src.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP)
+ fl->fl_ip_sport = t->src.u.tcp.port;
+ }
+}
+#endif
+
static unsigned int
ip_nat_fn(unsigned int hooknum,
struct sk_buff **pskb,
@@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- u_int32_t saddr, daddr;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
unsigned int ret;
- saddr = (*pskb)->nh.iph->saddr;
- daddr = (*pskb)->nh.iph->daddr;
-
ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN
- && ((*pskb)->nh.iph->saddr != saddr
- || (*pskb)->nh.iph->daddr != daddr)) {
- dst_release((*pskb)->dst);
- (*pskb)->dst = NULL;
+ && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.ip !=
+ ct->tuplehash[!dir].tuple.dst.ip) {
+ dst_release((*pskb)->dst);
+ (*pskb)->dst = NULL;
+ }
}
return ret;
}
@@ -185,29 +225,30 @@ ip_nat_out(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
|| (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
return NF_ACCEPT;
- /* We can hit fragment here; forwarded packets get
- defragmented by connection tracking coming in, then
- fragmented (grr) by the forward code.
-
- In future: If we have nfct != NULL, AND we have NAT
- initialized, AND there is no helper, then we can do full
- NAPT on the head, and IP-address-only NAT on the rest.
-
- I'm starting to have nightmares about fragments. */
-
- if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
- *pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
-
- if (!*pskb)
- return NF_STOLEN;
+ ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN
+ && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.ip !=
+ ct->tuplehash[!dir].tuple.dst.ip
+#ifdef CONFIG_XFRM
+ || ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all
+#endif
+ )
+ return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
}
-
- return ip_nat_fn(hooknum, pskb, in, out, okfn);
+ return ret;
}
static unsigned int
@@ -217,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- u_int32_t saddr, daddr;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
unsigned int ret;
/* root is playing with raw sockets. */
@@ -225,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum,
|| (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
return NF_ACCEPT;
- saddr = (*pskb)->nh.iph->saddr;
- daddr = (*pskb)->nh.iph->daddr;
-
ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN
- && ((*pskb)->nh.iph->saddr != saddr
- || (*pskb)->nh.iph->daddr != daddr))
- return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+ && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.ip !=
+ ct->tuplehash[!dir].tuple.src.ip
+#ifdef CONFIG_XFRM
+ || ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[dir].tuple.src.u.all
+#endif
+ )
+ return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+ }
return ret;
}
@@ -316,14 +364,18 @@ static int init_or_cleanup(int init)
{
int ret = 0;
- need_ip_conntrack();
+ need_conntrack();
if (!init) goto cleanup;
+#ifdef CONFIG_XFRM
+ BUG_ON(ip_nat_decode_session != NULL);
+ ip_nat_decode_session = nat_decode_session;
+#endif
ret = ip_nat_rule_init();
if (ret < 0) {
printk("ip_nat_init: can't setup rules.\n");
- goto cleanup_nothing;
+ goto cleanup_decode_session;
}
ret = nf_register_hook(&ip_nat_in_ops);
if (ret < 0) {
@@ -371,7 +423,11 @@ static int init_or_cleanup(int init)
nf_unregister_hook(&ip_nat_in_ops);
cleanup_rule_init:
ip_nat_rule_cleanup();
- cleanup_nothing:
+ cleanup_decode_session:
+#ifdef CONFIG_XFRM
+ ip_nat_decode_session = NULL;
+ synchronize_net();
+#endif
return ret;
}
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
index 2215317c76b..43c3bd7c118 100644
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -42,7 +42,10 @@ static unsigned int help(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
struct ip_conntrack_expect *exp)
{
- exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port;
+ struct ip_conntrack *ct = exp->master;
+
+ exp->saved_proto.udp.port
+ = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
exp->dir = IP_CT_DIR_REPLY;
exp->expectfn = ip_nat_follow_master;
if (ip_conntrack_expect_related(exp) != 0)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 45886c8475e..2371b2062c2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2,7 +2,7 @@
* Packet matching code.
*
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
- * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -11,16 +11,17 @@
* 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
* - increase module usage count as soon as we have rules inside
* a table
+ * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
+ * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
*/
#include <linux/config.h>
#include <linux/cache.h>
+#include <linux/capability.h>
#include <linux/skbuff.h>
#include <linux/kmod.h>
#include <linux/vmalloc.h>
#include <linux/netdevice.h>
#include <linux/module.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
#include <linux/icmp.h>
#include <net/ip.h>
#include <asm/uaccess.h>
@@ -29,6 +30,7 @@
#include <linux/err.h>
#include <linux/cpumask.h>
+#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
MODULE_LICENSE("GPL");
@@ -61,14 +63,6 @@ do { \
#else
#define IP_NF_ASSERT(x)
#endif
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
-
-static DECLARE_MUTEX(ipt_mutex);
-
-/* Must have mutex */
-#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
-#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
-#include <linux/netfilter_ipv4/listhelp.h>
#if 0
/* All the better to debug you with... */
@@ -83,48 +77,8 @@ static DECLARE_MUTEX(ipt_mutex);
context stops packets coming through and allows user context to read
the counters or update the rules.
- To be cache friendly on SMP, we arrange them like so:
- [ n-entries ]
- ... cache-align padding ...
- [ n-entries ]
-
Hence the start of any table is given by get_table() below. */
-/* The table itself */
-struct ipt_table_info
-{
- /* Size per table */
- unsigned int size;
- /* Number of entries: FIXME. --RR */
- unsigned int number;
- /* Initial number of entries. Needed for module usage count */
- unsigned int initial_entries;
-
- /* Entry points and underflows */
- unsigned int hook_entry[NF_IP_NUMHOOKS];
- unsigned int underflow[NF_IP_NUMHOOKS];
-
- /* ipt_entry tables: one per CPU */
- char entries[0] ____cacheline_aligned;
-};
-
-static LIST_HEAD(ipt_target);
-static LIST_HEAD(ipt_match);
-static LIST_HEAD(ipt_tables);
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
-
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
-#if 0
-#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
-#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
-#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
-#endif
-
/* Returns whether matches rule or not. */
static inline int
ip_packet_match(const struct iphdr *ip,
@@ -243,7 +197,8 @@ int do_match(struct ipt_entry_match *m,
int *hotdrop)
{
/* Stop iteration if it doesn't match */
- if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
+ if (!m->u.kernel.match->match(skb, in, out, m->data, offset,
+ skb->nh.iph->ihl*4, hotdrop))
return 1;
else
return 0;
@@ -274,6 +229,7 @@ ipt_do_table(struct sk_buff **pskb,
const char *indev, *outdev;
void *table_base;
struct ipt_entry *e, *back;
+ struct xt_table_info *private = table->private;
/* Initialization */
ip = (*pskb)->nh.iph;
@@ -290,25 +246,11 @@ ipt_do_table(struct sk_buff **pskb,
read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- table_base = (void *)table->private->entries
- + TABLE_OFFSET(table->private, smp_processor_id());
- e = get_entry(table_base, table->private->hook_entry[hook]);
-
-#ifdef CONFIG_NETFILTER_DEBUG
- /* Check noone else using our table */
- if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
- && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
- printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
- smp_processor_id(),
- table->name,
- &((struct ipt_entry *)table_base)->comefrom,
- ((struct ipt_entry *)table_base)->comefrom);
- }
- ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
-#endif
+ table_base = (void *)private->entries[smp_processor_id()];
+ e = get_entry(table_base, private->hook_entry[hook]);
/* For return from builtin chain */
- back = get_entry(table_base, table->private->underflow[hook]);
+ back = get_entry(table_base, private->underflow[hook]);
do {
IP_NF_ASSERT(e);
@@ -394,9 +336,6 @@ ipt_do_table(struct sk_buff **pskb,
}
} while (!hotdrop);
-#ifdef CONFIG_NETFILTER_DEBUG
- ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
-#endif
read_unlock_bh(&table->lock);
#ifdef DEBUG_ALLOW_ALL
@@ -408,145 +347,6 @@ ipt_do_table(struct sk_buff **pskb,
#endif
}
-/*
- * These are weird, but module loading must not be done with mutex
- * held (since they will register), and we have to have a single
- * function to use try_then_request_module().
- */
-
-/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
-static inline struct ipt_table *find_table_lock(const char *name)
-{
- struct ipt_table *t;
-
- if (down_interruptible(&ipt_mutex) != 0)
- return ERR_PTR(-EINTR);
-
- list_for_each_entry(t, &ipt_tables, list)
- if (strcmp(t->name, name) == 0 && try_module_get(t->me))
- return t;
- up(&ipt_mutex);
- return NULL;
-}
-
-/* Find match, grabs ref. Returns ERR_PTR() on error. */
-static inline struct ipt_match *find_match(const char *name, u8 revision)
-{
- struct ipt_match *m;
- int err = 0;
-
- if (down_interruptible(&ipt_mutex) != 0)
- return ERR_PTR(-EINTR);
-
- list_for_each_entry(m, &ipt_match, list) {
- if (strcmp(m->name, name) == 0) {
- if (m->revision == revision) {
- if (try_module_get(m->me)) {
- up(&ipt_mutex);
- return m;
- }
- } else
- err = -EPROTOTYPE; /* Found something. */
- }
- }
- up(&ipt_mutex);
- return ERR_PTR(err);
-}
-
-/* Find target, grabs ref. Returns ERR_PTR() on error. */
-static inline struct ipt_target *find_target(const char *name, u8 revision)
-{
- struct ipt_target *t;
- int err = 0;
-
- if (down_interruptible(&ipt_mutex) != 0)
- return ERR_PTR(-EINTR);
-
- list_for_each_entry(t, &ipt_target, list) {
- if (strcmp(t->name, name) == 0) {
- if (t->revision == revision) {
- if (try_module_get(t->me)) {
- up(&ipt_mutex);
- return t;
- }
- } else
- err = -EPROTOTYPE; /* Found something. */
- }
- }
- up(&ipt_mutex);
- return ERR_PTR(err);
-}
-
-struct ipt_target *ipt_find_target(const char *name, u8 revision)
-{
- struct ipt_target *target;
-
- target = try_then_request_module(find_target(name, revision),
- "ipt_%s", name);
- if (IS_ERR(target) || !target)
- return NULL;
- return target;
-}
-
-static int match_revfn(const char *name, u8 revision, int *bestp)
-{
- struct ipt_match *m;
- int have_rev = 0;
-
- list_for_each_entry(m, &ipt_match, list) {
- if (strcmp(m->name, name) == 0) {
- if (m->revision > *bestp)
- *bestp = m->revision;
- if (m->revision == revision)
- have_rev = 1;
- }
- }
- return have_rev;
-}
-
-static int target_revfn(const char *name, u8 revision, int *bestp)
-{
- struct ipt_target *t;
- int have_rev = 0;
-
- list_for_each_entry(t, &ipt_target, list) {
- if (strcmp(t->name, name) == 0) {
- if (t->revision > *bestp)
- *bestp = t->revision;
- if (t->revision == revision)
- have_rev = 1;
- }
- }
- return have_rev;
-}
-
-/* Returns true or false (if no such extension at all) */
-static inline int find_revision(const char *name, u8 revision,
- int (*revfn)(const char *, u8, int *),
- int *err)
-{
- int have_rev, best = -1;
-
- if (down_interruptible(&ipt_mutex) != 0) {
- *err = -EINTR;
- return 1;
- }
- have_rev = revfn(name, revision, &best);
- up(&ipt_mutex);
-
- /* Nothing at all? Return 0 to try loading module. */
- if (best == -1) {
- *err = -ENOENT;
- return 0;
- }
-
- *err = best;
- if (!have_rev)
- *err = -EPROTONOSUPPORT;
- return 1;
-}
-
-
/* All zeroes == unconditional rule. */
static inline int
unconditional(const struct ipt_ip *ip)
@@ -563,7 +363,8 @@ unconditional(const struct ipt_ip *ip)
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
-mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
+mark_source_chains(struct xt_table_info *newinfo,
+ unsigned int valid_hooks, void *entry0)
{
unsigned int hook;
@@ -572,7 +373,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
unsigned int pos = newinfo->hook_entry[hook];
struct ipt_entry *e
- = (struct ipt_entry *)(newinfo->entries + pos);
+ = (struct ipt_entry *)(entry0 + pos);
if (!(valid_hooks & (1 << hook)))
continue;
@@ -622,13 +423,13 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
goto next;
e = (struct ipt_entry *)
- (newinfo->entries + pos);
+ (entry0 + pos);
} while (oldpos == pos + e->next_offset);
/* Move along one */
size = e->next_offset;
e = (struct ipt_entry *)
- (newinfo->entries + pos + size);
+ (entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
} else {
@@ -645,7 +446,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
newpos = pos + e->next_offset;
}
e = (struct ipt_entry *)
- (newinfo->entries + newpos);
+ (entry0 + newpos);
e->counters.pcnt = pos;
pos = newpos;
}
@@ -708,7 +509,7 @@ check_match(struct ipt_entry_match *m,
{
struct ipt_match *match;
- match = try_then_request_module(find_match(m->u.user.name,
+ match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
m->u.user.revision),
"ipt_%s", m->u.user.name);
if (IS_ERR(match) || !match) {
@@ -753,7 +554,8 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
goto cleanup_matches;
t = ipt_get_target(e);
- target = try_then_request_module(find_target(t->u.user.name,
+ target = try_then_request_module(xt_find_target(AF_INET,
+ t->u.user.name,
t->u.user.revision),
"ipt_%s", t->u.user.name);
if (IS_ERR(target) || !target) {
@@ -790,7 +592,7 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
static inline int
check_entry_size_and_hooks(struct ipt_entry *e,
- struct ipt_table_info *newinfo,
+ struct xt_table_info *newinfo,
unsigned char *base,
unsigned char *limit,
const unsigned int *hook_entries,
@@ -824,7 +626,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
< 0 (not IPT_RETURN). --RR */
/* Clear counters and comefrom */
- e->counters = ((struct ipt_counters) { 0, 0 });
+ e->counters = ((struct xt_counters) { 0, 0 });
e->comefrom = 0;
(*i)++;
@@ -854,7 +656,8 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
static int
translate_table(const char *name,
unsigned int valid_hooks,
- struct ipt_table_info *newinfo,
+ struct xt_table_info *newinfo,
+ void *entry0,
unsigned int size,
unsigned int number,
const unsigned int *hook_entries,
@@ -875,11 +678,11 @@ translate_table(const char *name,
duprintf("translate_table: size %u\n", newinfo->size);
i = 0;
/* Walk through entries, checking offsets. */
- ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
check_entry_size_and_hooks,
newinfo,
- newinfo->entries,
- newinfo->entries + size,
+ entry0,
+ entry0 + size,
hook_entries, underflows, &i);
if (ret != 0)
return ret;
@@ -907,95 +710,79 @@ translate_table(const char *name,
}
}
- if (!mark_source_chains(newinfo, valid_hooks))
+ if (!mark_source_chains(newinfo, valid_hooks, entry0))
return -ELOOP;
/* Finally, each sanity check must pass */
i = 0;
- ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
check_entry, name, size, &i);
if (ret != 0) {
- IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ IPT_ENTRY_ITERATE(entry0, newinfo->size,
cleanup_entry, &i);
return ret;
}
/* And one copy for every other CPU */
for_each_cpu(i) {
- if (i == 0)
- continue;
- memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
- newinfo->entries,
- SMP_ALIGN(newinfo->size));
+ if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+ memcpy(newinfo->entries[i], entry0, newinfo->size);
}
return ret;
}
-static struct ipt_table_info *
-replace_table(struct ipt_table *table,
- unsigned int num_counters,
- struct ipt_table_info *newinfo,
- int *error)
+/* Gets counters. */
+static inline int
+add_entry_to_counter(const struct ipt_entry *e,
+ struct xt_counters total[],
+ unsigned int *i)
{
- struct ipt_table_info *oldinfo;
-
-#ifdef CONFIG_NETFILTER_DEBUG
- {
- struct ipt_entry *table_base;
- unsigned int i;
-
- for_each_cpu(i) {
- table_base =
- (void *)newinfo->entries
- + TABLE_OFFSET(newinfo, i);
-
- table_base->comefrom = 0xdead57ac;
- }
- }
-#endif
-
- /* Do the substitution. */
- write_lock_bh(&table->lock);
- /* Check inside lock: is the old number correct? */
- if (num_counters != table->private->number) {
- duprintf("num_counters != table->private->number (%u/%u)\n",
- num_counters, table->private->number);
- write_unlock_bh(&table->lock);
- *error = -EAGAIN;
- return NULL;
- }
- oldinfo = table->private;
- table->private = newinfo;
- newinfo->initial_entries = oldinfo->initial_entries;
- write_unlock_bh(&table->lock);
+ ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
- return oldinfo;
+ (*i)++;
+ return 0;
}
-/* Gets counters. */
static inline int
-add_entry_to_counter(const struct ipt_entry *e,
+set_entry_to_counter(const struct ipt_entry *e,
struct ipt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
(*i)++;
return 0;
}
static void
-get_counters(const struct ipt_table_info *t,
- struct ipt_counters counters[])
+get_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
{
unsigned int cpu;
unsigned int i;
+ unsigned int curcpu;
+
+ /* Instead of clearing (by a previous call to memset())
+ * the counters and using adds, we set the counters
+ * with data used by 'current' CPU
+ * We dont care about preemption here.
+ */
+ curcpu = raw_smp_processor_id();
+
+ i = 0;
+ IPT_ENTRY_ITERATE(t->entries[curcpu],
+ t->size,
+ set_entry_to_counter,
+ counters,
+ &i);
for_each_cpu(cpu) {
+ if (cpu == curcpu)
+ continue;
i = 0;
- IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+ IPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
@@ -1010,26 +797,32 @@ copy_entries_to_user(unsigned int total_size,
{
unsigned int off, num, countersize;
struct ipt_entry *e;
- struct ipt_counters *counters;
+ struct xt_counters *counters;
+ struct xt_table_info *private = table->private;
int ret = 0;
+ void *loc_cpu_entry;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
about). */
- countersize = sizeof(struct ipt_counters) * table->private->number;
- counters = vmalloc(countersize);
+ countersize = sizeof(struct xt_counters) * private->number;
+ counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
return -ENOMEM;
/* First, sum counters... */
- memset(counters, 0, countersize);
write_lock_bh(&table->lock);
- get_counters(table->private, counters);
+ get_counters(private, counters);
write_unlock_bh(&table->lock);
- /* ... then copy entire thing from CPU 0... */
- if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+ /* choose the copy that is on our node/cpu, ...
+ * This choice is lazy (because current thread is
+ * allowed to migrate to another cpu)
+ */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ /* ... then copy entire thing ... */
+ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
goto free_counters;
}
@@ -1041,7 +834,7 @@ copy_entries_to_user(unsigned int total_size,
struct ipt_entry_match *m;
struct ipt_entry_target *t;
- e = (struct ipt_entry *)(table->private->entries + off);
+ e = (struct ipt_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
+ offsetof(struct ipt_entry, counters),
&counters[num],
@@ -1089,21 +882,22 @@ get_entries(const struct ipt_get_entries *entries,
int ret;
struct ipt_table *t;
- t = find_table_lock(entries->name);
+ t = xt_find_table_lock(AF_INET, entries->name);
if (t && !IS_ERR(t)) {
+ struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n",
- t->private->number);
- if (entries->size == t->private->size)
- ret = copy_entries_to_user(t->private->size,
+ private->number);
+ if (entries->size == private->size)
+ ret = copy_entries_to_user(private->size,
t, uptr->entrytable);
else {
duprintf("get_entries: I've got %u not %u!\n",
- t->private->size,
+ private->size,
entries->size);
ret = -EINVAL;
}
module_put(t->me);
- up(&ipt_mutex);
+ xt_table_unlock(t);
} else
ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1116,8 +910,9 @@ do_replace(void __user *user, unsigned int len)
int ret;
struct ipt_replace tmp;
struct ipt_table *t;
- struct ipt_table_info *newinfo, *oldinfo;
- struct ipt_counters *counters;
+ struct xt_table_info *newinfo, *oldinfo;
+ struct xt_counters *counters;
+ void *loc_cpu_entry, *loc_cpu_old_entry;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1126,38 +921,33 @@ do_replace(void __user *user, unsigned int len)
if (len != sizeof(tmp) + tmp.size)
return -ENOPROTOOPT;
- /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
- if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
- return -ENOMEM;
-
- newinfo = vmalloc(sizeof(struct ipt_table_info)
- + SMP_ALIGN(tmp.size) *
- (highest_possible_processor_id()+1));
+ newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
return -ENOMEM;
- if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+ /* choose the copy that is our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
- counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
+ counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
goto free_newinfo;
}
- memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
ret = translate_table(tmp.name, tmp.valid_hooks,
- newinfo, tmp.size, tmp.num_entries,
+ newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
tmp.hook_entry, tmp.underflow);
if (ret != 0)
goto free_newinfo_counters;
duprintf("ip_tables: Translated table\n");
- t = try_then_request_module(find_table_lock(tmp.name),
+ t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
"iptable_%s", tmp.name);
if (!t || IS_ERR(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1172,7 +962,7 @@ do_replace(void __user *user, unsigned int len)
goto put_module;
}
- oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+ oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
if (!oldinfo)
goto put_module;
@@ -1189,24 +979,25 @@ do_replace(void __user *user, unsigned int len)
/* Get the old counters. */
get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
- IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
- vfree(oldinfo);
+ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+ xt_free_table_info(oldinfo);
if (copy_to_user(tmp.counters, counters,
- sizeof(struct ipt_counters) * tmp.num_counters) != 0)
+ sizeof(struct xt_counters) * tmp.num_counters) != 0)
ret = -EFAULT;
vfree(counters);
- up(&ipt_mutex);
+ xt_table_unlock(t);
return ret;
put_module:
module_put(t->me);
- up(&ipt_mutex);
+ xt_table_unlock(t);
free_newinfo_counters_untrans:
- IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+ IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
free_newinfo_counters:
vfree(counters);
free_newinfo:
- vfree(newinfo);
+ xt_free_table_info(newinfo);
return ret;
}
@@ -1214,7 +1005,7 @@ do_replace(void __user *user, unsigned int len)
* and everything is OK. */
static inline int
add_counter_to_entry(struct ipt_entry *e,
- const struct ipt_counters addme[],
+ const struct xt_counters addme[],
unsigned int *i)
{
#if 0
@@ -1236,17 +1027,19 @@ static int
do_add_counters(void __user *user, unsigned int len)
{
unsigned int i;
- struct ipt_counters_info tmp, *paddc;
+ struct xt_counters_info tmp, *paddc;
struct ipt_table *t;
+ struct xt_table_info *private;
int ret = 0;
+ void *loc_cpu_entry;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
- if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
+ if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
return -EINVAL;
- paddc = vmalloc(len);
+ paddc = vmalloc_node(len, numa_node_id());
if (!paddc)
return -ENOMEM;
@@ -1255,27 +1048,30 @@ do_add_counters(void __user *user, unsigned int len)
goto free;
}
- t = find_table_lock(tmp.name);
+ t = xt_find_table_lock(AF_INET, tmp.name);
if (!t || IS_ERR(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
}
write_lock_bh(&t->lock);
- if (t->private->number != paddc->num_counters) {
+ private = t->private;
+ if (private->number != paddc->num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
i = 0;
- IPT_ENTRY_ITERATE(t->private->entries,
- t->private->size,
+ /* Choose the copy that is on our node */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ IPT_ENTRY_ITERATE(loc_cpu_entry,
+ private->size,
add_counter_to_entry,
paddc->counters,
&i);
unlock_up_free:
write_unlock_bh(&t->lock);
- up(&ipt_mutex);
+ xt_table_unlock(t);
module_put(t->me);
free:
vfree(paddc);
@@ -1334,25 +1130,26 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
}
name[IPT_TABLE_MAXNAMELEN-1] = '\0';
- t = try_then_request_module(find_table_lock(name),
+ t = try_then_request_module(xt_find_table_lock(AF_INET, name),
"iptable_%s", name);
if (t && !IS_ERR(t)) {
struct ipt_getinfo info;
+ struct xt_table_info *private = t->private;
info.valid_hooks = t->valid_hooks;
- memcpy(info.hook_entry, t->private->hook_entry,
+ memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
- memcpy(info.underflow, t->private->underflow,
+ memcpy(info.underflow, private->underflow,
sizeof(info.underflow));
- info.num_entries = t->private->number;
- info.size = t->private->size;
+ info.num_entries = private->number;
+ info.size = private->size;
memcpy(info.name, name, sizeof(info.name));
if (copy_to_user(user, &info, *len) != 0)
ret = -EFAULT;
else
ret = 0;
- up(&ipt_mutex);
+ xt_table_unlock(t);
module_put(t->me);
} else
ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1379,7 +1176,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
case IPT_SO_GET_REVISION_MATCH:
case IPT_SO_GET_REVISION_TARGET: {
struct ipt_get_revision rev;
- int (*revfn)(const char *, u8, int *);
+ int target;
if (*len != sizeof(rev)) {
ret = -EINVAL;
@@ -1391,12 +1188,13 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
}
if (cmd == IPT_SO_GET_REVISION_TARGET)
- revfn = target_revfn;
+ target = 1;
else
- revfn = match_revfn;
+ target = 0;
- try_then_request_module(find_revision(rev.name, rev.revision,
- revfn, &ret),
+ try_then_request_module(xt_find_revision(AF_INET, rev.name,
+ rev.revision,
+ target, &ret),
"ipt_%s", rev.name);
break;
}
@@ -1409,309 +1207,53 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}
-/* Registration hooks for targets. */
-int
-ipt_register_target(struct ipt_target *target)
+int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
{
int ret;
-
- ret = down_interruptible(&ipt_mutex);
- if (ret != 0)
- return ret;
- list_add(&target->list, &ipt_target);
- up(&ipt_mutex);
- return ret;
-}
-
-void
-ipt_unregister_target(struct ipt_target *target)
-{
- down(&ipt_mutex);
- LIST_DELETE(&ipt_target, target);
- up(&ipt_mutex);
-}
-
-int
-ipt_register_match(struct ipt_match *match)
-{
- int ret;
-
- ret = down_interruptible(&ipt_mutex);
- if (ret != 0)
- return ret;
-
- list_add(&match->list, &ipt_match);
- up(&ipt_mutex);
-
- return ret;
-}
-
-void
-ipt_unregister_match(struct ipt_match *match)
-{
- down(&ipt_mutex);
- LIST_DELETE(&ipt_match, match);
- up(&ipt_mutex);
-}
-
-int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
-{
- int ret;
- struct ipt_table_info *newinfo;
- static struct ipt_table_info bootstrap
+ struct xt_table_info *newinfo;
+ static struct xt_table_info bootstrap
= { 0, 0, 0, { 0 }, { 0 }, { } };
+ void *loc_cpu_entry;
- newinfo = vmalloc(sizeof(struct ipt_table_info)
- + SMP_ALIGN(repl->size) *
- (highest_possible_processor_id()+1));
+ newinfo = xt_alloc_table_info(repl->size);
if (!newinfo)
return -ENOMEM;
- memcpy(newinfo->entries, repl->entries, repl->size);
+ /* choose the copy on our node/cpu
+ * but dont care of preemption
+ */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(table->name, table->valid_hooks,
- newinfo, repl->size,
+ newinfo, loc_cpu_entry, repl->size,
repl->num_entries,
repl->hook_entry,
repl->underflow);
if (ret != 0) {
- vfree(newinfo);
+ xt_free_table_info(newinfo);
return ret;
}
- ret = down_interruptible(&ipt_mutex);
- if (ret != 0) {
- vfree(newinfo);
+ if (xt_register_table(table, &bootstrap, newinfo) != 0) {
+ xt_free_table_info(newinfo);
return ret;
}
- /* Don't autoload: we'd eat our tail... */
- if (list_named_find(&ipt_tables, table->name)) {
- ret = -EEXIST;
- goto free_unlock;
- }
-
- /* Simplifies replace_table code. */
- table->private = &bootstrap;
- if (!replace_table(table, 0, newinfo, &ret))
- goto free_unlock;
-
- duprintf("table->private->number = %u\n",
- table->private->number);
-
- /* save number of initial entries */
- table->private->initial_entries = table->private->number;
-
- rwlock_init(&table->lock);
- list_prepend(&ipt_tables, table);
-
- unlock:
- up(&ipt_mutex);
- return ret;
-
- free_unlock:
- vfree(newinfo);
- goto unlock;
+ return 0;
}
void ipt_unregister_table(struct ipt_table *table)
{
- down(&ipt_mutex);
- LIST_DELETE(&ipt_tables, table);
- up(&ipt_mutex);
+ struct xt_table_info *private;
+ void *loc_cpu_entry;
- /* Decrease module usage counts and free resources */
- IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
- cleanup_entry, NULL);
- vfree(table->private);
-}
-
-/* Returns 1 if the port is matched by the range, 0 otherwise */
-static inline int
-port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
-{
- int ret;
-
- ret = (port >= min && port <= max) ^ invert;
- return ret;
-}
-
-static int
-tcp_find_option(u_int8_t option,
- const struct sk_buff *skb,
- unsigned int optlen,
- int invert,
- int *hotdrop)
-{
- /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
- u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
- unsigned int i;
-
- duprintf("tcp_match: finding option\n");
-
- if (!optlen)
- return invert;
-
- /* If we don't have the whole header, drop packet. */
- op = skb_header_pointer(skb,
- skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
- optlen, _opt);
- if (op == NULL) {
- *hotdrop = 1;
- return 0;
- }
-
- for (i = 0; i < optlen; ) {
- if (op[i] == option) return !invert;
- if (op[i] < 2) i++;
- else i += op[i+1]?:1;
- }
-
- return invert;
-}
-
-static int
-tcp_match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- struct tcphdr _tcph, *th;
- const struct ipt_tcp *tcpinfo = matchinfo;
-
- if (offset) {
- /* To quote Alan:
-
- Don't allow a fragment of TCP 8 bytes in. Nobody normal
- causes this. Its a cracker trying to break in by doing a
- flag overwrite to pass the direction checks.
- */
- if (offset == 1) {
- duprintf("Dropping evil TCP offset=1 frag.\n");
- *hotdrop = 1;
- }
- /* Must not be a fragment. */
- return 0;
- }
-
-#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
-
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
- if (th == NULL) {
- /* We've been asked to examine this packet, and we
- can't. Hence, no choice but to drop. */
- duprintf("Dropping evil TCP offset=0 tinygram.\n");
- *hotdrop = 1;
- return 0;
- }
-
- if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
- ntohs(th->source),
- !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
- return 0;
- if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
- ntohs(th->dest),
- !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
- return 0;
- if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
- == tcpinfo->flg_cmp,
- IPT_TCP_INV_FLAGS))
- return 0;
- if (tcpinfo->option) {
- if (th->doff * 4 < sizeof(_tcph)) {
- *hotdrop = 1;
- return 0;
- }
- if (!tcp_find_option(tcpinfo->option, skb,
- th->doff*4 - sizeof(_tcph),
- tcpinfo->invflags & IPT_TCP_INV_OPTION,
- hotdrop))
- return 0;
- }
- return 1;
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-tcp_checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- const struct ipt_tcp *tcpinfo = matchinfo;
-
- /* Must specify proto == TCP, and no unknown invflags */
- return ip->proto == IPPROTO_TCP
- && !(ip->invflags & IPT_INV_PROTO)
- && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
- && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
-}
-
-static int
-udp_match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- struct udphdr _udph, *uh;
- const struct ipt_udp *udpinfo = matchinfo;
-
- /* Must not be a fragment. */
- if (offset)
- return 0;
-
- uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- sizeof(_udph), &_udph);
- if (uh == NULL) {
- /* We've been asked to examine this packet, and we
- can't. Hence, no choice but to drop. */
- duprintf("Dropping evil UDP tinygram.\n");
- *hotdrop = 1;
- return 0;
- }
+ private = xt_unregister_table(table);
- return port_match(udpinfo->spts[0], udpinfo->spts[1],
- ntohs(uh->source),
- !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
- && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
- ntohs(uh->dest),
- !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-udp_checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchinfosize,
- unsigned int hook_mask)
-{
- const struct ipt_udp *udpinfo = matchinfo;
-
- /* Must specify proto == UDP, and no unknown invflags */
- if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
- duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
- IPPROTO_UDP);
- return 0;
- }
- if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
- duprintf("ipt_udp: matchsize %u != %u\n",
- matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
- return 0;
- }
- if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
- duprintf("ipt_udp: unknown flags %X\n",
- udpinfo->invflags);
- return 0;
- }
-
- return 1;
+ /* Decrease module usage counts and free resources */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+ xt_free_table_info(private);
}
/* Returns 1 if the type and code is matched by the range, 0 otherwise */
@@ -1730,6 +1272,7 @@ icmp_match(const struct sk_buff *skb,
const struct net_device *out,
const void *matchinfo,
int offset,
+ unsigned int protoff,
int *hotdrop)
{
struct icmphdr _icmph, *ic;
@@ -1739,8 +1282,7 @@ icmp_match(const struct sk_buff *skb,
if (offset)
return 0;
- ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- sizeof(_icmph), &_icmph);
+ ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
if (ic == NULL) {
/* We've been asked to examine this packet, and we
* can't. Hence, no choice but to drop.
@@ -1760,11 +1302,12 @@ icmp_match(const struct sk_buff *skb,
/* Called when user tries to insert an entry of this type. */
static int
icmp_checkentry(const char *tablename,
- const struct ipt_ip *ip,
+ const void *info,
void *matchinfo,
unsigned int matchsize,
unsigned int hook_mask)
{
+ const struct ipt_ip *ip = info;
const struct ipt_icmp *icmpinfo = matchinfo;
/* Must specify proto == ICMP, and no unknown invflags */
@@ -1794,123 +1337,22 @@ static struct nf_sockopt_ops ipt_sockopts = {
.get = do_ipt_get_ctl,
};
-static struct ipt_match tcp_matchstruct = {
- .name = "tcp",
- .match = &tcp_match,
- .checkentry = &tcp_checkentry,
-};
-
-static struct ipt_match udp_matchstruct = {
- .name = "udp",
- .match = &udp_match,
- .checkentry = &udp_checkentry,
-};
-
static struct ipt_match icmp_matchstruct = {
.name = "icmp",
.match = &icmp_match,
.checkentry = &icmp_checkentry,
};
-#ifdef CONFIG_PROC_FS
-static inline int print_name(const char *i,
- off_t start_offset, char *buffer, int length,
- off_t *pos, unsigned int *count)
-{
- if ((*count)++ >= start_offset) {
- unsigned int namelen;
-
- namelen = sprintf(buffer + *pos, "%s\n",
- i + sizeof(struct list_head));
- if (*pos + namelen > length) {
- /* Stop iterating */
- return 1;
- }
- *pos += namelen;
- }
- return 0;
-}
-
-static inline int print_target(const struct ipt_target *t,
- off_t start_offset, char *buffer, int length,
- off_t *pos, unsigned int *count)
-{
- if (t == &ipt_standard_target || t == &ipt_error_target)
- return 0;
- return print_name((char *)t, start_offset, buffer, length, pos, count);
-}
-
-static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
-{
- off_t pos = 0;
- unsigned int count = 0;
-
- if (down_interruptible(&ipt_mutex) != 0)
- return 0;
-
- LIST_FIND(&ipt_tables, print_name, void *,
- offset, buffer, length, &pos, &count);
-
- up(&ipt_mutex);
-
- /* `start' hack - see fs/proc/generic.c line ~105 */
- *start=(char *)((unsigned long)count-offset);
- return pos;
-}
-
-static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
-{
- off_t pos = 0;
- unsigned int count = 0;
-
- if (down_interruptible(&ipt_mutex) != 0)
- return 0;
-
- LIST_FIND(&ipt_target, print_target, struct ipt_target *,
- offset, buffer, length, &pos, &count);
-
- up(&ipt_mutex);
-
- *start = (char *)((unsigned long)count - offset);
- return pos;
-}
-
-static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
-{
- off_t pos = 0;
- unsigned int count = 0;
-
- if (down_interruptible(&ipt_mutex) != 0)
- return 0;
-
- LIST_FIND(&ipt_match, print_name, void *,
- offset, buffer, length, &pos, &count);
-
- up(&ipt_mutex);
-
- *start = (char *)((unsigned long)count - offset);
- return pos;
-}
-
-static const struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
-{ { "ip_tables_names", ipt_get_tables },
- { "ip_tables_targets", ipt_get_targets },
- { "ip_tables_matches", ipt_get_matches },
- { NULL, NULL} };
-#endif /*CONFIG_PROC_FS*/
-
static int __init init(void)
{
int ret;
+ xt_proto_init(AF_INET);
+
/* Noone else will be downing sem now, so we won't sleep */
- down(&ipt_mutex);
- list_append(&ipt_target, &ipt_standard_target);
- list_append(&ipt_target, &ipt_error_target);
- list_append(&ipt_match, &tcp_matchstruct);
- list_append(&ipt_match, &udp_matchstruct);
- list_append(&ipt_match, &icmp_matchstruct);
- up(&ipt_mutex);
+ xt_register_target(AF_INET, &ipt_standard_target);
+ xt_register_target(AF_INET, &ipt_error_target);
+ xt_register_match(AF_INET, &icmp_matchstruct);
/* Register setsockopt */
ret = nf_register_sockopt(&ipt_sockopts);
@@ -1919,49 +1361,23 @@ static int __init init(void)
return ret;
}
-#ifdef CONFIG_PROC_FS
- {
- struct proc_dir_entry *proc;
- int i;
-
- for (i = 0; ipt_proc_entry[i].name; i++) {
- proc = proc_net_create(ipt_proc_entry[i].name, 0,
- ipt_proc_entry[i].get_info);
- if (!proc) {
- while (--i >= 0)
- proc_net_remove(ipt_proc_entry[i].name);
- nf_unregister_sockopt(&ipt_sockopts);
- return -ENOMEM;
- }
- proc->owner = THIS_MODULE;
- }
- }
-#endif
-
- printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
+ printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
return 0;
}
static void __exit fini(void)
{
nf_unregister_sockopt(&ipt_sockopts);
-#ifdef CONFIG_PROC_FS
- {
- int i;
- for (i = 0; ipt_proc_entry[i].name; i++)
- proc_net_remove(ipt_proc_entry[i].name);
- }
-#endif
+
+ xt_unregister_match(AF_INET, &icmp_matchstruct);
+ xt_unregister_target(AF_INET, &ipt_error_target);
+ xt_unregister_target(AF_INET, &ipt_standard_target);
+
+ xt_proto_fini(AF_INET);
}
EXPORT_SYMBOL(ipt_register_table);
EXPORT_SYMBOL(ipt_unregister_table);
-EXPORT_SYMBOL(ipt_register_match);
-EXPORT_SYMBOL(ipt_unregister_match);
EXPORT_SYMBOL(ipt_do_table);
-EXPORT_SYMBOL(ipt_register_target);
-EXPORT_SYMBOL(ipt_unregister_target);
-EXPORT_SYMBOL(ipt_find_target);
-
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c
deleted file mode 100644
index dab78d8bd49..00000000000
--- a/net/ipv4/netfilter/ipt_CLASSIFY.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * This is a module which is used for setting the skb->priority field
- * of an skb for qdisc classification.
- */
-
-/* (C) 2001-2002 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_CLASSIFY.h>
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("iptables qdisc classification target module");
-
-static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_classify_target_info *clinfo = targinfo;
-
- if((*pskb)->priority != clinfo->priority)
- (*pskb)->priority = clinfo->priority;
-
- return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_entry *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
- printk(KERN_ERR "CLASSIFY: invalid size (%u != %Zu).\n",
- targinfosize,
- IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
- return 0;
- }
-
- if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
- (1 << NF_IP_POST_ROUTING))) {
- printk(KERN_ERR "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
- "and POST_ROUTING.\n");
- return 0;
- }
-
- if (strcmp(tablename, "mangle") != 0) {
- printk(KERN_ERR "CLASSIFY: can only be called from "
- "\"mangle\" table, not \"%s\".\n",
- tablename);
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_target ipt_classify_reg = {
- .name = "CLASSIFY",
- .target = target,
- .checkentry = checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_target(&ipt_classify_reg);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&ipt_classify_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 45c52d8f4d9..d9bc971f03a 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -379,12 +379,13 @@ target(struct sk_buff **pskb,
static int
checkentry(const char *tablename,
- const struct ipt_entry *e,
+ const void *e_void,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
{
struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+ const struct ipt_entry *e = e_void;
struct clusterip_config *config;
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
deleted file mode 100644
index 8acac5a40a9..00000000000
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/* This kernel module is used to modify the connection mark values, or
- * to optionally restore the skb nfmark from the connection mark
- *
- * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- * by Henrik Nordstrom <hno@marasystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
-MODULE_DESCRIPTION("IP tables CONNMARK matching module");
-MODULE_LICENSE("GPL");
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_CONNMARK.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-
-static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_connmark_target_info *markinfo = targinfo;
- u_int32_t diff;
- u_int32_t nfmark;
- u_int32_t newmark;
- u_int32_t ctinfo;
- u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
-
- if (ctmark) {
- switch(markinfo->mode) {
- case IPT_CONNMARK_SET:
- newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
- if (newmark != *ctmark)
- *ctmark = newmark;
- break;
- case IPT_CONNMARK_SAVE:
- newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
- if (*ctmark != newmark)
- *ctmark = newmark;
- break;
- case IPT_CONNMARK_RESTORE:
- nfmark = (*pskb)->nfmark;
- diff = (*ctmark ^ nfmark) & markinfo->mask;
- if (diff != 0)
- (*pskb)->nfmark = nfmark ^ diff;
- break;
- }
- }
-
- return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_entry *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- struct ipt_connmark_target_info *matchinfo = targinfo;
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_connmark_target_info))) {
- printk(KERN_WARNING "CONNMARK: targinfosize %u != %Zu\n",
- targinfosize,
- IPT_ALIGN(sizeof(struct ipt_connmark_target_info)));
- return 0;
- }
-
- if (matchinfo->mode == IPT_CONNMARK_RESTORE) {
- if (strcmp(tablename, "mangle") != 0) {
- printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename);
- return 0;
- }
- }
-
- if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
- printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_target ipt_connmark_reg = {
- .name = "CONNMARK",
- .target = &target,
- .checkentry = &checkentry,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- need_ip_conntrack();
- return ipt_register_target(&ipt_connmark_reg);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&ipt_connmark_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
index 6e319570a28..898cdf79ce1 100644
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ b/net/ipv4/netfilter/ipt_DSCP.c
@@ -57,7 +57,7 @@ target(struct sk_buff **pskb,
static int
checkentry(const char *tablename,
- const struct ipt_entry *e,
+ const void *e_void,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index a1319693f64..706445426a6 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -113,12 +113,13 @@ target(struct sk_buff **pskb,
static int
checkentry(const char *tablename,
- const struct ipt_entry *e,
+ const void *e_void,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
+ const struct ipt_entry *e = e_void;
if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ECN_info))) {
printk(KERN_WARNING "ECN: targinfosize %u != %Zu\n",
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 30be0f1dae3..6606ddb66a2 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -431,7 +431,7 @@ ipt_log_target(struct sk_buff **pskb,
}
static int ipt_log_checkentry(const char *tablename,
- const struct ipt_entry *e,
+ const void *e,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
deleted file mode 100644
index 52b4f2c296b..00000000000
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/* This is a module which is used for setting the NFMARK field of an skb. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_MARK.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables MARK modification module");
-
-static unsigned int
-target_v0(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_mark_target_info *markinfo = targinfo;
-
- if((*pskb)->nfmark != markinfo->mark)
- (*pskb)->nfmark = markinfo->mark;
-
- return IPT_CONTINUE;
-}
-
-static unsigned int
-target_v1(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_mark_target_info_v1 *markinfo = targinfo;
- int mark = 0;
-
- switch (markinfo->mode) {
- case IPT_MARK_SET:
- mark = markinfo->mark;
- break;
-
- case IPT_MARK_AND:
- mark = (*pskb)->nfmark & markinfo->mark;
- break;
-
- case IPT_MARK_OR:
- mark = (*pskb)->nfmark | markinfo->mark;
- break;
- }
-
- if((*pskb)->nfmark != mark)
- (*pskb)->nfmark = mark;
-
- return IPT_CONTINUE;
-}
-
-
-static int
-checkentry_v0(const char *tablename,
- const struct ipt_entry *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- struct ipt_mark_target_info *markinfo = targinfo;
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
- printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
- targinfosize,
- IPT_ALIGN(sizeof(struct ipt_mark_target_info)));
- return 0;
- }
-
- if (strcmp(tablename, "mangle") != 0) {
- printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
- return 0;
- }
-
- if (markinfo->mark > 0xffffffff) {
- printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
- return 0;
- }
-
- return 1;
-}
-
-static int
-checkentry_v1(const char *tablename,
- const struct ipt_entry *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- struct ipt_mark_target_info_v1 *markinfo = targinfo;
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1))){
- printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
- targinfosize,
- IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1)));
- return 0;
- }
-
- if (strcmp(tablename, "mangle") != 0) {
- printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
- return 0;
- }
-
- if (markinfo->mode != IPT_MARK_SET
- && markinfo->mode != IPT_MARK_AND
- && markinfo->mode != IPT_MARK_OR) {
- printk(KERN_WARNING "MARK: unknown mode %u\n",
- markinfo->mode);
- return 0;
- }
-
- if (markinfo->mark > 0xffffffff) {
- printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_target ipt_mark_reg_v0 = {
- .name = "MARK",
- .target = target_v0,
- .checkentry = checkentry_v0,
- .me = THIS_MODULE,
- .revision = 0,
-};
-
-static struct ipt_target ipt_mark_reg_v1 = {
- .name = "MARK",
- .target = target_v1,
- .checkentry = checkentry_v1,
- .me = THIS_MODULE,
- .revision = 1,
-};
-
-static int __init init(void)
-{
- int err;
-
- err = ipt_register_target(&ipt_mark_reg_v0);
- if (!err) {
- err = ipt_register_target(&ipt_mark_reg_v1);
- if (err)
- ipt_unregister_target(&ipt_mark_reg_v0);
- }
- return err;
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&ipt_mark_reg_v0);
- ipt_unregister_target(&ipt_mark_reg_v1);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 275a174c6fe..12c56d3343c 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -11,6 +11,7 @@
#include <linux/config.h>
#include <linux/types.h>
+#include <linux/inetdevice.h>
#include <linux/ip.h>
#include <linux/timer.h>
#include <linux/module.h>
@@ -18,6 +19,7 @@
#include <net/protocol.h>
#include <net/ip.h>
#include <net/checksum.h>
+#include <net/route.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -38,7 +40,7 @@ static DEFINE_RWLOCK(masq_lock);
/* FIXME: Multiple targets. --RR */
static int
masquerade_check(const char *tablename,
- const struct ipt_entry *e,
+ const void *e,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index e6e7b609536..b074467fe67 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -31,7 +31,7 @@ MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
static int
check(const char *tablename,
- const struct ipt_entry *e,
+ const void *e,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c
deleted file mode 100644
index 3cedc9be880..00000000000
--- a/net/ipv4/netfilter/ipt_NFQUEUE.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* iptables module for using new netfilter netlink queue
- *
- * (C) 2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_NFQUEUE.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables NFQUEUE target");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_NFQ_info *tinfo = targinfo;
-
- return NF_QUEUE_NR(tinfo->queuenum);
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_entry *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) {
- printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
- targinfosize,
- IPT_ALIGN(sizeof(struct ipt_NFQ_info)));
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_target ipt_NFQ_reg = {
- .name = "NFQUEUE",
- .target = target,
- .checkentry = checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_target(&ipt_NFQ_reg);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&ipt_NFQ_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/ipv4/netfilter/ipt_NOTRACK.c
deleted file mode 100644
index e3c69d072c6..00000000000
--- a/net/ipv4/netfilter/ipt_NOTRACK.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/* This is a module which is used for setting up fake conntracks
- * on packets so that they are not seen by the conntrack/NAT code.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-
-static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- /* Previously seen (loopback)? Ignore. */
- if ((*pskb)->nfct != NULL)
- return IPT_CONTINUE;
-
- /* Attach fake conntrack entry.
- If there is a real ct entry correspondig to this packet,
- it'll hang aroun till timing out. We don't deal with it
- for performance reasons. JK */
- nf_ct_untrack(*pskb);
- (*pskb)->nfctinfo = IP_CT_NEW;
- nf_conntrack_get((*pskb)->nfct);
-
- return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_entry *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- if (targinfosize != 0) {
- printk(KERN_WARNING "NOTRACK: targinfosize %u != 0\n",
- targinfosize);
- return 0;
- }
-
- if (strcmp(tablename, "raw") != 0) {
- printk(KERN_WARNING "NOTRACK: can only be called from \"raw\" table, not \"%s\"\n", tablename);
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_target ipt_notrack_reg = {
- .name = "NOTRACK",
- .target = target,
- .checkentry = checkentry,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- if (ipt_register_target(&ipt_notrack_reg))
- return -EINVAL;
-
- return 0;
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&ipt_notrack_reg);
-}
-
-module_init(init);
-module_exit(fini);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 5245bfd33d5..140be51f2f0 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -33,7 +33,7 @@ MODULE_DESCRIPTION("iptables REDIRECT target module");
/* FIXME: Take multiple ranges --RR */
static int
redirect_check(const char *tablename,
- const struct ipt_entry *e,
+ const void *e,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f057025a719..3eb47aae78c 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -203,7 +203,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
sizeof(struct tcphdr), 0));
/* Adjust IP TTL, DF */
- nskb->nh.iph->ttl = MAXTTL;
+ nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
/* Set DF, id = 0 */
nskb->nh.iph->frag_off = htons(IP_DF);
nskb->nh.iph->id = 0;
@@ -282,12 +282,13 @@ static unsigned int reject(struct sk_buff **pskb,
}
static int check(const char *tablename,
- const struct ipt_entry *e,
+ const void *e_void,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_reject_info *rejinfo = targinfo;
+ const struct ipt_entry *e = e_void;
if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) {
DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 7a0536d864a..a22de59bba0 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -49,7 +49,7 @@ MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
static int
same_check(const char *tablename,
- const struct ipt_entry *e,
+ const void *e,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 8db70d6908c..c122841e182 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -210,12 +210,13 @@ static inline int find_syn_match(const struct ipt_entry_match *m)
/* Must specify -p tcp --syn/--tcp-flags SYN */
static int
ipt_tcpmss_checkentry(const char *tablename,
- const struct ipt_entry *e,
+ const void *e_void,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
{
const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
+ const struct ipt_entry *e = e_void;
if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tcpmss_info))) {
DEBUGP("ipt_tcpmss_checkentry: targinfosize %u != %u\n",
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index deadb36d442..3a44a56db23 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -52,7 +52,7 @@ target(struct sk_buff **pskb,
static int
checkentry(const char *tablename,
- const struct ipt_entry *e,
+ const void *e_void,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index b9ae6a9382f..b769eb23197 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -66,7 +66,7 @@ ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in,
}
static int ipt_ttl_checkentry(const char *tablename,
- const struct ipt_entry *e,
+ const void *e,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 2883ccd8a91..641dbc47765 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -77,15 +77,15 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
static unsigned int nlbufsiz = 4096;
-module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */
+module_param(nlbufsiz, uint, 0400);
MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
static unsigned int flushtimeout = 10;
-module_param(flushtimeout, int, 0600);
+module_param(flushtimeout, uint, 0600);
MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
-static unsigned int nflog = 1;
-module_param(nflog, int, 0400);
+static int nflog = 1;
+module_param(nflog, bool, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
/* global data structures */
@@ -330,7 +330,7 @@ static void ipt_logfn(unsigned int pf,
}
static int ipt_ulog_checkentry(const char *tablename,
- const struct ipt_entry *e,
+ const void *e,
void *targinfo,
unsigned int targinfosize,
unsigned int hookmask)
@@ -376,7 +376,7 @@ static int __init init(void)
DEBUGP("ipt_ULOG: init module\n");
- if (nlbufsiz >= 128*1024) {
+ if (nlbufsiz > 128*1024) {
printk("Netlink buffer has to be <= 128kB\n");
return -EINVAL;
}
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index e19c2a52d00..d6b83a97651 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -29,7 +29,7 @@ static inline int match_type(u_int32_t addr, u_int16_t mask)
static int match(const struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, const void *matchinfo,
- int offset, int *hotdrop)
+ int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_addrtype_info *info = matchinfo;
const struct iphdr *iph = skb->nh.iph;
@@ -43,7 +43,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
return ret;
}
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void *ip,
void *matchinfo, unsigned int matchsize,
unsigned int hook_mask)
{
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index a0fea847cb7..144adfec13c 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -41,6 +41,7 @@ match(const struct sk_buff *skb,
const struct net_device *out,
const void *matchinfo,
int offset,
+ unsigned int protoff,
int *hotdrop)
{
struct ip_auth_hdr _ahdr, *ah;
@@ -50,7 +51,7 @@ match(const struct sk_buff *skb,
if (offset)
return 0;
- ah = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ ah = skb_header_pointer(skb, protoff,
sizeof(_ahdr), &_ahdr);
if (ah == NULL) {
/* We've been asked to examine this packet, and we
@@ -69,12 +70,13 @@ match(const struct sk_buff *skb,
/* Called when user tries to insert an entry of this type. */
static int
checkentry(const char *tablename,
- const struct ipt_ip *ip,
+ const void *ip_void,
void *matchinfo,
unsigned int matchinfosize,
unsigned int hook_mask)
{
const struct ipt_ah *ahinfo = matchinfo;
+ const struct ipt_ip *ip = ip_void;
/* Must specify proto == AH, and no unknown invflags */
if (ip->proto != IPPROTO_AH || (ip->invflags & IPT_INV_PROTO)) {
diff --git a/net/ipv4/netfilter/ipt_comment.c b/net/ipv4/netfilter/ipt_comment.c
deleted file mode 100644
index 6b76a1ea524..00000000000
--- a/net/ipv4/netfilter/ipt_comment.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Implements a dummy match to allow attaching comments to rules
- *
- * 2003-05-13 Brad Fisher (brad@info-link.net)
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_comment.h>
-
-MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
-MODULE_DESCRIPTION("iptables comment match module");
-MODULE_LICENSE("GPL");
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- /* We always match */
- return 1;
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- /* Check the size */
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_comment_info)))
- return 0;
- return 1;
-}
-
-static struct ipt_match comment_match = {
- .name = "comment",
- .match = match,
- .checkentry = checkentry,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&comment_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&comment_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c
deleted file mode 100644
index d68a048b717..00000000000
--- a/net/ipv4/netfilter/ipt_connbytes.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* Kernel module to match connection tracking byte counter.
- * GPL (C) 2002 Martin Devera (devik@cdi.cz).
- *
- * 2004-07-20 Harald Welte <laforge@netfilter.org>
- * - reimplemented to use per-connection accounting counters
- * - add functionality to match number of packets
- * - add functionality to match average packet size
- * - add support to match directions seperately
- *
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_connbytes.h>
-
-#include <asm/div64.h>
-#include <asm/bitops.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
-
-/* 64bit divisor, dividend and result. dynamic precision */
-static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
- u_int32_t d = divisor;
-
- if (divisor > 0xffffffffULL) {
- unsigned int shift = fls(divisor >> 32);
-
- d = divisor >> shift;
- dividend >>= shift;
- }
-
- do_div(dividend, d);
- return dividend;
-}
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_connbytes_info *sinfo = matchinfo;
- u_int64_t what = 0; /* initialize to make gcc happy */
- const struct ip_conntrack_counter *counters;
-
- if (!(counters = nf_ct_get_counters(skb)))
- return 0; /* no match */
-
- switch (sinfo->what) {
- case IPT_CONNBYTES_PKTS:
- switch (sinfo->direction) {
- case IPT_CONNBYTES_DIR_ORIGINAL:
- what = counters[IP_CT_DIR_ORIGINAL].packets;
- break;
- case IPT_CONNBYTES_DIR_REPLY:
- what = counters[IP_CT_DIR_REPLY].packets;
- break;
- case IPT_CONNBYTES_DIR_BOTH:
- what = counters[IP_CT_DIR_ORIGINAL].packets;
- what += counters[IP_CT_DIR_REPLY].packets;
- break;
- }
- break;
- case IPT_CONNBYTES_BYTES:
- switch (sinfo->direction) {
- case IPT_CONNBYTES_DIR_ORIGINAL:
- what = counters[IP_CT_DIR_ORIGINAL].bytes;
- break;
- case IPT_CONNBYTES_DIR_REPLY:
- what = counters[IP_CT_DIR_REPLY].bytes;
- break;
- case IPT_CONNBYTES_DIR_BOTH:
- what = counters[IP_CT_DIR_ORIGINAL].bytes;
- what += counters[IP_CT_DIR_REPLY].bytes;
- break;
- }
- break;
- case IPT_CONNBYTES_AVGPKT:
- switch (sinfo->direction) {
- case IPT_CONNBYTES_DIR_ORIGINAL:
- what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes,
- counters[IP_CT_DIR_ORIGINAL].packets);
- break;
- case IPT_CONNBYTES_DIR_REPLY:
- what = div64_64(counters[IP_CT_DIR_REPLY].bytes,
- counters[IP_CT_DIR_REPLY].packets);
- break;
- case IPT_CONNBYTES_DIR_BOTH:
- {
- u_int64_t bytes;
- u_int64_t pkts;
- bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
- counters[IP_CT_DIR_REPLY].bytes;
- pkts = counters[IP_CT_DIR_ORIGINAL].packets+
- counters[IP_CT_DIR_REPLY].packets;
-
- /* FIXME_THEORETICAL: what to do if sum
- * overflows ? */
-
- what = div64_64(bytes, pkts);
- }
- break;
- }
- break;
- }
-
- if (sinfo->count.to)
- return (what <= sinfo->count.to && what >= sinfo->count.from);
- else
- return (what >= sinfo->count.from);
-}
-
-static int check(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- const struct ipt_connbytes_info *sinfo = matchinfo;
-
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info)))
- return 0;
-
- if (sinfo->what != IPT_CONNBYTES_PKTS &&
- sinfo->what != IPT_CONNBYTES_BYTES &&
- sinfo->what != IPT_CONNBYTES_AVGPKT)
- return 0;
-
- if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL &&
- sinfo->direction != IPT_CONNBYTES_DIR_REPLY &&
- sinfo->direction != IPT_CONNBYTES_DIR_BOTH)
- return 0;
-
- return 1;
-}
-
-static struct ipt_match state_match = {
- .name = "connbytes",
- .match = &match,
- .checkentry = &check,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&state_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&state_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c
deleted file mode 100644
index 5306ef293b9..00000000000
--- a/net/ipv4/netfilter/ipt_connmark.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/* This kernel module matches connection mark values set by the
- * CONNMARK target
- *
- * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- * by Henrik Nordstrom <hno@marasystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
-MODULE_DESCRIPTION("IP tables connmark match module");
-MODULE_LICENSE("GPL");
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_connmark.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_connmark_info *info = matchinfo;
- u_int32_t ctinfo;
- const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
- if (!ctmark)
- return 0;
-
- return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- struct ipt_connmark_info *cm =
- (struct ipt_connmark_info *)matchinfo;
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info)))
- return 0;
-
- if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
- printk(KERN_WARNING "connmark: only support 32bit mark\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_match connmark_match = {
- .name = "connmark",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&connmark_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&connmark_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c
deleted file mode 100644
index c8d18705469..00000000000
--- a/net/ipv4/netfilter/ipt_conntrack.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/* Kernel module to match connection tracking information.
- * Superset of Rusty's minimalistic state match.
- *
- * (C) 2001 Marc Boucher (marc@mbsi.ca).
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#else
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_conntrack.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables connection tracking match module");
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_conntrack_info *sinfo = matchinfo;
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int statebit;
-
- ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-
-#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
-
- if (ct == &ip_conntrack_untracked)
- statebit = IPT_CONNTRACK_STATE_UNTRACKED;
- else if (ct)
- statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
- else
- statebit = IPT_CONNTRACK_STATE_INVALID;
-
- if(sinfo->flags & IPT_CONNTRACK_STATE) {
- if (ct) {
- if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
- statebit |= IPT_CONNTRACK_STATE_SNAT;
-
- if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
- statebit |= IPT_CONNTRACK_STATE_DNAT;
- }
-
- if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_PROTO) {
- if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_STATUS) {
- if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
- unsigned long expires;
-
- if(!ct)
- return 0;
-
- expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
-
- if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
- return 0;
- }
-
- return 1;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_conntrack_info *sinfo = matchinfo;
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int statebit;
-
- ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
-
-#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
-
- if (ct == &nf_conntrack_untracked)
- statebit = IPT_CONNTRACK_STATE_UNTRACKED;
- else if (ct)
- statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
- else
- statebit = IPT_CONNTRACK_STATE_INVALID;
-
- if(sinfo->flags & IPT_CONNTRACK_STATE) {
- if (ct) {
- if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
- statebit |= IPT_CONNTRACK_STATE_SNAT;
-
- if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
- statebit |= IPT_CONNTRACK_STATE_DNAT;
- }
-
- if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_PROTO) {
- if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
- if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_STATUS) {
- if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
- return 0;
- }
-
- if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
- unsigned long expires;
-
- if(!ct)
- return 0;
-
- expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
-
- if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
- return 0;
- }
-
- return 1;
-}
-
-#endif /* CONFIG_NF_IP_CONNTRACK */
-
-static int check(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_conntrack_info)))
- return 0;
-
- return 1;
-}
-
-static struct ipt_match conntrack_match = {
- .name = "conntrack",
- .match = &match,
- .checkentry = &check,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- need_ip_conntrack();
- return ipt_register_match(&conntrack_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&conntrack_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_dccp.c b/net/ipv4/netfilter/ipt_dccp.c
deleted file mode 100644
index ad3278bba6c..00000000000
--- a/net/ipv4/netfilter/ipt_dccp.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * iptables module for DCCP protocol header matching
- *
- * (C) 2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <net/ip.h>
-#include <linux/dccp.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_dccp.h>
-
-#define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
- || (!!((invflag) & (option)) ^ (cond)))
-
-static unsigned char *dccp_optbuf;
-static DEFINE_SPINLOCK(dccp_buflock);
-
-static inline int
-dccp_find_option(u_int8_t option,
- const struct sk_buff *skb,
- const struct dccp_hdr *dh,
- int *hotdrop)
-{
- /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
- unsigned char *op;
- unsigned int optoff = __dccp_hdr_len(dh);
- unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh);
- unsigned int i;
-
- if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) {
- *hotdrop = 1;
- return 0;
- }
-
- if (!optlen)
- return 0;
-
- spin_lock_bh(&dccp_buflock);
- op = skb_header_pointer(skb,
- skb->nh.iph->ihl*4 + optoff,
- optlen, dccp_optbuf);
- if (op == NULL) {
- /* If we don't have the whole header, drop packet. */
- spin_unlock_bh(&dccp_buflock);
- *hotdrop = 1;
- return 0;
- }
-
- for (i = 0; i < optlen; ) {
- if (op[i] == option) {
- spin_unlock_bh(&dccp_buflock);
- return 1;
- }
-
- if (op[i] < 2)
- i++;
- else
- i += op[i+1]?:1;
- }
-
- spin_unlock_bh(&dccp_buflock);
- return 0;
-}
-
-
-static inline int
-match_types(const struct dccp_hdr *dh, u_int16_t typemask)
-{
- return (typemask & (1 << dh->dccph_type));
-}
-
-static inline int
-match_option(u_int8_t option, const struct sk_buff *skb,
- const struct dccp_hdr *dh, int *hotdrop)
-{
- return dccp_find_option(option, skb, dh, hotdrop);
-}
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_dccp_info *info =
- (const struct ipt_dccp_info *)matchinfo;
- struct dccp_hdr _dh, *dh;
-
- if (offset)
- return 0;
-
- dh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_dh), &_dh);
- if (dh == NULL) {
- *hotdrop = 1;
- return 0;
- }
-
- return DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0])
- && (ntohs(dh->dccph_sport) <= info->spts[1])),
- IPT_DCCP_SRC_PORTS, info->flags, info->invflags)
- && DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0])
- && (ntohs(dh->dccph_dport) <= info->dpts[1])),
- IPT_DCCP_DEST_PORTS, info->flags, info->invflags)
- && DCCHECK(match_types(dh, info->typemask),
- IPT_DCCP_TYPE, info->flags, info->invflags)
- && DCCHECK(match_option(info->option, skb, dh, hotdrop),
- IPT_DCCP_OPTION, info->flags, info->invflags);
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- const struct ipt_dccp_info *info;
-
- info = (const struct ipt_dccp_info *)matchinfo;
-
- return ip->proto == IPPROTO_DCCP
- && !(ip->invflags & IPT_INV_PROTO)
- && matchsize == IPT_ALIGN(sizeof(struct ipt_dccp_info))
- && !(info->flags & ~IPT_DCCP_VALID_FLAGS)
- && !(info->invflags & ~IPT_DCCP_VALID_FLAGS)
- && !(info->invflags & ~info->flags);
-}
-
-static struct ipt_match dccp_match =
-{
- .name = "dccp",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- int ret;
-
- /* doff is 8 bits, so the maximum option size is (4*256). Don't put
- * this in BSS since DaveM is worried about locked TLB's for kernel
- * BSS. */
- dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
- if (!dccp_optbuf)
- return -ENOMEM;
- ret = ipt_register_match(&dccp_match);
- if (ret)
- kfree(dccp_optbuf);
-
- return ret;
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&dccp_match);
- kfree(dccp_optbuf);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Match for DCCP protocol packets");
-
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
index 5df52a64a5d..92063b4f860 100644
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ b/net/ipv4/netfilter/ipt_dscp.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
static int match(const struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, const void *matchinfo,
- int offset, int *hotdrop)
+ int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_dscp_info *info = matchinfo;
const struct iphdr *iph = skb->nh.iph;
@@ -31,7 +31,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
}
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void *ip,
void *matchinfo, unsigned int matchsize,
unsigned int hook_mask)
{
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b6f7181e89c..e68b0c7981f 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,7 +67,7 @@ static inline int match_tcp(const struct sk_buff *skb,
static int match(const struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, const void *matchinfo,
- int offset, int *hotdrop)
+ int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_ecn_info *info = matchinfo;
@@ -85,11 +85,12 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
return 1;
}
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void *ip_void,
void *matchinfo, unsigned int matchsize,
unsigned int hook_mask)
{
const struct ipt_ecn_info *info = matchinfo;
+ const struct ipt_ip *ip = ip_void;
if (matchsize != IPT_ALIGN(sizeof(struct ipt_ecn_info)))
return 0;
diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c
index e1d0dd31e11..9de191a8162 100644
--- a/net/ipv4/netfilter/ipt_esp.c
+++ b/net/ipv4/netfilter/ipt_esp.c
@@ -42,6 +42,7 @@ match(const struct sk_buff *skb,
const struct net_device *out,
const void *matchinfo,
int offset,
+ unsigned int protoff,
int *hotdrop)
{
struct ip_esp_hdr _esp, *eh;
@@ -51,7 +52,7 @@ match(const struct sk_buff *skb,
if (offset)
return 0;
- eh = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ eh = skb_header_pointer(skb, protoff,
sizeof(_esp), &_esp);
if (eh == NULL) {
/* We've been asked to examine this packet, and we
@@ -70,12 +71,13 @@ match(const struct sk_buff *skb,
/* Called when user tries to insert an entry of this type. */
static int
checkentry(const char *tablename,
- const struct ipt_ip *ip,
+ const void *ip_void,
void *matchinfo,
unsigned int matchinfosize,
unsigned int hook_mask)
{
const struct ipt_esp *espinfo = matchinfo;
+ const struct ipt_ip *ip = ip_void;
/* Must specify proto == ESP, and no unknown invflags */
if (ip->proto != IPPROTO_ESP || (ip->invflags & IPT_INV_PROTO)) {
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 2dd1cccbdab..4fe48c1bd5f 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -429,6 +429,7 @@ hashlimit_match(const struct sk_buff *skb,
const struct net_device *out,
const void *matchinfo,
int offset,
+ unsigned int protoff,
int *hotdrop)
{
struct ipt_hashlimit_info *r =
@@ -504,7 +505,7 @@ hashlimit_match(const struct sk_buff *skb,
static int
hashlimit_checkentry(const char *tablename,
- const struct ipt_ip *ip,
+ const void *inf,
void *matchinfo,
unsigned int matchsize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
deleted file mode 100644
index bf14e1c7798..00000000000
--- a/net/ipv4/netfilter/ipt_helper.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/* iptables module to match on related connections */
-/*
- * (C) 2001 Martin Josefsson <gandalf@wlug.westbo.se>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 19 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- * - Port to newnat infrastructure
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter.h>
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#else
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#endif
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_helper.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
-MODULE_DESCRIPTION("iptables helper match module");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_helper_info *info = matchinfo;
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- int ret = info->invert;
-
- ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
- if (!ct) {
- DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
- return ret;
- }
-
- if (!ct->master) {
- DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
- return ret;
- }
-
- read_lock_bh(&ip_conntrack_lock);
- if (!ct->master->helper) {
- DEBUGP("ipt_helper: master ct %p has no helper\n",
- exp->expectant);
- goto out_unlock;
- }
-
- DEBUGP("master's name = %s , info->name = %s\n",
- ct->master->helper->name, info->name);
-
- if (info->name[0] == '\0')
- ret ^= 1;
- else
- ret ^= !strncmp(ct->master->helper->name, info->name,
- strlen(ct->master->helper->name));
-out_unlock:
- read_unlock_bh(&ip_conntrack_lock);
- return ret;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_helper_info *info = matchinfo;
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- int ret = info->invert;
-
- ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
- if (!ct) {
- DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
- return ret;
- }
-
- if (!ct->master) {
- DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
- return ret;
- }
-
- read_lock_bh(&nf_conntrack_lock);
- if (!ct->master->helper) {
- DEBUGP("ipt_helper: master ct %p has no helper\n",
- exp->expectant);
- goto out_unlock;
- }
-
- DEBUGP("master's name = %s , info->name = %s\n",
- ct->master->helper->name, info->name);
-
- if (info->name[0] == '\0')
- ret ^= 1;
- else
- ret ^= !strncmp(ct->master->helper->name, info->name,
- strlen(ct->master->helper->name));
-out_unlock:
- read_unlock_bh(&nf_conntrack_lock);
- return ret;
-}
-#endif
-
-static int check(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- struct ipt_helper_info *info = matchinfo;
-
- info->name[29] = '\0';
-
- /* verify size */
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_helper_info)))
- return 0;
-
- return 1;
-}
-
-static struct ipt_match helper_match = {
- .name = "helper",
- .match = &match,
- .checkentry = &check,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- need_ip_conntrack();
- return ipt_register_match(&helper_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&helper_match);
-}
-
-module_init(init);
-module_exit(fini);
-
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index b835b7b2e56..13fb16fb789 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -28,7 +28,7 @@ match(const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const void *matchinfo,
- int offset, int *hotdrop)
+ int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_iprange_info *info = matchinfo;
const struct iphdr *iph = skb->nh.iph;
@@ -63,7 +63,7 @@ match(const struct sk_buff *skb,
}
static int check(const char *tablename,
- const struct ipt_ip *ip,
+ const void *inf,
void *matchinfo,
unsigned int matchsize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_length.c b/net/ipv4/netfilter/ipt_length.c
deleted file mode 100644
index 4eabcfbda9d..00000000000
--- a/net/ipv4/netfilter/ipt_length.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Kernel module to match packet length. */
-/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_length.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("IP tables packet length matching module");
-MODULE_LICENSE("GPL");
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_length_info *info = matchinfo;
- u_int16_t pktlen = ntohs(skb->nh.iph->tot_len);
-
- return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_length_info)))
- return 0;
-
- return 1;
-}
-
-static struct ipt_match length_match = {
- .name = "length",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&length_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&length_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_limit.c b/net/ipv4/netfilter/ipt_limit.c
deleted file mode 100644
index 0c24dcc703a..00000000000
--- a/net/ipv4/netfilter/ipt_limit.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/* Kernel module to control the rate
- *
- * 2 September 1999: Changed from the target RATE to the match
- * `limit', removed logging. Did I mention that
- * Alexey is a fucking genius?
- * Rusty Russell (rusty@rustcorp.com.au). */
-
-/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
- * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_limit.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
-MODULE_DESCRIPTION("iptables rate limit match");
-
-/* The algorithm used is the Simple Token Bucket Filter (TBF)
- * see net/sched/sch_tbf.c in the linux source tree
- */
-
-static DEFINE_SPINLOCK(limit_lock);
-
-/* Rusty: This is my (non-mathematically-inclined) understanding of
- this algorithm. The `average rate' in jiffies becomes your initial
- amount of credit `credit' and the most credit you can ever have
- `credit_cap'. The `peak rate' becomes the cost of passing the
- test, `cost'.
-
- `prev' tracks the last packet hit: you gain one credit per jiffy.
- If you get credit balance more than this, the extra credit is
- discarded. Every time the match passes, you lose `cost' credits;
- if you don't have that many, the test fails.
-
- See Alexey's formal explanation in net/sched/sch_tbf.c.
-
- To get the maxmum range, we multiply by this factor (ie. you get N
- credits per jiffy). We want to allow a rate as low as 1 per day
- (slowest userspace tool allows), which means
- CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32. ie. */
-#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
-
-/* Repeated shift and or gives us all 1s, final shift and add 1 gives
- * us the power of 2 below the theoretical max, so GCC simply does a
- * shift. */
-#define _POW2_BELOW2(x) ((x)|((x)>>1))
-#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
-#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
-#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
-#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
-#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
-
-#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
-
-static int
-ipt_limit_match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- struct ipt_rateinfo *r = ((struct ipt_rateinfo *)matchinfo)->master;
- unsigned long now = jiffies;
-
- spin_lock_bh(&limit_lock);
- r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY;
- if (r->credit > r->credit_cap)
- r->credit = r->credit_cap;
-
- if (r->credit >= r->cost) {
- /* We're not limited. */
- r->credit -= r->cost;
- spin_unlock_bh(&limit_lock);
- return 1;
- }
-
- spin_unlock_bh(&limit_lock);
- return 0;
-}
-
-/* Precision saver. */
-static u_int32_t
-user2credits(u_int32_t user)
-{
- /* If multiplying would overflow... */
- if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
- /* Divide first. */
- return (user / IPT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
-
- return (user * HZ * CREDITS_PER_JIFFY) / IPT_LIMIT_SCALE;
-}
-
-static int
-ipt_limit_checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- struct ipt_rateinfo *r = matchinfo;
-
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_rateinfo)))
- return 0;
-
- /* Check for overflow. */
- if (r->burst == 0
- || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
- printk("Overflow in ipt_limit, try lower: %u/%u\n",
- r->avg, r->burst);
- return 0;
- }
-
- /* User avg in seconds * IPT_LIMIT_SCALE: convert to jiffies *
- 128. */
- r->prev = jiffies;
- r->credit = user2credits(r->avg * r->burst); /* Credits full. */
- r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
- r->cost = user2credits(r->avg);
-
- /* For SMP, we only want to use one set of counters. */
- r->master = r;
-
- return 1;
-}
-
-static struct ipt_match ipt_limit_reg = {
- .name = "limit",
- .match = ipt_limit_match,
- .checkentry = ipt_limit_checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- if (ipt_register_match(&ipt_limit_reg))
- return -EINVAL;
- return 0;
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&ipt_limit_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c
deleted file mode 100644
index 11a459e33f2..00000000000
--- a/net/ipv4/netfilter/ipt_mac.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Kernel module to match MAC address parameters. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/if_ether.h>
-
-#include <linux/netfilter_ipv4/ipt_mac.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables mac matching module");
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_mac_info *info = matchinfo;
-
- /* Is mac pointer valid? */
- return (skb->mac.raw >= skb->head
- && (skb->mac.raw + ETH_HLEN) <= skb->data
- /* If so, compare... */
- && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN)
- == 0) ^ info->invert));
-}
-
-static int
-ipt_mac_checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- /* FORWARD isn't always valid, but it's nice to be able to do --RR */
- if (hook_mask
- & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
- | (1 << NF_IP_FORWARD))) {
- printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
- return 0;
- }
-
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_mac_info)))
- return 0;
-
- return 1;
-}
-
-static struct ipt_match mac_match = {
- .name = "mac",
- .match = &match,
- .checkentry = &ipt_mac_checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&mac_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&mac_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c
deleted file mode 100644
index 00bef6cdd3f..00000000000
--- a/net/ipv4/netfilter/ipt_mark.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/* Kernel module to match NFMARK values. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_mark.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables mark matching module");
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_mark_info *info = matchinfo;
-
- return ((skb->nfmark & info->mask) == info->mark) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo;
-
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info)))
- return 0;
-
- if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
- printk(KERN_WARNING "mark: only supports 32bit mark\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_match mark_match = {
- .name = "mark",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&mark_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&mark_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
index 99e8188162e..2d52326553f 100644
--- a/net/ipv4/netfilter/ipt_multiport.c
+++ b/net/ipv4/netfilter/ipt_multiport.c
@@ -97,6 +97,7 @@ match(const struct sk_buff *skb,
const struct net_device *out,
const void *matchinfo,
int offset,
+ unsigned int protoff,
int *hotdrop)
{
u16 _ports[2], *pptr;
@@ -105,7 +106,7 @@ match(const struct sk_buff *skb,
if (offset)
return 0;
- pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ pptr = skb_header_pointer(skb, protoff,
sizeof(_ports), _ports);
if (pptr == NULL) {
/* We've been asked to examine this packet, and we
@@ -128,6 +129,7 @@ match_v1(const struct sk_buff *skb,
const struct net_device *out,
const void *matchinfo,
int offset,
+ unsigned int protoff,
int *hotdrop)
{
u16 _ports[2], *pptr;
@@ -136,7 +138,7 @@ match_v1(const struct sk_buff *skb,
if (offset)
return 0;
- pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+ pptr = skb_header_pointer(skb, protoff,
sizeof(_ports), _ports);
if (pptr == NULL) {
/* We've been asked to examine this packet, and we
@@ -154,7 +156,7 @@ match_v1(const struct sk_buff *skb,
/* Called when user tries to insert an entry of this type. */
static int
checkentry(const char *tablename,
- const struct ipt_ip *ip,
+ const void *ip,
void *matchinfo,
unsigned int matchsize,
unsigned int hook_mask)
@@ -164,7 +166,7 @@ checkentry(const char *tablename,
static int
checkentry_v1(const char *tablename,
- const struct ipt_ip *ip,
+ const void *ip,
void *matchinfo,
unsigned int matchsize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 0cee2862ed8..4843d0c9734 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -27,6 +27,7 @@ match(const struct sk_buff *skb,
const struct net_device *out,
const void *matchinfo,
int offset,
+ unsigned int protoff,
int *hotdrop)
{
const struct ipt_owner_info *info = matchinfo;
@@ -51,7 +52,7 @@ match(const struct sk_buff *skb,
static int
checkentry(const char *tablename,
- const struct ipt_ip *ip,
+ const void *ip,
void *matchinfo,
unsigned int matchsize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c
deleted file mode 100644
index 1a53924041f..00000000000
--- a/net/ipv4/netfilter/ipt_physdev.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/* Kernel module to match the bridge port in and
- * out device for IP packets coming into contact with a bridge. */
-
-/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ipt_physdev.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_bridge.h>
-#define MATCH 1
-#define NOMATCH 0
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
-MODULE_DESCRIPTION("iptables bridge physical device match module");
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- int i;
- static const char nulldevname[IFNAMSIZ];
- const struct ipt_physdev_info *info = matchinfo;
- unsigned int ret;
- const char *indev, *outdev;
- struct nf_bridge_info *nf_bridge;
-
- /* Not a bridged IP packet or no info available yet:
- * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
- * the destination device will be a bridge. */
- if (!(nf_bridge = skb->nf_bridge)) {
- /* Return MATCH if the invert flags of the used options are on */
- if ((info->bitmask & IPT_PHYSDEV_OP_BRIDGED) &&
- !(info->invert & IPT_PHYSDEV_OP_BRIDGED))
- return NOMATCH;
- if ((info->bitmask & IPT_PHYSDEV_OP_ISIN) &&
- !(info->invert & IPT_PHYSDEV_OP_ISIN))
- return NOMATCH;
- if ((info->bitmask & IPT_PHYSDEV_OP_ISOUT) &&
- !(info->invert & IPT_PHYSDEV_OP_ISOUT))
- return NOMATCH;
- if ((info->bitmask & IPT_PHYSDEV_OP_IN) &&
- !(info->invert & IPT_PHYSDEV_OP_IN))
- return NOMATCH;
- if ((info->bitmask & IPT_PHYSDEV_OP_OUT) &&
- !(info->invert & IPT_PHYSDEV_OP_OUT))
- return NOMATCH;
- return MATCH;
- }
-
- /* This only makes sense in the FORWARD and POSTROUTING chains */
- if ((info->bitmask & IPT_PHYSDEV_OP_BRIDGED) &&
- (!!(nf_bridge->mask & BRNF_BRIDGED) ^
- !(info->invert & IPT_PHYSDEV_OP_BRIDGED)))
- return NOMATCH;
-
- if ((info->bitmask & IPT_PHYSDEV_OP_ISIN &&
- (!nf_bridge->physindev ^ !!(info->invert & IPT_PHYSDEV_OP_ISIN))) ||
- (info->bitmask & IPT_PHYSDEV_OP_ISOUT &&
- (!nf_bridge->physoutdev ^ !!(info->invert & IPT_PHYSDEV_OP_ISOUT))))
- return NOMATCH;
-
- if (!(info->bitmask & IPT_PHYSDEV_OP_IN))
- goto match_outdev;
- indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
- for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
- ret |= (((const unsigned int *)indev)[i]
- ^ ((const unsigned int *)info->physindev)[i])
- & ((const unsigned int *)info->in_mask)[i];
- }
-
- if ((ret == 0) ^ !(info->invert & IPT_PHYSDEV_OP_IN))
- return NOMATCH;
-
-match_outdev:
- if (!(info->bitmask & IPT_PHYSDEV_OP_OUT))
- return MATCH;
- outdev = nf_bridge->physoutdev ?
- nf_bridge->physoutdev->name : nulldevname;
- for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
- ret |= (((const unsigned int *)outdev)[i]
- ^ ((const unsigned int *)info->physoutdev)[i])
- & ((const unsigned int *)info->out_mask)[i];
- }
-
- return (ret != 0) ^ !(info->invert & IPT_PHYSDEV_OP_OUT);
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- const struct ipt_physdev_info *info = matchinfo;
-
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_physdev_info)))
- return 0;
- if (!(info->bitmask & IPT_PHYSDEV_OP_MASK) ||
- info->bitmask & ~IPT_PHYSDEV_OP_MASK)
- return 0;
- return 1;
-}
-
-static struct ipt_match physdev_match = {
- .name = "physdev",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&physdev_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&physdev_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_pkttype.c b/net/ipv4/netfilter/ipt_pkttype.c
deleted file mode 100644
index 8ddb1dc5e5a..00000000000
--- a/net/ipv4/netfilter/ipt_pkttype.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* (C) 1999-2001 Michal Ludvig <michal@logix.cz>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-
-#include <linux/netfilter_ipv4/ipt_pkttype.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>");
-MODULE_DESCRIPTION("IP tables match to match on linklayer packet type");
-
-static int match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_pkttype_info *info = matchinfo;
-
- return (skb->pkt_type == info->pkttype) ^ info->invert;
-}
-
-static int checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
-/*
- if (hook_mask
- & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
- | (1 << NF_IP_FORWARD))) {
- printk("ipt_pkttype: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
- return 0;
- }
-*/
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_pkttype_info)))
- return 0;
-
- return 1;
-}
-
-static struct ipt_match pkttype_match = {
- .name = "pkttype",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&pkttype_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&pkttype_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c
new file mode 100644
index 00000000000..18ca8258a1c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_policy.c
@@ -0,0 +1,173 @@
+/* IP tables module for matching IPsec policy
+ *
+ * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_policy.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("IPtables IPsec policy matching module");
+MODULE_LICENSE("GPL");
+
+
+static inline int
+match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e)
+{
+#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
+
+ return MATCH(saddr, x->props.saddr.a4 & e->smask) &&
+ MATCH(daddr, x->id.daddr.a4 & e->dmask) &&
+ MATCH(proto, x->id.proto) &&
+ MATCH(mode, x->props.mode) &&
+ MATCH(spi, x->id.spi) &&
+ MATCH(reqid, x->props.reqid);
+}
+
+static int
+match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info)
+{
+ const struct ipt_policy_elem *e;
+ struct sec_path *sp = skb->sp;
+ int strict = info->flags & IPT_POLICY_MATCH_STRICT;
+ int i, pos;
+
+ if (sp == NULL)
+ return -1;
+ if (strict && info->len != sp->len)
+ return 0;
+
+ for (i = sp->len - 1; i >= 0; i--) {
+ pos = strict ? i - sp->len + 1 : 0;
+ if (pos >= info->len)
+ return 0;
+ e = &info->pol[pos];
+
+ if (match_xfrm_state(sp->x[i].xvec, e)) {
+ if (!strict)
+ return 1;
+ } else if (strict)
+ return 0;
+ }
+
+ return strict ? 1 : 0;
+}
+
+static int
+match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info)
+{
+ const struct ipt_policy_elem *e;
+ struct dst_entry *dst = skb->dst;
+ int strict = info->flags & IPT_POLICY_MATCH_STRICT;
+ int i, pos;
+
+ if (dst->xfrm == NULL)
+ return -1;
+
+ for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
+ pos = strict ? i : 0;
+ if (pos >= info->len)
+ return 0;
+ e = &info->pol[pos];
+
+ if (match_xfrm_state(dst->xfrm, e)) {
+ if (!strict)
+ return 1;
+ } else if (strict)
+ return 0;
+ }
+
+ return strict ? 1 : 0;
+}
+
+static int match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ unsigned int protoff,
+ int *hotdrop)
+{
+ const struct ipt_policy_info *info = matchinfo;
+ int ret;
+
+ if (info->flags & IPT_POLICY_MATCH_IN)
+ ret = match_policy_in(skb, info);
+ else
+ ret = match_policy_out(skb, info);
+
+ if (ret < 0)
+ ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0;
+ else if (info->flags & IPT_POLICY_MATCH_NONE)
+ ret = 0;
+
+ return ret;
+}
+
+static int checkentry(const char *tablename, const void *ip_void,
+ void *matchinfo, unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ struct ipt_policy_info *info = matchinfo;
+
+ if (matchsize != IPT_ALIGN(sizeof(*info))) {
+ printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n",
+ matchsize, IPT_ALIGN(sizeof(*info)));
+ return 0;
+ }
+ if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) {
+ printk(KERN_ERR "ipt_policy: neither incoming nor "
+ "outgoing policy selected\n");
+ return 0;
+ }
+ if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
+ && info->flags & IPT_POLICY_MATCH_OUT) {
+ printk(KERN_ERR "ipt_policy: output policy not valid in "
+ "PRE_ROUTING and INPUT\n");
+ return 0;
+ }
+ if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
+ && info->flags & IPT_POLICY_MATCH_IN) {
+ printk(KERN_ERR "ipt_policy: input policy not valid in "
+ "POST_ROUTING and OUTPUT\n");
+ return 0;
+ }
+ if (info->len > IPT_POLICY_MAX_ELEM) {
+ printk(KERN_ERR "ipt_policy: too many policy elements\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_match policy_match = {
+ .name = "policy",
+ .match = match,
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&policy_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&policy_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_realm.c b/net/ipv4/netfilter/ipt_realm.c
deleted file mode 100644
index 54a6897ebaa..00000000000
--- a/net/ipv4/netfilter/ipt_realm.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/* IP tables module for matching the routing realm
- *
- * $Id: ipt_realm.c,v 1.3 2004/03/05 13:25:40 laforge Exp $
- *
- * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4/ipt_realm.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("iptables realm match");
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_realm_info *info = matchinfo;
- struct dst_entry *dst = skb->dst;
-
- return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
-}
-
-static int check(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- if (hook_mask
- & ~((1 << NF_IP_POST_ROUTING) | (1 << NF_IP_FORWARD) |
- (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_LOCAL_IN))) {
- printk("ipt_realm: only valid for POST_ROUTING, LOCAL_OUT, "
- "LOCAL_IN or FORWARD.\n");
- return 0;
- }
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_realm_info))) {
- printk("ipt_realm: invalid matchsize.\n");
- return 0;
- }
- return 1;
-}
-
-static struct ipt_match realm_match = {
- .name = "realm",
- .match = match,
- .checkentry = check,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&realm_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&realm_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 2d44b07688a..44611d6d14f 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -24,10 +24,10 @@
#define HASH_LOG 9
/* Defaults, these can be overridden on the module command-line. */
-static int ip_list_tot = 100;
-static int ip_pkt_list_tot = 20;
-static int ip_list_hash_size = 0;
-static int ip_list_perms = 0644;
+static unsigned int ip_list_tot = 100;
+static unsigned int ip_pkt_list_tot = 20;
+static unsigned int ip_list_hash_size = 0;
+static unsigned int ip_list_perms = 0644;
#ifdef DEBUG
static int debug = 1;
#endif
@@ -38,13 +38,13 @@ KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>. htt
MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
MODULE_LICENSE("GPL");
-module_param(ip_list_tot, int, 0400);
-module_param(ip_pkt_list_tot, int, 0400);
-module_param(ip_list_hash_size, int, 0400);
-module_param(ip_list_perms, int, 0400);
+module_param(ip_list_tot, uint, 0400);
+module_param(ip_pkt_list_tot, uint, 0400);
+module_param(ip_list_hash_size, uint, 0400);
+module_param(ip_list_perms, uint, 0400);
#ifdef DEBUG
-module_param(debug, int, 0600);
-MODULE_PARM_DESC(debug,"debugging level, defaults to 1");
+module_param(debug, bool, 0600);
+MODULE_PARM_DESC(debug,"enable debugging output");
#endif
MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember");
@@ -104,6 +104,7 @@ match(const struct sk_buff *skb,
const struct net_device *out,
const void *matchinfo,
int offset,
+ unsigned int protoff,
int *hotdrop);
/* Function to hash a given address into the hash table of table_size size */
@@ -317,7 +318,7 @@ static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned
skb->nh.iph->daddr = 0;
/* Clear ttl since we have no way of knowing it */
skb->nh.iph->ttl = 0;
- match(skb,NULL,NULL,info,0,NULL);
+ match(skb,NULL,NULL,info,0,0,NULL);
kfree(skb->nh.iph);
out_free_skb:
@@ -357,6 +358,7 @@ match(const struct sk_buff *skb,
const struct net_device *out,
const void *matchinfo,
int offset,
+ unsigned int protoff,
int *hotdrop)
{
int pkt_count, hits_found, ans;
@@ -532,6 +534,7 @@ match(const struct sk_buff *skb,
}
if(info->seconds && info->hit_count) {
for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) {
+ if(r_list[location].last_pkts[pkt_count] == 0) break;
if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++;
}
if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert;
@@ -653,7 +656,7 @@ match(const struct sk_buff *skb,
*/
static int
checkentry(const char *tablename,
- const struct ipt_ip *ip,
+ const void *ip,
void *matchinfo,
unsigned int matchsize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_sctp.c b/net/ipv4/netfilter/ipt_sctp.c
deleted file mode 100644
index fe2b327bcaa..00000000000
--- a/net/ipv4/netfilter/ipt_sctp.c
+++ /dev/null
@@ -1,203 +0,0 @@
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <linux/sctp.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_sctp.h>
-
-#ifdef DEBUG_SCTP
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
-#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
- || (!!((invflag) & (option)) ^ (cond)))
-
-static int
-match_flags(const struct ipt_sctp_flag_info *flag_info,
- const int flag_count,
- u_int8_t chunktype,
- u_int8_t chunkflags)
-{
- int i;
-
- for (i = 0; i < flag_count; i++) {
- if (flag_info[i].chunktype == chunktype) {
- return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag;
- }
- }
-
- return 1;
-}
-
-static int
-match_packet(const struct sk_buff *skb,
- const u_int32_t *chunkmap,
- int chunk_match_type,
- const struct ipt_sctp_flag_info *flag_info,
- const int flag_count,
- int *hotdrop)
-{
- int offset;
- u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
- sctp_chunkhdr_t _sch, *sch;
-
-#ifdef DEBUG_SCTP
- int i = 0;
-#endif
-
- if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) {
- SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap);
- }
-
- offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t);
- do {
- sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
- if (sch == NULL) {
- duprintf("Dropping invalid SCTP packet.\n");
- *hotdrop = 1;
- return 0;
- }
-
- duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n",
- ++i, offset, sch->type, htons(sch->length), sch->flags);
-
- offset += (htons(sch->length) + 3) & ~3;
-
- duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
-
- if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch->type)) {
- switch (chunk_match_type) {
- case SCTP_CHUNK_MATCH_ANY:
- if (match_flags(flag_info, flag_count,
- sch->type, sch->flags)) {
- return 1;
- }
- break;
-
- case SCTP_CHUNK_MATCH_ALL:
- if (match_flags(flag_info, flag_count,
- sch->type, sch->flags)) {
- SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch->type);
- }
- break;
-
- case SCTP_CHUNK_MATCH_ONLY:
- if (!match_flags(flag_info, flag_count,
- sch->type, sch->flags)) {
- return 0;
- }
- break;
- }
- } else {
- switch (chunk_match_type) {
- case SCTP_CHUNK_MATCH_ONLY:
- return 0;
- }
- }
- } while (offset < skb->len);
-
- switch (chunk_match_type) {
- case SCTP_CHUNK_MATCH_ALL:
- return SCTP_CHUNKMAP_IS_CLEAR(chunkmap);
- case SCTP_CHUNK_MATCH_ANY:
- return 0;
- case SCTP_CHUNK_MATCH_ONLY:
- return 1;
- }
-
- /* This will never be reached, but required to stop compiler whine */
- return 0;
-}
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_sctp_info *info;
- sctp_sctphdr_t _sh, *sh;
-
- info = (const struct ipt_sctp_info *)matchinfo;
-
- if (offset) {
- duprintf("Dropping non-first fragment.. FIXME\n");
- return 0;
- }
-
- sh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_sh), &_sh);
- if (sh == NULL) {
- duprintf("Dropping evil TCP offset=0 tinygram.\n");
- *hotdrop = 1;
- return 0;
- }
- duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
-
- return SCCHECK(((ntohs(sh->source) >= info->spts[0])
- && (ntohs(sh->source) <= info->spts[1])),
- IPT_SCTP_SRC_PORTS, info->flags, info->invflags)
- && SCCHECK(((ntohs(sh->dest) >= info->dpts[0])
- && (ntohs(sh->dest) <= info->dpts[1])),
- IPT_SCTP_DEST_PORTS, info->flags, info->invflags)
- && SCCHECK(match_packet(skb, info->chunkmap, info->chunk_match_type,
- info->flag_info, info->flag_count,
- hotdrop),
- IPT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- const struct ipt_sctp_info *info;
-
- info = (const struct ipt_sctp_info *)matchinfo;
-
- return ip->proto == IPPROTO_SCTP
- && !(ip->invflags & IPT_INV_PROTO)
- && matchsize == IPT_ALIGN(sizeof(struct ipt_sctp_info))
- && !(info->flags & ~IPT_SCTP_VALID_FLAGS)
- && !(info->invflags & ~IPT_SCTP_VALID_FLAGS)
- && !(info->invflags & ~info->flags)
- && ((!(info->flags & IPT_SCTP_CHUNK_TYPES)) ||
- (info->chunk_match_type &
- (SCTP_CHUNK_MATCH_ALL
- | SCTP_CHUNK_MATCH_ANY
- | SCTP_CHUNK_MATCH_ONLY)));
-}
-
-static struct ipt_match sctp_match =
-{
- .list = { NULL, NULL},
- .name = "sctp",
- .match = &match,
- .checkentry = &checkentry,
- .destroy = NULL,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&sctp_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&sctp_match);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Match for SCTP protocol packets");
-
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c
deleted file mode 100644
index 4d7f16b70ce..00000000000
--- a/net/ipv4/netfilter/ipt_state.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/* Kernel module to match connection tracking information. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_state.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("iptables connection tracking state match module");
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_state_info *sinfo = matchinfo;
- enum ip_conntrack_info ctinfo;
- unsigned int statebit;
-
- if (nf_ct_is_untracked(skb))
- statebit = IPT_STATE_UNTRACKED;
- else if (!nf_ct_get_ctinfo(skb, &ctinfo))
- statebit = IPT_STATE_INVALID;
- else
- statebit = IPT_STATE_BIT(ctinfo);
-
- return (sinfo->statemask & statebit);
-}
-
-static int check(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_state_info)))
- return 0;
-
- return 1;
-}
-
-static struct ipt_match state_match = {
- .name = "state",
- .match = &match,
- .checkentry = &check,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- need_ip_conntrack();
- return ipt_register_match(&state_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&state_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_string.c b/net/ipv4/netfilter/ipt_string.c
deleted file mode 100644
index b5def204d79..00000000000
--- a/net/ipv4/netfilter/ipt_string.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/* String matching match for iptables
- *
- * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_string.h>
-#include <linux/textsearch.h>
-
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
-MODULE_DESCRIPTION("IP tables string match module");
-MODULE_LICENSE("GPL");
-
-static int match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- struct ts_state state;
- struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo;
-
- memset(&state, 0, sizeof(struct ts_state));
-
- return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
- conf->to_offset, conf->config, &state)
- != UINT_MAX) && !conf->invert;
-}
-
-#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m)
-
-static int checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- struct ipt_string_info *conf = matchinfo;
- struct ts_config *ts_conf;
-
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info)))
- return 0;
-
- /* Damn, can't handle this case properly with iptables... */
- if (conf->from_offset > conf->to_offset)
- return 0;
-
- ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
- GFP_KERNEL, TS_AUTOLOAD);
- if (IS_ERR(ts_conf))
- return 0;
-
- conf->config = ts_conf;
-
- return 1;
-}
-
-static void destroy(void *matchinfo, unsigned int matchsize)
-{
- textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
-}
-
-static struct ipt_match string_match = {
- .name = "string",
- .match = match,
- .checkentry = checkentry,
- .destroy = destroy,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&string_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&string_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_tcpmss.c b/net/ipv4/netfilter/ipt_tcpmss.c
deleted file mode 100644
index 4dc9b16ab4a..00000000000
--- a/net/ipv4/netfilter/ipt_tcpmss.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/* Kernel module to match TCP MSS values. */
-
-/* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ipt_tcpmss.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-#define TH_SYN 0x02
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables TCP MSS match module");
-
-/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */
-static inline int
-mssoption_match(u_int16_t min, u_int16_t max,
- const struct sk_buff *skb,
- int invert,
- int *hotdrop)
-{
- struct tcphdr _tcph, *th;
- /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
- u8 _opt[15 * 4 - sizeof(_tcph)], *op;
- unsigned int i, optlen;
-
- /* If we don't have the whole header, drop packet. */
- th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- goto dropit;
-
- /* Malformed. */
- if (th->doff*4 < sizeof(*th))
- goto dropit;
-
- optlen = th->doff*4 - sizeof(*th);
- if (!optlen)
- goto out;
-
- /* Truncated options. */
- op = skb_header_pointer(skb, skb->nh.iph->ihl * 4 + sizeof(*th),
- optlen, _opt);
- if (op == NULL)
- goto dropit;
-
- for (i = 0; i < optlen; ) {
- if (op[i] == TCPOPT_MSS
- && (optlen - i) >= TCPOLEN_MSS
- && op[i+1] == TCPOLEN_MSS) {
- u_int16_t mssval;
-
- mssval = (op[i+2] << 8) | op[i+3];
-
- return (mssval >= min && mssval <= max) ^ invert;
- }
- if (op[i] < 2) i++;
- else i += op[i+1]?:1;
- }
-out:
- return invert;
-
- dropit:
- *hotdrop = 1;
- return 0;
-}
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- int *hotdrop)
-{
- const struct ipt_tcpmss_match_info *info = matchinfo;
-
- return mssoption_match(info->mss_min, info->mss_max, skb,
- info->invert, hotdrop);
-}
-
-static int
-checkentry(const char *tablename,
- const struct ipt_ip *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info)))
- return 0;
-
- /* Must specify -p tcp */
- if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) {
- printk("tcpmss: Only works on TCP packets\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_match tcpmss_match = {
- .name = "tcpmss",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&tcpmss_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&tcpmss_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 086a1bb61e3..9ab765e126f 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -23,6 +23,7 @@ match(const struct sk_buff *skb,
const struct net_device *out,
const void *matchinfo,
int offset,
+ unsigned int protoff,
int *hotdrop)
{
const struct ipt_tos_info *info = matchinfo;
@@ -32,7 +33,7 @@ match(const struct sk_buff *skb,
static int
checkentry(const char *tablename,
- const struct ipt_ip *ip,
+ const void *ip,
void *matchinfo,
unsigned int matchsize,
unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 219aa9de88c..82da53f430a 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
static int match(const struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, const void *matchinfo,
- int offset, int *hotdrop)
+ int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_ttl_info *info = matchinfo;
@@ -47,7 +47,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
return 0;
}
-static int checkentry(const char *tablename, const struct ipt_ip *ip,
+static int checkentry(const char *tablename, const void *ip,
void *matchinfo, unsigned int matchsize,
unsigned int hook_mask)
{
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 260a4f0a2a9..212a3079085 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -78,7 +78,8 @@ static struct ipt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.lock = RW_LOCK_UNLOCKED,
- .me = THIS_MODULE
+ .me = THIS_MODULE,
+ .af = AF_INET,
};
/* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 160eb11b6e2..3212a5cc4b6 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -109,6 +109,7 @@ static struct ipt_table packet_mangler = {
.valid_hooks = MANGLE_VALID_HOOKS,
.lock = RW_LOCK_UNLOCKED,
.me = THIS_MODULE,
+ .af = AF_INET,
};
/* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 47449ba83eb..fdb9e9c81e8 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -83,7 +83,8 @@ static struct ipt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
.lock = RW_LOCK_UNLOCKED,
- .me = THIS_MODULE
+ .me = THIS_MODULE,
+ .af = AF_INET,
};
/* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8202c1c0afa..167619f638c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -22,6 +22,7 @@
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
+#include <net/route.h>
#include <net/ip.h>
#include <linux/netfilter_ipv4.h>
@@ -180,30 +181,6 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
return NF_ACCEPT;
}
-static unsigned int ipv4_refrag(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
-
- /* We've seen it coming out the other side: confirm */
- if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
- return NF_DROP;
-
- /* Local packets are never produced too large for their
- interface. We degfragment them at LOCAL_OUT, however,
- so we have to refragment them here. */
- if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
- !skb_shinfo(*pskb)->tso_size) {
- /* No hook can be after us, so this should be OK. */
- ip_fragment(*pskb, okfn);
- return NF_STOLEN;
- }
- return NF_ACCEPT;
-}
-
static unsigned int ipv4_conntrack_in(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
@@ -283,7 +260,7 @@ static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
/* Refragmenter; last chance. */
static struct nf_hook_ops ipv4_conntrack_out_ops = {
- .hook = ipv4_refrag,
+ .hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_POST_ROUTING,
@@ -300,7 +277,7 @@ static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
#ifdef CONFIG_SYSCTL
/* From nf_conntrack_proto_icmp.c */
-extern unsigned long nf_ct_icmp_timeout;
+extern unsigned int nf_ct_icmp_timeout;
static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
static ctl_table nf_ct_sysctl_table[] = {
@@ -392,6 +369,48 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
return -ENOENT;
}
+#if defined(CONFIG_NF_CT_NETLINK) || \
+ defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv4_tuple_to_nfattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple)
+{
+ NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
+ &tuple->src.u3.ip);
+ NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
+ &tuple->dst.u3.ip);
+ return 0;
+
+nfattr_failure:
+ return -1;
+}
+
+static const size_t cta_min_ip[CTA_IP_MAX] = {
+ [CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
+ [CTA_IP_V4_DST-1] = sizeof(u_int32_t),
+};
+
+static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
+ struct nf_conntrack_tuple *t)
+{
+ if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1])
+ return -EINVAL;
+
+ if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+ return -EINVAL;
+
+ t->src.u3.ip =
+ *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
+ t->dst.u3.ip =
+ *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+
+ return 0;
+}
+#endif
+
static struct nf_sockopt_ops so_getorigdst = {
.pf = PF_INET,
.get_optmin = SO_ORIGINAL_DST,
@@ -408,6 +427,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
.print_conntrack = ipv4_print_conntrack,
.prepare = ipv4_prepare,
.get_features = ipv4_get_features,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+ defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nfattr = ipv4_tuple_to_nfattr,
+ .nfattr_to_tuple = ipv4_nfattr_to_tuple,
+#endif
.me = THIS_MODULE,
};
@@ -551,7 +575,7 @@ MODULE_LICENSE("GPL");
static int __init init(void)
{
- need_nf_conntrack();
+ need_conntrack();
return init_or_cleanup(1);
}
@@ -563,9 +587,4 @@ static void __exit fini(void)
module_init(init);
module_exit(fini);
-void need_ip_conntrack(void)
-{
-}
-
-EXPORT_SYMBOL(need_ip_conntrack);
EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7ddb5c08f7b..52dc175be39 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -50,20 +50,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
return 1;
}
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+ [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+ [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+ [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+ [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+ [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+ [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+ [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+ [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
- /* Add 1; spaces filled with 0. */
- static u_int8_t invmap[]
- = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
- [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
- [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
- [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
- [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
- [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
- [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
- [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
-
if (orig->dst.u.icmp.type >= sizeof(invmap)
|| !invmap[orig->dst.u.icmp.type])
return 0;
@@ -120,11 +121,12 @@ static int icmp_packet(struct nf_conn *ct,
static int icmp_new(struct nf_conn *conntrack,
const struct sk_buff *skb, unsigned int dataoff)
{
- static u_int8_t valid_new[]
- = { [ICMP_ECHO] = 1,
- [ICMP_TIMESTAMP] = 1,
- [ICMP_INFO_REQUEST] = 1,
- [ICMP_ADDRESS] = 1 };
+ static const u_int8_t valid_new[] = {
+ [ICMP_ECHO] = 1,
+ [ICMP_TIMESTAMP] = 1,
+ [ICMP_INFO_REQUEST] = 1,
+ [ICMP_ADDRESS] = 1
+ };
if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
|| !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
@@ -168,7 +170,7 @@ icmp_error_message(struct sk_buff *skb,
return -NF_ACCEPT;
}
- innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol);
+ innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol);
dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
@@ -281,6 +283,60 @@ checksum_skipped:
return icmp_error_message(skb, ctinfo, hooknum);
}
+#if defined(CONFIG_NF_CT_NETLINK) || \
+ defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nfattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *t)
+{
+ NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
+ &t->src.u.icmp.id);
+ NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
+ &t->dst.u.icmp.type);
+ NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
+ &t->dst.u.icmp.code);
+
+ return 0;
+
+nfattr_failure:
+ return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+ [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
+ [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
+ [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t)
+};
+
+static int icmp_nfattr_to_tuple(struct nfattr *tb[],
+ struct nf_conntrack_tuple *tuple)
+{
+ if (!tb[CTA_PROTO_ICMP_TYPE-1]
+ || !tb[CTA_PROTO_ICMP_CODE-1]
+ || !tb[CTA_PROTO_ICMP_ID-1])
+ return -EINVAL;
+
+ if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+ return -EINVAL;
+
+ tuple->dst.u.icmp.type =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
+ tuple->dst.u.icmp.code =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
+ tuple->src.u.icmp.id =
+ *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+
+ if (tuple->dst.u.icmp.type >= sizeof(invmap)
+ || !invmap[tuple->dst.u.icmp.type])
+ return -EINVAL;
+
+ return 0;
+}
+#endif
+
struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
{
.list = { NULL, NULL },
@@ -295,7 +351,12 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
.new = icmp_new,
.error = icmp_error,
.destroy = NULL,
- .me = NULL
+ .me = NULL,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+ defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nfattr = icmp_tuple_to_nfattr,
+ .nfattr_to_tuple = icmp_nfattr_to_tuple,
+#endif
};
EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 0d7dc668db4..39d49dc333a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -38,6 +38,7 @@
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/udp.h>
+#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/sock.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4b0d7e4d626..f29a12da510 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -40,12 +40,12 @@
*/
#include <linux/config.h>
+#include <linux/types.h>
#include <asm/atomic.h>
#include <asm/byteorder.h>
#include <asm/current.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
-#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/errno.h>
@@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
return NET_RX_DROP;
}
+ nf_reset(skb);
skb_push(skb, skb->data - skb->nh.raw);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f701a136a6a..d82c242ea70 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -240,9 +240,9 @@ static unsigned rt_hash_mask;
static int rt_hash_log;
static unsigned int rt_hash_rnd;
-static struct rt_cache_stat *rt_cache_stat;
-#define RT_CACHE_STAT_INC(field) \
- (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
+static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
+#define RT_CACHE_STAT_INC(field) \
+ (per_cpu(rt_cache_stat, raw_smp_processor_id()).field++)
static int rt_intern_hash(unsigned hash, struct rtable *rth,
struct rtable **res);
@@ -401,7 +401,7 @@ static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return per_cpu_ptr(rt_cache_stat, cpu);
+ return &per_cpu(rt_cache_stat, cpu);
}
return NULL;
}
@@ -414,7 +414,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return per_cpu_ptr(rt_cache_stat, cpu);
+ return &per_cpu(rt_cache_stat, cpu);
}
return NULL;
@@ -3160,10 +3160,6 @@ int __init ip_rt_init(void)
ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
ip_rt_max_size = (rt_hash_mask + 1) * 16;
- rt_cache_stat = alloc_percpu(struct rt_cache_stat);
- if (!rt_cache_stat)
- return -ENOMEM;
-
devinet_init();
ip_fib_init();
@@ -3191,7 +3187,6 @@ int __init ip_rt_init(void)
if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
!(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
proc_net_stat))) {
- free_percpu(rt_cache_stat);
return -ENOMEM;
}
rtstat_pde->proc_fops = &rt_cpu_seq_fops;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a34e60ea48a..e20be3331f6 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -173,10 +173,10 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
- child = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
+ child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
if (child)
inet_csk_reqsk_queue_add(sk, req, child);
else
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 01444a02b48..16984d4a8a0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -12,6 +12,7 @@
#include <linux/sysctl.h>
#include <linux/config.h>
#include <linux/igmp.h>
+#include <linux/inetdevice.h>
#include <net/snmp.h>
#include <net/icmp.h>
#include <net/ip.h>
@@ -22,6 +23,7 @@
extern int sysctl_ip_nonlocal_bind;
#ifdef CONFIG_SYSCTL
+static int zero;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -614,6 +616,15 @@ ctl_table ipv4_table[] = {
.strategy = &sysctl_jiffies
},
{
+ .ctl_name = NET_IPV4_IPFRAG_MAX_DIST,
+ .procname = "ipfrag_max_dist",
+ .data = &sysctl_ipfrag_max_dist,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = &zero
+ },
+ {
.ctl_name = NET_TCP_NO_METRICS_SAVE,
.procname = "tcp_no_metrics_save",
.data = &sysctl_tcp_nometrics_save,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ef98b14ac56..00aa80e9324 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1696,8 +1696,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
int err = 0;
if (level != SOL_TCP)
- return tp->af_specific->setsockopt(sk, level, optname,
- optval, optlen);
+ return icsk->icsk_af_ops->setsockopt(sk, level, optname,
+ optval, optlen);
/* This is a string value all the others are int's */
if (optname == TCP_CONGESTION) {
@@ -1914,7 +1914,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
- info->tcpi_pmtu = tp->pmtu_cookie;
+ info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
@@ -1939,8 +1939,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int val, len;
if (level != SOL_TCP)
- return tp->af_specific->getsockopt(sk, level, optname,
- optval, optlen);
+ return icsk->icsk_af_ops->getsockopt(sk, level, optname,
+ optval, optlen);
if (get_user(len, optlen))
return -EFAULT;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 1d0cd86621b..035f2092d73 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -30,8 +30,6 @@ static int fast_convergence = 1;
static int max_increment = 16;
static int low_window = 14;
static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
-static int low_utilization_threshold = 153;
-static int low_utilization_period = 2;
static int initial_ssthresh = 100;
static int smooth_part = 20;
@@ -43,10 +41,6 @@ module_param(low_window, int, 0644);
MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)");
module_param(beta, int, 0644);
MODULE_PARM_DESC(beta, "beta for multiplicative increase");
-module_param(low_utilization_threshold, int, 0644);
-MODULE_PARM_DESC(low_utilization_threshold, "percent (scaled by 1024) for low utilization mode");
-module_param(low_utilization_period, int, 0644);
-MODULE_PARM_DESC(low_utilization_period, "if average delay exceeds then goto to low utilization mode (seconds)");
module_param(initial_ssthresh, int, 0644);
MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
module_param(smooth_part, int, 0644);
@@ -60,11 +54,6 @@ struct bictcp {
u32 loss_cwnd; /* congestion window at last loss */
u32 last_cwnd; /* the last snd_cwnd */
u32 last_time; /* time when updated last_cwnd */
- u32 delay_min; /* min delay */
- u32 delay_max; /* max delay */
- u32 last_delay;
- u8 low_utilization;/* 0: high; 1: low */
- u32 low_utilization_start; /* starting time of low utilization detection*/
u32 epoch_start; /* beginning of an epoch */
#define ACK_RATIO_SHIFT 4
u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
@@ -77,11 +66,6 @@ static inline void bictcp_reset(struct bictcp *ca)
ca->loss_cwnd = 0;
ca->last_cwnd = 0;
ca->last_time = 0;
- ca->delay_min = 0;
- ca->delay_max = 0;
- ca->last_delay = 0;
- ca->low_utilization = 0;
- ca->low_utilization_start = 0;
ca->epoch_start = 0;
ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
}
@@ -143,8 +127,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
}
/* if in slow start or link utilization is very low */
- if ( ca->loss_cwnd == 0 ||
- (cwnd > ca->loss_cwnd && ca->low_utilization)) {
+ if (ca->loss_cwnd == 0) {
if (ca->cnt > 20) /* increase cwnd 5% per RTT */
ca->cnt = 20;
}
@@ -154,69 +137,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
ca->cnt = 1;
}
-
-/* Detect low utilization in congestion avoidance */
-static inline void bictcp_low_utilization(struct sock *sk, int flag)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
- u32 dist, delay;
-
- /* No time stamp */
- if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
- /* Discard delay samples right after fast recovery */
- tcp_time_stamp < ca->epoch_start + HZ ||
- /* this delay samples may not be accurate */
- flag == 0) {
- ca->last_delay = 0;
- goto notlow;
- }
-
- delay = ca->last_delay<<3; /* use the same scale as tp->srtt*/
- ca->last_delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
- if (delay == 0) /* no previous delay sample */
- goto notlow;
-
- /* first time call or link delay decreases */
- if (ca->delay_min == 0 || ca->delay_min > delay) {
- ca->delay_min = ca->delay_max = delay;
- goto notlow;
- }
-
- if (ca->delay_max < delay)
- ca->delay_max = delay;
-
- /* utilization is low, if avg delay < dist*threshold
- for checking_period time */
- dist = ca->delay_max - ca->delay_min;
- if (dist <= ca->delay_min>>6 ||
- tp->srtt - ca->delay_min >= (dist*low_utilization_threshold)>>10)
- goto notlow;
-
- if (ca->low_utilization_start == 0) {
- ca->low_utilization = 0;
- ca->low_utilization_start = tcp_time_stamp;
- } else if ((s32)(tcp_time_stamp - ca->low_utilization_start)
- > low_utilization_period*HZ) {
- ca->low_utilization = 1;
- }
-
- return;
-
- notlow:
- ca->low_utilization = 0;
- ca->low_utilization_start = 0;
-
-}
-
static void bictcp_cong_avoid(struct sock *sk, u32 ack,
u32 seq_rtt, u32 in_flight, int data_acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
- bictcp_low_utilization(sk, data_acked);
-
if (!tcp_is_cwnd_limited(sk, in_flight))
return;
@@ -249,11 +175,6 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
ca->epoch_start = 0; /* end of epoch */
- /* in case of wrong delay_max*/
- if (ca->delay_min > 0 && ca->delay_max > ca->delay_min)
- ca->delay_max = ca->delay_min
- + ((ca->delay_max - ca->delay_min)* 90) / 100;
-
/* Wmax and fast convergence */
if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
@@ -289,14 +210,14 @@ static void bictcp_state(struct sock *sk, u8 new_state)
bictcp_reset(inet_csk_ca(sk));
}
-/* Track delayed acknowledgement ratio using sliding window
+/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
static void bictcp_acked(struct sock *sk, u32 cnt)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
+ if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
struct bictcp *ca = inet_csk_ca(sk);
cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
ca->delayed_ack += cnt;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index c7cc62c8dc1..e688c687d62 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -174,6 +174,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
return err;
}
+
+/*
+ * Linear increase during slow start
+ */
+void tcp_slow_start(struct tcp_sock *tp)
+{
+ if (sysctl_tcp_abc) {
+ /* RFC3465: Slow Start
+ * TCP sender SHOULD increase cwnd by the number of
+ * previously unacknowledged bytes ACKed by each incoming
+ * acknowledgment, provided the increase is not more than L
+ */
+ if (tp->bytes_acked < tp->mss_cache)
+ return;
+
+ /* We MAY increase by 2 if discovered delayed ack */
+ if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ }
+ }
+ tp->bytes_acked = 0;
+
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+}
+EXPORT_SYMBOL_GPL(tcp_slow_start);
+
/*
* TCP Reno congestion control
* This is special case used for fallback as well.
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
new file mode 100644
index 00000000000..31a4986dfbf
--- /dev/null
+++ b/net/ipv4/tcp_cubic.c
@@ -0,0 +1,411 @@
+/*
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
+ *
+ * This is from the implementation of CUBIC TCP in
+ * Injong Rhee, Lisong Xu.
+ * "CUBIC: A New TCP-Friendly High-Speed TCP Variant
+ * in PFLDnet 2005
+ * Available from:
+ * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
+ *
+ * Unless CUBIC is enabled and congestion window is large
+ * this behaves the same as the original Reno.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <net/tcp.h>
+#include <asm/div64.h>
+
+#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
+ * max_cwnd = snd_cwnd * beta
+ */
+#define BICTCP_B 4 /*
+ * In binary search,
+ * go to point (max+min)/N
+ */
+#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
+
+static int fast_convergence = 1;
+static int max_increment = 16;
+static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
+static int initial_ssthresh = 100;
+static int bic_scale = 41;
+static int tcp_friendliness = 1;
+
+static u32 cube_rtt_scale;
+static u32 beta_scale;
+static u64 cube_factor;
+
+/* Note parameters that are used for precomputing scale factors are read-only */
+module_param(fast_convergence, int, 0644);
+MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
+module_param(max_increment, int, 0644);
+MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
+module_param(beta, int, 0444);
+MODULE_PARM_DESC(beta, "beta for multiplicative increase");
+module_param(initial_ssthresh, int, 0644);
+MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
+module_param(bic_scale, int, 0444);
+MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
+module_param(tcp_friendliness, int, 0644);
+MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
+
+#include <asm/div64.h>
+
+/* BIC TCP Parameters */
+struct bictcp {
+ u32 cnt; /* increase cwnd by 1 after ACKs */
+ u32 last_max_cwnd; /* last maximum snd_cwnd */
+ u32 loss_cwnd; /* congestion window at last loss */
+ u32 last_cwnd; /* the last snd_cwnd */
+ u32 last_time; /* time when updated last_cwnd */
+ u32 bic_origin_point;/* origin point of bic function */
+ u32 bic_K; /* time to origin point from the beginning of the current epoch */
+ u32 delay_min; /* min delay */
+ u32 epoch_start; /* beginning of an epoch */
+ u32 ack_cnt; /* number of acks */
+ u32 tcp_cwnd; /* estimated tcp cwnd */
+#define ACK_RATIO_SHIFT 4
+ u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
+};
+
+static inline void bictcp_reset(struct bictcp *ca)
+{
+ ca->cnt = 0;
+ ca->last_max_cwnd = 0;
+ ca->loss_cwnd = 0;
+ ca->last_cwnd = 0;
+ ca->last_time = 0;
+ ca->bic_origin_point = 0;
+ ca->bic_K = 0;
+ ca->delay_min = 0;
+ ca->epoch_start = 0;
+ ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
+ ca->ack_cnt = 0;
+ ca->tcp_cwnd = 0;
+}
+
+static void bictcp_init(struct sock *sk)
+{
+ bictcp_reset(inet_csk_ca(sk));
+ if (initial_ssthresh)
+ tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
+}
+
+/* 64bit divisor, dividend and result. dynamic precision */
+static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
+{
+ u_int32_t d = divisor;
+
+ if (divisor > 0xffffffffULL) {
+ unsigned int shift = fls(divisor >> 32);
+
+ d = divisor >> shift;
+ dividend >>= shift;
+ }
+
+ /* avoid 64 bit division if possible */
+ if (dividend >> 32)
+ do_div(dividend, d);
+ else
+ dividend = (uint32_t) dividend / d;
+
+ return dividend;
+}
+
+/*
+ * calculate the cubic root of x using Newton-Raphson
+ */
+static u32 cubic_root(u64 a)
+{
+ u32 x, x1;
+
+ /* Initial estimate is based on:
+ * cbrt(x) = exp(log(x) / 3)
+ */
+ x = 1u << (fls64(a)/3);
+
+ /*
+ * Iteration based on:
+ * 2
+ * x = ( 2 * x + a / x ) / 3
+ * k+1 k k
+ */
+ do {
+ x1 = x;
+ x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
+ } while (abs(x1 - x) > 1);
+
+ return x;
+}
+
+/*
+ * Compute congestion window to use.
+ */
+static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
+{
+ u64 offs;
+ u32 delta, t, bic_target, min_cnt, max_cnt;
+
+ ca->ack_cnt++; /* count the number of ACKs */
+
+ if (ca->last_cwnd == cwnd &&
+ (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
+ return;
+
+ ca->last_cwnd = cwnd;
+ ca->last_time = tcp_time_stamp;
+
+ if (ca->epoch_start == 0) {
+ ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */
+ ca->ack_cnt = 1; /* start counting */
+ ca->tcp_cwnd = cwnd; /* syn with cubic */
+
+ if (ca->last_max_cwnd <= cwnd) {
+ ca->bic_K = 0;
+ ca->bic_origin_point = cwnd;
+ } else {
+ /* Compute new K based on
+ * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
+ */
+ ca->bic_K = cubic_root(cube_factor
+ * (ca->last_max_cwnd - cwnd));
+ ca->bic_origin_point = ca->last_max_cwnd;
+ }
+ }
+
+ /* cubic function - calc*/
+ /* calculate c * time^3 / rtt,
+ * while considering overflow in calculation of time^3
+ * (so time^3 is done by using 64 bit)
+ * and without the support of division of 64bit numbers
+ * (so all divisions are done by using 32 bit)
+ * also NOTE the unit of those veriables
+ * time = (t - K) / 2^bictcp_HZ
+ * c = bic_scale >> 10
+ * rtt = (srtt >> 3) / HZ
+ * !!! The following code does not have overflow problems,
+ * if the cwnd < 1 million packets !!!
+ */
+
+ /* change the unit from HZ to bictcp_HZ */
+ t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start)
+ << BICTCP_HZ) / HZ;
+
+ if (t < ca->bic_K) /* t - K */
+ offs = ca->bic_K - t;
+ else
+ offs = t - ca->bic_K;
+
+ /* c/rtt * (t-K)^3 */
+ delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
+ if (t < ca->bic_K) /* below origin*/
+ bic_target = ca->bic_origin_point - delta;
+ else /* above origin*/
+ bic_target = ca->bic_origin_point + delta;
+
+ /* cubic function - calc bictcp_cnt*/
+ if (bic_target > cwnd) {
+ ca->cnt = cwnd / (bic_target - cwnd);
+ } else {
+ ca->cnt = 100 * cwnd; /* very small increment*/
+ }
+
+ if (ca->delay_min > 0) {
+ /* max increment = Smax * rtt / 0.1 */
+ min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
+ if (ca->cnt < min_cnt)
+ ca->cnt = min_cnt;
+ }
+
+ /* slow start and low utilization */
+ if (ca->loss_cwnd == 0) /* could be aggressive in slow start */
+ ca->cnt = 50;
+
+ /* TCP Friendly */
+ if (tcp_friendliness) {
+ u32 scale = beta_scale;
+ delta = (cwnd * scale) >> 3;
+ while (ca->ack_cnt > delta) { /* update tcp cwnd */
+ ca->ack_cnt -= delta;
+ ca->tcp_cwnd++;
+ }
+
+ if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */
+ delta = ca->tcp_cwnd - cwnd;
+ max_cnt = cwnd / delta;
+ if (ca->cnt > max_cnt)
+ ca->cnt = max_cnt;
+ }
+ }
+
+ ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
+ if (ca->cnt == 0) /* cannot be zero */
+ ca->cnt = 1;
+}
+
+
+/* Keep track of minimum rtt */
+static inline void measure_delay(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct bictcp *ca = inet_csk_ca(sk);
+ u32 delay;
+
+ /* No time stamp */
+ if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
+ /* Discard delay samples right after fast recovery */
+ (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+ return;
+
+ delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ if (delay == 0)
+ delay = 1;
+
+ /* first time call or link delay decreases */
+ if (ca->delay_min == 0 || ca->delay_min > delay)
+ ca->delay_min = delay;
+}
+
+static void bictcp_cong_avoid(struct sock *sk, u32 ack,
+ u32 seq_rtt, u32 in_flight, int data_acked)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bictcp *ca = inet_csk_ca(sk);
+
+ if (data_acked)
+ measure_delay(sk);
+
+ if (!tcp_is_cwnd_limited(sk, in_flight))
+ return;
+
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+ else {
+ bictcp_update(ca, tp->snd_cwnd);
+
+ /* In dangerous area, increase slowly.
+ * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
+ */
+ if (tp->snd_cwnd_cnt >= ca->cnt) {
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ tp->snd_cwnd_cnt = 0;
+ } else
+ tp->snd_cwnd_cnt++;
+ }
+
+}
+
+static u32 bictcp_recalc_ssthresh(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct bictcp *ca = inet_csk_ca(sk);
+
+ ca->epoch_start = 0; /* end of epoch */
+
+ /* Wmax and fast convergence */
+ if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
+ ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+ / (2 * BICTCP_BETA_SCALE);
+ else
+ ca->last_max_cwnd = tp->snd_cwnd;
+
+ ca->loss_cwnd = tp->snd_cwnd;
+
+ return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+}
+
+static u32 bictcp_undo_cwnd(struct sock *sk)
+{
+ struct bictcp *ca = inet_csk_ca(sk);
+
+ return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd);
+}
+
+static u32 bictcp_min_cwnd(struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+static void bictcp_state(struct sock *sk, u8 new_state)
+{
+ if (new_state == TCP_CA_Loss)
+ bictcp_reset(inet_csk_ca(sk));
+}
+
+/* Track delayed acknowledgment ratio using sliding window
+ * ratio = (15*ratio + sample) / 16
+ */
+static void bictcp_acked(struct sock *sk, u32 cnt)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
+ struct bictcp *ca = inet_csk_ca(sk);
+ cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
+ ca->delayed_ack += cnt;
+ }
+}
+
+
+static struct tcp_congestion_ops cubictcp = {
+ .init = bictcp_init,
+ .ssthresh = bictcp_recalc_ssthresh,
+ .cong_avoid = bictcp_cong_avoid,
+ .set_state = bictcp_state,
+ .undo_cwnd = bictcp_undo_cwnd,
+ .min_cwnd = bictcp_min_cwnd,
+ .pkts_acked = bictcp_acked,
+ .owner = THIS_MODULE,
+ .name = "cubic",
+};
+
+static int __init cubictcp_register(void)
+{
+ BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+
+ /* Precompute a bunch of the scaling factors that are used per-packet
+ * based on SRTT of 100ms
+ */
+
+ beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
+
+ cube_rtt_scale = (bic_scale << 3) / 10; /* 1024*c/rtt */
+
+ /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
+ * so K = cubic_root( (wmax-cwnd)*rtt/c )
+ * the unit of K is bictcp_HZ=2^10, not HZ
+ *
+ * c = bic_scale >> 10
+ * rtt = 100ms
+ *
+ * the following code has been designed and tested for
+ * cwnd < 1 million packets
+ * RTT < 100 seconds
+ * HZ < 1,000,00 (corresponding to 10 nano-second)
+ */
+
+ /* 1/c * 2^2*bictcp_HZ * srtt */
+ cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */
+
+ /* divide by bic_scale and by constant Srtt (100ms) */
+ do_div(cube_factor, bic_scale * 10);
+
+ return tcp_register_congestion_control(&cubictcp);
+}
+
+static void __exit cubictcp_unregister(void)
+{
+ tcp_unregister_congestion_control(&cubictcp);
+}
+
+module_init(cubictcp_register);
+module_exit(cubictcp_unregister);
+
+MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CUBIC TCP");
+MODULE_VERSION("2.0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf2e23086bc..a97ed5416c2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -115,8 +115,8 @@ int sysctl_tcp_abc = 1;
/* Adapt the MSS value used to make delayed ack decision to the
* real world.
*/
-static inline void tcp_measure_rcv_mss(struct sock *sk,
- const struct sk_buff *skb)
+static void tcp_measure_rcv_mss(struct sock *sk,
+ const struct sk_buff *skb)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const unsigned int lss = icsk->icsk_ack.last_seg_size;
@@ -246,8 +246,8 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
return 0;
}
-static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
- struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+ struct sk_buff *skb)
{
/* Check #1 */
if (tp->rcv_ssthresh < tp->window_clamp &&
@@ -341,6 +341,26 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
}
+
+/* Initialize RCV_MSS value.
+ * RCV_MSS is an our guess about MSS used by the peer.
+ * We haven't any direct information about the MSS.
+ * It's better to underestimate the RCV_MSS rather than overestimate.
+ * Overestimations make us ACKing less frequently than needed.
+ * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
+ */
+void tcp_initialize_rcv_mss(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
+
+ hint = min(hint, tp->rcv_wnd/2);
+ hint = min(hint, TCP_MIN_RCVMSS);
+ hint = max(hint, TCP_MIN_MSS);
+
+ inet_csk(sk)->icsk_ack.rcv_mss = hint;
+}
+
/* Receiver "autotuning" code.
*
* The algorithm for RTT estimation w/o timestamps is based on
@@ -735,6 +755,27 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
}
+/* Set slow start threshold and cwnd not falling to slow start */
+void tcp_enter_cwr(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tp->prior_ssthresh = 0;
+ tp->bytes_acked = 0;
+ if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+ tp->undo_marker = 0;
+ tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+ tp->snd_cwnd = min(tp->snd_cwnd,
+ tcp_packets_in_flight(tp) + 1U);
+ tp->snd_cwnd_cnt = 0;
+ tp->high_seq = tp->snd_nxt;
+ tp->snd_cwnd_stamp = tcp_time_stamp;
+ TCP_ECN_queue_cwr(tp);
+
+ tcp_set_ca_state(sk, TCP_CA_CWR);
+ }
+}
+
/* Initialize metrics on socket. */
static void tcp_init_metrics(struct sock *sk)
@@ -2070,8 +2111,8 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
tcp_ack_no_tstamp(sk, seq_rtt, flag);
}
-static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
- u32 in_flight, int good)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+ u32 in_flight, int good)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
@@ -2082,7 +2123,7 @@ static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
* RFC2988 recommends to restart timer to now+rto.
*/
-static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
+static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
{
if (!tp->packets_out) {
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -2147,7 +2188,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
return acked;
}
-static inline u32 tcp_usrtt(const struct sk_buff *skb)
+static u32 tcp_usrtt(const struct sk_buff *skb)
{
struct timeval tv, now;
@@ -2342,7 +2383,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
if (nwin > tp->max_window) {
tp->max_window = nwin;
- tcp_sync_mss(sk, tp->pmtu_cookie);
+ tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
}
}
}
@@ -2583,8 +2624,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
/* Fast parse options. This hopes to only see timestamps.
* If it is wrong it falls back on tcp_parse_options().
*/
-static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
- struct tcp_sock *tp)
+static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
+ struct tcp_sock *tp)
{
if (th->doff == sizeof(struct tcphdr)>>2) {
tp->rx_opt.saw_tstamp = 0;
@@ -2804,8 +2845,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
}
}
-static __inline__ int
-tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
+static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
{
if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
if (before(seq, sp->start_seq))
@@ -2817,7 +2857,7 @@ tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
return 0;
}
-static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
{
if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
if (before(seq, tp->rcv_nxt))
@@ -2832,7 +2872,7 @@ static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
}
}
-static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
{
if (!tp->rx_opt.dsack)
tcp_dsack_set(tp, seq, end_seq);
@@ -2890,7 +2930,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
}
}
-static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
+static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
{
__u32 tmp;
@@ -3307,7 +3347,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
int offset = start - TCP_SKB_CB(skb)->seq;
int size = TCP_SKB_CB(skb)->end_seq - start;
- if (offset < 0) BUG();
+ BUG_ON(offset < 0);
if (size > 0) {
size = min(copy, size);
if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
@@ -3455,7 +3495,7 @@ void tcp_cwnd_application_limited(struct sock *sk)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
+static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
{
/* If the user specified a specific send buffer setting, do
* not modify it.
@@ -3502,7 +3542,7 @@ static void tcp_new_space(struct sock *sk)
sk->sk_write_space(sk);
}
-static inline void tcp_check_space(struct sock *sk)
+static void tcp_check_space(struct sock *sk)
{
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
@@ -3512,7 +3552,7 @@ static inline void tcp_check_space(struct sock *sk)
}
}
-static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
{
tcp_push_pending_frames(sk, tp);
tcp_check_space(sk);
@@ -3544,7 +3584,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
}
}
-static __inline__ void tcp_ack_snd_check(struct sock *sk)
+static inline void tcp_ack_snd_check(struct sock *sk)
{
if (!inet_csk_ack_scheduled(sk)) {
/* We sent a data segment already. */
@@ -3692,8 +3732,7 @@ static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
return result;
}
-static __inline__ int
-tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
{
return skb->ip_summed != CHECKSUM_UNNECESSARY &&
__tcp_checksum_complete_user(sk, skb);
@@ -3967,12 +4006,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
int saved_clamp = tp->rx_opt.mss_clamp;
tcp_parse_options(skb, &tp->rx_opt, 0);
if (th->ack) {
- struct inet_connection_sock *icsk;
/* rfc793:
* "If the state is SYN-SENT then
* first check the ACK bit
@@ -4061,7 +4100,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
tp->rx_opt.sack_ok |= 2;
- tcp_sync_mss(sk, tp->pmtu_cookie);
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
/* Remember, tcp_poll() does not lock socket!
@@ -4072,7 +4111,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_set_state(sk, TCP_ESTABLISHED);
/* Make sure socket is routed, for correct metrics. */
- tp->af_specific->rebuild_header(sk);
+ icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_metrics(sk);
@@ -4098,8 +4137,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
sk_wake_async(sk, 0, POLL_OUT);
}
- icsk = inet_csk(sk);
-
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) {
@@ -4173,7 +4210,7 @@ discard:
if (tp->ecn_flags&TCP_ECN_OK)
sock_set_flag(sk, SOCK_NO_LARGESEND);
- tcp_sync_mss(sk, tp->pmtu_cookie);
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
@@ -4220,6 +4257,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
int queued = 0;
tp->rx_opt.saw_tstamp = 0;
@@ -4236,7 +4274,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
goto discard;
if(th->syn) {
- if(tp->af_specific->conn_request(sk, skb) < 0)
+ if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
return 1;
/* Now we have several options: In theory there is
@@ -4349,7 +4387,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* Make sure socket is routed, for
* correct metrics.
*/
- tp->af_specific->rebuild_header(sk);
+ icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_metrics(sk);
@@ -4475,3 +4513,4 @@ EXPORT_SYMBOL(sysctl_tcp_abc);
EXPORT_SYMBOL(tcp_parse_options);
EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process);
+EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4d5021e1929..6ea353907af 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -69,6 +69,7 @@
#include <net/transp_v6.h>
#include <net/ipv6.h>
#include <net/inet_common.h>
+#include <net/timewait_sock.h>
#include <net/xfrm.h>
#include <linux/inet.h>
@@ -86,8 +87,7 @@ int sysctl_tcp_low_latency;
/* Socket used for sending RSTs */
static struct socket *tcp_socket;
-void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
- struct sk_buff *skb);
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
.lhash_lock = RW_LOCK_UNLOCKED,
@@ -97,7 +97,8 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
{
- return inet_csk_get_port(&tcp_hashinfo, sk, snum);
+ return inet_csk_get_port(&tcp_hashinfo, sk, snum,
+ inet_csk_bind_conflict);
}
static void tcp_v4_hash(struct sock *sk)
@@ -118,202 +119,38 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
skb->h.th->source);
}
-/* called with local bh disabled */
-static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
- struct inet_timewait_sock **twp)
+int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
- struct inet_sock *inet = inet_sk(sk);
- u32 daddr = inet->rcv_saddr;
- u32 saddr = inet->daddr;
- int dif = sk->sk_bound_dev_if;
- INET_ADDR_COOKIE(acookie, saddr, daddr)
- const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
- unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
- struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
- struct sock *sk2;
- const struct hlist_node *node;
- struct inet_timewait_sock *tw;
-
- prefetch(head->chain.first);
- write_lock(&head->lock);
-
- /* Check TIME-WAIT sockets first. */
- sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
- tw = inet_twsk(sk2);
-
- if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
- const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
- struct tcp_sock *tp = tcp_sk(sk);
-
- /* With PAWS, it is safe from the viewpoint
- of data integrity. Even without PAWS it
- is safe provided sequence spaces do not
- overlap i.e. at data rates <= 80Mbit/sec.
-
- Actually, the idea is close to VJ's one,
- only timestamp cache is held not per host,
- but per port pair and TW bucket is used
- as state holder.
+ const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
+ struct tcp_sock *tp = tcp_sk(sk);
- If TW bucket has been already destroyed we
- fall back to VJ's scheme and use initial
- timestamp retrieved from peer table.
- */
- if (tcptw->tw_ts_recent_stamp &&
- (!twp || (sysctl_tcp_tw_reuse &&
- xtime.tv_sec -
- tcptw->tw_ts_recent_stamp > 1))) {
- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
- tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
- sock_hold(sk2);
- goto unique;
- } else
- goto not_unique;
- }
- }
- tw = NULL;
+ /* With PAWS, it is safe from the viewpoint
+ of data integrity. Even without PAWS it is safe provided sequence
+ spaces do not overlap i.e. at data rates <= 80Mbit/sec.
- /* And established part... */
- sk_for_each(sk2, node, &head->chain) {
- if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
- goto not_unique;
- }
+ Actually, the idea is close to VJ's one, only timestamp cache is
+ held not per host, but per port pair and TW bucket is used as state
+ holder.
-unique:
- /* Must record num and sport now. Otherwise we will see
- * in hash table socket with a funny identity. */
- inet->num = lport;
- inet->sport = htons(lport);
- sk->sk_hash = hash;
- BUG_TRAP(sk_unhashed(sk));
- __sk_add_node(sk, &head->chain);
- sock_prot_inc_use(sk->sk_prot);
- write_unlock(&head->lock);
-
- if (twp) {
- *twp = tw;
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
- } else if (tw) {
- /* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw, &tcp_death_row);
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
- inet_twsk_put(tw);
+ If TW bucket has been already destroyed we fall back to VJ's scheme
+ and use initial timestamp retrieved from peer table.
+ */
+ if (tcptw->tw_ts_recent_stamp &&
+ (twp == NULL || (sysctl_tcp_tw_reuse &&
+ xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
+ tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
+ if (tp->write_seq == 0)
+ tp->write_seq = 1;
+ tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
+ tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ sock_hold(sktw);
+ return 1;
}
return 0;
-
-not_unique:
- write_unlock(&head->lock);
- return -EADDRNOTAVAIL;
}
-static inline u32 connect_port_offset(const struct sock *sk)
-{
- const struct inet_sock *inet = inet_sk(sk);
-
- return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr,
- inet->dport);
-}
-
-/*
- * Bind a port for a connect operation and hash it.
- */
-static inline int tcp_v4_hash_connect(struct sock *sk)
-{
- const unsigned short snum = inet_sk(sk)->num;
- struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
- int ret;
-
- if (!snum) {
- int low = sysctl_local_port_range[0];
- int high = sysctl_local_port_range[1];
- int range = high - low;
- int i;
- int port;
- static u32 hint;
- u32 offset = hint + connect_port_offset(sk);
- struct hlist_node *node;
- struct inet_timewait_sock *tw = NULL;
-
- local_bh_disable();
- for (i = 1; i <= range; i++) {
- port = low + (i + offset) % range;
- head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
- spin_lock(&head->lock);
-
- /* Does not bother with rcv_saddr checks,
- * because the established check is already
- * unique enough.
- */
- inet_bind_bucket_for_each(tb, node, &head->chain) {
- if (tb->port == port) {
- BUG_TRAP(!hlist_empty(&tb->owners));
- if (tb->fastreuse >= 0)
- goto next_port;
- if (!__tcp_v4_check_established(sk,
- port,
- &tw))
- goto ok;
- goto next_port;
- }
- }
-
- tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
- if (!tb) {
- spin_unlock(&head->lock);
- break;
- }
- tb->fastreuse = -1;
- goto ok;
-
- next_port:
- spin_unlock(&head->lock);
- }
- local_bh_enable();
-
- return -EADDRNOTAVAIL;
-
-ok:
- hint += i;
-
- /* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, port);
- if (sk_unhashed(sk)) {
- inet_sk(sk)->sport = htons(port);
- __inet_hash(&tcp_hashinfo, sk, 0);
- }
- spin_unlock(&head->lock);
-
- if (tw) {
- inet_twsk_deschedule(tw, &tcp_death_row);;
- inet_twsk_put(tw);
- }
-
- ret = 0;
- goto out;
- }
-
- head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- __inet_hash(&tcp_hashinfo, sk, 0);
- spin_unlock_bh(&head->lock);
- return 0;
- } else {
- spin_unlock(&head->lock);
- /* No definite answer... Walk to established hash table */
- ret = __tcp_v4_check_established(sk, snum, NULL);
-out:
- local_bh_enable();
- return ret;
- }
-}
+EXPORT_SYMBOL_GPL(tcp_twsk_unique);
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -383,9 +220,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->dport = usin->sin_port;
inet->daddr = daddr;
- tp->ext_header_len = 0;
+ inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet->opt)
- tp->ext_header_len = inet->opt->optlen;
+ inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
tp->rx_opt.mss_clamp = 536;
@@ -395,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
* complete initialization after this.
*/
tcp_set_state(sk, TCP_SYN_SENT);
- err = tcp_v4_hash_connect(sk);
+ err = inet_hash_connect(&tcp_death_row, sk);
if (err)
goto failure;
@@ -433,12 +270,10 @@ failure:
/*
* This routine does path mtu discovery as defined in RFC1191.
*/
-static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
- u32 mtu)
+static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
{
struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
* send out by Linux are always <576bytes so they should go through
@@ -467,7 +302,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
mtu = dst_mtu(dst);
if (inet->pmtudisc != IP_PMTUDISC_DONT &&
- tp->pmtu_cookie > mtu) {
+ inet_csk(sk)->icsk_pmtu_cookie > mtu) {
tcp_sync_mss(sk, mtu);
/* Resend the TCP packet because it's
@@ -644,10 +479,10 @@ out:
}
/* This routine computes an IPv4 TCP checksum. */
-void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
- struct sk_buff *skb)
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
{
struct inet_sock *inet = inet_sk(sk);
+ struct tcphdr *th = skb->h.th;
if (skb->ip_summed == CHECKSUM_HW) {
th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
@@ -826,7 +661,8 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
kfree(inet_rsk(req)->opt);
}
-static inline void syn_flood_warning(struct sk_buff *skb)
+#ifdef CONFIG_SYN_COOKIES
+static void syn_flood_warning(struct sk_buff *skb)
{
static unsigned long warntime;
@@ -837,12 +673,13 @@ static inline void syn_flood_warning(struct sk_buff *skb)
ntohs(skb->h.th->dest));
}
}
+#endif
/*
* Save and compile IPv4 options into the request_sock if needed.
*/
-static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
- struct sk_buff *skb)
+static struct ip_options *tcp_v4_save_options(struct sock *sk,
+ struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
struct ip_options *dopt = NULL;
@@ -869,6 +706,11 @@ struct request_sock_ops tcp_request_sock_ops = {
.send_reset = tcp_v4_send_reset,
};
+static struct timewait_sock_ops tcp_timewait_sock_ops = {
+ .twsk_obj_size = sizeof(struct tcp_timewait_sock),
+ .twsk_unique = tcp_twsk_unique,
+};
+
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct inet_request_sock *ireq;
@@ -1053,9 +895,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = skb->nh.iph->ttl;
- newtp->ext_header_len = 0;
+ inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newinet->opt)
- newtp->ext_header_len = newinet->opt->optlen;
+ inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
newinet->id = newtp->write_seq ^ jiffies;
tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1238,6 +1080,7 @@ process:
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
+ nf_reset(skb);
if (sk_filter(sk, skb, 0))
goto discard_and_relse;
@@ -1314,16 +1157,6 @@ do_time_wait:
goto discard_it;
}
-static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
-{
- struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
- struct inet_sock *inet = inet_sk(sk);
-
- sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = inet->daddr;
- sin->sin_port = inet->dport;
-}
-
/* VJ's idea. Save last timestamp seen from this destination
* and hold it at least for normal timewait interval to use for duplicate
* segment detection in subsequent connections, before they enter synchronized
@@ -1382,7 +1215,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
return 0;
}
-struct tcp_func ipv4_specific = {
+struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
@@ -1392,7 +1225,7 @@ struct tcp_func ipv4_specific = {
.net_header_len = sizeof(struct iphdr),
.setsockopt = ip_setsockopt,
.getsockopt = ip_getsockopt,
- .addr2sockaddr = v4_addr2sockaddr,
+ .addr2sockaddr = inet_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in),
};
@@ -1433,7 +1266,8 @@ static int tcp_v4_init_sock(struct sock *sk)
sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
- tp->af_specific = &ipv4_specific;
+ icsk->icsk_af_ops = &ipv4_specific;
+ icsk->icsk_sync_mss = tcp_sync_mss;
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -1989,7 +1823,7 @@ struct proto tcp_prot = {
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
- .twsk_obj_size = sizeof(struct tcp_timewait_sock),
+ .twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
};
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1b66a2ac432..2b9b7f6c7f7 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -274,18 +274,18 @@ kill:
void tcp_time_wait(struct sock *sk, int state, int timeo)
{
struct inet_timewait_sock *tw = NULL;
+ const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
int recycle_ok = 0;
if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
- recycle_ok = tp->af_specific->remember_stamp(sk);
+ recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
tw = inet_twsk_alloc(sk, state);
if (tw != NULL) {
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
- const struct inet_connection_sock *icsk = inet_csk(sk);
const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
@@ -298,10 +298,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
if (tw->tw_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
+ struct inet6_timewait_sock *tw6;
- ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr);
- ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr);
+ tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
+ tw6 = inet6_twsk((struct sock *)tw);
+ ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
+ ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
tw->tw_ipv6only = np->ipv6only;
}
#endif
@@ -456,7 +458,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
struct request_sock **prev)
{
struct tcphdr *th = skb->h.th;
- struct tcp_sock *tp = tcp_sk(sk);
u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
int paws_reject = 0;
struct tcp_options_received tmp_opt;
@@ -613,7 +614,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
* ESTABLISHED STATE. If it will be dropped after
* socket is created, wait for troubles.
*/
- child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
+ req, NULL);
if (child == NULL)
goto listen_overflow;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 029c70dfb58..a7623ead39a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -51,8 +51,8 @@ int sysctl_tcp_retrans_collapse = 1;
*/
int sysctl_tcp_tso_win_divisor = 3;
-static inline void update_send_head(struct sock *sk, struct tcp_sock *tp,
- struct sk_buff *skb)
+static void update_send_head(struct sock *sk, struct tcp_sock *tp,
+ struct sk_buff *skb)
{
sk->sk_send_head = skb->next;
if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
@@ -124,8 +124,8 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
tp->snd_cwnd_used = 0;
}
-static inline void tcp_event_data_sent(struct tcp_sock *tp,
- struct sk_buff *skb, struct sock *sk)
+static void tcp_event_data_sent(struct tcp_sock *tp,
+ struct sk_buff *skb, struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const u32 now = tcp_time_stamp;
@@ -142,7 +142,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
icsk->icsk_ack.pingpong = 1;
}
-static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
{
tcp_dec_quickack_mode(sk, pkts);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
@@ -212,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
* value can be stuffed directly into th->window for an outgoing
* frame.
*/
-static __inline__ u16 tcp_select_window(struct sock *sk)
+static u16 tcp_select_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 cur_win = tcp_receive_window(tp);
@@ -250,6 +250,75 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
return new_win;
}
+static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
+ __u32 tstamp)
+{
+ if (tp->rx_opt.tstamp_ok) {
+ *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ *ptr++ = htonl(tstamp);
+ *ptr++ = htonl(tp->rx_opt.ts_recent);
+ }
+ if (tp->rx_opt.eff_sacks) {
+ struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
+ int this_sack;
+
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_SACK << 8) |
+ (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
+ TCPOLEN_SACK_PERBLOCK)));
+ for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+ *ptr++ = htonl(sp[this_sack].start_seq);
+ *ptr++ = htonl(sp[this_sack].end_seq);
+ }
+ if (tp->rx_opt.dsack) {
+ tp->rx_opt.dsack = 0;
+ tp->rx_opt.eff_sacks--;
+ }
+ }
+}
+
+/* Construct a tcp options header for a SYN or SYN_ACK packet.
+ * If this is every changed make sure to change the definition of
+ * MAX_SYN_SIZE to match the new maximum number of options that you
+ * can generate.
+ */
+static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
+ int offer_wscale, int wscale, __u32 tstamp,
+ __u32 ts_recent)
+{
+ /* We always get an MSS option.
+ * The option bytes which will be seen in normal data
+ * packets should timestamps be used, must be in the MSS
+ * advertised. But we subtract them from tp->mss_cache so
+ * that calculations in tcp_sendmsg are simpler etc.
+ * So account for this fact here if necessary. If we
+ * don't do this correctly, as a receiver we won't
+ * recognize data packets as being full sized when we
+ * should, and thus we won't abide by the delayed ACK
+ * rules correctly.
+ * SACKs don't matter, we never delay an ACK when we
+ * have any of those going out.
+ */
+ *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
+ if (ts) {
+ if(sack)
+ *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
+ (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+ else
+ *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+ *ptr++ = htonl(tstamp); /* TSVAL */
+ *ptr++ = htonl(ts_recent); /* TSECR */
+ } else if(sack)
+ *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
+ if (offer_wscale)
+ *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
+}
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
@@ -262,122 +331,139 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
* We are working here with either a clone of the original
* SKB, or a fresh unique copy made by the retransmit engine.
*/
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask)
{
- if (skb != NULL) {
- const struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_sock *inet = inet_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
- int tcp_header_size = tp->tcp_header_len;
- struct tcphdr *th;
- int sysctl_flags;
- int err;
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_sock *inet;
+ struct tcp_sock *tp;
+ struct tcp_skb_cb *tcb;
+ int tcp_header_size;
+ struct tcphdr *th;
+ int sysctl_flags;
+ int err;
+
+ BUG_ON(!skb || !tcp_skb_pcount(skb));
+
+ /* If congestion control is doing timestamping, we must
+ * take such a timestamp before we potentially clone/copy.
+ */
+ if (icsk->icsk_ca_ops->rtt_sample)
+ __net_timestamp(skb);
+
+ if (likely(clone_it)) {
+ if (unlikely(skb_cloned(skb)))
+ skb = pskb_copy(skb, gfp_mask);
+ else
+ skb = skb_clone(skb, gfp_mask);
+ if (unlikely(!skb))
+ return -ENOBUFS;
+ }
- BUG_ON(!tcp_skb_pcount(skb));
+ inet = inet_sk(sk);
+ tp = tcp_sk(sk);
+ tcb = TCP_SKB_CB(skb);
+ tcp_header_size = tp->tcp_header_len;
#define SYSCTL_FLAG_TSTAMPS 0x1
#define SYSCTL_FLAG_WSCALE 0x2
#define SYSCTL_FLAG_SACK 0x4
- /* If congestion control is doing timestamping */
- if (icsk->icsk_ca_ops->rtt_sample)
- __net_timestamp(skb);
-
- sysctl_flags = 0;
- if (tcb->flags & TCPCB_FLAG_SYN) {
- tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
- if(sysctl_tcp_timestamps) {
- tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
- }
- if(sysctl_tcp_window_scaling) {
- tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_WSCALE;
- }
- if(sysctl_tcp_sack) {
- sysctl_flags |= SYSCTL_FLAG_SACK;
- if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
- tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
- }
- } else if (tp->rx_opt.eff_sacks) {
- /* A SACK is 2 pad bytes, a 2 byte header, plus
- * 2 32-bit sequence numbers for each SACK block.
- */
- tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
- (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+ sysctl_flags = 0;
+ if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+ tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+ if(sysctl_tcp_timestamps) {
+ tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+ sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
}
-
- if (tcp_packets_in_flight(tp) == 0)
- tcp_ca_event(sk, CA_EVENT_TX_START);
-
- th = (struct tcphdr *) skb_push(skb, tcp_header_size);
- skb->h.th = th;
- skb_set_owner_w(skb, sk);
-
- /* Build TCP header and checksum it. */
- th->source = inet->sport;
- th->dest = inet->dport;
- th->seq = htonl(tcb->seq);
- th->ack_seq = htonl(tp->rcv_nxt);
- *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags);
- if (tcb->flags & TCPCB_FLAG_SYN) {
- /* RFC1323: The window in SYN & SYN/ACK segments
- * is never scaled.
- */
- th->window = htons(tp->rcv_wnd);
- } else {
- th->window = htons(tcp_select_window(sk));
+ if (sysctl_tcp_window_scaling) {
+ tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+ sysctl_flags |= SYSCTL_FLAG_WSCALE;
}
- th->check = 0;
- th->urg_ptr = 0;
-
- if (tp->urg_mode &&
- between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
- th->urg_ptr = htons(tp->snd_up-tcb->seq);
- th->urg = 1;
+ if (sysctl_tcp_sack) {
+ sysctl_flags |= SYSCTL_FLAG_SACK;
+ if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+ tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
}
+ } else if (unlikely(tp->rx_opt.eff_sacks)) {
+ /* A SACK is 2 pad bytes, a 2 byte header, plus
+ * 2 32-bit sequence numbers for each SACK block.
+ */
+ tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+ (tp->rx_opt.eff_sacks *
+ TCPOLEN_SACK_PERBLOCK));
+ }
+
+ if (tcp_packets_in_flight(tp) == 0)
+ tcp_ca_event(sk, CA_EVENT_TX_START);
+
+ th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+ skb->h.th = th;
+ skb_set_owner_w(skb, sk);
+
+ /* Build TCP header and checksum it. */
+ th->source = inet->sport;
+ th->dest = inet->dport;
+ th->seq = htonl(tcb->seq);
+ th->ack_seq = htonl(tp->rcv_nxt);
+ *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
+ tcb->flags);
+
+ if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+ /* RFC1323: The window in SYN & SYN/ACK segments
+ * is never scaled.
+ */
+ th->window = htons(tp->rcv_wnd);
+ } else {
+ th->window = htons(tcp_select_window(sk));
+ }
+ th->check = 0;
+ th->urg_ptr = 0;
- if (tcb->flags & TCPCB_FLAG_SYN) {
- tcp_syn_build_options((__u32 *)(th + 1),
- tcp_advertise_mss(sk),
- (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
- (sysctl_flags & SYSCTL_FLAG_SACK),
- (sysctl_flags & SYSCTL_FLAG_WSCALE),
- tp->rx_opt.rcv_wscale,
- tcb->when,
- tp->rx_opt.ts_recent);
- } else {
- tcp_build_and_update_options((__u32 *)(th + 1),
- tp, tcb->when);
+ if (unlikely(tp->urg_mode &&
+ between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {
+ th->urg_ptr = htons(tp->snd_up-tcb->seq);
+ th->urg = 1;
+ }
- TCP_ECN_send(sk, tp, skb, tcp_header_size);
- }
- tp->af_specific->send_check(sk, th, skb->len, skb);
+ if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+ tcp_syn_build_options((__u32 *)(th + 1),
+ tcp_advertise_mss(sk),
+ (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+ (sysctl_flags & SYSCTL_FLAG_SACK),
+ (sysctl_flags & SYSCTL_FLAG_WSCALE),
+ tp->rx_opt.rcv_wscale,
+ tcb->when,
+ tp->rx_opt.ts_recent);
+ } else {
+ tcp_build_and_update_options((__u32 *)(th + 1),
+ tp, tcb->when);
+ TCP_ECN_send(sk, tp, skb, tcp_header_size);
+ }
- if (tcb->flags & TCPCB_FLAG_ACK)
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+ icsk->icsk_af_ops->send_check(sk, skb->len, skb);
- if (skb->len != tcp_header_size)
- tcp_event_data_sent(tp, skb, sk);
+ if (likely(tcb->flags & TCPCB_FLAG_ACK))
+ tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
- TCP_INC_STATS(TCP_MIB_OUTSEGS);
+ if (skb->len != tcp_header_size)
+ tcp_event_data_sent(tp, skb, sk);
- err = tp->af_specific->queue_xmit(skb, 0);
- if (err <= 0)
- return err;
+ TCP_INC_STATS(TCP_MIB_OUTSEGS);
- tcp_enter_cwr(sk);
+ err = icsk->icsk_af_ops->queue_xmit(skb, 0);
+ if (unlikely(err <= 0))
+ return err;
+
+ tcp_enter_cwr(sk);
+
+ /* NET_XMIT_CN is special. It does not guarantee,
+ * that this packet is lost. It tells that device
+ * is about to start to drop packets or already
+ * drops some packets of the same priority and
+ * invokes us to send less aggressively.
+ */
+ return err == NET_XMIT_CN ? 0 : err;
- /* NET_XMIT_CN is special. It does not guarantee,
- * that this packet is lost. It tells that device
- * is about to start to drop packets or already
- * drops some packets of the same priority and
- * invokes us to send less aggressively.
- */
- return err == NET_XMIT_CN ? 0 : err;
- }
- return -ENOBUFS;
#undef SYSCTL_FLAG_TSTAMPS
#undef SYSCTL_FLAG_WSCALE
#undef SYSCTL_FLAG_SACK
@@ -604,7 +690,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
It is minimum of user_mss and mss received with SYN.
It also does not include TCP options.
- tp->pmtu_cookie is last pmtu, seen by this function.
+ inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function.
tp->mss_cache is current effective sending mss, including
all tcp options except for SACKs. It is evaluated,
@@ -614,26 +700,26 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
NOTE1. rfc1122 clearly states that advertised MSS
DOES NOT include either tcp or ip options.
- NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside
- this function. --ANK (980731)
+ NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
+ are READ ONLY outside this function. --ANK (980731)
*/
unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
{
struct tcp_sock *tp = tcp_sk(sk);
- int mss_now;
-
+ struct inet_connection_sock *icsk = inet_csk(sk);
/* Calculate base mss without TCP options:
It is MMS_S - sizeof(tcphdr) of rfc1122
*/
- mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+ int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
+ sizeof(struct tcphdr));
/* Clamp it (mss_clamp does not include tcp options) */
if (mss_now > tp->rx_opt.mss_clamp)
mss_now = tp->rx_opt.mss_clamp;
/* Now subtract optional transport overhead */
- mss_now -= tp->ext_header_len;
+ mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
if (mss_now < 48)
@@ -647,7 +733,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
/* And store cached results */
- tp->pmtu_cookie = pmtu;
+ icsk->icsk_pmtu_cookie = pmtu;
tp->mss_cache = mss_now;
return mss_now;
@@ -677,7 +763,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
if (dst) {
u32 mtu = dst_mtu(dst);
- if (mtu != tp->pmtu_cookie)
+ if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
mss_now = tcp_sync_mss(sk, mtu);
}
@@ -688,9 +774,10 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
xmit_size_goal = mss_now;
if (doing_tso) {
- xmit_size_goal = 65535 -
- tp->af_specific->net_header_len -
- tp->ext_header_len - tp->tcp_header_len;
+ xmit_size_goal = (65535 -
+ inet_csk(sk)->icsk_af_ops->net_header_len -
+ inet_csk(sk)->icsk_ext_hdr_len -
+ tp->tcp_header_len);
if (tp->max_window &&
(xmit_size_goal > (tp->max_window >> 1)))
@@ -706,7 +793,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
/* Congestion window validation. (RFC2861) */
-static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
+static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
{
__u32 packets_out = tp->packets_out;
@@ -755,7 +842,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
/* This must be invoked the first time we consider transmitting
* SKB onto the wire.
*/
-static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
+static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
{
int tso_segs = tcp_skb_pcount(skb);
@@ -1036,7 +1123,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
+ if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC)))
break;
/* Advance the send_head. This one is sent out.
@@ -1109,7 +1196,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
/* Send it out now. */
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) {
+ if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
update_send_head(sk, tp, skb);
tcp_cwnd_validate(sk, tp);
return;
@@ -1405,7 +1492,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
(sysctl_tcp_retrans_collapse != 0))
tcp_retrans_try_collapse(sk, skb, cur_mss);
- if(tp->af_specific->rebuild_header(sk))
+ if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
return -EHOSTUNREACH; /* Routing failure or similar. */
/* Some Solaris stacks overoptimize and ignore the FIN on a
@@ -1429,9 +1516,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
*/
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
- pskb_copy(skb, GFP_ATOMIC):
- skb_clone(skb, GFP_ATOMIC)));
+ err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (err == 0) {
/* Update global TCP statistics. */
@@ -1665,7 +1750,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- if (tcp_transmit_skb(sk, skb))
+ if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
}
@@ -1700,7 +1785,7 @@ int tcp_send_synack(struct sock *sk)
TCP_ECN_send_synack(tcp_sk(sk), skb);
}
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+ return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
/*
@@ -1778,7 +1863,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
/*
* Do all connect socket setups that can be done AF independent.
*/
-static inline void tcp_connect_init(struct sock *sk)
+static void tcp_connect_init(struct sock *sk)
{
struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -1861,7 +1946,7 @@ int tcp_connect(struct sock *sk)
__skb_queue_tail(&sk->sk_write_queue, buff);
sk_charge_skb(sk, buff);
tp->packets_out += tcp_skb_pcount(buff);
- tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
+ tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
@@ -1957,7 +2042,7 @@ void tcp_send_ack(struct sock *sk)
/* Send it off, this clears delayed acks for us. */
TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
TCP_SKB_CB(buff)->when = tcp_time_stamp;
- tcp_transmit_skb(sk, buff);
+ tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
}
}
@@ -1997,7 +2082,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- return tcp_transmit_skb(sk, skb);
+ return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
int tcp_write_wakeup(struct sock *sk)
@@ -2030,7 +2115,7 @@ int tcp_write_wakeup(struct sock *sk)
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+ err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (!err) {
update_send_head(sk, tp, skb);
}
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index b7d296a8ac6..3b740349505 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -215,14 +215,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
vegas->beg_snd_nxt = tp->snd_nxt;
vegas->beg_snd_cwnd = tp->snd_cwnd;
- /* Take into account the current RTT sample too, to
- * decrease the impact of delayed acks. This double counts
- * this sample since we count it for the next window as well,
- * but that's not too awful, since we're taking the min,
- * rather than averaging.
- */
- tcp_vegas_rtt_calc(sk, seq_rtt * 1000);
-
/* We do the Vegas calculations only if we got enough RTT
* samples that we can be reasonably sure that we got
* at least one RTT sample that wasn't from a delayed ACK.
@@ -333,11 +325,15 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
tp->snd_cwnd = tp->snd_cwnd_clamp;
}
- }
- /* Wipe the slate clean for the next RTT. */
- vegas->cntRTT = 0;
- vegas->minRTT = 0x7fffffff;
+ /* Wipe the slate clean for the next RTT. */
+ vegas->cntRTT = 0;
+ vegas->minRTT = 0x7fffffff;
+ }
+ /* Use normal slow start */
+ else if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+
}
/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2422a5f7195..00840474a44 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -86,6 +86,7 @@
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/sockios.h>
+#include <linux/igmp.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/timer.h>
@@ -846,20 +847,7 @@ out:
csum_copy_err:
UDP_INC_STATS_BH(UDP_MIB_INERRORS);
- /* Clear queue. */
- if (flags&MSG_PEEK) {
- int clear = 0;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
- __skb_unlink(skb, &sk->sk_receive_queue);
- clear = 1;
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- if (clear)
- kfree_skb(skb);
- }
-
- skb_free_datagram(sk, skb);
+ skb_kill_datagram(sk, skb, flags);
if (noblock)
return -EAGAIN;
@@ -1001,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
kfree_skb(skb);
return -1;
}
+ nf_reset(skb);
if (up->encap_type) {
/*
@@ -1094,7 +1083,7 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
* Otherwise, csum completion requires chacksumming packet body,
* including udp header and folding it to skb->csum.
*/
-static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
+static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
unsigned short ulen, u32 saddr, u32 daddr)
{
if (uh->check == 0) {
@@ -1108,7 +1097,6 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
/* Probably, we should checksum udp header (it should be in cache
* in any case) and data in tiny packets (< rx copybreak).
*/
- return 0;
}
/*
@@ -1141,8 +1129,7 @@ int udp_rcv(struct sk_buff *skb)
if (pskb_trim_rcsum(skb, ulen))
goto short_packet;
- if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
- goto csum_error;
+ udp_checksum_init(skb, uh, ulen, saddr, daddr);
if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
@@ -1163,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb)
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
+ nf_reset(skb);
/* No socket. Drop packet silently, if checksum is wrong */
if (udp_checksum_complete(skb))
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 2d3849c38a0..850d919591d 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -11,6 +11,8 @@
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
#include <net/xfrm.h>
@@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
return xfrm_parse_spi(skb, nexthdr, spi, seq);
}
+#ifdef CONFIG_NETFILTER
+static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
+{
+ struct iphdr *iph = skb->nh.iph;
+
+ if (skb->dst == NULL) {
+ if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
+ skb->dev))
+ goto drop;
+ }
+ return dst_input(skb);
+drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+#endif
+
int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
{
int err;
@@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
skb->sp->len += xfrm_nr;
+ nf_reset(skb);
+
if (decaps) {
if (!(skb->dev->flags&IFF_LOOPBACK)) {
dst_release(skb->dst);
@@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
netif_rx(skb);
return 0;
} else {
+#ifdef CONFIG_NETFILTER
+ __skb_push(skb, skb->data - skb->nh.raw);
+ skb->nh.iph->tot_len = htons(skb->len);
+ ip_send_check(skb->nh.iph);
+
+ NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+ xfrm4_rcv_encap_finish);
+ return 0;
+#else
return -skb->nh.iph->protocol;
+#endif
}
drop_unlock:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 66620a95942..d4df0ddd424 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -8,8 +8,10 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/compiler.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
+#include <linux/netfilter_ipv4.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
#include <net/xfrm.h>
@@ -95,7 +97,7 @@ out:
return ret;
}
-int xfrm4_output(struct sk_buff *skb)
+static int xfrm4_output_one(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
struct xfrm_state *x = dst->xfrm;
@@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb)
goto error_nolock;
}
- spin_lock_bh(&x->lock);
- err = xfrm_state_check(x, skb);
- if (err)
- goto error;
+ do {
+ spin_lock_bh(&x->lock);
+ err = xfrm_state_check(x, skb);
+ if (err)
+ goto error;
- xfrm4_encap(skb);
+ xfrm4_encap(skb);
- err = x->type->output(x, skb);
- if (err)
- goto error;
+ err = x->type->output(x, skb);
+ if (err)
+ goto error;
- x->curlft.bytes += skb->len;
- x->curlft.packets++;
+ x->curlft.bytes += skb->len;
+ x->curlft.packets++;
- spin_unlock_bh(&x->lock);
+ spin_unlock_bh(&x->lock);
- if (!(skb->dst = dst_pop(dst))) {
- err = -EHOSTUNREACH;
- goto error_nolock;
- }
- err = NET_XMIT_BYPASS;
+ if (!(skb->dst = dst_pop(dst))) {
+ err = -EHOSTUNREACH;
+ goto error_nolock;
+ }
+ dst = skb->dst;
+ x = dst->xfrm;
+ } while (x && !x->props.mode);
+
+ IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+ err = 0;
out_exit:
return err;
@@ -143,3 +151,33 @@ error_nolock:
kfree_skb(skb);
goto out_exit;
}
+
+int xfrm4_output_finish(struct sk_buff *skb)
+{
+ int err;
+
+ while (likely((err = xfrm4_output_one(skb)) == 0)) {
+ nf_reset(skb);
+
+ err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
+ skb->dst->dev, dst_output);
+ if (unlikely(err != 1))
+ break;
+
+ if (!skb->dst->xfrm)
+ return dst_output(skb);
+
+ err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+ skb->dst->dev, xfrm4_output_finish);
+ if (unlikely(err != 1))
+ break;
+ }
+
+ return err;
+}
+
+int xfrm4_output(struct sk_buff *skb)
+{
+ return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
+ xfrm4_output_finish);
+}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b2b60f3e9cd..42196ba3b0b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -182,6 +182,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
case IPPROTO_UDP:
case IPPROTO_TCP:
case IPPROTO_SCTP:
+ case IPPROTO_DCCP:
if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
u16 *ports = (u16 *)xprth;
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index d23e07fc81f..dbabf81a9b7 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,6 +42,21 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
x->props.saddr = tmpl->saddr;
if (x->props.saddr.a4 == 0)
x->props.saddr.a4 = saddr->a4;
+ if (tmpl->mode && x->props.saddr.a4 == 0) {
+ struct rtable *rt;
+ struct flowi fl_tunnel = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = x->id.daddr.a4,
+ }
+ }
+ };
+ if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
+ &fl_tunnel, AF_INET)) {
+ x->props.saddr.a4 = rt->rt_src;
+ dst_release(&rt->u.dst);
+ }
+ }
x->props.mode = tmpl->mode;
x->props.reqid = tmpl->reqid;
x->props.family = AF_INET;