summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/af_inet.c72
-rw-r--r--net/ipv4/arp.c65
-rw-r--r--net/ipv4/cipso_ipv4.c1
-rw-r--r--net/ipv4/devinet.c36
-rw-r--r--net/ipv4/esp4.c4
-rw-r--r--net/ipv4/fib_frontend.c20
-rw-r--r--net/ipv4/fib_hash.c15
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/fib_trie.c247
-rw-r--r--net/ipv4/icmp.c227
-rw-r--r--net/ipv4/igmp.c45
-rw-r--r--net/ipv4/inet_connection_sock.c45
-rw-r--r--net/ipv4/inet_fragment.c13
-rw-r--r--net/ipv4/inet_hashtables.c43
-rw-r--r--net/ipv4/inet_timewait_sock.c4
-rw-r--r--net/ipv4/ip_forward.c4
-rw-r--r--net/ipv4/ip_fragment.c26
-rw-r--r--net/ipv4/ip_gre.c254
-rw-r--r--net/ipv4/ip_input.c21
-rw-r--r--net/ipv4/ip_options.c63
-rw-r--r--net/ipv4/ip_output.c28
-rw-r--r--net/ipv4/ip_sockglue.c17
-rw-r--r--net/ipv4/ipcomp.c8
-rw-r--r--net/ipv4/ipconfig.c56
-rw-r--r--net/ipv4/ipip.c246
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c4
-rw-r--r--net/ipv4/netfilter.c37
-rw-r--r--net/ipv4/netfilter/Kconfig16
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/arp_tables.c89
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c14
-rw-r--r--net/ipv4/netfilter/arptable_filter.c7
-rw-r--r--net/ipv4/netfilter/ip_queue.c22
-rw-r--r--net/ipv4/netfilter/ip_tables.c53
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c23
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c2
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c9
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c18
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c6
-rw-r--r--net/ipv4/netfilter/ipt_recent.c11
-rw-r--r--net/ipv4/netfilter/iptable_filter.c21
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c51
-rw-r--r--net/ipv4/netfilter/iptable_raw.c8
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c70
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c22
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c27
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c63
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c120
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_dccp.c108
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c45
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c19
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c96
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c80
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c77
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udplite.c99
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c25
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c25
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c556
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c29
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c76
-rw-r--r--net/ipv4/proc.c71
-rw-r--r--net/ipv4/raw.c47
-rw-r--r--net/ipv4/route.c283
-rw-r--r--net/ipv4/syncookies.c102
-rw-r--r--net/ipv4/sysctl_net_ipv4.c162
-rw-r--r--net/ipv4/tcp.c22
-rw-r--r--net/ipv4/tcp_bic.c5
-rw-r--r--net/ipv4/tcp_cubic.c35
-rw-r--r--net/ipv4/tcp_input.c233
-rw-r--r--net/ipv4/tcp_ipv4.c195
-rw-r--r--net/ipv4/tcp_minisocks.c36
-rw-r--r--net/ipv4/tcp_output.c28
-rw-r--r--net/ipv4/tcp_probe.c2
-rw-r--r--net/ipv4/tcp_timer.c19
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c142
-rw-r--r--net/ipv4/udp_impl.h6
-rw-r--r--net/ipv4/udplite.c62
-rw-r--r--net/ipv4/xfrm4_mode_beet.c11
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv4/xfrm4_policy.c2
-rw-r--r--net/ipv4/xfrm4_state.c2
90 files changed, 3060 insertions, 1942 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 19880b086e7..4670683b468 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -160,7 +160,7 @@ config IP_PNP_DHCP
If unsure, say Y. Note that if you want to use DHCP, a DHCP server
must be operating on your network. Read
- <file:Documentation/nfsroot.txt> for details.
+ <file:Documentation/filesystems/nfsroot.txt> for details.
config IP_PNP_BOOTP
bool "IP: BOOTP support"
@@ -175,7 +175,7 @@ config IP_PNP_BOOTP
does BOOTP itself, providing all necessary information on the kernel
command line, you can say N here. If unsure, say Y. Note that if you
want to use BOOTP, a BOOTP server must be operating on your network.
- Read <file:Documentation/nfsroot.txt> for details.
+ Read <file:Documentation/filesystems/nfsroot.txt> for details.
config IP_PNP_RARP
bool "IP: RARP support"
@@ -187,8 +187,8 @@ config IP_PNP_RARP
discovered automatically at boot time using the RARP protocol (an
older protocol which is being obsoleted by BOOTP and DHCP), say Y
here. Note that if you want to use RARP, a RARP server must be
- operating on your network. Read <file:Documentation/nfsroot.txt> for
- details.
+ operating on your network. Read
+ <file:Documentation/filesystems/nfsroot.txt> for details.
# not yet ready..
# bool ' IP: ARP support' CONFIG_IP_PNP_ARP
@@ -343,7 +343,7 @@ config INET_ESP
tristate "IP: ESP transformation"
select XFRM
select CRYPTO
- select CRYPTO_AEAD
+ select CRYPTO_AUTHENC
select CRYPTO_HMAC
select CRYPTO_MD5
select CRYPTO_CBC
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 09ca5293d08..f2b5270efda 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -243,6 +243,23 @@ void build_ehash_secret(void)
}
EXPORT_SYMBOL(build_ehash_secret);
+static inline int inet_netns_ok(struct net *net, int protocol)
+{
+ int hash;
+ struct net_protocol *ipprot;
+
+ if (net == &init_net)
+ return 1;
+
+ hash = protocol & (MAX_INET_PROTOS - 1);
+ ipprot = rcu_dereference(inet_protos[hash]);
+
+ if (ipprot == NULL)
+ /* raw IP is OK */
+ return 1;
+ return ipprot->netns_ok;
+}
+
/*
* Create an inet socket.
*/
@@ -259,9 +276,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol)
int try_loading_module = 0;
int err;
- if (net != &init_net)
- return -EAFNOSUPPORT;
-
if (sock->type != SOCK_RAW &&
sock->type != SOCK_DGRAM &&
!inet_ehash_secret)
@@ -320,6 +334,10 @@ lookup_protocol:
if (answer->capability > 0 && !capable(answer->capability))
goto out_rcu_unlock;
+ err = -EAFNOSUPPORT;
+ if (!inet_netns_ok(net, protocol))
+ goto out_rcu_unlock;
+
sock->ops = answer->ops;
answer_prot = answer->prot;
answer_no_check = answer->no_check;
@@ -446,7 +464,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < sizeof(struct sockaddr_in))
goto out;
- chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr);
+ chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
/* Not specified by any standard per-se, however it breaks too
* many applications when removed. It is unfortunate since
@@ -458,7 +476,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
err = -EADDRNOTAVAIL;
if (!sysctl_ip_nonlocal_bind &&
!inet->freebind &&
- addr->sin_addr.s_addr != INADDR_ANY &&
+ addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST &&
chk_addr_ret != RTN_BROADCAST)
@@ -784,6 +802,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
int err = 0;
+ struct net *net = sock_net(sk);
switch (cmd) {
case SIOCGSTAMP:
@@ -795,12 +814,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCADDRT:
case SIOCDELRT:
case SIOCRTMSG:
- err = ip_rt_ioctl(sk->sk_net, cmd, (void __user *)arg);
+ err = ip_rt_ioctl(net, cmd, (void __user *)arg);
break;
case SIOCDARP:
case SIOCGARP:
case SIOCSARP:
- err = arp_ioctl(sk->sk_net, cmd, (void __user *)arg);
+ err = arp_ioctl(net, cmd, (void __user *)arg);
break;
case SIOCGIFADDR:
case SIOCSIFADDR:
@@ -813,7 +832,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFPFLAGS:
case SIOCGIFPFLAGS:
case SIOCSIFFLAGS:
- err = devinet_ioctl(cmd, (void __user *)arg);
+ err = devinet_ioctl(net, cmd, (void __user *)arg);
break;
default:
if (sk->sk_prot->ioctl)
@@ -1058,8 +1077,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
if (sysctl_ip_dynaddr > 1) {
printk(KERN_INFO "%s(): shifting inet->"
- "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
- __FUNCTION__,
+ "saddr from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n",
+ __func__,
NIPQUAD(old_saddr),
NIPQUAD(new_saddr));
}
@@ -1113,7 +1132,7 @@ int inet_sk_rebuild_header(struct sock *sk)
};
security_sk_classify_flow(sk, &fl);
- err = ip_route_output_flow(&init_net, &rt, &fl, sk, 0);
+ err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
}
if (!err)
sk_setup_caps(sk, &rt->u.dst);
@@ -1231,6 +1250,29 @@ out:
return segs;
}
+int inet_ctl_sock_create(struct sock **sk, unsigned short family,
+ unsigned short type, unsigned char protocol,
+ struct net *net)
+{
+ struct socket *sock;
+ int rc = sock_create_kern(family, type, protocol, &sock);
+
+ if (rc == 0) {
+ *sk = sock->sk;
+ (*sk)->sk_allocation = GFP_ATOMIC;
+ /*
+ * Unhash it so that IP input processing does not even see it,
+ * we do not wish this socket to see incoming packets.
+ */
+ (*sk)->sk_prot->unhash(*sk);
+
+ sk_change_net(*sk, net);
+ }
+ return rc;
+}
+
+EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
+
unsigned long snmp_fold_field(void *mib[], int offt)
{
unsigned long res = 0;
@@ -1283,17 +1325,20 @@ static struct net_protocol tcp_protocol = {
.gso_send_check = tcp_v4_gso_send_check,
.gso_segment = tcp_tso_segment,
.no_policy = 1,
+ .netns_ok = 1,
};
static struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
+ .netns_ok = 1,
};
static struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
.no_policy = 1,
+ .netns_ok = 1,
};
static int __init init_ipv4_mibs(void)
@@ -1414,7 +1459,7 @@ static int __init inet_init(void)
ip_init();
- tcp_v4_init(&inet_family_ops);
+ tcp_v4_init();
/* Setup TCP slab cache for open requests. */
tcp_init();
@@ -1429,7 +1474,8 @@ static int __init inet_init(void)
* Set the ICMP layer up
*/
- icmp_init(&inet_family_ops);
+ if (icmp_init() < 0)
+ panic("Failed to create the ICMP control socket.\n");
/*
* Initialise the multicast router
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c663fa5339e..68b72a7a180 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -242,7 +242,7 @@ static int arp_constructor(struct neighbour *neigh)
return -EINVAL;
}
- neigh->type = inet_addr_type(&init_net, addr);
+ neigh->type = inet_addr_type(dev_net(dev), addr);
parms = in_dev->arp_parms;
__neigh_parms_put(neigh->parms);
@@ -341,14 +341,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
default:
case 0: /* By default announce any local IP */
- if (skb && inet_addr_type(&init_net, ip_hdr(skb)->saddr) == RTN_LOCAL)
+ if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL)
saddr = ip_hdr(skb)->saddr;
break;
case 1: /* Restrict announcements of saddr in same subnet */
if (!skb)
break;
saddr = ip_hdr(skb)->saddr;
- if (inet_addr_type(&init_net, saddr) == RTN_LOCAL) {
+ if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) {
/* saddr should be known to target */
if (inet_addr_onlink(in_dev, target, saddr))
break;
@@ -368,6 +368,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
if (!(neigh->nud_state&NUD_VALID))
printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
dst_ha = neigh->ha;
+ read_lock_bh(&neigh->lock);
} else if ((probes -= neigh->parms->app_probes) < 0) {
#ifdef CONFIG_ARPD
neigh_app_ns(neigh);
@@ -377,6 +378,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
dst_ha, dev->dev_addr, NULL);
+ if (dst_ha)
+ read_unlock_bh(&neigh->lock);
}
static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
@@ -421,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
int flag = 0;
/*unsigned long now; */
- if (ip_route_output_key(&init_net, &rt, &fl) < 0)
+ if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0)
return 1;
if (rt->u.dst.dev != dev) {
NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
@@ -472,9 +475,9 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
return 1;
}
- paddr = ((struct rtable*)skb->dst)->rt_gateway;
+ paddr = skb->rtable->rt_gateway;
- if (arp_set_predefined(inet_addr_type(&init_net, paddr), haddr, paddr, dev))
+ if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev))
return 0;
n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
@@ -567,14 +570,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
* Allocate a buffer
*/
- skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4)
- + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+ skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
if (skb == NULL)
return NULL;
skb_reserve(skb, LL_RESERVED_SPACE(dev));
skb_reset_network_header(skb);
- arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
+ arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev));
skb->dev = dev;
skb->protocol = htons(ETH_P_ARP);
if (src_hw == NULL)
@@ -707,6 +709,7 @@ static int arp_process(struct sk_buff *skb)
u16 dev_type = dev->type;
int addr_type;
struct neighbour *n;
+ struct net *net = dev_net(dev);
/* arp_rcv below verifies the ARP header and verifies the device
* is ARP'able.
@@ -802,7 +805,7 @@ static int arp_process(struct sk_buff *skb)
/* Special case: IPv4 duplicate address detection packet (RFC2131) */
if (sip == 0) {
if (arp->ar_op == htons(ARPOP_REQUEST) &&
- inet_addr_type(&init_net, tip) == RTN_LOCAL &&
+ inet_addr_type(net, tip) == RTN_LOCAL &&
!arp_ignore(in_dev, sip, tip))
arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
dev->dev_addr, sha);
@@ -812,7 +815,7 @@ static int arp_process(struct sk_buff *skb)
if (arp->ar_op == htons(ARPOP_REQUEST) &&
ip_route_input(skb, tip, sip, 0, dev) == 0) {
- rt = (struct rtable*)skb->dst;
+ rt = skb->rtable;
addr_type = rt->rt_type;
if (addr_type == RTN_LOCAL) {
@@ -832,7 +835,7 @@ static int arp_process(struct sk_buff *skb)
goto out;
} else if (IN_DEV_FORWARD(in_dev)) {
if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
- (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0))) {
+ (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n)
neigh_release(n);
@@ -855,14 +858,14 @@ static int arp_process(struct sk_buff *skb)
n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
- if (IPV4_DEVCONF_ALL(dev->nd_net, ARP_ACCEPT)) {
+ if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) {
/* Unsolicited ARP is not accepted by default.
It is possible, that this option should be enabled for some
devices (strip is candidate)
*/
if (n == NULL &&
arp->ar_op == htons(ARPOP_REPLY) &&
- inet_addr_type(&init_net, sip) == RTN_UNICAST)
+ inet_addr_type(net, sip) == RTN_UNICAST)
n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
}
@@ -909,13 +912,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct arphdr *arp;
- if (dev->nd_net != &init_net)
- goto freeskb;
-
/* ARP header, plus 2 device addresses, plus 2 IP addresses. */
- if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
- (2 * dev->addr_len) +
- (2 * sizeof(u32)))))
+ if (!pskb_may_pull(skb, arp_hdr_len(dev)))
goto freeskb;
arp = arp_hdr(skb);
@@ -1198,9 +1196,6 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo
{
struct net_device *dev = ptr;
- if (dev->nd_net != &init_net)
- return NOTIFY_DONE;
-
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&arp_tbl, dev);
@@ -1315,7 +1310,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
}
#endif
- sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->primary_key));
+ sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->primary_key));
seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name);
read_unlock(&n->lock);
@@ -1328,7 +1323,7 @@ static void arp_format_pneigh_entry(struct seq_file *seq,
int hatype = dev ? dev->type : 0;
char tbuf[16];
- sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->key));
+ sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->key));
seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
dev ? dev->name : "*");
@@ -1382,13 +1377,29 @@ static const struct file_operations arp_seq_fops = {
.release = seq_release_net,
};
-static int __init arp_proc_init(void)
+
+static int __net_init arp_net_init(struct net *net)
{
- if (!proc_net_fops_create(&init_net, "arp", S_IRUGO, &arp_seq_fops))
+ if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops))
return -ENOMEM;
return 0;
}
+static void __net_exit arp_net_exit(struct net *net)
+{
+ proc_net_remove(net, "arp");
+}
+
+static struct pernet_operations arp_net_ops = {
+ .init = arp_net_init,
+ .exit = arp_net_exit,
+};
+
+static int __init arp_proc_init(void)
+{
+ return register_pernet_subsys(&arp_net_ops);
+}
+
#else /* CONFIG_PROC_FS */
static int __init arp_proc_init(void)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 8cd357f4128..4637ded3dba 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1800,7 +1800,6 @@ int cipso_v4_sock_setattr(struct sock *sk,
}
memcpy(opt->__data, buf, buf_len);
opt->optlen = opt_len;
- opt->is_data = 1;
opt->cipso = sizeof(struct iphdr);
kfree(buf);
buf = NULL;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f282b26f63e..6848e4760f3 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -165,7 +165,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
if (!in_dev)
goto out;
INIT_RCU_HEAD(&in_dev->rcu_head);
- memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
+ memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
sizeof(in_dev->cnf));
in_dev->cnf.sysctl = NULL;
in_dev->dev = dev;
@@ -437,7 +437,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
struct in_device *in_dev;
struct ifaddrmsg *ifm;
@@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
ASSERT_RTNL();
- if (net != &init_net)
- return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
if (err < 0)
goto errout;
@@ -555,14 +552,11 @@ errout:
static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct in_ifaddr *ifa;
ASSERT_RTNL();
- if (net != &init_net)
- return -EINVAL;
-
ifa = rtm_to_ifaddr(net, nlh);
if (IS_ERR(ifa))
return PTR_ERR(ifa);
@@ -595,7 +589,7 @@ static __inline__ int inet_abc_len(__be32 addr)
}
-int devinet_ioctl(unsigned int cmd, void __user *arg)
+int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct ifreq ifr;
struct sockaddr_in sin_orig;
@@ -624,7 +618,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
*colon = 0;
#ifdef CONFIG_KMOD
- dev_load(&init_net, ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
#endif
switch (cmd) {
@@ -665,7 +659,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
rtnl_lock();
ret = -ENODEV;
- if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
+ if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
goto done;
if (colon)
@@ -752,6 +746,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
inet_del_ifa(in_dev, ifap, 0);
ifa->ifa_broadcast = 0;
ifa->ifa_anycast = 0;
+ ifa->ifa_scope = 0;
}
ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
@@ -877,6 +872,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
{
__be32 addr = 0;
struct in_device *in_dev;
+ struct net *net = dev_net(dev);
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
@@ -905,7 +901,7 @@ no_in_dev:
*/
read_lock(&dev_base_lock);
rcu_read_lock();
- for_each_netdev(&init_net, dev) {
+ for_each_netdev(net, dev) {
if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
continue;
@@ -978,7 +974,7 @@ __be32 inet_confirm_addr(struct in_device *in_dev,
if (scope != RT_SCOPE_LINK)
return confirm_addr_indev(in_dev, dst, local, scope);
- net = in_dev->dev->nd_net;
+ net = dev_net(in_dev->dev);
read_lock(&dev_base_lock);
rcu_read_lock();
for_each_netdev(net, dev) {
@@ -1044,9 +1040,6 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
struct net_device *dev = ptr;
struct in_device *in_dev = __in_dev_get_rtnl(dev);
- if (dev->nd_net != &init_net)
- return NOTIFY_DONE;
-
ASSERT_RTNL();
if (!in_dev) {
@@ -1165,16 +1158,13 @@ nla_put_failure:
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
int idx, ip_idx;
struct net_device *dev;
struct in_device *in_dev;
struct in_ifaddr *ifa;
int s_ip_idx, s_idx = cb->args[0];
- if (net != &init_net)
- return 0;
-
s_ip_idx = ip_idx = cb->args[1];
idx = 0;
for_each_netdev(net, dev) {
@@ -1213,7 +1203,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
int err = -ENOBUFS;
struct net *net;
- net = ifa->ifa_dev->dev->nd_net;
+ net = dev_net(ifa->ifa_dev->dev);
skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
if (skb == NULL)
goto errout;
@@ -1527,7 +1517,7 @@ static void devinet_sysctl_register(struct in_device *idev)
{
neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
NET_IPV4_NEIGH, "ipv4", NULL, NULL);
- __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
+ __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
idev->dev->ifindex, &idev->cnf);
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 091e6709f83..4e73e5708e7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -168,7 +168,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_encap_tmpl *encap = x->encap;
struct udphdr *uh;
__be32 *udpdata32;
- unsigned int sport, dport;
+ __be16 sport, dport;
int encap_type;
spin_lock_bh(&x->lock);
@@ -336,7 +336,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
struct scatterlist *asg;
int err = -EINVAL;
- if (!pskb_may_pull(skb, sizeof(*esph)))
+ if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
goto out;
if (elen <= 0)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 86ff2711fc9..0f1557a4ac7 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -257,7 +257,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
if (in_dev == NULL)
goto e_inval;
- net = dev->nd_net;
+ net = dev_net(dev);
if (fib_lookup(net, &fl, &res))
goto last_resort;
if (res.type != RTN_UNICAST)
@@ -583,7 +583,7 @@ errout:
static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct fib_config cfg;
struct fib_table *tb;
int err;
@@ -605,7 +605,7 @@ errout:
static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
struct fib_config cfg;
struct fib_table *tb;
int err;
@@ -627,7 +627,7 @@ errout:
static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct fib_table *tb;
@@ -674,7 +674,7 @@ out:
static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
{
- struct net *net = ifa->ifa_dev->dev->nd_net;
+ struct net *net = dev_net(ifa->ifa_dev->dev);
struct fib_table *tb;
struct fib_config cfg = {
.fc_protocol = RTPROT_KERNEL,
@@ -801,15 +801,15 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
/* Check, that this local address finally disappeared. */
- if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
+ if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
/* And the last, but not the least thing.
We must flush stray FIB entries.
First of all, we scan fib_info list searching
for stray nexthop entries, then ignite fib_flush.
*/
- if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local))
- fib_flush(dev->nd_net);
+ if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
+ fib_flush(dev_net(dev));
}
}
#undef LOCAL_OK
@@ -857,7 +857,7 @@ static void nl_fib_input(struct sk_buff *skb)
struct fib_table *tb;
u32 pid;
- net = skb->sk->sk_net;
+ net = sock_net(skb->sk);
nlh = nlmsg_hdr(skb);
if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
@@ -899,7 +899,7 @@ static void nl_fib_lookup_exit(struct net *net)
static void fib_disable_ip(struct net_device *dev, int force)
{
if (fib_sync_down_dev(dev, force))
- fib_flush(dev->nd_net);
+ fib_flush(dev_net(dev));
rt_cache_flush(0);
arp_ifdown(dev);
}
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 76b9c684ccc..02088deb046 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -372,7 +372,8 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
{
struct fn_hash *table = (struct fn_hash *) tb->tb_data;
- struct fib_node *new_f, *f;
+ struct fib_node *new_f = NULL;
+ struct fib_node *f;
struct fib_alias *fa, *new_fa;
struct fn_zone *fz;
struct fib_info *fi;
@@ -496,7 +497,6 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
err = -ENOBUFS;
- new_f = NULL;
if (!f) {
new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
if (new_f == NULL)
@@ -512,7 +512,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
if (new_fa->fa_info != NULL) {
new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
if (new_fa == NULL)
- goto out_free_new_f;
+ goto out;
}
new_fa->fa_info = fi;
new_fa->fa_tos = tos;
@@ -540,9 +540,9 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
&cfg->fc_nlinfo, 0);
return 0;
-out_free_new_f:
- kmem_cache_free(fn_hash_kmem, new_f);
out:
+ if (new_f)
+ kmem_cache_free(fn_hash_kmem, new_f);
fib_release_info(fi);
return err;
}
@@ -821,7 +821,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
struct fib_table *main_table;
struct fn_hash *table;
- main_table = fib_get_table(iter->p.net, RT_TABLE_MAIN);
+ main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
table = (struct fn_hash *)main_table->tb_data;
iter->bucket = 0;
@@ -959,11 +959,10 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(fib_hash_lock)
{
- struct fib_iter_state *iter = seq->private;
void *v = NULL;
read_lock(&fib_hash_lock);
- if (fib_get_table(iter->p.net, RT_TABLE_MAIN))
+ if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
return v;
}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 19274d01afa..1fb56876be5 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -137,7 +137,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
struct nlattr **tb)
{
- struct net *net = skb->sk->sk_net;
+ struct net *net = sock_net(skb->sk);
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a13c84763d4..3b83c34019f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -152,6 +152,7 @@ void free_fib_info(struct fib_info *fi)
nh->nh_dev = NULL;
} endfor_nexthops(fi);
fib_info_cnt--;
+ release_net(fi->fib_net);
kfree(fi);
}
@@ -730,7 +731,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto failure;
fib_info_cnt++;
- fi->fib_net = net;
+ fi->fib_net = hold_net(net);
fi->fib_protocol = cfg->fc_protocol;
fi->fib_flags = cfg->fc_flags;
fi->fib_priority = cfg->fc_priority;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1ff446d0fa8..ea294fffb9c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -122,7 +122,10 @@ struct tnode {
unsigned char bits; /* 2log(KEYLENGTH) bits needed */
unsigned int full_children; /* KEYLENGTH bits needed */
unsigned int empty_children; /* KEYLENGTH bits needed */
- struct rcu_head rcu;
+ union {
+ struct rcu_head rcu;
+ struct work_struct work;
+ };
struct node *child[0];
};
@@ -160,7 +163,6 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
static struct node *resize(struct trie *t, struct tnode *tn);
static struct tnode *inflate(struct trie *t, struct tnode *tn);
static struct tnode *halve(struct trie *t, struct tnode *tn);
-static void tnode_free(struct tnode *tn);
static struct kmem_cache *fn_alias_kmem __read_mostly;
static struct kmem_cache *trie_leaf_kmem __read_mostly;
@@ -177,10 +179,13 @@ static inline struct tnode *node_parent_rcu(struct node *node)
return rcu_dereference(ret);
}
+/* Same as rcu_assign_pointer
+ * but that macro() assumes that value is a pointer.
+ */
static inline void node_set_parent(struct node *node, struct tnode *ptr)
{
- rcu_assign_pointer(node->parent,
- (unsigned long)ptr | NODE_TYPE(node));
+ smp_wmb();
+ node->parent = (unsigned long)ptr | NODE_TYPE(node);
}
static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
@@ -331,6 +336,11 @@ static void __leaf_free_rcu(struct rcu_head *head)
kmem_cache_free(trie_leaf_kmem, l);
}
+static inline void free_leaf(struct leaf *l)
+{
+ call_rcu_bh(&l->rcu, __leaf_free_rcu);
+}
+
static void __leaf_info_free_rcu(struct rcu_head *head)
{
kfree(container_of(head, struct leaf_info, rcu));
@@ -343,16 +353,16 @@ static inline void free_leaf_info(struct leaf_info *leaf)
static struct tnode *tnode_alloc(size_t size)
{
- struct page *pages;
-
if (size <= PAGE_SIZE)
return kzalloc(size, GFP_KERNEL);
+ else
+ return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+}
- pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
- if (!pages)
- return NULL;
-
- return page_address(pages);
+static void __tnode_vfree(struct work_struct *arg)
+{
+ struct tnode *tn = container_of(arg, struct tnode, work);
+ vfree(tn);
}
static void __tnode_free_rcu(struct rcu_head *head)
@@ -363,16 +373,17 @@ static void __tnode_free_rcu(struct rcu_head *head)
if (size <= PAGE_SIZE)
kfree(tn);
- else
- free_pages((unsigned long)tn, get_order(size));
+ else {
+ INIT_WORK(&tn->work, __tnode_vfree);
+ schedule_work(&tn->work);
+ }
}
static inline void tnode_free(struct tnode *tn)
{
- if (IS_LEAF(tn)) {
- struct leaf *l = (struct leaf *) tn;
- call_rcu_bh(&l->rcu, __leaf_free_rcu);
- } else
+ if (IS_LEAF(tn))
+ free_leaf((struct leaf *) tn);
+ else
call_rcu(&tn->rcu, __tnode_free_rcu);
}
@@ -1083,7 +1094,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
li = leaf_info_new(plen);
if (!li) {
- tnode_free((struct tnode *) l);
+ free_leaf(l);
return NULL;
}
@@ -1119,7 +1130,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
if (!tn) {
free_leaf_info(li);
- tnode_free((struct tnode *) l);
+ free_leaf(l);
return NULL;
}
@@ -1575,7 +1586,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
} else
rcu_assign_pointer(t->trie, NULL);
- tnode_free((struct tnode *) l);
+ free_leaf(l);
}
/*
@@ -1662,7 +1673,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
return 0;
}
-static int trie_flush_list(struct trie *t, struct list_head *head)
+static int trie_flush_list(struct list_head *head)
{
struct fib_alias *fa, *fa_node;
int found = 0;
@@ -1680,7 +1691,7 @@ static int trie_flush_list(struct trie *t, struct list_head *head)
return found;
}
-static int trie_flush_leaf(struct trie *t, struct leaf *l)
+static int trie_flush_leaf(struct leaf *l)
{
int found = 0;
struct hlist_head *lih = &l->list;
@@ -1688,7 +1699,7 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l)
struct leaf_info *li = NULL;
hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
- found += trie_flush_list(t, &li->falh);
+ found += trie_flush_list(&li->falh);
if (list_empty(&li->falh)) {
hlist_del_rcu(&li->hlist);
@@ -1779,7 +1790,7 @@ static int fn_trie_flush(struct fib_table *tb)
int found = 0;
for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
- found += trie_flush_leaf(t, l);
+ found += trie_flush_leaf(l);
if (ll && hlist_empty(&ll->list))
trie_leaf_remove(t, ll);
@@ -2026,9 +2037,8 @@ struct fib_table *fib_hash_table(u32 id)
/* Depth first Trie walk iterator */
struct fib_trie_iter {
struct seq_net_private p;
- struct trie *trie_local, *trie_main;
+ struct fib_table *tb;
struct tnode *tnode;
- struct trie *trie;
unsigned index;
unsigned depth;
};
@@ -2081,31 +2091,26 @@ rescan:
static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
struct trie *t)
{
- struct node *n ;
+ struct node *n;
if (!t)
return NULL;
n = rcu_dereference(t->trie);
-
- if (!iter)
+ if (!n)
return NULL;
- if (n) {
- if (IS_TNODE(n)) {
- iter->tnode = (struct tnode *) n;
- iter->trie = t;
- iter->index = 0;
- iter->depth = 1;
- } else {
- iter->tnode = NULL;
- iter->trie = t;
- iter->index = 0;
- iter->depth = 0;
- }
- return n;
+ if (IS_TNODE(n)) {
+ iter->tnode = (struct tnode *) n;
+ iter->index = 0;
+ iter->depth = 1;
+ } else {
+ iter->tnode = NULL;
+ iter->index = 0;
+ iter->depth = 0;
}
- return NULL;
+
+ return n;
}
static void trie_collect_stats(struct trie *t, struct trie_stat *s)
@@ -2116,8 +2121,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
memset(s, 0, sizeof(*s));
rcu_read_lock();
- for (n = fib_trie_get_first(&iter, t); n;
- n = fib_trie_get_next(&iter)) {
+ for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
if (IS_LEAF(n)) {
struct leaf *l = (struct leaf *)n;
struct leaf_info *li;
@@ -2206,36 +2210,48 @@ static void trie_show_usage(struct seq_file *seq,
}
#endif /* CONFIG_IP_FIB_TRIE_STATS */
-static void fib_trie_show(struct seq_file *seq, const char *name,
- struct trie *trie)
+static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
{
- struct trie_stat stat;
-
- trie_collect_stats(trie, &stat);
- seq_printf(seq, "%s:\n", name);
- trie_show_stats(seq, &stat);
-#ifdef CONFIG_IP_FIB_TRIE_STATS
- trie_show_usage(seq, &trie->stats);
-#endif
+ if (tb->tb_id == RT_TABLE_LOCAL)
+ seq_puts(seq, "Local:\n");
+ else if (tb->tb_id == RT_TABLE_MAIN)
+ seq_puts(seq, "Main:\n");
+ else
+ seq_printf(seq, "Id %d:\n", tb->tb_id);
}
+
static int fib_triestat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = (struct net *)seq->private;
- struct fib_table *tb;
+ unsigned int h;
seq_printf(seq,
"Basic info: size of leaf:"
" %Zd bytes, size of tnode: %Zd bytes.\n",
sizeof(struct leaf), sizeof(struct tnode));
- tb = fib_get_table(net, RT_TABLE_LOCAL);
- if (tb)
- fib_trie_show(seq, "Local", (struct trie *) tb->tb_data);
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+ struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+ struct hlist_node *node;
+ struct fib_table *tb;
+
+ hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
+ struct trie *t = (struct trie *) tb->tb_data;
+ struct trie_stat stat;
+
+ if (!t)
+ continue;
+
+ fib_table_print(seq, tb);
- tb = fib_get_table(net, RT_TABLE_MAIN);
- if (tb)
- fib_trie_show(seq, "Main", (struct trie *) tb->tb_data);
+ trie_collect_stats(t, &stat);
+ trie_show_stats(seq, &stat);
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+ trie_show_usage(seq, &t->stats);
+#endif
+ }
+ }
return 0;
}
@@ -2271,67 +2287,79 @@ static const struct file_operations fib_triestat_fops = {
.release = fib_triestat_seq_release,
};
-static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
- loff_t pos)
+static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
{
+ struct fib_trie_iter *iter = seq->private;
+ struct net *net = seq_file_net(seq);
loff_t idx = 0;
- struct node *n;
+ unsigned int h;
- for (n = fib_trie_get_first(iter, iter->trie_local);
- n; ++idx, n = fib_trie_get_next(iter)) {
- if (pos == idx)
- return n;
- }
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+ struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+ struct hlist_node *node;
+ struct fib_table *tb;
- for (n = fib_trie_get_first(iter, iter->trie_main);
- n; ++idx, n = fib_trie_get_next(iter)) {
- if (pos == idx)
- return n;
+ hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
+ struct node *n;
+
+ for (n = fib_trie_get_first(iter,
+ (struct trie *) tb->tb_data);
+ n; n = fib_trie_get_next(iter))
+ if (pos == idx++) {
+ iter->tb = tb;
+ return n;
+ }
+ }
}
+
return NULL;
}
static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
- struct fib_trie_iter *iter = seq->private;
- struct fib_table *tb;
-
- if (!iter->trie_local) {
- tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL);
- if (tb)
- iter->trie_local = (struct trie *) tb->tb_data;
- }
- if (!iter->trie_main) {
- tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
- if (tb)
- iter->trie_main = (struct trie *) tb->tb_data;
- }
rcu_read_lock();
- if (*pos == 0)
- return SEQ_START_TOKEN;
- return fib_trie_get_idx(iter, *pos - 1);
+ return fib_trie_get_idx(seq, *pos);
}
static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct fib_trie_iter *iter = seq->private;
- void *l = v;
+ struct net *net = seq_file_net(seq);
+ struct fib_table *tb = iter->tb;
+ struct hlist_node *tb_node;
+ unsigned int h;
+ struct node *n;
++*pos;
- if (v == SEQ_START_TOKEN)
- return fib_trie_get_idx(iter, 0);
-
- v = fib_trie_get_next(iter);
- BUG_ON(v == l);
- if (v)
- return v;
+ /* next node in same table */
+ n = fib_trie_get_next(iter);
+ if (n)
+ return n;
- /* continue scan in next trie */
- if (iter->trie == iter->trie_local)
- return fib_trie_get_first(iter, iter->trie_main);
+ /* walk rest of this hash chain */
+ h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
+ while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
+ tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
+ n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
+ if (n)
+ goto found;
+ }
+ /* new hash chain */
+ while (++h < FIB_TABLE_HASHSZ) {
+ struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+ hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
+ n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
+ if (n)
+ goto found;
+ }
+ }
return NULL;
+
+found:
+ iter->tb = tb;
+ return n;
}
static void fib_trie_seq_stop(struct seq_file *seq, void *v)
@@ -2388,22 +2416,15 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
const struct fib_trie_iter *iter = seq->private;
struct node *n = v;
- if (v == SEQ_START_TOKEN)
- return 0;
-
- if (!node_parent_rcu(n)) {
- if (iter->trie == iter->trie_local)
- seq_puts(seq, "<local>:\n");
- else
- seq_puts(seq, "<main>:\n");
- }
+ if (!node_parent_rcu(n))
+ fib_table_print(seq, iter->tb);
if (IS_TNODE(n)) {
struct tnode *tn = (struct tnode *) n;
__be32 prf = htonl(mask_pfx(tn->key, tn->pos));
seq_indent(seq, iter->depth-1);
- seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
+ seq_printf(seq, " +-- " NIPQUAD_FMT "/%d %d %d %d\n",
NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
tn->empty_children);
@@ -2414,7 +2435,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
__be32 val = htonl(l->key);
seq_indent(seq, iter->depth);
- seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
+ seq_printf(seq, " |-- " NIPQUAD_FMT "\n", NIPQUAD(val));
hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
struct fib_alias *fa;
@@ -2499,7 +2520,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
struct fib_table *tb;
rcu_read_lock();
- tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
+ tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
if (!tb)
return NULL;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a13c074dac0..f064031f203 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -93,6 +93,7 @@
#include <asm/uaccess.h>
#include <net/checksum.h>
#include <net/xfrm.h>
+#include <net/inet_common.h>
/*
* Build xmit assembly blocks
@@ -188,29 +189,6 @@ struct icmp_err icmp_err_convert[] = {
},
};
-/* Control parameters for ECHO replies. */
-int sysctl_icmp_echo_ignore_all __read_mostly;
-int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
-
-/* Control parameter - ignore bogus broadcast responses? */
-int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
-
-/*
- * Configurable global rate limit.
- *
- * ratelimit defines tokens/packet consumed for dst->rate_token bucket
- * ratemask defines which icmp types are ratelimited by setting
- * it's bit position.
- *
- * default:
- * dest unreachable (3), source quench (4),
- * time exceeded (11), parameter problem (12)
- */
-
-int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
-int sysctl_icmp_ratemask __read_mostly = 0x1818;
-int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
-
/*
* ICMP control array. This specifies what to do with each ICMP.
*/
@@ -229,14 +207,16 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
*
* On SMP we have one ICMP socket per-cpu.
*/
-static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
-#define icmp_socket __get_cpu_var(__icmp_socket)
+static struct sock *icmp_sk(struct net *net)
+{
+ return net->ipv4.icmp_sk[smp_processor_id()];
+}
-static inline int icmp_xmit_lock(void)
+static inline int icmp_xmit_lock(struct sock *sk)
{
local_bh_disable();
- if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) {
+ if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path signals a
* dst_link_failure() for an outgoing ICMP packet.
*/
@@ -246,9 +226,9 @@ static inline int icmp_xmit_lock(void)
return 0;
}
-static inline void icmp_xmit_unlock(void)
+static inline void icmp_xmit_unlock(struct sock *sk)
{
- spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
+ spin_unlock_bh(&sk->sk_lock.slock);
}
/*
@@ -291,7 +271,8 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
return rc;
}
-static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code)
+static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
+ int type, int code)
{
struct dst_entry *dst = &rt->u.dst;
int rc = 1;
@@ -308,8 +289,8 @@ static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code)
goto out;
/* Limit if icmp type is enabled in ratemask. */
- if ((1 << type) & sysctl_icmp_ratemask)
- rc = xrlim_allow(dst, sysctl_icmp_ratelimit);
+ if ((1 << type) & net->ipv4.sysctl_icmp_ratemask)
+ rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit);
out:
return rc;
}
@@ -346,19 +327,21 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
static void icmp_push_reply(struct icmp_bxm *icmp_param,
struct ipcm_cookie *ipc, struct rtable *rt)
{
+ struct sock *sk;
struct sk_buff *skb;
- if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
+ sk = icmp_sk(dev_net(rt->u.dst.dev));
+ if (ip_append_data(sk, icmp_glue_bits, icmp_param,
icmp_param->data_len+icmp_param->head_len,
icmp_param->head_len,
ipc, rt, MSG_DONTWAIT) < 0)
- ip_flush_pending_frames(icmp_socket->sk);
- else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
+ ip_flush_pending_frames(sk);
+ else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
struct icmphdr *icmph = icmp_hdr(skb);
__wsum csum = 0;
struct sk_buff *skb1;
- skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) {
+ skb_queue_walk(&sk->sk_write_queue, skb1) {
csum = csum_add(csum, skb1->csum);
}
csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
@@ -366,7 +349,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
icmp_param->head_len, csum);
icmph->checksum = csum_fold(csum);
skb->ip_summed = CHECKSUM_NONE;
- ip_push_pending_frames(icmp_socket->sk);
+ ip_push_pending_frames(sk);
}
}
@@ -376,16 +359,17 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
{
- struct sock *sk = icmp_socket->sk;
- struct inet_sock *inet = inet_sk(sk);
struct ipcm_cookie ipc;
- struct rtable *rt = (struct rtable *)skb->dst;
+ struct rtable *rt = skb->rtable;
+ struct net *net = dev_net(rt->u.dst.dev);
+ struct sock *sk = icmp_sk(net);
+ struct inet_sock *inet = inet_sk(sk);
__be32 daddr;
if (ip_options_echo(&icmp_param->replyopts, skb))
return;
- if (icmp_xmit_lock())
+ if (icmp_xmit_lock(sk))
return;
icmp_param->data.icmph.checksum = 0;
@@ -405,15 +389,15 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
.tos = RT_TOS(ip_hdr(skb)->tos) } },
.proto = IPPROTO_ICMP };
security_skb_classify_flow(skb, &fl);
- if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl))
+ if (ip_route_output_key(net, &rt, &fl))
goto out_unlock;
}
- if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
+ if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
icmp_param->data.icmph.code))
icmp_push_reply(icmp_param, &ipc, rt);
ip_rt_put(rt);
out_unlock:
- icmp_xmit_unlock();
+ icmp_xmit_unlock(sk);
}
@@ -433,15 +417,17 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
struct iphdr *iph;
int room;
struct icmp_bxm icmp_param;
- struct rtable *rt = (struct rtable *)skb_in->dst;
+ struct rtable *rt = skb_in->rtable;
struct ipcm_cookie ipc;
__be32 saddr;
u8 tos;
struct net *net;
+ struct sock *sk;
if (!rt)
goto out;
- net = rt->u.dst.dev->nd_net;
+ net = dev_net(rt->u.dst.dev);
+ sk = icmp_sk(net);
/*
* Find the original header. It is expected to be valid, of course.
@@ -505,7 +491,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
}
}
- if (icmp_xmit_lock())
+ if (icmp_xmit_lock(sk))
return;
/*
@@ -516,7 +502,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
if (!(rt->rt_flags & RTCF_LOCAL)) {
struct net_device *dev = NULL;
- if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr)
+ if (rt->fl.iif &&
+ net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
dev = dev_get_by_index(net, rt->fl.iif);
if (dev) {
@@ -544,7 +531,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
icmp_param.data.icmph.checksum = 0;
icmp_param.skb = skb_in;
icmp_param.offset = skb_network_offset(skb_in);
- inet_sk(icmp_socket->sk)->tos = tos;
+ inet_sk(sk)->tos = tos;
ipc.addr = iph->saddr;
ipc.opt = &icmp_param.replyopts;
@@ -591,7 +578,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
}
if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET))
- goto out_unlock;
+ goto relookup_failed;
if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL)
err = __ip_route_output_key(net, &rt2, &fl);
@@ -601,7 +588,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
fl2.fl4_dst = fl.fl4_src;
if (ip_route_output_key(net, &rt2, &fl2))
- goto out_unlock;
+ goto relookup_failed;
/* Ugh! */
odst = skb_in->dst;
@@ -609,30 +596,32 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
RT_TOS(tos), rt2->u.dst.dev);
dst_release(&rt2->u.dst);
- rt2 = (struct rtable *)skb_in->dst;
+ rt2 = skb_in->rtable;
skb_in->dst = odst;
}
if (err)
- goto out_unlock;
+ goto relookup_failed;
err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL,
XFRM_LOOKUP_ICMP);
- if (err == -ENOENT) {
+ switch (err) {
+ case 0:
+ dst_release(&rt->u.dst);
+ rt = rt2;
+ break;
+ case -EPERM:
+ goto ende;
+ default:
+relookup_failed:
if (!rt)
goto out_unlock;
- goto route_done;
+ break;
}
-
- dst_release(&rt->u.dst);
- rt = rt2;
-
- if (err)
- goto out_unlock;
}
route_done:
- if (!icmpv4_xrlim_allow(rt, type, code))
+ if (!icmpv4_xrlim_allow(net, rt, type, code))
goto ende;
/* RFC says return as much as we can without exceeding 576 bytes. */
@@ -652,7 +641,7 @@ route_done:
ende:
ip_rt_put(rt);
out_unlock:
- icmp_xmit_unlock();
+ icmp_xmit_unlock(sk);
out:;
}
@@ -670,7 +659,7 @@ static void icmp_unreach(struct sk_buff *skb)
u32 info = 0;
struct net *net;
- net = skb->dst->dev->nd_net;
+ net = dev_net(skb->dst->dev);
/*
* Incomplete header ?
@@ -696,7 +685,7 @@ static void icmp_unreach(struct sk_buff *skb)
break;
case ICMP_FRAG_NEEDED:
if (ipv4_config.no_pmtu_disc) {
- LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: "
+ LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": "
"fragmentation needed "
"and DF set.\n",
NIPQUAD(iph->daddr));
@@ -708,7 +697,7 @@ static void icmp_unreach(struct sk_buff *skb)
}
break;
case ICMP_SR_FAILED:
- LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source "
+ LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": Source "
"Route Failed.\n",
NIPQUAD(iph->daddr));
break;
@@ -738,12 +727,12 @@ static void icmp_unreach(struct sk_buff *skb)
* get the other vendor to fix their kit.
*/
- if (!sysctl_icmp_ignore_bogus_error_responses &&
+ if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
if (net_ratelimit())
- printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
+ printk(KERN_WARNING NIPQUAD_FMT " sent an invalid ICMP "
"type %u, code %u "
- "error to a broadcast: %u.%u.%u.%u on %s\n",
+ "error to a broadcast: " NIPQUAD_FMT " on %s\n",
NIPQUAD(ip_hdr(skb)->saddr),
icmph->type, icmph->code,
NIPQUAD(iph->daddr),
@@ -833,7 +822,10 @@ out_err:
static void icmp_echo(struct sk_buff *skb)
{
- if (!sysctl_icmp_echo_ignore_all) {
+ struct net *net;
+
+ net = dev_net(skb->dst->dev);
+ if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
struct icmp_bxm icmp_param;
icmp_param.data.icmph = *icmp_hdr(skb);
@@ -936,7 +928,7 @@ static void icmp_address(struct sk_buff *skb)
static void icmp_address_reply(struct sk_buff *skb)
{
- struct rtable *rt = (struct rtable *)skb->dst;
+ struct rtable *rt = skb->rtable;
struct net_device *dev = skb->dev;
struct in_device *in_dev;
struct in_ifaddr *ifa;
@@ -961,8 +953,8 @@ static void icmp_address_reply(struct sk_buff *skb)
break;
}
if (!ifa && net_ratelimit()) {
- printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from "
- "%s/%u.%u.%u.%u\n",
+ printk(KERN_INFO "Wrong address mask " NIPQUAD_FMT " from "
+ "%s/" NIPQUAD_FMT "\n",
NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src));
}
}
@@ -981,7 +973,7 @@ static void icmp_discard(struct sk_buff *skb)
int icmp_rcv(struct sk_buff *skb)
{
struct icmphdr *icmph;
- struct rtable *rt = (struct rtable *)skb->dst;
+ struct rtable *rt = skb->rtable;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
int nh;
@@ -1036,6 +1028,9 @@ int icmp_rcv(struct sk_buff *skb)
*/
if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
+ struct net *net;
+
+ net = dev_net(rt->u.dst.dev);
/*
* RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
* silently ignored (we let user decide with a sysctl).
@@ -1044,7 +1039,7 @@ int icmp_rcv(struct sk_buff *skb)
*/
if ((icmph->type == ICMP_ECHO ||
icmph->type == ICMP_TIMESTAMP) &&
- sysctl_icmp_echo_ignore_broadcasts) {
+ net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
goto error;
}
if (icmph->type != ICMP_ECHO &&
@@ -1139,38 +1134,84 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
},
};
-void __init icmp_init(struct net_proto_family *ops)
+static void __net_exit icmp_sk_exit(struct net *net)
{
- struct inet_sock *inet;
int i;
- for_each_possible_cpu(i) {
- int err;
+ for_each_possible_cpu(i)
+ inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
+ kfree(net->ipv4.icmp_sk);
+ net->ipv4.icmp_sk = NULL;
+}
- err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
- &per_cpu(__icmp_socket, i));
+int __net_init icmp_sk_init(struct net *net)
+{
+ int i, err;
+
+ net->ipv4.icmp_sk =
+ kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
+ if (net->ipv4.icmp_sk == NULL)
+ return -ENOMEM;
+ for_each_possible_cpu(i) {
+ struct sock *sk;
+
+ err = inet_ctl_sock_create(&sk, PF_INET,
+ SOCK_RAW, IPPROTO_ICMP, net);
if (err < 0)
- panic("Failed to create the ICMP control socket.\n");
+ goto fail;
- per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC;
+ net->ipv4.icmp_sk[i] = sk;
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
*/
- per_cpu(__icmp_socket, i)->sk->sk_sndbuf =
+ sk->sk_sndbuf =
(2 * ((64 * 1024) + sizeof(struct sk_buff)));
- inet = inet_sk(per_cpu(__icmp_socket, i)->sk);
- inet->uc_ttl = -1;
- inet->pmtudisc = IP_PMTUDISC_DONT;
-
- /* Unhash it so that IP input processing does not even
- * see it, we do not wish this socket to see incoming
- * packets.
- */
- per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
+ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
}
+
+ /* Control parameters for ECHO replies. */
+ net->ipv4.sysctl_icmp_echo_ignore_all = 0;
+ net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
+
+ /* Control parameter - ignore bogus broadcast responses? */
+ net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;
+
+ /*
+ * Configurable global rate limit.
+ *
+ * ratelimit defines tokens/packet consumed for dst->rate_token
+ * bucket ratemask defines which icmp types are ratelimited by
+ * setting it's bit position.
+ *
+ * default:
+ * dest unreachable (3), source quench (4),
+ * time exceeded (11), parameter problem (12)
+ */
+
+ net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
+ net->ipv4.sysctl_icmp_ratemask = 0x1818;
+ net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
+
+ return 0;
+
+fail:
+ for_each_possible_cpu(i)
+ inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
+ kfree(net->ipv4.icmp_sk);
+ return err;
+}
+
+static struct pernet_operations __net_initdata icmp_sk_ops = {
+ .init = icmp_sk_init,
+ .exit = icmp_sk_exit,
+};
+
+int __init icmp_init(void)
+{
+ return register_pernet_device(&icmp_sk_ops);
}
EXPORT_SYMBOL(icmp_err_convert);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 732cd07e607..6250f4239b6 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -130,12 +130,12 @@
*/
#define IGMP_V1_SEEN(in_dev) \
- (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \
+ (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
((in_dev)->mr_v1_seen && \
time_before(jiffies, (in_dev)->mr_v1_seen)))
#define IGMP_V2_SEEN(in_dev) \
- (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \
+ (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
((in_dev)->mr_v2_seen && \
time_before(jiffies, (in_dev)->mr_v2_seen)))
@@ -948,7 +948,7 @@ int igmp_rcv(struct sk_buff *skb)
case IGMPV2_HOST_MEMBERSHIP_REPORT:
case IGMPV3_HOST_MEMBERSHIP_REPORT:
/* Is it our report looped back? */
- if (((struct rtable*)skb->dst)->fl.iif == 0)
+ if (skb->rtable->fl.iif == 0)
break;
/* don't rely on MC router hearing unicast reports */
if (skb->pkt_type == PACKET_MULTICAST ||
@@ -1198,6 +1198,9 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
ASSERT_RTNL();
+ if (dev_net(in_dev->dev) != &init_net)
+ return;
+
for (im=in_dev->mc_list; im; im=im->next) {
if (im->multiaddr == addr) {
im->users++;
@@ -1277,6 +1280,9 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
ASSERT_RTNL();
+ if (dev_net(in_dev->dev) != &init_net)
+ return;
+
for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
if (i->multiaddr==addr) {
if (--i->users == 0) {
@@ -1304,6 +1310,9 @@ void ip_mc_down(struct in_device *in_dev)
ASSERT_RTNL();
+ if (dev_net(in_dev->dev) != &init_net)
+ return;
+
for (i=in_dev->mc_list; i; i=i->next)
igmp_group_dropped(i);
@@ -1324,6 +1333,9 @@ void ip_mc_init_dev(struct in_device *in_dev)
{
ASSERT_RTNL();
+ if (dev_net(in_dev->dev) != &init_net)
+ return;
+
in_dev->mc_tomb = NULL;
#ifdef CONFIG_IP_MULTICAST
in_dev->mr_gq_running = 0;
@@ -1347,6 +1359,9 @@ void ip_mc_up(struct in_device *in_dev)
ASSERT_RTNL();
+ if (dev_net(in_dev->dev) != &init_net)
+ return;
+
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
for (i=in_dev->mc_list; i; i=i->next)
@@ -1363,6 +1378,9 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
ASSERT_RTNL();
+ if (dev_net(in_dev->dev) != &init_net)
+ return;
+
/* Deactivate timers */
ip_mc_down(in_dev);
@@ -1744,6 +1762,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
if (!ipv4_is_multicast(addr))
return -EINVAL;
+ if (sock_net(sk) != &init_net)
+ return -EPROTONOSUPPORT;
+
rtnl_lock();
in_dev = ip_mc_find_dev(imr);
@@ -1812,6 +1833,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
u32 ifindex;
int ret = -EADDRNOTAVAIL;
+ if (sock_net(sk) != &init_net)
+ return -EPROTONOSUPPORT;
+
rtnl_lock();
in_dev = ip_mc_find_dev(imr);
ifindex = imr->imr_ifindex;
@@ -1857,6 +1881,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
if (!ipv4_is_multicast(addr))
return -EINVAL;
+ if (sock_net(sk) != &init_net)
+ return -EPROTONOSUPPORT;
+
rtnl_lock();
imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
@@ -1990,6 +2017,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
msf->imsf_fmode != MCAST_EXCLUDE)
return -EINVAL;
+ if (sock_net(sk) != &init_net)
+ return -EPROTONOSUPPORT;
+
rtnl_lock();
imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
@@ -2070,6 +2100,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
+ if (sock_net(sk) != &init_net)
+ return -EPROTONOSUPPORT;
+
rtnl_lock();
imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
@@ -2132,6 +2165,9 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
+ if (sock_net(sk) != &init_net)
+ return -EPROTONOSUPPORT;
+
rtnl_lock();
err = -EADDRNOTAVAIL;
@@ -2216,6 +2252,9 @@ void ip_mc_drop_socket(struct sock *sk)
if (inet->mc_list == NULL)
return;
+ if (sock_net(sk) != &init_net)
+ return;
+
rtnl_lock();
while ((iml = inet->mc_list) != NULL) {
struct in_device *in_dev;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b189278c7bc..828ea211ff2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -55,6 +55,13 @@ int inet_csk_bind_conflict(const struct sock *sk,
struct hlist_node *node;
int reuse = sk->sk_reuse;
+ /*
+ * Unlike other sk lookup places we do not check
+ * for sk_net here, since _all_ the socks listed
+ * in tb->owners list belong to the same net - the
+ * one this bucket belongs to.
+ */
+
sk_for_each_bound(sk2, node, &tb->owners) {
if (sk != sk2 &&
!inet_v6_ipv6only(sk2) &&
@@ -80,12 +87,12 @@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
*/
int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo;
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_bind_hashbucket *head;
struct hlist_node *node;
struct inet_bind_bucket *tb;
int ret;
- struct net *net = sk->sk_net;
+ struct net *net = sock_net(sk);
local_bh_disable();
if (!snum) {
@@ -133,8 +140,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
goto tb_not_found;
tb_found:
if (!hlist_empty(&tb->owners)) {
- if (sk->sk_reuse > 1)
- goto success;
if (tb->fastreuse > 0 &&
sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
goto success;
@@ -333,7 +338,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
.dport = ireq->rmt_port } } };
security_req_classify_flow(req, &fl);
- if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) {
+ if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) {
IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
@@ -414,8 +419,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
struct inet_connection_sock *icsk = inet_csk(parent);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct listen_sock *lopt = queue->listen_opt;
- int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
- int thresh = max_retries;
+ int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
unsigned long now = jiffies;
struct request_sock **reqp, *req;
int i, budget;
@@ -451,9 +455,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
}
}
- if (queue->rskq_defer_accept)
- max_retries = queue->rskq_defer_accept;
-
budget = 2 * (lopt->nr_table_entries / (timeout / interval));
i = lopt->clock_hand;
@@ -461,9 +462,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
reqp=&lopt->syn_table[i];
while ((req = *reqp) != NULL) {
if (time_after_eq(now, req->expires)) {
- if ((req->retrans < thresh ||
- (inet_rsk(req)->acked && req->retrans < max_retries))
- && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
+ if (req->retrans < thresh &&
+ !req->rsk_ops->rtx_syn_ack(parent, req)) {
unsigned long timeo;
if (req->retrans++ == 0)
@@ -656,25 +656,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
-int inet_csk_ctl_sock_create(struct socket **sock, unsigned short family,
- unsigned short type, unsigned char protocol)
-{
- int rc = sock_create_kern(family, type, protocol, sock);
-
- if (rc == 0) {
- (*sock)->sk->sk_allocation = GFP_ATOMIC;
- inet_sk((*sock)->sk)->uc_ttl = -1;
- /*
- * Unhash it so that IP input processing does not even see it,
- * we do not wish this socket to see incoming packets.
- */
- (*sock)->sk->sk_prot->unhash((*sock)->sk);
- }
- return rc;
-}
-
-EXPORT_SYMBOL_GPL(inet_csk_ctl_sock_create);
-
#ifdef CONFIG_COMPAT
int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 724d69aed03..4ed429bd595 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -86,7 +86,10 @@ EXPORT_SYMBOL(inet_frags_fini);
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
{
nf->low_thresh = 0;
+
+ local_bh_disable();
inet_frag_evictor(nf, f);
+ local_bh_enable();
}
EXPORT_SYMBOL(inet_frags_exit_net);
@@ -104,10 +107,10 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
if (del_timer(&fq->timer))
atomic_dec(&fq->refcnt);
- if (!(fq->last_in & COMPLETE)) {
+ if (!(fq->last_in & INET_FRAG_COMPLETE)) {
fq_unlink(fq, f);
atomic_dec(&fq->refcnt);
- fq->last_in |= COMPLETE;
+ fq->last_in |= INET_FRAG_COMPLETE;
}
}
@@ -131,7 +134,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
struct sk_buff *fp;
struct netns_frags *nf;
- BUG_TRAP(q->last_in & COMPLETE);
+ BUG_TRAP(q->last_in & INET_FRAG_COMPLETE);
BUG_TRAP(del_timer(&q->timer) == 0);
/* Release all fragment data. */
@@ -174,7 +177,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
read_unlock(&f->lock);
spin_lock(&q->lock);
- if (!(q->last_in & COMPLETE))
+ if (!(q->last_in & INET_FRAG_COMPLETE))
inet_frag_kill(q, f);
spin_unlock(&q->lock);
@@ -206,7 +209,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
write_unlock(&f->lock);
- qp_in->last_in |= COMPLETE;
+ qp_in->last_in |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
return qp;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1aba606f6bb..2023d37b270 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -35,7 +35,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
if (tb != NULL) {
- tb->ib_net = net;
+ tb->ib_net = hold_net(net);
tb->port = snum;
tb->fastreuse = 0;
INIT_HLIST_HEAD(&tb->owners);
@@ -51,6 +51,7 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
{
if (hlist_empty(&tb->owners)) {
__hlist_del(&tb->node);
+ release_net(tb->ib_net);
kmem_cache_free(cachep, tb);
}
}
@@ -68,7 +69,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
*/
static void __inet_put_port(struct sock *sk)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo;
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
struct inet_bind_bucket *tb;
@@ -91,6 +92,22 @@ void inet_put_port(struct sock *sk)
EXPORT_SYMBOL(inet_put_port);
+void __inet_inherit_port(struct sock *sk, struct sock *child)
+{
+ struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
+ const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
+ struct inet_bind_hashbucket *head = &table->bhash[bhash];
+ struct inet_bind_bucket *tb;
+
+ spin_lock(&head->lock);
+ tb = inet_csk(sk)->icsk_bind_hash;
+ sk_add_bind_node(child, &tb->owners);
+ inet_csk(child)->icsk_bind_hash = tb;
+ spin_unlock(&head->lock);
+}
+
+EXPORT_SYMBOL_GPL(__inet_inherit_port);
+
/*
* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
* Look, when several writers sleep and reader wakes them up, all but one
@@ -139,7 +156,7 @@ static struct sock *inet_lookup_listener_slow(struct net *net,
sk_for_each(sk, node, head) {
const struct inet_sock *inet = inet_sk(sk);
- if (sk->sk_net == net && inet->num == hnum &&
+ if (net_eq(sock_net(sk), net) && inet->num == hnum &&
!ipv6_only_sock(sk)) {
const __be32 rcv_saddr = inet->rcv_saddr;
int score = sk->sk_family == PF_INET ? 1 : 0;
@@ -182,7 +199,7 @@ struct sock *__inet_lookup_listener(struct net *net,
if (inet->num == hnum && !sk->sk_node.next &&
(!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
(sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
- !sk->sk_bound_dev_if && sk->sk_net == net)
+ !sk->sk_bound_dev_if && net_eq(sock_net(sk), net))
goto sherry_cache;
sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif);
}
@@ -254,7 +271,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
- struct net *net = sk->sk_net;
+ struct net *net = sock_net(sk);
prefetch(head->chain.first);
write_lock(lock);
@@ -288,7 +305,7 @@ unique:
sk->sk_hash = hash;
BUG_TRAP(sk_unhashed(sk));
__sk_add_node(sk, &head->chain);
- sock_prot_inuse_add(sk->sk_prot, 1);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock(lock);
if (twp) {
@@ -318,7 +335,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk)
void __inet_hash_nolisten(struct sock *sk)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo;
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_head *list;
rwlock_t *lock;
struct inet_ehash_bucket *head;
@@ -332,14 +349,14 @@ void __inet_hash_nolisten(struct sock *sk)
write_lock(lock);
__sk_add_node(sk, list);
- sock_prot_inuse_add(sk->sk_prot, 1);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock(lock);
}
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
static void __inet_hash(struct sock *sk)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo;
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_head *list;
rwlock_t *lock;
@@ -354,7 +371,7 @@ static void __inet_hash(struct sock *sk)
inet_listen_wlock(hashinfo);
__sk_add_node(sk, list);
- sock_prot_inuse_add(sk->sk_prot, 1);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock(lock);
wake_up(&hashinfo->lhash_wait);
}
@@ -372,7 +389,7 @@ EXPORT_SYMBOL_GPL(inet_hash);
void inet_unhash(struct sock *sk)
{
rwlock_t *lock;
- struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo;
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
if (sk_unhashed(sk))
goto out;
@@ -387,7 +404,7 @@ void inet_unhash(struct sock *sk)
}
if (__sk_del_node_init(sk))
- sock_prot_inuse_add(sk->sk_prot, -1);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
write_unlock_bh(lock);
out:
if (sk->sk_state == TCP_LISTEN)
@@ -406,7 +423,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct inet_bind_hashbucket *head;
struct inet_bind_bucket *tb;
int ret;
- struct net *net = sk->sk_net;
+ struct net *net = sock_net(sk);
if (!snum) {
int i, remaining, low, high, port;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 876169f3a52..ce16e9ac24c 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -57,6 +57,7 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
printk(KERN_DEBUG "%s timewait_sock %p released\n",
tw->tw_prot->name, tw);
#endif
+ release_net(twsk_net(tw));
kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
module_put(owner);
}
@@ -91,7 +92,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
/* Step 2: Remove SK from established hash. */
if (__sk_del_node_init(sk))
- sock_prot_inuse_add(sk->sk_prot, -1);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
/* Step 3: Hash TW into TIMEWAIT chain. */
inet_twsk_add_node(tw, &ehead->twchain);
@@ -124,6 +125,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
tw->tw_hash = sk->sk_hash;
tw->tw_ipv6only = 0;
tw->tw_prot = sk->sk_prot_creator;
+ twsk_net_set(tw, hold_net(sock_net(sk)));
atomic_set(&tw->tw_refcnt, 1);
inet_twsk_dead_node_init(tw);
__module_get(tw->tw_prot->owner);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 0b3b328d82d..4813c39b438 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -80,12 +80,12 @@ int ip_forward(struct sk_buff *skb)
if (!xfrm4_route_forward(skb))
goto drop;
- rt = (struct rtable*)skb->dst;
+ rt = skb->rtable;
if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
goto sr_failed;
- if (unlikely(skb->len > dst_mtu(&rt->u.dst) &&
+ if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) &&
(ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a2e92f9709d..cd6ce6ac635 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -194,7 +194,7 @@ static void ip_expire(unsigned long arg)
spin_lock(&qp->q.lock);
- if (qp->q.last_in & COMPLETE)
+ if (qp->q.last_in & INET_FRAG_COMPLETE)
goto out;
ipq_kill(qp);
@@ -202,10 +202,13 @@ static void ip_expire(unsigned long arg)
IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
- if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) {
+ if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
struct sk_buff *head = qp->q.fragments;
+ struct net *net;
+
+ net = container_of(qp->q.net, struct net, ipv4.frags);
/* Send an ICMP "Fragment Reassembly Timeout" message. */
- if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) {
+ if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) {
icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
dev_put(head->dev);
}
@@ -298,7 +301,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
int ihl, end;
int err = -ENOENT;
- if (qp->q.last_in & COMPLETE)
+ if (qp->q.last_in & INET_FRAG_COMPLETE)
goto err;
if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
@@ -324,9 +327,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* or have different end, the segment is corrrupted.
*/
if (end < qp->q.len ||
- ((qp->q.last_in & LAST_IN) && end != qp->q.len))
+ ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len))
goto err;
- qp->q.last_in |= LAST_IN;
+ qp->q.last_in |= INET_FRAG_LAST_IN;
qp->q.len = end;
} else {
if (end&7) {
@@ -336,7 +339,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
if (end > qp->q.len) {
/* Some bits beyond end -> corruption. */
- if (qp->q.last_in & LAST_IN)
+ if (qp->q.last_in & INET_FRAG_LAST_IN)
goto err;
qp->q.len = end;
}
@@ -435,9 +438,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->q.meat += skb->len;
atomic_add(skb->truesize, &qp->q.net->mem);
if (offset == 0)
- qp->q.last_in |= FIRST_IN;
+ qp->q.last_in |= INET_FRAG_FIRST_IN;
- if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len)
+ if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ qp->q.meat == qp->q.len)
return ip_frag_reasm(qp, prev, dev);
write_lock(&ip4_frags.lock);
@@ -553,7 +557,7 @@ out_nomem:
out_oversize:
if (net_ratelimit())
printk(KERN_INFO
- "Oversized IP packet from %d.%d.%d.%d.\n",
+ "Oversized IP packet from " NIPQUAD_FMT ".\n",
NIPQUAD(qp->saddr));
out_fail:
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
@@ -568,7 +572,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
- net = skb->dev->nd_net;
+ net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
/* Start by cleaning up the memory. */
if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
ip_evictor(net);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 63f69171935..2ada033406d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -39,6 +39,8 @@
#include <net/dsfield.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#ifdef CONFIG_IPV6
#include <net/ipv6.h>
@@ -122,7 +124,14 @@ static void ipgre_tunnel_setup(struct net_device *dev);
static int ipgre_fb_tunnel_init(struct net_device *dev);
-static struct net_device *ipgre_fb_tunnel_dev;
+#define HASH_SIZE 16
+
+static int ipgre_net_id;
+struct ipgre_net {
+ struct ip_tunnel *tunnels[4][HASH_SIZE];
+
+ struct net_device *fb_tunnel_dev;
+};
/* Tunnel hash table */
@@ -142,39 +151,38 @@ static struct net_device *ipgre_fb_tunnel_dev;
will match fallback tunnel.
*/
-#define HASH_SIZE 16
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
-static struct ip_tunnel *tunnels[4][HASH_SIZE];
-
-#define tunnels_r_l (tunnels[3])
-#define tunnels_r (tunnels[2])
-#define tunnels_l (tunnels[1])
-#define tunnels_wc (tunnels[0])
+#define tunnels_r_l tunnels[3]
+#define tunnels_r tunnels[2]
+#define tunnels_l tunnels[1]
+#define tunnels_wc tunnels[0]
static DEFINE_RWLOCK(ipgre_lock);
/* Given src, dst and key, find appropriate for input tunnel. */
-static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
+static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
+ __be32 remote, __be32 local, __be32 key)
{
unsigned h0 = HASH(remote);
unsigned h1 = HASH(key);
struct ip_tunnel *t;
+ struct ipgre_net *ign = net_generic(net, ipgre_net_id);
- for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+ for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
return t;
}
}
- for (t = tunnels_r[h0^h1]; t; t = t->next) {
+ for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
if (remote == t->parms.iph.daddr) {
if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
return t;
}
}
- for (t = tunnels_l[h1]; t; t = t->next) {
+ for (t = ign->tunnels_l[h1]; t; t = t->next) {
if (local == t->parms.iph.saddr ||
(local == t->parms.iph.daddr &&
ipv4_is_multicast(local))) {
@@ -182,17 +190,18 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3
return t;
}
}
- for (t = tunnels_wc[h1]; t; t = t->next) {
+ for (t = ign->tunnels_wc[h1]; t; t = t->next) {
if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
return t;
}
- if (ipgre_fb_tunnel_dev->flags&IFF_UP)
- return netdev_priv(ipgre_fb_tunnel_dev);
+ if (ign->fb_tunnel_dev->flags&IFF_UP)
+ return netdev_priv(ign->fb_tunnel_dev);
return NULL;
}
-static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
+static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
+ struct ip_tunnel_parm *parms)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
@@ -207,17 +216,18 @@ static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
h ^= HASH(remote);
}
- return &tunnels[prio][h];
+ return &ign->tunnels[prio][h];
}
-static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
+ struct ip_tunnel *t)
{
- return __ipgre_bucket(&t->parms);
+ return __ipgre_bucket(ign, &t->parms);
}
-static void ipgre_tunnel_link(struct ip_tunnel *t)
+static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
{
- struct ip_tunnel **tp = ipgre_bucket(t);
+ struct ip_tunnel **tp = ipgre_bucket(ign, t);
t->next = *tp;
write_lock_bh(&ipgre_lock);
@@ -225,11 +235,11 @@ static void ipgre_tunnel_link(struct ip_tunnel *t)
write_unlock_bh(&ipgre_lock);
}
-static void ipgre_tunnel_unlink(struct ip_tunnel *t)
+static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
{
struct ip_tunnel **tp;
- for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
+ for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
if (t == *tp) {
write_lock_bh(&ipgre_lock);
*tp = t->next;
@@ -239,7 +249,8 @@ static void ipgre_tunnel_unlink(struct ip_tunnel *t)
}
}
-static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
+ struct ip_tunnel_parm *parms, int create)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
@@ -247,8 +258,9 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
char name[IFNAMSIZ];
+ struct ipgre_net *ign = net_generic(net, ipgre_net_id);
- for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
if (key == t->parms.i_key)
return t;
@@ -259,41 +271,42 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
if (parms->name[0])
strlcpy(name, parms->name, IFNAMSIZ);
- else {
- int i;
- for (i=1; i<100; i++) {
- sprintf(name, "gre%d", i);
- if (__dev_get_by_name(&init_net, name) == NULL)
- break;
- }
- if (i==100)
- goto failed;
- }
+ else
+ sprintf(name, "gre%%d");
dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
if (!dev)
return NULL;
+ dev_net_set(dev, net);
+
+ if (strchr(name, '%')) {
+ if (dev_alloc_name(dev, name) < 0)
+ goto failed_free;
+ }
+
dev->init = ipgre_tunnel_init;
nt = netdev_priv(dev);
nt->parms = *parms;
- if (register_netdevice(dev) < 0) {
- free_netdev(dev);
- goto failed;
- }
+ if (register_netdevice(dev) < 0)
+ goto failed_free;
dev_hold(dev);
- ipgre_tunnel_link(nt);
+ ipgre_tunnel_link(ign, nt);
return nt;
-failed:
+failed_free:
+ free_netdev(dev);
return NULL;
}
static void ipgre_tunnel_uninit(struct net_device *dev)
{
- ipgre_tunnel_unlink(netdev_priv(dev));
+ struct net *net = dev_net(dev);
+ struct ipgre_net *ign = net_generic(net, ipgre_net_id);
+
+ ipgre_tunnel_unlink(ign, netdev_priv(dev));
dev_put(dev);
}
@@ -367,7 +380,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
}
read_lock(&ipgre_lock);
- t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
+ t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
+ (flags&GRE_KEY) ?
+ *(((__be32*)p) + (grehlen>>2) - 1) : 0);
if (t == NULL || t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
goto out;
@@ -480,7 +495,7 @@ out:
fl.fl4_dst = eiph->saddr;
fl.fl4_tos = RT_TOS(eiph->tos);
fl.proto = IPPROTO_GRE;
- if (ip_route_output_key(&init_net, &rt, &fl)) {
+ if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
kfree_skb(skb2);
return;
}
@@ -493,7 +508,7 @@ out:
fl.fl4_dst = eiph->daddr;
fl.fl4_src = eiph->saddr;
fl.fl4_tos = eiph->tos;
- if (ip_route_output_key(&init_net, &rt, &fl) ||
+ if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
rt->u.dst.dev->type != ARPHRD_IPGRE) {
ip_rt_put(rt);
kfree_skb(skb2);
@@ -600,7 +615,8 @@ static int ipgre_rcv(struct sk_buff *skb)
}
read_lock(&ipgre_lock);
- if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
+ if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
+ iph->saddr, iph->daddr, key)) != NULL) {
secpath_reset(skb);
skb->protocol = *(__be16*)(h + 2);
@@ -623,7 +639,7 @@ static int ipgre_rcv(struct sk_buff *skb)
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
/* Looped back packet, drop it! */
- if (((struct rtable*)skb->dst)->fl.iif == 0)
+ if (skb->rtable->fl.iif == 0)
goto drop;
tunnel->stat.multicast++;
skb->pkt_type = PACKET_BROADCAST;
@@ -703,7 +719,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
}
if (skb->protocol == htons(ETH_P_IP)) {
- rt = (struct rtable*)skb->dst;
+ rt = skb->rtable;
if ((dst = rt->rt_gateway) == 0)
goto tx_error_icmp;
}
@@ -748,7 +764,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
.saddr = tiph->saddr,
.tos = RT_TOS(tos) } },
.proto = IPPROTO_GRE };
- if (ip_route_output_key(&init_net, &rt, &fl)) {
+ if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
tunnel->stat.tx_carrier_errors++;
goto tx_error;
}
@@ -921,7 +937,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
.tos = RT_TOS(iph->tos) } },
.proto = IPPROTO_GRE };
struct rtable *rt;
- if (!ip_route_output_key(&init_net, &rt, &fl)) {
+ if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
tdev = rt->u.dst.dev;
ip_rt_put(rt);
}
@@ -929,7 +945,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
}
if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+ tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
if (tdev) {
hlen = tdev->hard_header_len;
@@ -958,16 +974,18 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
int err = 0;
struct ip_tunnel_parm p;
struct ip_tunnel *t;
+ struct net *net = dev_net(dev);
+ struct ipgre_net *ign = net_generic(net, ipgre_net_id);
switch (cmd) {
case SIOCGETTUNNEL:
t = NULL;
- if (dev == ipgre_fb_tunnel_dev) {
+ if (dev == ign->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
- t = ipgre_tunnel_locate(&p, 0);
+ t = ipgre_tunnel_locate(net, &p, 0);
}
if (t == NULL)
t = netdev_priv(dev);
@@ -999,9 +1017,9 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
if (!(p.o_flags&GRE_KEY))
p.o_key = 0;
- t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+ t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
- if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+ if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
err = -EEXIST;
@@ -1021,14 +1039,14 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EINVAL;
break;
}
- ipgre_tunnel_unlink(t);
+ ipgre_tunnel_unlink(ign, t);
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
t->parms.i_key = p.i_key;
t->parms.o_key = p.o_key;
memcpy(dev->dev_addr, &p.iph.saddr, 4);
memcpy(dev->broadcast, &p.iph.daddr, 4);
- ipgre_tunnel_link(t);
+ ipgre_tunnel_link(ign, t);
netdev_state_change(dev);
}
}
@@ -1056,15 +1074,15 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
if (!capable(CAP_NET_ADMIN))
goto done;
- if (dev == ipgre_fb_tunnel_dev) {
+ if (dev == ign->fb_tunnel_dev) {
err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
goto done;
err = -ENOENT;
- if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
+ if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
goto done;
err = -EPERM;
- if (t == netdev_priv(ipgre_fb_tunnel_dev))
+ if (t == netdev_priv(ign->fb_tunnel_dev))
goto done;
dev = t->dev;
}
@@ -1177,7 +1195,7 @@ static int ipgre_open(struct net_device *dev)
.tos = RT_TOS(t->parms.iph.tos) } },
.proto = IPPROTO_GRE };
struct rtable *rt;
- if (ip_route_output_key(&init_net, &rt, &fl))
+ if (ip_route_output_key(dev_net(dev), &rt, &fl))
return -EADDRNOTAVAIL;
dev = rt->u.dst.dev;
ip_rt_put(rt);
@@ -1194,7 +1212,7 @@ static int ipgre_close(struct net_device *dev)
struct ip_tunnel *t = netdev_priv(dev);
if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
struct in_device *in_dev;
- in_dev = inetdev_by_index(dev->nd_net, t->mlink);
+ in_dev = inetdev_by_index(dev_net(dev), t->mlink);
if (in_dev) {
ip_mc_dec_group(in_dev, t->parms.iph.daddr);
in_dev_put(in_dev);
@@ -1220,6 +1238,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
+ dev->features |= NETIF_F_NETNS_LOCAL;
}
static int ipgre_tunnel_init(struct net_device *dev)
@@ -1255,10 +1274,11 @@ static int ipgre_tunnel_init(struct net_device *dev)
return 0;
}
-static int __init ipgre_fb_tunnel_init(struct net_device *dev)
+static int ipgre_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
+ struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
tunnel->dev = dev;
strcpy(tunnel->parms.name, dev->name);
@@ -1269,7 +1289,7 @@ static int __init ipgre_fb_tunnel_init(struct net_device *dev)
tunnel->hlen = sizeof(struct iphdr) + 4;
dev_hold(dev);
- tunnels_wc[0] = tunnel;
+ ign->tunnels_wc[0] = tunnel;
return 0;
}
@@ -1277,56 +1297,98 @@ static int __init ipgre_fb_tunnel_init(struct net_device *dev)
static struct net_protocol ipgre_protocol = {
.handler = ipgre_rcv,
.err_handler = ipgre_err,
+ .netns_ok = 1,
};
+static void ipgre_destroy_tunnels(struct ipgre_net *ign)
+{
+ int prio;
-/*
- * And now the modules code and kernel interface.
- */
+ for (prio = 0; prio < 4; prio++) {
+ int h;
+ for (h = 0; h < HASH_SIZE; h++) {
+ struct ip_tunnel *t;
+ while ((t = ign->tunnels[prio][h]) != NULL)
+ unregister_netdevice(t->dev);
+ }
+ }
+}
-static int __init ipgre_init(void)
+static int ipgre_init_net(struct net *net)
{
int err;
+ struct ipgre_net *ign;
- printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
+ err = -ENOMEM;
+ ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
+ if (ign == NULL)
+ goto err_alloc;
- if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
- printk(KERN_INFO "ipgre init: can't add protocol\n");
- return -EAGAIN;
- }
+ err = net_assign_generic(net, ipgre_net_id, ign);
+ if (err < 0)
+ goto err_assign;
- ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
+ ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
ipgre_tunnel_setup);
- if (!ipgre_fb_tunnel_dev) {
+ if (!ign->fb_tunnel_dev) {
err = -ENOMEM;
- goto err1;
+ goto err_alloc_dev;
}
- ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
+ ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
+ dev_net_set(ign->fb_tunnel_dev, net);
- if ((err = register_netdev(ipgre_fb_tunnel_dev)))
- goto err2;
-out:
+ if ((err = register_netdev(ign->fb_tunnel_dev)))
+ goto err_reg_dev;
+
+ return 0;
+
+err_reg_dev:
+ free_netdev(ign->fb_tunnel_dev);
+err_alloc_dev:
+ /* nothing */
+err_assign:
+ kfree(ign);
+err_alloc:
return err;
-err2:
- free_netdev(ipgre_fb_tunnel_dev);
-err1:
- inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
- goto out;
}
-static void __exit ipgre_destroy_tunnels(void)
+static void ipgre_exit_net(struct net *net)
{
- int prio;
+ struct ipgre_net *ign;
- for (prio = 0; prio < 4; prio++) {
- int h;
- for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t;
- while ((t = tunnels[prio][h]) != NULL)
- unregister_netdevice(t->dev);
- }
+ ign = net_generic(net, ipgre_net_id);
+ rtnl_lock();
+ ipgre_destroy_tunnels(ign);
+ rtnl_unlock();
+ kfree(ign);
+}
+
+static struct pernet_operations ipgre_net_ops = {
+ .init = ipgre_init_net,
+ .exit = ipgre_exit_net,
+};
+
+/*
+ * And now the modules code and kernel interface.
+ */
+
+static int __init ipgre_init(void)
+{
+ int err;
+
+ printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
+
+ if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
+ printk(KERN_INFO "ipgre init: can't add protocol\n");
+ return -EAGAIN;
}
+
+ err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
+ if (err < 0)
+ inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
+
+ return err;
}
static void __exit ipgre_fini(void)
@@ -1334,9 +1396,7 @@ static void __exit ipgre_fini(void)
if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
printk(KERN_INFO "ipgre close: can't remove protocol\n");
- rtnl_lock();
- ipgre_destroy_tunnels();
- rtnl_unlock();
+ unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
}
module_init(ipgre_init);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 65631391d47..7b4bad6d572 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -160,6 +160,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
struct ip_ra_chain *ra;
u8 protocol = ip_hdr(skb)->protocol;
struct sock *last = NULL;
+ struct net_device *dev = skb->dev;
read_lock(&ip_ra_lock);
for (ra = ip_ra_chain; ra; ra = ra->next) {
@@ -170,7 +171,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
*/
if (sk && inet_sk(sk)->num == protocol &&
(!sk->sk_bound_dev_if ||
- sk->sk_bound_dev_if == skb->dev->ifindex)) {
+ sk->sk_bound_dev_if == dev->ifindex) &&
+ sock_net(sk) == dev_net(dev)) {
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
read_unlock(&ip_ra_lock);
@@ -197,6 +199,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
static int ip_local_deliver_finish(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb->dev);
+
__skb_pull(skb, ip_hdrlen(skb));
/* Point into the IP datagram, just past the header. */
@@ -212,7 +216,8 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
raw = raw_local_deliver(skb, protocol);
hash = protocol & (MAX_INET_PROTOS - 1);
- if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
+ ipprot = rcu_dereference(inet_protos[hash]);
+ if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) {
int ret;
if (!ipprot->no_policy) {
@@ -283,13 +288,14 @@ static inline int ip_rcv_options(struct sk_buff *skb)
}
iph = ip_hdr(skb);
+ opt = &(IPCB(skb)->opt);
+ opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
- if (ip_options_compile(NULL, skb)) {
+ if (ip_options_compile(dev_net(dev), opt, skb)) {
IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
goto drop;
}
- opt = &(IPCB(skb)->opt);
if (unlikely(opt->srr)) {
struct in_device *in_dev = in_dev_get(dev);
if (in_dev) {
@@ -297,7 +303,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
if (IN_DEV_LOG_MARTIANS(in_dev) &&
net_ratelimit())
printk(KERN_INFO "source route option "
- "%u.%u.%u.%u -> %u.%u.%u.%u\n",
+ NIPQUAD_FMT " -> " NIPQUAD_FMT "\n",
NIPQUAD(iph->saddr),
NIPQUAD(iph->daddr));
in_dev_put(in_dev);
@@ -351,7 +357,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
if (iph->ihl > 5 && ip_rcv_options(skb))
goto drop;
- rt = (struct rtable*)skb->dst;
+ rt = skb->rtable;
if (rt->rt_type == RTN_MULTICAST)
IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
else if (rt->rt_type == RTN_BROADCAST)
@@ -372,9 +378,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
struct iphdr *iph;
u32 len;
- if (dev->nd_net != &init_net)
- goto drop;
-
/* When the interface is in promisc. mode, drop all the crap
* that it receives, do not try to analyse it.
*/
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4d315158fd3..d107543d3f8 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -45,7 +45,6 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
opt = &(IPCB(skb)->opt);
- opt->is_data = 0;
if (opt->srr)
memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
@@ -95,8 +94,6 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
memset(dopt, 0, sizeof(struct ip_options));
- dopt->is_data = 1;
-
sopt = &(IPCB(skb)->opt);
if (sopt->optlen == 0) {
@@ -107,10 +104,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
sptr = skb_network_header(skb);
dptr = dopt->__data;
- if (skb->dst)
- daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
- else
- daddr = ip_hdr(skb)->daddr;
+ daddr = skb->rtable->rt_spec_dst;
if (sopt->rr) {
optlen = sptr[sopt->rr+1];
@@ -151,7 +145,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
__be32 addr;
memcpy(&addr, sptr+soffset-1, 4);
- if (inet_addr_type(&init_net, addr) != RTN_LOCAL) {
+ if (inet_addr_type(dev_net(skb->dst->dev), addr) != RTN_LOCAL) {
dopt->ts_needtime = 1;
soffset += 8;
}
@@ -254,26 +248,22 @@ void ip_options_fragment(struct sk_buff * skb)
* If opt == NULL, then skb->data should point to IP header.
*/
-int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
+int ip_options_compile(struct net *net,
+ struct ip_options * opt, struct sk_buff * skb)
{
int l;
unsigned char * iph;
unsigned char * optptr;
int optlen;
unsigned char * pp_ptr = NULL;
- struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL;
-
- if (!opt) {
- opt = &(IPCB(skb)->opt);
- iph = skb_network_header(skb);
- opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
- optptr = iph + sizeof(struct iphdr);
- opt->is_data = 0;
- } else {
- optptr = opt->is_data ? opt->__data :
- (unsigned char *)&(ip_hdr(skb)[1]);
- iph = optptr - sizeof(struct iphdr);
- }
+ struct rtable *rt = NULL;
+
+ if (skb != NULL) {
+ rt = skb->rtable;
+ optptr = (unsigned char *)&(ip_hdr(skb)[1]);
+ } else
+ optptr = opt->__data;
+ iph = optptr - sizeof(struct iphdr);
for (l = opt->optlen; l > 0; ) {
switch (*optptr) {
@@ -400,7 +390,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
{
__be32 addr;
memcpy(&addr, &optptr[optptr[2]-1], 4);
- if (inet_addr_type(&init_net, addr) == RTN_UNICAST)
+ if (inet_addr_type(net, addr) == RTN_UNICAST)
break;
if (skb)
timeptr = (__be32*)&optptr[optptr[2]+3];
@@ -517,14 +507,13 @@ static struct ip_options *ip_options_get_alloc(const int optlen)
GFP_KERNEL);
}
-static int ip_options_get_finish(struct ip_options **optp,
+static int ip_options_get_finish(struct net *net, struct ip_options **optp,
struct ip_options *opt, int optlen)
{
while (optlen & 3)
opt->__data[optlen++] = IPOPT_END;
opt->optlen = optlen;
- opt->is_data = 1;
- if (optlen && ip_options_compile(opt, NULL)) {
+ if (optlen && ip_options_compile(net, opt, NULL)) {
kfree(opt);
return -EINVAL;
}
@@ -533,7 +522,8 @@ static int ip_options_get_finish(struct ip_options **optp,
return 0;
}
-int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen)
+int ip_options_get_from_user(struct net *net, struct ip_options **optp,
+ unsigned char __user *data, int optlen)
{
struct ip_options *opt = ip_options_get_alloc(optlen);
@@ -543,10 +533,11 @@ int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *dat
kfree(opt);
return -EFAULT;
}
- return ip_options_get_finish(optp, opt, optlen);
+ return ip_options_get_finish(net, optp, opt, optlen);
}
-int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen)
+int ip_options_get(struct net *net, struct ip_options **optp,
+ unsigned char *data, int optlen)
{
struct ip_options *opt = ip_options_get_alloc(optlen);
@@ -554,14 +545,14 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen)
return -ENOMEM;
if (optlen)
memcpy(opt->__data, data, optlen);
- return ip_options_get_finish(optp, opt, optlen);
+ return ip_options_get_finish(net, optp, opt, optlen);
}
void ip_forward_options(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
unsigned char * optptr;
- struct rtable *rt = (struct rtable*)skb->dst;
+ struct rtable *rt = skb->rtable;
unsigned char *raw = skb_network_header(skb);
if (opt->rr_needaddr) {
@@ -609,7 +600,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
__be32 nexthop;
struct iphdr *iph = ip_hdr(skb);
unsigned char *optptr = skb_network_header(skb) + opt->srr;
- struct rtable *rt = (struct rtable*)skb->dst;
+ struct rtable *rt = skb->rtable;
struct rtable *rt2;
int err;
@@ -634,13 +625,13 @@ int ip_options_rcv_srr(struct sk_buff *skb)
}
memcpy(&nexthop, &optptr[srrptr-1], 4);
- rt = (struct rtable*)skb->dst;
- skb->dst = NULL;
+ rt = skb->rtable;
+ skb->rtable = NULL;
err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
- rt2 = (struct rtable*)skb->dst;
+ rt2 = skb->rtable;
if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
ip_rt_put(rt2);
- skb->dst = &rt->u.dst;
+ skb->rtable = rt;
return -EINVAL;
}
ip_rt_put(rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 341779e685d..08349267ceb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -142,7 +142,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
__be32 saddr, __be32 daddr, struct ip_options *opt)
{
struct inet_sock *inet = inet_sk(sk);
- struct rtable *rt = (struct rtable *)skb->dst;
+ struct rtable *rt = skb->rtable;
struct iphdr *iph;
/* Build the IP header. */
@@ -240,7 +240,7 @@ static int ip_finish_output(struct sk_buff *skb)
int ip_mc_output(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
- struct rtable *rt = (struct rtable*)skb->dst;
+ struct rtable *rt = skb->rtable;
struct net_device *dev = rt->u.dst.dev;
/*
@@ -321,7 +321,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
/* Skip all of this if the packet is already routed,
* f.e. by something like SCTP.
*/
- rt = (struct rtable *) skb->dst;
+ rt = skb->rtable;
if (rt != NULL)
goto packet_routed;
@@ -351,7 +351,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
* itself out.
*/
security_sk_classify_flow(sk, &fl);
- if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0))
+ if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
goto no_route;
}
sk_setup_caps(sk, &rt->u.dst);
@@ -441,7 +441,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
unsigned int mtu, hlen, left, len, ll_rs, pad;
int offset;
__be16 not_last_frag;
- struct rtable *rt = (struct rtable*)skb->dst;
+ struct rtable *rt = skb->rtable;
int err = 0;
dev = rt->u.dst.dev;
@@ -825,7 +825,7 @@ int ip_append_data(struct sock *sk,
inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
rt->u.dst.dev->mtu :
dst_mtu(rt->u.dst.path);
- inet->cork.rt = rt;
+ inet->cork.dst = &rt->u.dst;
inet->cork.length = 0;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0;
@@ -834,7 +834,7 @@ int ip_append_data(struct sock *sk,
transhdrlen += exthdrlen;
}
} else {
- rt = inet->cork.rt;
+ rt = (struct rtable *)inet->cork.dst;
if (inet->cork.flags & IPCORK_OPT)
opt = inet->cork.opt;
@@ -1083,7 +1083,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
if (skb_queue_empty(&sk->sk_write_queue))
return -EINVAL;
- rt = inet->cork.rt;
+ rt = (struct rtable *)inet->cork.dst;
if (inet->cork.flags & IPCORK_OPT)
opt = inet->cork.opt;
@@ -1208,10 +1208,8 @@ static void ip_cork_release(struct inet_sock *inet)
inet->cork.flags &= ~IPCORK_OPT;
kfree(inet->cork.opt);
inet->cork.opt = NULL;
- if (inet->cork.rt) {
- ip_rt_put(inet->cork.rt);
- inet->cork.rt = NULL;
- }
+ dst_release(inet->cork.dst);
+ inet->cork.dst = NULL;
}
/*
@@ -1224,7 +1222,7 @@ int ip_push_pending_frames(struct sock *sk)
struct sk_buff **tail_skb;
struct inet_sock *inet = inet_sk(sk);
struct ip_options *opt = NULL;
- struct rtable *rt = inet->cork.rt;
+ struct rtable *rt = (struct rtable *)inet->cork.dst;
struct iphdr *iph;
__be16 df = 0;
__u8 ttl;
@@ -1357,7 +1355,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
} replyopts;
struct ipcm_cookie ipc;
__be32 daddr;
- struct rtable *rt = (struct rtable*)skb->dst;
+ struct rtable *rt = skb->rtable;
if (ip_options_echo(&replyopts.opt, skb))
return;
@@ -1384,7 +1382,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
.dport = tcp_hdr(skb)->source } },
.proto = sk->sk_protocol };
security_skb_classify_flow(skb, &fl);
- if (ip_route_output_key(sk->sk_net, &rt, &fl))
+ if (ip_route_output_key(sock_net(sk), &rt, &fl))
return;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index de0572c8885..d8adfd4972e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -57,7 +57,7 @@
static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
{
struct in_pktinfo info;
- struct rtable *rt = (struct rtable *)skb->dst;
+ struct rtable *rt = skb->rtable;
info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
if (rt) {
@@ -163,7 +163,7 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
ip_cmsg_recv_security(msg, skb);
}
-int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
+int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
{
int err;
struct cmsghdr *cmsg;
@@ -176,7 +176,7 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
switch (cmsg->cmsg_type) {
case IP_RETOPTS:
err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
- err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40);
+ err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40);
if (err)
return err;
break;
@@ -449,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
struct ip_options * opt = NULL;
if (optlen > 40 || optlen < 0)
goto e_inval;
- err = ip_options_get_from_user(&opt, optval, optlen);
+ err = ip_options_get_from_user(sock_net(sk), &opt,
+ optval, optlen);
if (err)
break;
if (inet->is_icsk) {
@@ -583,19 +584,19 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
if (!mreq.imr_ifindex) {
- if (mreq.imr_address.s_addr == INADDR_ANY) {
+ if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {
inet->mc_index = 0;
inet->mc_addr = 0;
err = 0;
break;
}
- dev = ip_dev_find(&init_net, mreq.imr_address.s_addr);
+ dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
if (dev) {
mreq.imr_ifindex = dev->ifindex;
dev_put(dev);
}
} else
- dev = __dev_get_by_index(&init_net, mreq.imr_ifindex);
+ dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
err = -EADDRNOTAVAIL;
@@ -1132,7 +1133,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
}
release_sock(sk);
- if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
+ if (len < sizeof(int) && len > 0 && val>=0 && val<=255) {
unsigned char ucval = (unsigned char)val;
len = 1;
if (put_user(len, optlen))
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index ae1f45fc23b..a75807b971b 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -14,7 +14,6 @@
* - Adaptive compression.
*/
#include <linux/module.h>
-#include <asm/semaphore.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/pfkeyv2.h>
@@ -108,8 +107,11 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
const int cpu = get_cpu();
u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
- int err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
+ int err;
+ local_bh_disable();
+ err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
+ local_bh_enable();
if (err)
goto out;
@@ -176,7 +178,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
spi, IPPROTO_COMP, AF_INET);
if (!x)
return;
- NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
+ NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIPQUAD_FMT "\n",
spi, NIPQUAD(iph->daddr));
xfrm_state_put(x);
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index a52b5853aaa..0f42d1c1f69 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -103,6 +103,7 @@
- '3' from resolv.h */
#define NONE __constant_htonl(INADDR_NONE)
+#define ANY __constant_htonl(INADDR_ANY)
/*
* Public IP configuration
@@ -291,7 +292,7 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
mm_segment_t oldfs = get_fs();
set_fs(get_ds());
- res = devinet_ioctl(cmd, (struct ifreq __user *) arg);
+ res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
set_fs(oldfs);
return res;
}
@@ -375,7 +376,7 @@ static int __init ic_defaults(void)
*/
if (!ic_host_name_set)
- sprintf(init_utsname()->nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
+ sprintf(init_utsname()->nodename, NIPQUAD_FMT, NIPQUAD(ic_myaddr));
if (root_server_addr == NONE)
root_server_addr = ic_servaddr;
@@ -388,11 +389,11 @@ static int __init ic_defaults(void)
else if (IN_CLASSC(ntohl(ic_myaddr)))
ic_netmask = htonl(IN_CLASSC_NET);
else {
- printk(KERN_ERR "IP-Config: Unable to guess netmask for address %u.%u.%u.%u\n",
+ printk(KERN_ERR "IP-Config: Unable to guess netmask for address " NIPQUAD_FMT "\n",
NIPQUAD(ic_myaddr));
return -1;
}
- printk("IP-Config: Guessing netmask %u.%u.%u.%u\n", NIPQUAD(ic_netmask));
+ printk("IP-Config: Guessing netmask " NIPQUAD_FMT "\n", NIPQUAD(ic_netmask));
}
return 0;
@@ -433,7 +434,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
unsigned char *sha, *tha; /* s for "source", t for "target" */
struct ic_device *d;
- if (dev->nd_net != &init_net)
+ if (dev_net(dev) != &init_net)
goto drop;
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -459,10 +460,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (rarp->ar_pro != htons(ETH_P_IP))
goto drop;
- if (!pskb_may_pull(skb,
- sizeof(struct arphdr) +
- (2 * dev->addr_len) +
- (2 * 4)))
+ if (!pskb_may_pull(skb, arp_hdr_len(dev)))
goto drop;
/* OK, it is all there and looks valid, process... */
@@ -753,9 +751,9 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
printk("Unknown ARP type 0x%04x for device %s\n", dev->type, dev->name);
b->htype = dev->type; /* can cause undefined behavior */
}
+
+ /* server_ip and your_ip address are both already zero per RFC2131 */
b->hlen = dev->addr_len;
- b->your_ip = NONE;
- b->server_ip = NONE;
memcpy(b->hw_addr, dev->dev_addr, dev->addr_len);
b->secs = htons(jiffies_diff / HZ);
b->xid = d->xid;
@@ -856,7 +854,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
struct ic_device *d;
int len, ext_len;
- if (dev->nd_net != &init_net)
+ if (dev_net(dev) != &init_net)
goto drop;
/* Perform verifications before taking the lock. */
@@ -983,9 +981,9 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
ic_myaddr = b->your_ip;
ic_servaddr = server_id;
#ifdef IPCONFIG_DEBUG
- printk("DHCP: Offered address %u.%u.%u.%u",
+ printk("DHCP: Offered address " NIPQUAD_FMT,
NIPQUAD(ic_myaddr));
- printk(" by server %u.%u.%u.%u\n",
+ printk(" by server " NIPQUAD_FMT "\n",
NIPQUAD(ic_servaddr));
#endif
/* The DHCP indicated server address takes
@@ -1181,11 +1179,11 @@ static int __init ic_dynamic(void)
return -1;
}
- printk("IP-Config: Got %s answer from %u.%u.%u.%u, ",
+ printk("IP-Config: Got %s answer from " NIPQUAD_FMT ", ",
((ic_got_reply & IC_RARP) ? "RARP"
: (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"),
NIPQUAD(ic_servaddr));
- printk("my address is %u.%u.%u.%u\n", NIPQUAD(ic_myaddr));
+ printk("my address is " NIPQUAD_FMT "\n", NIPQUAD(ic_myaddr));
return 0;
}
@@ -1211,12 +1209,12 @@ static int pnp_seq_show(struct seq_file *seq, void *v)
for (i = 0; i < CONF_NAMESERVERS_MAX; i++) {
if (ic_nameservers[i] != NONE)
seq_printf(seq,
- "nameserver %u.%u.%u.%u\n",
+ "nameserver " NIPQUAD_FMT "\n",
NIPQUAD(ic_nameservers[i]));
}
if (ic_servaddr != NONE)
seq_printf(seq,
- "bootserver %u.%u.%u.%u\n",
+ "bootserver " NIPQUAD_FMT "\n",
NIPQUAD(ic_servaddr));
return 0;
}
@@ -1390,14 +1388,14 @@ static int __init ip_auto_config(void)
* Clue in the operator.
*/
printk("IP-Config: Complete:");
- printk("\n device=%s", ic_dev->name);
- printk(", addr=%u.%u.%u.%u", NIPQUAD(ic_myaddr));
- printk(", mask=%u.%u.%u.%u", NIPQUAD(ic_netmask));
- printk(", gw=%u.%u.%u.%u", NIPQUAD(ic_gateway));
+ printk("\n device=%s", ic_dev->name);
+ printk(", addr=" NIPQUAD_FMT, NIPQUAD(ic_myaddr));
+ printk(", mask=" NIPQUAD_FMT, NIPQUAD(ic_netmask));
+ printk(", gw=" NIPQUAD_FMT, NIPQUAD(ic_gateway));
printk(",\n host=%s, domain=%s, nis-domain=%s",
utsname()->nodename, ic_domain, utsname()->domainname);
- printk(",\n bootserver=%u.%u.%u.%u", NIPQUAD(ic_servaddr));
- printk(", rootserver=%u.%u.%u.%u", NIPQUAD(root_server_addr));
+ printk(",\n bootserver=" NIPQUAD_FMT, NIPQUAD(ic_servaddr));
+ printk(", rootserver=" NIPQUAD_FMT, NIPQUAD(root_server_addr));
printk(", rootpath=%s", root_server_path);
printk("\n");
#endif /* !SILENT */
@@ -1410,7 +1408,7 @@ late_initcall(ip_auto_config);
/*
* Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
- * command line parameter. See Documentation/nfsroot.txt.
+ * command line parameter. See Documentation/filesystems/nfsroot.txt.
*/
static int __init ic_proto_name(char *name)
{
@@ -1479,19 +1477,19 @@ static int __init ip_auto_config_setup(char *addrs)
DBG(("IP-Config: Parameter #%d: `%s'\n", num, ip));
switch (num) {
case 0:
- if ((ic_myaddr = in_aton(ip)) == INADDR_ANY)
+ if ((ic_myaddr = in_aton(ip)) == ANY)
ic_myaddr = NONE;
break;
case 1:
- if ((ic_servaddr = in_aton(ip)) == INADDR_ANY)
+ if ((ic_servaddr = in_aton(ip)) == ANY)
ic_servaddr = NONE;
break;
case 2:
- if ((ic_gateway = in_aton(ip)) == INADDR_ANY)
+ if ((ic_gateway = in_aton(ip)) == ANY)
ic_gateway = NONE;
break;
case 3:
- if ((ic_netmask = in_aton(ip)) == INADDR_ANY)
+ if ((ic_netmask = in_aton(ip)) == ANY)
ic_netmask = NONE;
break;
case 4:
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index da281581692..149111f08e8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -115,49 +115,57 @@
#include <net/ipip.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#define HASH_SIZE 16
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+static int ipip_net_id;
+struct ipip_net {
+ struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+ struct ip_tunnel *tunnels_r[HASH_SIZE];
+ struct ip_tunnel *tunnels_l[HASH_SIZE];
+ struct ip_tunnel *tunnels_wc[1];
+ struct ip_tunnel **tunnels[4];
+
+ struct net_device *fb_tunnel_dev;
+};
+
static int ipip_fb_tunnel_init(struct net_device *dev);
static int ipip_tunnel_init(struct net_device *dev);
static void ipip_tunnel_setup(struct net_device *dev);
-static struct net_device *ipip_fb_tunnel_dev;
-
-static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_r[HASH_SIZE];
-static struct ip_tunnel *tunnels_l[HASH_SIZE];
-static struct ip_tunnel *tunnels_wc[1];
-static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
-
static DEFINE_RWLOCK(ipip_lock);
-static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
+static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
+ __be32 remote, __be32 local)
{
unsigned h0 = HASH(remote);
unsigned h1 = HASH(local);
struct ip_tunnel *t;
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
- for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+ for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
return t;
}
- for (t = tunnels_r[h0]; t; t = t->next) {
+ for (t = ipn->tunnels_r[h0]; t; t = t->next) {
if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
return t;
}
- for (t = tunnels_l[h1]; t; t = t->next) {
+ for (t = ipn->tunnels_l[h1]; t; t = t->next) {
if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
return t;
}
- if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+ if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
return t;
return NULL;
}
-static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
+static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
+ struct ip_tunnel_parm *parms)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
@@ -172,19 +180,20 @@ static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
prio |= 1;
h ^= HASH(local);
}
- return &tunnels[prio][h];
+ return &ipn->tunnels[prio][h];
}
-static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
+ struct ip_tunnel *t)
{
- return __ipip_bucket(&t->parms);
+ return __ipip_bucket(ipn, &t->parms);
}
-static void ipip_tunnel_unlink(struct ip_tunnel *t)
+static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
{
struct ip_tunnel **tp;
- for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
+ for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
if (t == *tp) {
write_lock_bh(&ipip_lock);
*tp = t->next;
@@ -194,9 +203,9 @@ static void ipip_tunnel_unlink(struct ip_tunnel *t)
}
}
-static void ipip_tunnel_link(struct ip_tunnel *t)
+static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
{
- struct ip_tunnel **tp = ipip_bucket(t);
+ struct ip_tunnel **tp = ipip_bucket(ipn, t);
t->next = *tp;
write_lock_bh(&ipip_lock);
@@ -204,15 +213,17 @@ static void ipip_tunnel_link(struct ip_tunnel *t)
write_unlock_bh(&ipip_lock);
}
-static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
+ struct ip_tunnel_parm *parms, int create)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
char name[IFNAMSIZ];
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
- for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
return t;
}
@@ -221,46 +232,47 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
if (parms->name[0])
strlcpy(name, parms->name, IFNAMSIZ);
- else {
- int i;
- for (i=1; i<100; i++) {
- sprintf(name, "tunl%d", i);
- if (__dev_get_by_name(&init_net, name) == NULL)
- break;
- }
- if (i==100)
- goto failed;
- }
+ else
+ sprintf(name, "tunl%%d");
dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
if (dev == NULL)
return NULL;
+ dev_net_set(dev, net);
+
+ if (strchr(name, '%')) {
+ if (dev_alloc_name(dev, name) < 0)
+ goto failed_free;
+ }
+
nt = netdev_priv(dev);
dev->init = ipip_tunnel_init;
nt->parms = *parms;
- if (register_netdevice(dev) < 0) {
- free_netdev(dev);
- goto failed;
- }
+ if (register_netdevice(dev) < 0)
+ goto failed_free;
dev_hold(dev);
- ipip_tunnel_link(nt);
+ ipip_tunnel_link(ipn, nt);
return nt;
-failed:
+failed_free:
+ free_netdev(dev);
return NULL;
}
static void ipip_tunnel_uninit(struct net_device *dev)
{
- if (dev == ipip_fb_tunnel_dev) {
+ struct net *net = dev_net(dev);
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
+
+ if (dev == ipn->fb_tunnel_dev) {
write_lock_bh(&ipip_lock);
- tunnels_wc[0] = NULL;
+ ipn->tunnels_wc[0] = NULL;
write_unlock_bh(&ipip_lock);
} else
- ipip_tunnel_unlink(netdev_priv(dev));
+ ipip_tunnel_unlink(ipn, netdev_priv(dev));
dev_put(dev);
}
@@ -309,7 +321,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
err = -ENOENT;
read_lock(&ipip_lock);
- t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
+ t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
if (t == NULL || t->parms.iph.daddr == 0)
goto out;
@@ -405,7 +417,7 @@ out:
fl.fl4_daddr = eiph->saddr;
fl.fl4_tos = RT_TOS(eiph->tos);
fl.proto = IPPROTO_IPIP;
- if (ip_route_output_key(&init_net, &rt, &key)) {
+ if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) {
kfree_skb(skb2);
return 0;
}
@@ -418,7 +430,7 @@ out:
fl.fl4_daddr = eiph->daddr;
fl.fl4_src = eiph->saddr;
fl.fl4_tos = eiph->tos;
- if (ip_route_output_key(&init_net, &rt, &fl) ||
+ if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
rt->u.dst.dev->type != ARPHRD_TUNNEL) {
ip_rt_put(rt);
kfree_skb(skb2);
@@ -469,7 +481,8 @@ static int ipip_rcv(struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
read_lock(&ipip_lock);
- if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+ if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
+ iph->saddr, iph->daddr)) != NULL) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
read_unlock(&ipip_lock);
kfree_skb(skb);
@@ -532,7 +545,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (!dst) {
/* NBMA tunnel */
- if ((rt = (struct rtable*)skb->dst) == NULL) {
+ if ((rt = skb->rtable) == NULL) {
tunnel->stat.tx_fifo_errors++;
goto tx_error;
}
@@ -547,7 +560,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
.saddr = tiph->saddr,
.tos = RT_TOS(tos) } },
.proto = IPPROTO_IPIP };
- if (ip_route_output_key(&init_net, &rt, &fl)) {
+ if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
tunnel->stat.tx_carrier_errors++;
goto tx_error_icmp;
}
@@ -668,7 +681,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
.tos = RT_TOS(iph->tos) } },
.proto = IPPROTO_IPIP };
struct rtable *rt;
- if (!ip_route_output_key(&init_net, &rt, &fl)) {
+ if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
tdev = rt->u.dst.dev;
ip_rt_put(rt);
}
@@ -676,7 +689,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
}
if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+ tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
if (tdev) {
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
@@ -691,16 +704,18 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
int err = 0;
struct ip_tunnel_parm p;
struct ip_tunnel *t;
+ struct net *net = dev_net(dev);
+ struct ipip_net *ipn = net_generic(net, ipip_net_id);
switch (cmd) {
case SIOCGETTUNNEL:
t = NULL;
- if (dev == ipip_fb_tunnel_dev) {
+ if (dev == ipn->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
- t = ipip_tunnel_locate(&p, 0);
+ t = ipip_tunnel_locate(net, &p, 0);
}
if (t == NULL)
t = netdev_priv(dev);
@@ -726,9 +741,9 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
if (p.iph.ttl)
p.iph.frag_off |= htons(IP_DF);
- t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+ t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
- if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+ if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
err = -EEXIST;
@@ -741,12 +756,12 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
t = netdev_priv(dev);
- ipip_tunnel_unlink(t);
+ ipip_tunnel_unlink(ipn, t);
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
memcpy(dev->dev_addr, &p.iph.saddr, 4);
memcpy(dev->broadcast, &p.iph.daddr, 4);
- ipip_tunnel_link(t);
+ ipip_tunnel_link(ipn, t);
netdev_state_change(dev);
}
}
@@ -774,15 +789,15 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
if (!capable(CAP_NET_ADMIN))
goto done;
- if (dev == ipip_fb_tunnel_dev) {
+ if (dev == ipn->fb_tunnel_dev) {
err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
goto done;
err = -ENOENT;
- if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
+ if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
goto done;
err = -EPERM;
- if (t->dev == ipip_fb_tunnel_dev)
+ if (t->dev == ipn->fb_tunnel_dev)
goto done;
dev = t->dev;
}
@@ -826,6 +841,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
+ dev->features |= NETIF_F_NETNS_LOCAL;
}
static int ipip_tunnel_init(struct net_device *dev)
@@ -845,10 +861,11 @@ static int ipip_tunnel_init(struct net_device *dev)
return 0;
}
-static int __init ipip_fb_tunnel_init(struct net_device *dev)
+static int ipip_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
+ struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
tunnel->dev = dev;
strcpy(tunnel->parms.name, dev->name);
@@ -858,7 +875,7 @@ static int __init ipip_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
dev_hold(dev);
- tunnels_wc[0] = tunnel;
+ ipn->tunnels_wc[0] = tunnel;
return 0;
}
@@ -871,50 +888,98 @@ static struct xfrm_tunnel ipip_handler = {
static char banner[] __initdata =
KERN_INFO "IPv4 over IPv4 tunneling driver\n";
-static int __init ipip_init(void)
+static void ipip_destroy_tunnels(struct ipip_net *ipn)
+{
+ int prio;
+
+ for (prio = 1; prio < 4; prio++) {
+ int h;
+ for (h = 0; h < HASH_SIZE; h++) {
+ struct ip_tunnel *t;
+ while ((t = ipn->tunnels[prio][h]) != NULL)
+ unregister_netdevice(t->dev);
+ }
+ }
+}
+
+static int ipip_init_net(struct net *net)
{
int err;
+ struct ipip_net *ipn;
- printk(banner);
+ err = -ENOMEM;
+ ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL);
+ if (ipn == NULL)
+ goto err_alloc;
- if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
- printk(KERN_INFO "ipip init: can't register tunnel\n");
- return -EAGAIN;
- }
+ err = net_assign_generic(net, ipip_net_id, ipn);
+ if (err < 0)
+ goto err_assign;
- ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
+ ipn->tunnels[0] = ipn->tunnels_wc;
+ ipn->tunnels[1] = ipn->tunnels_l;
+ ipn->tunnels[2] = ipn->tunnels_r;
+ ipn->tunnels[3] = ipn->tunnels_r_l;
+
+ ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
"tunl0",
ipip_tunnel_setup);
- if (!ipip_fb_tunnel_dev) {
+ if (!ipn->fb_tunnel_dev) {
err = -ENOMEM;
- goto err1;
+ goto err_alloc_dev;
}
- ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
+ ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
+ dev_net_set(ipn->fb_tunnel_dev, net);
+
+ if ((err = register_netdev(ipn->fb_tunnel_dev)))
+ goto err_reg_dev;
+
+ return 0;
- if ((err = register_netdev(ipip_fb_tunnel_dev)))
- goto err2;
- out:
+err_reg_dev:
+ free_netdev(ipn->fb_tunnel_dev);
+err_alloc_dev:
+ /* nothing */
+err_assign:
+ kfree(ipn);
+err_alloc:
return err;
- err2:
- free_netdev(ipip_fb_tunnel_dev);
- err1:
- xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
- goto out;
}
-static void __exit ipip_destroy_tunnels(void)
+static void ipip_exit_net(struct net *net)
{
- int prio;
+ struct ipip_net *ipn;
- for (prio = 1; prio < 4; prio++) {
- int h;
- for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t;
- while ((t = tunnels[prio][h]) != NULL)
- unregister_netdevice(t->dev);
- }
+ ipn = net_generic(net, ipip_net_id);
+ rtnl_lock();
+ ipip_destroy_tunnels(ipn);
+ unregister_netdevice(ipn->fb_tunnel_dev);
+ rtnl_unlock();
+ kfree(ipn);
+}
+
+static struct pernet_operations ipip_net_ops = {
+ .init = ipip_init_net,
+ .exit = ipip_exit_net,
+};
+
+static int __init ipip_init(void)
+{
+ int err;
+
+ printk(banner);
+
+ if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
+ printk(KERN_INFO "ipip init: can't register tunnel\n");
+ return -EAGAIN;
}
+
+ err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
+ if (err)
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+
+ return err;
}
static void __exit ipip_fini(void)
@@ -922,10 +987,7 @@ static void __exit ipip_fini(void)
if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
printk(KERN_INFO "ipip close: can't deregister tunnel\n");
- rtnl_lock();
- ipip_destroy_tunnels();
- unregister_netdevice(ipip_fb_tunnel_dev);
- rtnl_unlock();
+ unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
}
module_init(ipip_init);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a94f52c207a..11700a4dcd9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -849,7 +849,7 @@ static void mrtsock_destruct(struct sock *sk)
{
rtnl_lock();
if (sk == mroute_socket) {
- IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--;
+ IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
write_lock_bh(&mrt_lock);
mroute_socket=NULL;
@@ -898,7 +898,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
mroute_socket=sk;
write_unlock_bh(&mrt_lock);
- IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++;
+ IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
}
rtnl_unlock();
return ret;
@@ -1089,7 +1089,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
struct vif_device *v;
int ct;
- if (dev->nd_net != &init_net)
+ if (dev_net(dev) != &init_net)
return NOTIFY_DONE;
if (event != NETDEV_UNREGISTER)
@@ -1283,7 +1283,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
if (vif_table[vif].dev != skb->dev) {
int true_vifi;
- if (((struct rtable*)skb->dst)->fl.iif == 0) {
+ if (skb->rtable->fl.iif == 0) {
/* It is our own packet, looped back.
Very complicated situation...
@@ -1357,7 +1357,7 @@ dont_forward:
int ip_mr_input(struct sk_buff *skb)
{
struct mfc_cache *cache;
- int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
+ int local = skb->rtable->rt_flags&RTCF_LOCAL;
/* Packet is looped back after forward, it should not be
forwarded second time, but still can be delivered locally.
@@ -1594,7 +1594,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
{
int err;
struct mfc_cache *cache;
- struct rtable *rt = (struct rtable*)skb->dst;
+ struct rtable *rt = skb->rtable;
read_lock(&mrt_lock);
cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 12dc0d640b6..620e40ff79a 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -550,7 +550,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
"%u.%u.%u.%u:%u to app %s on port %u\n",
- __FUNCTION__,
+ __func__,
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
inc->name, ntohs(inc->port));
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 1fa7b330b9a..1caa2908373 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -344,7 +344,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
"%u.%u.%u.%u:%u to app %s on port %u\n",
- __FUNCTION__,
+ __func__,
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
inc->name, ntohs(inc->port));
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 948378d0a75..69c56663cc9 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -916,7 +916,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
if (!tinfo)
return -ENOMEM;
- IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
+ IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
sizeof(struct ip_vs_sync_conn));
@@ -956,7 +956,7 @@ int stop_sync_thread(int state)
(state == IP_VS_STATE_BACKUP && !sync_backup_pid))
return -ESRCH;
- IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
+ IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
IP_VS_INFO("stopping sync thread %d ...\n",
(state == IP_VS_STATE_MASTER) ?
sync_master_pid : sync_backup_pid);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 9a904c6c0dc..f8edacdf991 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -182,21 +182,44 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
}
return csum;
}
-
EXPORT_SYMBOL(nf_ip_checksum);
+static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ __sum16 csum = 0;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (len == skb->len - dataoff)
+ return nf_ip_checksum(skb, hook, dataoff, protocol);
+ /* fall through */
+ case CHECKSUM_NONE:
+ skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
+ skb->len - dataoff, 0);
+ skb->ip_summed = CHECKSUM_NONE;
+ csum = __skb_checksum_complete_head(skb, dataoff + len);
+ if (!csum)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ return csum;
+}
+
static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
{
return ip_route_output_key(&init_net, (struct rtable **)dst, fl);
}
static const struct nf_afinfo nf_ip_afinfo = {
- .family = AF_INET,
- .checksum = nf_ip_checksum,
- .route = nf_ip_route,
- .saveroute = nf_ip_saveroute,
- .reroute = nf_ip_reroute,
- .route_key_size = sizeof(struct ip_rt_info),
+ .family = AF_INET,
+ .checksum = nf_ip_checksum,
+ .checksum_partial = nf_ip_checksum_partial,
+ .route = nf_ip_route,
+ .saveroute = nf_ip_saveroute,
+ .reroute = nf_ip_reroute,
+ .route_key_size = sizeof(struct ip_rt_info),
};
static int ipv4_netfilter_init(void)
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 9a077cb2479..2767841a8ce 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -241,10 +241,26 @@ config NF_NAT_SNMP_BASIC
# <expr> '&&' <expr> (6)
#
# (6) Returns the result of min(/expr/, /expr/).
+config NF_NAT_PROTO_DCCP
+ tristate
+ depends on NF_NAT && NF_CT_PROTO_DCCP
+ default NF_NAT && NF_CT_PROTO_DCCP
+
config NF_NAT_PROTO_GRE
tristate
depends on NF_NAT && NF_CT_PROTO_GRE
+config NF_NAT_PROTO_UDPLITE
+ tristate
+ depends on NF_NAT && NF_CT_PROTO_UDPLITE
+ default NF_NAT && NF_CT_PROTO_UDPLITE
+
+config NF_NAT_PROTO_SCTP
+ tristate
+ default NF_NAT && NF_CT_PROTO_SCTP
+ depends on NF_NAT && NF_CT_PROTO_SCTP
+ select LIBCRC32C
+
config NF_NAT_FTP
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 0c7dc78a62e..d9b92fbf557 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -10,7 +10,7 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
endif
endif
-nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
+nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o
# connection tracking
@@ -29,7 +29,10 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# NAT protocols (nf_nat)
+obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
+obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
+obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index a7591ce344d..03e83a65aec 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -52,14 +52,14 @@ MODULE_DESCRIPTION("arptables core");
do { \
if (!(x)) \
printk("ARP_NF_ASSERT: %s:%s:%u\n", \
- __FUNCTION__, __FILE__, __LINE__); \
+ __func__, __FILE__, __LINE__); \
} while(0)
#else
#define ARP_NF_ASSERT(x)
#endif
static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
- char *hdr_addr, int len)
+ const char *hdr_addr, int len)
{
int i, ret;
@@ -80,8 +80,8 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
const char *outdev,
const struct arpt_arp *arpinfo)
{
- char *arpptr = (char *)(arphdr + 1);
- char *src_devaddr, *tgt_devaddr;
+ const char *arpptr = (char *)(arphdr + 1);
+ const char *src_devaddr, *tgt_devaddr;
__be32 src_ipaddr, tgt_ipaddr;
int i, ret;
@@ -222,21 +222,18 @@ unsigned int arpt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
- struct arpt_table *table)
+ struct xt_table *table)
{
static const char nulldevname[IFNAMSIZ];
unsigned int verdict = NF_DROP;
- struct arphdr *arp;
+ const struct arphdr *arp;
bool hotdrop = false;
struct arpt_entry *e, *back;
const char *indev, *outdev;
void *table_base;
- struct xt_table_info *private;
+ const struct xt_table_info *private;
- /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
- if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
- (2 * skb->dev->addr_len) +
- (2 * sizeof(u32)))))
+ if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
return NF_DROP;
indev = in ? in->name : nulldevname;
@@ -355,7 +352,7 @@ static int mark_source_chains(struct xt_table_info *newinfo,
e->counters.pcnt = pos;
for (;;) {
- struct arpt_standard_target *t
+ const struct arpt_standard_target *t
= (void *)arpt_get_target(e);
int visited = e->comefrom & (1 << hook);
@@ -440,7 +437,7 @@ static int mark_source_chains(struct xt_table_info *newinfo,
static inline int check_entry(struct arpt_entry *e, const char *name)
{
- struct arpt_entry_target *t;
+ const struct arpt_entry_target *t;
if (!arp_checkentry(&e->arp)) {
duprintf("arp_tables: arp check failed %p %s.\n", e, name);
@@ -460,7 +457,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name)
static inline int check_target(struct arpt_entry *e, const char *name)
{
struct arpt_entry_target *t;
- struct arpt_target *target;
+ struct xt_target *target;
int ret;
t = arpt_get_target(e);
@@ -483,7 +480,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
unsigned int *i)
{
struct arpt_entry_target *t;
- struct arpt_target *target;
+ struct xt_target *target;
int ret;
ret = check_entry(e, name);
@@ -709,11 +706,11 @@ static void get_counters(const struct xt_table_info *t,
}
}
-static inline struct xt_counters *alloc_counters(struct arpt_table *table)
+static inline struct xt_counters *alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
- struct xt_table_info *private = table->private;
+ const struct xt_table_info *private = table->private;
/* We need atomic snapshot of counters: rest doesn't change
* (other than comefrom, which userspace doesn't care
@@ -734,7 +731,7 @@ static inline struct xt_counters *alloc_counters(struct arpt_table *table)
}
static int copy_entries_to_user(unsigned int total_size,
- struct arpt_table *table,
+ struct xt_table *table,
void __user *userptr)
{
unsigned int off, num;
@@ -854,7 +851,7 @@ static int compat_table_info(const struct xt_table_info *info,
static int get_info(struct net *net, void __user *user, int *len, int compat)
{
char name[ARPT_TABLE_MAXNAMELEN];
- struct arpt_table *t;
+ struct xt_table *t;
int ret;
if (*len != sizeof(struct arpt_getinfo)) {
@@ -875,7 +872,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
"arptable_%s", name);
if (t && !IS_ERR(t)) {
struct arpt_getinfo info;
- struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
if (compat) {
@@ -914,7 +911,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
{
int ret;
struct arpt_get_entries get;
- struct arpt_table *t;
+ struct xt_table *t;
if (*len < sizeof(get)) {
duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
@@ -930,7 +927,8 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
t = xt_find_table_lock(net, NF_ARP, get.name);
if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = t->private;
+
duprintf("t->private->number = %u\n",
private->number);
if (get.size == private->size)
@@ -939,7 +937,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
else {
duprintf("get_entries: I've got %u not %u!\n",
private->size, get.size);
- ret = -EINVAL;
+ ret = -EAGAIN;
}
module_put(t->me);
xt_table_unlock(t);
@@ -956,7 +954,7 @@ static int __do_replace(struct net *net, const char *name,
void __user *counters_ptr)
{
int ret;
- struct arpt_table *t;
+ struct xt_table *t;
struct xt_table_info *oldinfo;
struct xt_counters *counters;
void *loc_cpu_old_entry;
@@ -1090,11 +1088,11 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
- char *name;
+ const char *name;
int size;
void *ptmp;
- struct arpt_table *t;
- struct xt_table_info *private;
+ struct xt_table *t;
+ const struct xt_table_info *private;
int ret = 0;
void *loc_cpu_entry;
#ifdef CONFIG_COMPAT
@@ -1499,11 +1497,11 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
switch (cmd) {
case ARPT_SO_SET_REPLACE:
- ret = compat_do_replace(sk->sk_net, user, len);
+ ret = compat_do_replace(sock_net(sk), user, len);
break;
case ARPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sk->sk_net, user, len, 1);
+ ret = do_add_counters(sock_net(sk), user, len, 1);
break;
default:
@@ -1557,11 +1555,11 @@ out:
}
static int compat_copy_entries_to_user(unsigned int total_size,
- struct arpt_table *table,
+ struct xt_table *table,
void __user *userptr)
{
struct xt_counters *counters;
- struct xt_table_info *private = table->private;
+ const struct xt_table_info *private = table->private;
void __user *pos;
unsigned int size;
int ret = 0;
@@ -1595,7 +1593,7 @@ static int compat_get_entries(struct net *net,
{
int ret;
struct compat_arpt_get_entries get;
- struct arpt_table *t;
+ struct xt_table *t;
if (*len < sizeof(get)) {
duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
@@ -1612,7 +1610,7 @@ static int compat_get_entries(struct net *net,
xt_compat_lock(NF_ARP);
t = xt_find_table_lock(net, NF_ARP, get.name);
if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = t->private;
struct xt_table_info info;
duprintf("t->private->number = %u\n", private->number);
@@ -1623,7 +1621,7 @@ static int compat_get_entries(struct net *net,
} else if (!ret) {
duprintf("compat_get_entries: I've got %u not %u!\n",
private->size, get.size);
- ret = -EINVAL;
+ ret = -EAGAIN;
}
xt_compat_flush_offsets(NF_ARP);
module_put(t->me);
@@ -1647,10 +1645,10 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
switch (cmd) {
case ARPT_SO_GET_INFO:
- ret = get_info(sk->sk_net, user, len, 1);
+ ret = get_info(sock_net(sk), user, len, 1);
break;
case ARPT_SO_GET_ENTRIES:
- ret = compat_get_entries(sk->sk_net, user, len);
+ ret = compat_get_entries(sock_net(sk), user, len);
break;
default:
ret = do_arpt_get_ctl(sk, cmd, user, len);
@@ -1668,11 +1666,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
switch (cmd) {
case ARPT_SO_SET_REPLACE:
- ret = do_replace(sk->sk_net, user, len);
+ ret = do_replace(sock_net(sk), user, len);
break;
case ARPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sk->sk_net, user, len, 0);
+ ret = do_add_counters(sock_net(sk), user, len, 0);
break;
default:
@@ -1692,11 +1690,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
switch (cmd) {
case ARPT_SO_GET_INFO:
- ret = get_info(sk->sk_net, user, len, 0);
+ ret = get_info(sock_net(sk), user, len, 0);
break;
case ARPT_SO_GET_ENTRIES:
- ret = get_entries(sk->sk_net, user, len);
+ ret = get_entries(sock_net(sk), user, len);
break;
case ARPT_SO_GET_REVISION_TARGET: {
@@ -1725,9 +1723,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
return ret;
}
-struct arpt_table *arpt_register_table(struct net *net,
- struct arpt_table *table,
- const struct arpt_replace *repl)
+struct xt_table *arpt_register_table(struct net *net, struct xt_table *table,
+ const struct arpt_replace *repl)
{
int ret;
struct xt_table_info *newinfo;
@@ -1769,7 +1766,7 @@ out:
return ERR_PTR(ret);
}
-void arpt_unregister_table(struct arpt_table *table)
+void arpt_unregister_table(struct xt_table *table)
{
struct xt_table_info *private;
void *loc_cpu_entry;
@@ -1787,7 +1784,7 @@ void arpt_unregister_table(struct arpt_table *table)
}
/* The built-in targets: standard (NULL) and error. */
-static struct arpt_target arpt_standard_target __read_mostly = {
+static struct xt_target arpt_standard_target __read_mostly = {
.name = ARPT_STANDARD_TARGET,
.targetsize = sizeof(int),
.family = NF_ARP,
@@ -1798,7 +1795,7 @@ static struct arpt_target arpt_standard_target __read_mostly = {
#endif
};
-static struct arpt_target arpt_error_target __read_mostly = {
+static struct xt_target arpt_error_target __read_mostly = {
.name = ARPT_ERROR_TARGET,
.target = arpt_error,
.targetsize = ARPT_FUNCTION_MAXNAMELEN,
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 45fa4e20094..a385959d265 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -15,11 +15,11 @@ target(struct sk_buff *skb,
const void *targinfo)
{
const struct arpt_mangle *mangle = targinfo;
- struct arphdr *arp;
+ const struct arphdr *arp;
unsigned char *arpptr;
int pln, hln;
- if (skb_make_writable(skb, skb->len))
+ if (!skb_make_writable(skb, skb->len))
return NF_DROP;
arp = arp_hdr(skb);
@@ -73,8 +73,9 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target,
return true;
}
-static struct arpt_target arpt_mangle_reg __read_mostly = {
+static struct xt_target arpt_mangle_reg __read_mostly = {
.name = "mangle",
+ .family = NF_ARP,
.target = target,
.targetsize = sizeof(struct arpt_mangle),
.checkentry = checkentry,
@@ -83,15 +84,12 @@ static struct arpt_target arpt_mangle_reg __read_mostly = {
static int __init arpt_mangle_init(void)
{
- if (arpt_register_target(&arpt_mangle_reg))
- return -EINVAL;
-
- return 0;
+ return xt_register_target(&arpt_mangle_reg);
}
static void __exit arpt_mangle_fini(void)
{
- arpt_unregister_target(&arpt_mangle_reg);
+ xt_unregister_target(&arpt_mangle_reg);
}
module_init(arpt_mangle_init);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 4e9c496a30c..3be4d07e7ed 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -45,10 +45,10 @@ static struct
.term = ARPT_ERROR_INIT,
};
-static struct arpt_table packet_filter = {
+static struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
+ .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
.private = NULL,
.me = THIS_MODULE,
.af = NF_ARP,
@@ -70,18 +70,21 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = {
.owner = THIS_MODULE,
.pf = NF_ARP,
.hooknum = NF_ARP_IN,
+ .priority = NF_IP_PRI_FILTER,
},
{
.hook = arpt_hook,
.owner = THIS_MODULE,
.pf = NF_ARP,
.hooknum = NF_ARP_OUT,
+ .priority = NF_IP_PRI_FILTER,
},
{
.hook = arpt_hook,
.owner = THIS_MODULE,
.pf = NF_ARP,
.hooknum = NF_ARP_FORWARD,
+ .priority = NF_IP_PRI_FILTER,
},
};
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 6bda1102851..719be29f750 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -283,8 +283,8 @@ static int
ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
{
int diff;
- int err;
struct iphdr *user_iph = (struct iphdr *)v->payload;
+ struct sk_buff *nskb;
if (v->data_len < sizeof(*user_iph))
return 0;
@@ -296,14 +296,16 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
if (v->data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
- err = pskb_expand_head(e->skb, 0,
+ nskb = skb_copy_expand(e->skb, 0,
diff - skb_tailroom(e->skb),
GFP_ATOMIC);
- if (err) {
+ if (!nskb) {
printk(KERN_WARNING "ip_queue: error "
- "in mangle, dropping packet: %d\n", -err);
- return err;
+ "in mangle, dropping packet\n");
+ return -ENOMEM;
}
+ kfree_skb(e->skb);
+ e->skb = nskb;
}
skb_put(e->skb, diff);
}
@@ -479,7 +481,7 @@ ipq_rcv_dev_event(struct notifier_block *this,
{
struct net_device *dev = ptr;
- if (dev->nd_net != &init_net)
+ if (dev_net(dev) != &init_net)
return NOTIFY_DONE;
/* Drop any packets associated with the downed device */
@@ -586,11 +588,9 @@ static int __init ip_queue_init(void)
}
#ifdef CONFIG_PROC_FS
- proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net);
- if (proc) {
- proc->owner = THIS_MODULE;
- proc->proc_fops = &ip_queue_proc_fops;
- } else {
+ proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
+ &ip_queue_proc_fops);
+ if (!proc) {
printk(KERN_ERR "ip_queue: failed to create proc entry\n");
goto cleanup_ipqnl;
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 600737f122d..4e7c719445c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -53,7 +53,7 @@ MODULE_DESCRIPTION("IPv4 packet filter");
do { \
if (!(x)) \
printk("IP_NF_ASSERT: %s:%s:%u\n", \
- __FUNCTION__, __FILE__, __LINE__); \
+ __func__, __FILE__, __LINE__); \
} while(0)
#else
#define IP_NF_ASSERT(x)
@@ -296,7 +296,7 @@ static void trace_packet(struct sk_buff *skb,
struct ipt_entry *e)
{
void *table_base;
- struct ipt_entry *root;
+ const struct ipt_entry *root;
char *hookname, *chainname, *comment;
unsigned int rulenum = 0;
@@ -327,7 +327,7 @@ ipt_do_table(struct sk_buff *skb,
{
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
u_int16_t offset;
- struct iphdr *ip;
+ const struct iphdr *ip;
u_int16_t datalen;
bool hotdrop = false;
/* Initializing verdict to NF_DROP keeps gcc happy. */
@@ -926,7 +926,7 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
- struct xt_table_info *private = table->private;
+ const struct xt_table_info *private = table->private;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -953,9 +953,9 @@ copy_entries_to_user(unsigned int total_size,
unsigned int off, num;
struct ipt_entry *e;
struct xt_counters *counters;
- struct xt_table_info *private = table->private;
+ const struct xt_table_info *private = table->private;
int ret = 0;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
counters = alloc_counters(table);
if (IS_ERR(counters))
@@ -975,8 +975,8 @@ copy_entries_to_user(unsigned int total_size,
/* ... then go back and fix counters and names */
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
unsigned int i;
- struct ipt_entry_match *m;
- struct ipt_entry_target *t;
+ const struct ipt_entry_match *m;
+ const struct ipt_entry_target *t;
e = (struct ipt_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
@@ -1116,7 +1116,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
"iptable_%s", name);
if (t && !IS_ERR(t)) {
struct ipt_getinfo info;
- struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
if (compat) {
@@ -1172,7 +1172,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
t = xt_find_table_lock(net, AF_INET, get.name);
if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n", private->number);
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1180,7 +1180,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
else {
duprintf("get_entries: I've got %u not %u!\n",
private->size, get.size);
- ret = -EINVAL;
+ ret = -EAGAIN;
}
module_put(t->me);
xt_table_unlock(t);
@@ -1337,11 +1337,11 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
- char *name;
+ const char *name;
int size;
void *ptmp;
struct xt_table *t;
- struct xt_table_info *private;
+ const struct xt_table_info *private;
int ret = 0;
void *loc_cpu_entry;
#ifdef CONFIG_COMPAT
@@ -1852,11 +1852,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
switch (cmd) {
case IPT_SO_SET_REPLACE:
- ret = compat_do_replace(sk->sk_net, user, len);
+ ret = compat_do_replace(sock_net(sk), user, len);
break;
case IPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sk->sk_net, user, len, 1);
+ ret = do_add_counters(sock_net(sk), user, len, 1);
break;
default:
@@ -1878,11 +1878,11 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
void __user *userptr)
{
struct xt_counters *counters;
- struct xt_table_info *private = table->private;
+ const struct xt_table_info *private = table->private;
void __user *pos;
unsigned int size;
int ret = 0;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
unsigned int i = 0;
counters = alloc_counters(table);
@@ -1929,7 +1929,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
xt_compat_lock(AF_INET);
t = xt_find_table_lock(net, AF_INET, get.name);
if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = t->private;
struct xt_table_info info;
duprintf("t->private->number = %u\n", private->number);
ret = compat_table_info(private, &info);
@@ -1939,7 +1939,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
} else if (!ret) {
duprintf("compat_get_entries: I've got %u not %u!\n",
private->size, get.size);
- ret = -EINVAL;
+ ret = -EAGAIN;
}
xt_compat_flush_offsets(AF_INET);
module_put(t->me);
@@ -1963,10 +1963,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
switch (cmd) {
case IPT_SO_GET_INFO:
- ret = get_info(sk->sk_net, user, len, 1);
+ ret = get_info(sock_net(sk), user, len, 1);
break;
case IPT_SO_GET_ENTRIES:
- ret = compat_get_entries(sk->sk_net, user, len);
+ ret = compat_get_entries(sock_net(sk), user, len);
break;
default:
ret = do_ipt_get_ctl(sk, cmd, user, len);
@@ -1985,11 +1985,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
switch (cmd) {
case IPT_SO_SET_REPLACE:
- ret = do_replace(sk->sk_net, user, len);
+ ret = do_replace(sock_net(sk), user, len);
break;
case IPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sk->sk_net, user, len, 0);
+ ret = do_add_counters(sock_net(sk), user, len, 0);
break;
default:
@@ -2010,11 +2010,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
switch (cmd) {
case IPT_SO_GET_INFO:
- ret = get_info(sk->sk_net, user, len, 0);
+ ret = get_info(sock_net(sk), user, len, 0);
break;
case IPT_SO_GET_ENTRIES:
- ret = get_entries(sk->sk_net, user, len);
+ ret = get_entries(sock_net(sk), user, len);
break;
case IPT_SO_GET_REVISION_MATCH:
@@ -2130,7 +2130,8 @@ icmp_match(const struct sk_buff *skb,
unsigned int protoff,
bool *hotdrop)
{
- struct icmphdr _icmph, *ic;
+ const struct icmphdr *ic;
+ struct icmphdr _icmph;
const struct ipt_icmp *icmpinfo = matchinfo;
/* Must not be a fragment. */
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index c6cf84c7761..22d8e7cd919 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -82,8 +82,8 @@ clusterip_config_put(struct clusterip_config *c)
static inline void
clusterip_config_entry_put(struct clusterip_config *c)
{
+ write_lock_bh(&clusterip_lock);
if (atomic_dec_and_test(&c->entries)) {
- write_lock_bh(&clusterip_lock);
list_del(&c->list);
write_unlock_bh(&clusterip_lock);
@@ -96,7 +96,9 @@ clusterip_config_entry_put(struct clusterip_config *c)
#ifdef CONFIG_PROC_FS
remove_proc_entry(c->pde->name, c->pde->parent);
#endif
+ return;
}
+ write_unlock_bh(&clusterip_lock);
}
static struct clusterip_config *
@@ -142,7 +144,7 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
}
static struct clusterip_config *
-clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip,
+clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
struct net_device *dev)
{
struct clusterip_config *c;
@@ -167,14 +169,13 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip,
/* create proc dir entry */
sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
- c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR,
- clusterip_procdir);
+ c->pde = proc_create(buffer, S_IWUSR|S_IRUSR,
+ clusterip_procdir, &clusterip_proc_fops);
if (!c->pde) {
kfree(c);
return NULL;
}
}
- c->pde->proc_fops = &clusterip_proc_fops;
c->pde->data = c;
#endif
@@ -332,7 +333,7 @@ clusterip_tg(struct sk_buff *skb, const struct net_device *in,
}
#ifdef DEBUG
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
#endif
pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
if (!clusterip_responsible(cipinfo->config, hash)) {
@@ -417,7 +418,7 @@ clusterip_tg_check(const char *tablename, const void *e_void,
/* drop reference count of cluster config when rule is deleted */
static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo)
{
- struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+ const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
/* if no more entries are referencing the config, remove it
* from the list and destroy the proc entry */
@@ -566,7 +567,7 @@ struct clusterip_seq_position {
static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
{
- struct proc_dir_entry *pde = s->private;
+ const struct proc_dir_entry *pde = s->private;
struct clusterip_config *c = pde->data;
unsigned int weight;
u_int32_t local_nodes;
@@ -593,7 +594,7 @@ static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
+ struct clusterip_seq_position *idx = v;
*pos = ++idx->pos;
if (*pos >= idx->weight) {
@@ -612,7 +613,7 @@ static void clusterip_seq_stop(struct seq_file *s, void *v)
static int clusterip_seq_show(struct seq_file *s, void *v)
{
- struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
+ struct clusterip_seq_position *idx = v;
if (idx->pos != 0)
seq_putc(s, ',');
@@ -668,7 +669,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
{
#define PROC_WRITELEN 10
char buffer[PROC_WRITELEN+1];
- struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
struct clusterip_config *c = pde->data;
unsigned long nodenum;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 21395bc2b27..d60139c134c 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -100,7 +100,7 @@ ecn_tg_check(const char *tablename, const void *e_void,
const struct xt_target *target, void *targinfo,
unsigned int hook_mask)
{
- const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
+ const struct ipt_ECN_info *einfo = targinfo;
const struct ipt_entry *e = e_void;
if (einfo->operation & IPT_ECN_OP_MASK) {
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b38d7850f50..0af14137137 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -76,7 +76,8 @@ static void dump_packet(const struct nf_loginfo *info,
if ((logflags & IPT_LOG_IPOPT)
&& ih->ihl * 4 > sizeof(struct iphdr)) {
- unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op;
+ const unsigned char *op;
+ unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
unsigned int i, optsize;
optsize = ih->ihl * 4 - sizeof(struct iphdr);
@@ -338,12 +339,16 @@ static void dump_packet(const struct nf_loginfo *info,
if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
read_lock_bh(&skb->sk->sk_callback_lock);
if (skb->sk->sk_socket && skb->sk->sk_socket->file)
- printk("UID=%u GID=%u",
+ printk("UID=%u GID=%u ",
skb->sk->sk_socket->file->f_uid,
skb->sk->sk_socket->file->f_gid);
read_unlock_bh(&skb->sk->sk_callback_lock);
}
+ /* Max length: 16 "MARK=0xFFFFFFFF " */
+ if (!iphoff && skb->mark)
+ printk("MARK=0x%x ", skb->mark);
+
/* Proto Max log string length */
/* IP: 40+46+6+11+127 = 230 */
/* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d80fee8327e..84c26dd27d8 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -77,7 +77,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in,
return NF_ACCEPT;
mr = targinfo;
- rt = (struct rtable *)skb->dst;
+ rt = skb->rtable;
newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
if (!newsrc) {
printk("MASQUERADE: %s ate my IP address\n", out->name);
@@ -120,7 +120,7 @@ static int masq_device_event(struct notifier_block *this,
{
const struct net_device *dev = ptr;
- if (dev->nd_net != &init_net)
+ if (dev_net(dev) != &init_net)
return NOTIFY_DONE;
if (event == NETDEV_DOWN) {
@@ -139,18 +139,8 @@ static int masq_inet_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- const struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
-
- if (event == NETDEV_DOWN) {
- /* IP address was deleted. Search entire table for
- conntracks which were associated with that device,
- and forget them. */
- NF_CT_ASSERT(dev->ifindex != 0);
-
- nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
- }
-
- return NOTIFY_DONE;
+ struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+ return masq_device_event(this, event, dev);
}
static struct notifier_block masq_dev_notifier = {
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 22606e2baa1..2639872849d 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -35,8 +35,10 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
static void send_reset(struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
- struct iphdr *oiph, *niph;
- struct tcphdr _otcph, *oth, *tcph;
+ const struct iphdr *oiph;
+ struct iphdr *niph;
+ const struct tcphdr *oth;
+ struct tcphdr _otcph, *tcph;
unsigned int addr_type;
/* IP header checks: fragment. */
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 68cbe3ca01c..21cb053f5d7 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -252,6 +252,8 @@ recent_mt_check(const char *tablename, const void *ip,
if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) &&
(info->seconds || info->hit_count))
return false;
+ if (info->hit_count > ip_pkt_list_tot)
+ return false;
if (info->name[0] == '\0' ||
strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN)
return false;
@@ -274,12 +276,11 @@ recent_mt_check(const char *tablename, const void *ip,
for (i = 0; i < ip_list_hash_size; i++)
INIT_LIST_HEAD(&t->iphash[i]);
#ifdef CONFIG_PROC_FS
- t->proc = create_proc_entry(t->name, ip_list_perms, proc_dir);
+ t->proc = proc_create(t->name, ip_list_perms, proc_dir, &recent_fops);
if (t->proc == NULL) {
kfree(t);
goto out;
}
- t->proc->proc_fops = &recent_fops;
t->proc->uid = ip_list_uid;
t->proc->gid = ip_list_gid;
t->proc->data = t;
@@ -339,7 +340,7 @@ static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct recent_iter_state *st = seq->private;
- struct recent_table *t = st->table;
+ const struct recent_table *t = st->table;
struct recent_entry *e = v;
struct list_head *head = e->list.next;
@@ -360,7 +361,7 @@ static void recent_seq_stop(struct seq_file *s, void *v)
static int recent_seq_show(struct seq_file *seq, void *v)
{
- struct recent_entry *e = v;
+ const struct recent_entry *e = v;
unsigned int i;
i = (e->index - 1) % ip_pkt_list_tot;
@@ -395,7 +396,7 @@ static int recent_seq_open(struct inode *inode, struct file *file)
static ssize_t recent_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *loff)
{
- struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
struct recent_table *t = pde->data;
struct recent_entry *e;
char buf[sizeof("+255.255.255.255")], *c = buf;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 69f3d7e6e96..1ea677dcf84 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -56,20 +56,32 @@ static struct
static struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
+ .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
/* The work comes in here from netfilter.c. */
static unsigned int
+ipt_local_in_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ nf_local_in_net(in, out)->ipv4.iptable_filter);
+}
+
+static unsigned int
ipt_hook(unsigned int hook,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
+ return ipt_do_table(skb, hook, in, out,
+ nf_forward_net(in, out)->ipv4.iptable_filter);
}
static unsigned int
@@ -88,12 +100,13 @@ ipt_local_out_hook(unsigned int hook,
return NF_ACCEPT;
}
- return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
+ return ipt_do_table(skb, hook, in, out,
+ nf_local_out_net(in, out)->ipv4.iptable_filter);
}
static struct nf_hook_ops ipt_ops[] __read_mostly = {
{
- .hook = ipt_hook,
+ .hook = ipt_local_in_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_IN,
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index c55a210853a..da59182f222 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -67,20 +67,54 @@ static struct
static struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
+ .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
/* The work comes in here from netfilter.c. */
static unsigned int
-ipt_route_hook(unsigned int hook,
+ipt_pre_routing_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ nf_pre_routing_net(in, out)->ipv4.iptable_mangle);
+}
+
+static unsigned int
+ipt_post_routing_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ nf_post_routing_net(in, out)->ipv4.iptable_mangle);
+}
+
+static unsigned int
+ipt_local_in_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ nf_local_in_net(in, out)->ipv4.iptable_mangle);
+}
+
+static unsigned int
+ipt_forward_hook(unsigned int hook,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle);
+ return ipt_do_table(skb, hook, in, out,
+ nf_forward_net(in, out)->ipv4.iptable_mangle);
}
static unsigned int
@@ -112,7 +146,8 @@ ipt_local_hook(unsigned int hook,
daddr = iph->daddr;
tos = iph->tos;
- ret = ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle);
+ ret = ipt_do_table(skb, hook, in, out,
+ nf_local_out_net(in, out)->ipv4.iptable_mangle);
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
iph = ip_hdr(skb);
@@ -130,21 +165,21 @@ ipt_local_hook(unsigned int hook,
static struct nf_hook_ops ipt_ops[] __read_mostly = {
{
- .hook = ipt_route_hook,
+ .hook = ipt_pre_routing_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_MANGLE,
},
{
- .hook = ipt_route_hook,
+ .hook = ipt_local_in_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_MANGLE,
},
{
- .hook = ipt_route_hook,
+ .hook = ipt_forward_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_FORWARD,
@@ -158,7 +193,7 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
.priority = NF_IP_PRI_MANGLE,
},
{
- .hook = ipt_route_hook,
+ .hook = ipt_post_routing_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_POST_ROUTING,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index e41fe8ca4e1..fddce7754b7 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -39,7 +39,7 @@ static struct
static struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
+ .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
@@ -52,7 +52,8 @@ ipt_hook(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw);
+ return ipt_do_table(skb, hook, in, out,
+ nf_pre_routing_net(in, out)->ipv4.iptable_raw);
}
static unsigned int
@@ -70,7 +71,8 @@ ipt_local_hook(unsigned int hook,
"packet.\n");
return NF_ACCEPT;
}
- return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw);
+ return ipt_do_table(skb, hook, in, out,
+ nf_local_out_net(in, out)->ipv4.iptable_raw);
}
/* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index a65b845c5f1..cacb9cb27da 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -23,30 +23,36 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/nf_nat_helper.h>
-static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
- struct nf_conntrack_tuple *tuple)
+int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
+
+static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+ struct nf_conntrack_tuple *tuple)
{
const __be32 *ap;
__be32 _addrs[2];
ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
sizeof(u_int32_t) * 2, _addrs);
if (ap == NULL)
- return 0;
+ return false;
tuple->src.u3.ip = ap[0];
tuple->dst.u3.ip = ap[1];
- return 1;
+ return true;
}
-static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
+static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
{
tuple->src.u3.ip = orig->dst.u3.ip;
tuple->dst.u3.ip = orig->src.u3.ip;
- return 1;
+ return true;
}
static int ipv4_print_tuple(struct seq_file *s,
@@ -101,35 +107,41 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- /* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(skb);
-}
-
-static unsigned int ipv4_conntrack_help(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
+ unsigned int ret;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
- return NF_ACCEPT;
+ goto out;
help = nfct_help(ct);
if (!help)
- return NF_ACCEPT;
+ goto out;
+
/* rcu_read_lock()ed by nf_hook_slow */
helper = rcu_dereference(help->helper);
if (!helper)
- return NF_ACCEPT;
- return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
- ct, ctinfo);
+ goto out;
+
+ ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+ ct, ctinfo);
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
+ typeof(nf_nat_seq_adjust_hook) seq_adjust;
+
+ seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
+ if (!seq_adjust || !seq_adjust(skb, ct, ctinfo))
+ return NF_DROP;
+ }
+out:
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(skb);
}
static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
@@ -211,20 +223,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
.priority = NF_IP_PRI_CONNTRACK,
},
{
- .hook = ipv4_conntrack_help,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
- },
- {
- .hook = ipv4_conntrack_help,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
- },
- {
.hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = PF_INET,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 089252e82c0..40a46d48249 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -106,21 +106,16 @@ static int ct_seq_show(struct seq_file *s, void *v)
/* we only want to print DIR_ORIGINAL */
if (NF_CT_DIRECTION(hash))
return 0;
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num != AF_INET)
+ if (nf_ct_l3num(ct) != AF_INET)
return 0;
- l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.src.l3num);
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
NF_CT_ASSERT(l3proto);
- l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.src.l3num,
- ct->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.protonum);
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
NF_CT_ASSERT(l4proto);
if (seq_printf(s, "%-8s %u %ld ",
- l4proto->name,
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
+ l4proto->name, nf_ct_protonum(ct),
timer_pending(&ct->timeout)
? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
return -ENOSPC;
@@ -379,7 +374,7 @@ static const struct file_operations ct_cpu_seq_fops = {
.open = ct_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release,
};
int __init nf_conntrack_ipv4_compat_init(void)
@@ -395,13 +390,10 @@ int __init nf_conntrack_ipv4_compat_init(void)
if (!proc_exp)
goto err2;
- proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat);
+ proc_stat = proc_create("ip_conntrack", S_IRUGO,
+ init_net.proc_net_stat, &ct_cpu_seq_fops);
if (!proc_stat)
goto err3;
-
- proc_stat->proc_fops = &ct_cpu_seq_fops;
- proc_stat->owner = THIS_MODULE;
-
return 0;
err3:
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 6873fddb352..78ab19accac 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -22,22 +22,21 @@
static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
-static int icmp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
{
const struct icmphdr *hp;
struct icmphdr _hdr;
hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
- return 0;
+ return false;
tuple->dst.u.icmp.type = hp->type;
tuple->src.u.icmp.id = hp->un.echo.id;
tuple->dst.u.icmp.code = hp->code;
- return 1;
+ return true;
}
/* Add 1; spaces filled with 0. */
@@ -52,17 +51,17 @@ static const u_int8_t invmap[] = {
[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
};
-static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
+static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
{
if (orig->dst.u.icmp.type >= sizeof(invmap)
|| !invmap[orig->dst.u.icmp.type])
- return 0;
+ return false;
tuple->src.u.icmp.id = orig->src.u.icmp.id;
tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
- return 1;
+ return true;
}
/* Print out the per-protocol part of the tuple. */
@@ -101,8 +100,8 @@ static int icmp_packet(struct nf_conn *ct,
}
/* Called when a new connection for this protocol found. */
-static int icmp_new(struct nf_conn *ct,
- const struct sk_buff *skb, unsigned int dataoff)
+static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff)
{
static const u_int8_t valid_new[] = {
[ICMP_ECHO] = 1,
@@ -116,11 +115,11 @@ static int icmp_new(struct nf_conn *ct,
/* Can't create a new ICMP `conn' with this. */
pr_debug("icmp: can't create new conn with type %u\n",
ct->tuplehash[0].tuple.dst.u.icmp.type);
- NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple);
- return 0;
+ nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
+ return false;
}
atomic_set(&ct->proto.icmp.count, 0);
- return 1;
+ return true;
}
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 0d5fa3a54d0..04578593e10 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -150,9 +150,9 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range)
{
unsigned int h = hash_by_src(tuple);
- struct nf_conn_nat *nat;
- struct nf_conn *ct;
- struct hlist_node *n;
+ const struct nf_conn_nat *nat;
+ const struct nf_conn *ct;
+ const struct hlist_node *n;
rcu_read_lock();
hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) {
@@ -349,7 +349,7 @@ nf_nat_setup_info(struct nf_conn *ct,
EXPORT_SYMBOL(nf_nat_setup_info);
/* Returns true if succeeded. */
-static int
+static bool
manip_pkt(u_int16_t proto,
struct sk_buff *skb,
unsigned int iphdroff,
@@ -360,7 +360,7 @@ manip_pkt(u_int16_t proto,
const struct nf_nat_protocol *p;
if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
- return 0;
+ return false;
iph = (void *)skb->data + iphdroff;
@@ -369,7 +369,7 @@ manip_pkt(u_int16_t proto,
/* rcu_read_lock()ed by nf_hook_slow */
p = __nf_nat_proto_find(proto);
if (!p->manip_pkt(skb, iphdroff, target, maniptype))
- return 0;
+ return false;
iph = (void *)skb->data + iphdroff;
@@ -380,7 +380,7 @@ manip_pkt(u_int16_t proto,
csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
iph->daddr = target->dst.u3.ip;
}
- return 1;
+ return true;
}
/* Do packet manipulations according to nf_nat_setup_info. */
@@ -426,7 +426,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
struct icmphdr icmp;
struct iphdr ip;
} *inside;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple inner, target;
int hdrlen = ip_hdrlen(skb);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -544,46 +544,6 @@ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
}
EXPORT_SYMBOL(nf_nat_protocol_unregister);
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-int
-nf_nat_port_range_to_nlattr(struct sk_buff *skb,
- const struct nf_nat_range *range)
-{
- NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.tcp.port);
- NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.tcp.port);
-
- return 0;
-
-nla_put_failure:
- return -1;
-}
-EXPORT_SYMBOL_GPL(nf_nat_port_nlattr_to_range);
-
-int
-nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range)
-{
- int ret = 0;
-
- /* we have to return whether we actually parsed something or not */
-
- if (tb[CTA_PROTONAT_PORT_MIN]) {
- ret = 1;
- range->min.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
- }
-
- if (!tb[CTA_PROTONAT_PORT_MAX]) {
- if (ret)
- range->max.tcp.port = range->min.tcp.port;
- } else {
- ret = 1;
- range->max.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nlattr);
-#endif
-
/* Noone using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
@@ -629,6 +589,8 @@ static int __init nf_nat_init(void)
size_t i;
int ret;
+ need_ipv4_conntrack();
+
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
@@ -658,6 +620,9 @@ static int __init nf_nat_init(void)
nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
+
+ BUG_ON(nf_nat_seq_adjust_hook != NULL);
+ rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
return 0;
cleanup_extend:
@@ -684,6 +649,8 @@ static void __exit nf_nat_cleanup(void)
nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size);
nf_ct_l3proto_put(l3proto);
nf_ct_extend_unregister(&nat_extend);
+ rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
+ synchronize_net();
}
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index ca57f47bbd2..11976ea2988 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -139,7 +139,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
const char *rep_buffer,
unsigned int rep_len)
{
- struct rtable *rt = (struct rtable *)skb->dst;
+ struct rtable *rt = skb->rtable;
struct iphdr *iph;
struct tcphdr *tcph;
int oldlen, datalen;
@@ -217,7 +217,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
const char *rep_buffer,
unsigned int rep_len)
{
- struct rtable *rt = (struct rtable *)skb->dst;
+ struct rtable *rt = skb->rtable;
struct iphdr *iph;
struct udphdr *udph;
int datalen, oldlen;
@@ -416,7 +416,6 @@ nf_nat_seq_adjust(struct sk_buff *skb,
return 1;
}
-EXPORT_SYMBOL(nf_nat_seq_adjust);
/* Setup NAT on this expected conntrack so it follows master. */
/* If we fail to get a free NAT slot, we'll get dropped on confirm */
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 3a1e6d6afc0..da3d91a5ef5 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -72,7 +72,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
}
pr_debug("trying to unexpect other dir: ");
- NF_CT_DUMP_TUPLE(&t);
+ nf_ct_dump_tuple_ip(&t);
other_exp = nf_ct_expect_find_get(&t);
if (other_exp) {
nf_ct_unexpect_related(other_exp);
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
new file mode 100644
index 00000000000..91537f11273
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -0,0 +1,120 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ __be16 port;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ port = tuple->src.u.all;
+ else
+ port = tuple->dst.u.all;
+
+ return ntohs(port) >= ntohs(min->all) &&
+ ntohs(port) <= ntohs(max->all);
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
+
+bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct,
+ u_int16_t *rover)
+{
+ unsigned int range_size, min, i;
+ __be16 *portptr;
+ u_int16_t off;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ portptr = &tuple->src.u.all;
+ else
+ portptr = &tuple->dst.u.all;
+
+ /* If no range specified... */
+ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ /* If it's dst rewrite, can't change port */
+ if (maniptype == IP_NAT_MANIP_DST)
+ return false;
+
+ if (ntohs(*portptr) < 1024) {
+ /* Loose convention: >> 512 is credential passing */
+ if (ntohs(*portptr) < 512) {
+ min = 1;
+ range_size = 511 - min + 1;
+ } else {
+ min = 600;
+ range_size = 1023 - min + 1;
+ }
+ } else {
+ min = 1024;
+ range_size = 65535 - 1024 + 1;
+ }
+ } else {
+ min = ntohs(range->min.all);
+ range_size = ntohs(range->max.all) - min + 1;
+ }
+
+ off = *rover;
+ if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+ off = net_random();
+
+ for (i = 0; i < range_size; i++, off++) {
+ *portptr = htons(min + off % range_size);
+ if (nf_nat_used_tuple(tuple, ct))
+ continue;
+ if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
+ *rover = off;
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
+ const struct nf_nat_range *range)
+{
+ NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
+ NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
+
+int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range *range)
+{
+ if (tb[CTA_PROTONAT_PORT_MIN]) {
+ range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
+ range->max.all = range->min.tcp.port;
+ range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ }
+ if (tb[CTA_PROTONAT_PORT_MAX]) {
+ range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
+ range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr);
+#endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
new file mode 100644
index 00000000000..22485ce306d
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -0,0 +1,108 @@
+/*
+ * DCCP NAT protocol helper
+ *
+ * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/dccp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+static u_int16_t dccp_port_rover;
+
+static bool
+dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+ &dccp_port_rover);
+}
+
+static bool
+dccp_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct iphdr *iph = (const void *)(skb->data + iphdroff);
+ struct dccp_hdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl * 4;
+ __be32 oldip, newip;
+ __be16 *portptr, oldport, newport;
+ int hdrsize = 8; /* DCCP connection tracking guarantees this much */
+
+ if (skb->len >= hdroff + sizeof(struct dccp_hdr))
+ hdrsize = sizeof(struct dccp_hdr);
+
+ if (!skb_make_writable(skb, hdroff + hdrsize))
+ return false;
+
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct dccp_hdr *)(skb->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ oldip = iph->saddr;
+ newip = tuple->src.u3.ip;
+ newport = tuple->src.u.dccp.port;
+ portptr = &hdr->dccph_sport;
+ } else {
+ oldip = iph->daddr;
+ newip = tuple->dst.u3.ip;
+ newport = tuple->dst.u.dccp.port;
+ portptr = &hdr->dccph_dport;
+ }
+
+ oldport = *portptr;
+ *portptr = newport;
+
+ if (hdrsize < sizeof(*hdr))
+ return true;
+
+ inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1);
+ inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
+ 0);
+ return true;
+}
+
+static const struct nf_nat_protocol nf_nat_protocol_dccp = {
+ .protonum = IPPROTO_DCCP,
+ .me = THIS_MODULE,
+ .manip_pkt = dccp_manip_pkt,
+ .in_range = nf_nat_proto_in_range,
+ .unique_tuple = dccp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_dccp_init(void)
+{
+ return nf_nat_protocol_register(&nf_nat_protocol_dccp);
+}
+
+static void __exit nf_nat_proto_dccp_fini(void)
+{
+ nf_nat_protocol_unregister(&nf_nat_protocol_dccp);
+}
+
+module_init(nf_nat_proto_dccp_init);
+module_exit(nf_nat_proto_dccp_fini);
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("DCCP NAT protocol helper");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index a1e4da16da2..d7e89201351 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -36,26 +36,8 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-/* is key in given range between min and max */
-static int
-gre_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- __be16 key;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- key = tuple->src.u.gre.key;
- else
- key = tuple->dst.u.gre.key;
-
- return ntohs(key) >= ntohs(min->gre.key) &&
- ntohs(key) <= ntohs(max->gre.key);
-}
-
/* generate unique tuple ... */
-static int
+static bool
gre_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
@@ -68,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
/* If there is no master conntrack we are not PPTP,
do not change tuples */
if (!ct->master)
- return 0;
+ return false;
if (maniptype == IP_NAT_MANIP_SRC)
keyptr = &tuple->src.u.gre.key;
@@ -89,20 +71,20 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
for (i = 0; i < range_size; i++, key++) {
*keyptr = htons(min + key % range_size);
if (!nf_nat_used_tuple(tuple, ct))
- return 1;
+ return true;
}
pr_debug("%p: no NAT mapping\n", ct);
- return 0;
+ return false;
}
/* manipulate a GRE packet according to maniptype */
-static int
+static bool
gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct gre_hdr *greh;
+ const struct gre_hdr *greh;
struct gre_hdr_pptp *pgreh;
const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
unsigned int hdroff = iphdroff + iph->ihl * 4;
@@ -110,7 +92,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
- return 0;
+ return false;
greh = (void *)skb->data + hdroff;
pgreh = (struct gre_hdr_pptp *)greh;
@@ -118,7 +100,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
/* we only have destination manip of a packet, since 'source key'
* is not present in the packet itself */
if (maniptype != IP_NAT_MANIP_DST)
- return 1;
+ return true;
switch (greh->version) {
case GRE_VERSION_1701:
/* We do not currently NAT any GREv0 packets.
@@ -130,21 +112,20 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
break;
default:
pr_debug("can't nat unknown GRE version\n");
- return 0;
+ return false;
}
- return 1;
+ return true;
}
static const struct nf_nat_protocol gre = {
- .name = "GRE",
.protonum = IPPROTO_GRE,
.me = THIS_MODULE,
.manip_pkt = gre_manip_pkt,
- .in_range = gre_in_range,
+ .in_range = nf_nat_proto_in_range,
.unique_tuple = gre_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_port_range_to_nlattr,
- .nlattr_to_range = nf_nat_port_nlattr_to_range,
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 03a02969aa5..19a8b0b07d8 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -17,7 +17,7 @@
#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_nat_protocol.h>
-static int
+static bool
icmp_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
const union nf_conntrack_man_proto *min,
@@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
}
-static int
+static bool
icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
@@ -46,12 +46,12 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
(id % range_size));
if (!nf_nat_used_tuple(tuple, ct))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
-static int
+static bool
icmp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
@@ -62,24 +62,23 @@ icmp_manip_pkt(struct sk_buff *skb,
unsigned int hdroff = iphdroff + iph->ihl*4;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return 0;
+ return false;
hdr = (struct icmphdr *)(skb->data + hdroff);
inet_proto_csum_replace2(&hdr->checksum, skb,
hdr->un.echo.id, tuple->src.u.icmp.id, 0);
hdr->un.echo.id = tuple->src.u.icmp.id;
- return 1;
+ return true;
}
const struct nf_nat_protocol nf_nat_protocol_icmp = {
- .name = "ICMP",
.protonum = IPPROTO_ICMP,
.me = THIS_MODULE,
.manip_pkt = icmp_manip_pkt,
.in_range = icmp_in_range,
.unique_tuple = icmp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_port_range_to_nlattr,
- .nlattr_to_range = nf_nat_port_nlattr_to_range,
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
new file mode 100644
index 00000000000..82e4c0e286b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <net/sctp/checksum.h>
+
+#include <net/netfilter/nf_nat_protocol.h>
+
+static u_int16_t nf_sctp_port_rover;
+
+static bool
+sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+ &nf_sctp_port_rover);
+}
+
+static bool
+sctp_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ sctp_sctphdr_t *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+ __be32 oldip, newip;
+ u32 crc32;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct sctphdr *)(skb->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ /* Get rid of src ip and src pt */
+ oldip = iph->saddr;
+ newip = tuple->src.u3.ip;
+ hdr->source = tuple->src.u.sctp.port;
+ } else {
+ /* Get rid of dst ip and dst pt */
+ oldip = iph->daddr;
+ newip = tuple->dst.u3.ip;
+ hdr->dest = tuple->dst.u.sctp.port;
+ }
+
+ crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
+ for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
+ crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb),
+ crc32);
+ crc32 = sctp_end_cksum(crc32);
+ hdr->checksum = htonl(crc32);
+
+ return true;
+}
+
+static const struct nf_nat_protocol nf_nat_protocol_sctp = {
+ .protonum = IPPROTO_SCTP,
+ .me = THIS_MODULE,
+ .manip_pkt = sctp_manip_pkt,
+ .in_range = nf_nat_proto_in_range,
+ .unique_tuple = sctp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_sctp_init(void)
+{
+ return nf_nat_protocol_register(&nf_nat_protocol_sctp);
+}
+
+static void __exit nf_nat_proto_sctp_exit(void)
+{
+ nf_nat_protocol_unregister(&nf_nat_protocol_sctp);
+}
+
+module_init(nf_nat_proto_sctp_init);
+module_exit(nf_nat_proto_sctp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SCTP NAT protocol helper");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index ffd5d1589ec..399e2cfa263 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -8,7 +8,6 @@
#include <linux/types.h>
#include <linux/init.h>
-#include <linux/random.h>
#include <linux/ip.h>
#include <linux/tcp.h>
@@ -19,75 +18,19 @@
#include <net/netfilter/nf_nat_protocol.h>
#include <net/netfilter/nf_nat_core.h>
-static int
-tcp_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- __be16 port;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- port = tuple->src.u.tcp.port;
- else
- port = tuple->dst.u.tcp.port;
-
- return ntohs(port) >= ntohs(min->tcp.port) &&
- ntohs(port) <= ntohs(max->tcp.port);
-}
+static u_int16_t tcp_port_rover;
-static int
+static bool
tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- static u_int16_t port;
- __be16 *portptr;
- unsigned int range_size, min, i;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- portptr = &tuple->src.u.tcp.port;
- else
- portptr = &tuple->dst.u.tcp.port;
-
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- /* If it's dst rewrite, can't change port */
- if (maniptype == IP_NAT_MANIP_DST)
- return 0;
-
- /* Map privileged onto privileged. */
- if (ntohs(*portptr) < 1024) {
- /* Loose convention: >> 512 is credential passing */
- if (ntohs(*portptr)<512) {
- min = 1;
- range_size = 511 - min + 1;
- } else {
- min = 600;
- range_size = 1023 - min + 1;
- }
- } else {
- min = 1024;
- range_size = 65535 - 1024 + 1;
- }
- } else {
- min = ntohs(range->min.tcp.port);
- range_size = ntohs(range->max.tcp.port) - min + 1;
- }
-
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
- port = net_random();
-
- for (i = 0; i < range_size; i++, port++) {
- *portptr = htons(min + port % range_size);
- if (!nf_nat_used_tuple(tuple, ct))
- return 1;
- }
- return 0;
+ return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+ &tcp_port_rover);
}
-static int
+static bool
tcp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
@@ -107,7 +50,7 @@ tcp_manip_pkt(struct sk_buff *skb,
hdrsize = sizeof(struct tcphdr);
if (!skb_make_writable(skb, hdroff + hdrsize))
- return 0;
+ return false;
iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct tcphdr *)(skb->data + hdroff);
@@ -130,22 +73,21 @@ tcp_manip_pkt(struct sk_buff *skb,
*portptr = newport;
if (hdrsize < sizeof(*hdr))
- return 1;
+ return true;
inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
- return 1;
+ return true;
}
const struct nf_nat_protocol nf_nat_protocol_tcp = {
- .name = "TCP",
.protonum = IPPROTO_TCP,
.me = THIS_MODULE,
.manip_pkt = tcp_manip_pkt,
- .in_range = tcp_in_range,
+ .in_range = nf_nat_proto_in_range,
.unique_tuple = tcp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_port_range_to_nlattr,
- .nlattr_to_range = nf_nat_port_nlattr_to_range,
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 4b8f49910ff..9e61c79492e 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -8,7 +8,6 @@
#include <linux/types.h>
#include <linux/init.h>
-#include <linux/random.h>
#include <linux/ip.h>
#include <linux/udp.h>
@@ -18,74 +17,19 @@
#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_nat_protocol.h>
-static int
-udp_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- __be16 port;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- port = tuple->src.u.udp.port;
- else
- port = tuple->dst.u.udp.port;
-
- return ntohs(port) >= ntohs(min->udp.port) &&
- ntohs(port) <= ntohs(max->udp.port);
-}
+static u_int16_t udp_port_rover;
-static int
+static bool
udp_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- static u_int16_t port;
- __be16 *portptr;
- unsigned int range_size, min, i;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- portptr = &tuple->src.u.udp.port;
- else
- portptr = &tuple->dst.u.udp.port;
-
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- /* If it's dst rewrite, can't change port */
- if (maniptype == IP_NAT_MANIP_DST)
- return 0;
-
- if (ntohs(*portptr) < 1024) {
- /* Loose convention: >> 512 is credential passing */
- if (ntohs(*portptr)<512) {
- min = 1;
- range_size = 511 - min + 1;
- } else {
- min = 600;
- range_size = 1023 - min + 1;
- }
- } else {
- min = 1024;
- range_size = 65535 - 1024 + 1;
- }
- } else {
- min = ntohs(range->min.udp.port);
- range_size = ntohs(range->max.udp.port) - min + 1;
- }
-
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
- port = net_random();
-
- for (i = 0; i < range_size; i++, port++) {
- *portptr = htons(min + port % range_size);
- if (!nf_nat_used_tuple(tuple, ct))
- return 1;
- }
- return 0;
+ return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+ &udp_port_rover);
}
-static int
+static bool
udp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
@@ -98,7 +42,7 @@ udp_manip_pkt(struct sk_buff *skb,
__be16 *portptr, newport;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return 0;
+ return false;
iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct udphdr *)(skb->data + hdroff);
@@ -124,18 +68,17 @@ udp_manip_pkt(struct sk_buff *skb,
hdr->check = CSUM_MANGLED_0;
}
*portptr = newport;
- return 1;
+ return true;
}
const struct nf_nat_protocol nf_nat_protocol_udp = {
- .name = "UDP",
.protonum = IPPROTO_UDP,
.me = THIS_MODULE,
.manip_pkt = udp_manip_pkt,
- .in_range = udp_in_range,
+ .in_range = nf_nat_proto_in_range,
.unique_tuple = udp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_port_range_to_nlattr,
- .nlattr_to_range = nf_nat_port_nlattr_to_range,
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
new file mode 100644
index 00000000000..440a229bbd8
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -0,0 +1,99 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+
+static u_int16_t udplite_port_rover;
+
+static bool
+udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+ &udplite_port_rover);
+}
+
+static bool
+udplite_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ struct udphdr *hdr;
+ unsigned int hdroff = iphdroff + iph->ihl*4;
+ __be32 oldip, newip;
+ __be16 *portptr, newport;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct udphdr *)(skb->data + hdroff);
+
+ if (maniptype == IP_NAT_MANIP_SRC) {
+ /* Get rid of src ip and src pt */
+ oldip = iph->saddr;
+ newip = tuple->src.u3.ip;
+ newport = tuple->src.u.udp.port;
+ portptr = &hdr->source;
+ } else {
+ /* Get rid of dst ip and dst pt */
+ oldip = iph->daddr;
+ newip = tuple->dst.u3.ip;
+ newport = tuple->dst.u.udp.port;
+ portptr = &hdr->dest;
+ }
+
+ inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
+ if (!hdr->check)
+ hdr->check = CSUM_MANGLED_0;
+
+ *portptr = newport;
+ return true;
+}
+
+static const struct nf_nat_protocol nf_nat_protocol_udplite = {
+ .protonum = IPPROTO_UDPLITE,
+ .me = THIS_MODULE,
+ .manip_pkt = udplite_manip_pkt,
+ .in_range = nf_nat_proto_in_range,
+ .unique_tuple = udplite_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .range_to_nlattr = nf_nat_proto_range_to_nlattr,
+ .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_udplite_init(void)
+{
+ return nf_nat_protocol_register(&nf_nat_protocol_udplite);
+}
+
+static void __exit nf_nat_proto_udplite_fini(void)
+{
+ nf_nat_protocol_unregister(&nf_nat_protocol_udplite);
+}
+
+module_init(nf_nat_proto_udplite_init);
+module_exit(nf_nat_proto_udplite_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index a26efeb073c..14381c62ace 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -18,35 +18,34 @@
#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_nat_protocol.h>
-static int unknown_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type manip_type,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
+static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type manip_type,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
{
- return 1;
+ return true;
}
-static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
+static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
{
/* Sorry: we can't help you; if it's not unique, we can't frob
anything. */
- return 0;
+ return false;
}
-static int
+static bool
unknown_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- return 1;
+ return true;
}
const struct nf_nat_protocol nf_nat_unknown_protocol = {
- .name = "unknown",
/* .me isn't set: getting a ref to this cannot fail. */
.manip_pkt = unknown_manip_pkt,
.in_range = unknown_in_range,
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index f8fda57ba20..e8b4d0d4439 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -61,7 +61,7 @@ static struct
static struct xt_table __nat_table = {
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
+ .lock = __RW_LOCK_UNLOCKED(__nat_table.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
@@ -143,7 +143,7 @@ static bool ipt_snat_checkentry(const char *tablename,
void *targinfo,
unsigned int hook_mask)
{
- struct nf_nat_multi_range_compat *mr = targinfo;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
/* Must be a valid range */
if (mr->rangesize != 1) {
@@ -159,7 +159,7 @@ static bool ipt_dnat_checkentry(const char *tablename,
void *targinfo,
unsigned int hook_mask)
{
- struct nf_nat_multi_range_compat *mr = targinfo;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
/* Must be a valid range */
if (mr->rangesize != 1) {
@@ -188,25 +188,6 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
}
-unsigned int
-alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum)
-{
- __be32 ip
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip
- : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
- __be16 all
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
- : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
- struct nf_nat_range range
- = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
-
- pr_debug("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
- ct, NIPQUAD(ip));
- return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
-}
-
int nf_nat_rule_find(struct sk_buff *skb,
unsigned int hooknum,
const struct net_device *in,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b4c8d4968bb..4334d5cabc5 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -2,6 +2,8 @@
*
* (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
* based on RR's ip_nat_ftp.c and other modules.
+ * (C) 2007 United Security Providers
+ * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -26,275 +28,461 @@ MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
MODULE_DESCRIPTION("SIP NAT helper");
MODULE_ALIAS("ip_nat_sip");
-struct addr_map {
- struct {
- char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
- char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
- unsigned int srclen, srciplen;
- unsigned int dstlen, dstiplen;
- } addr[IP_CT_DIR_MAX];
-};
-static void addr_map_init(const struct nf_conn *ct, struct addr_map *map)
+static unsigned int mangle_packet(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int matchoff, unsigned int matchlen,
+ const char *buffer, unsigned int buflen)
{
- const struct nf_conntrack_tuple *t;
- enum ip_conntrack_dir dir;
- unsigned int n;
-
- for (dir = 0; dir < IP_CT_DIR_MAX; dir++) {
- t = &ct->tuplehash[dir].tuple;
-
- n = sprintf(map->addr[dir].src, "%u.%u.%u.%u",
- NIPQUAD(t->src.u3.ip));
- map->addr[dir].srciplen = n;
- n += sprintf(map->addr[dir].src + n, ":%u",
- ntohs(t->src.u.udp.port));
- map->addr[dir].srclen = n;
-
- n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u",
- NIPQUAD(t->dst.u3.ip));
- map->addr[dir].dstiplen = n;
- n += sprintf(map->addr[dir].dst + n, ":%u",
- ntohs(t->dst.u.udp.port));
- map->addr[dir].dstlen = n;
- }
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen,
+ buffer, buflen))
+ return 0;
+
+ /* Reload data pointer and adjust datalen value */
+ *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
+ *datalen += buflen - matchlen;
+ return 1;
}
-static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo,
- struct nf_conn *ct, const char **dptr, size_t dlen,
- enum sip_header_pos pos, struct addr_map *map)
+static int map_addr(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ unsigned int matchoff, unsigned int matchlen,
+ union nf_inet_addr *addr, __be16 port)
{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned int matchlen, matchoff, addrlen;
- char *addr;
-
- if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0)
+ char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ unsigned int buflen;
+ __be32 newaddr;
+ __be16 newport;
+
+ if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip &&
+ ct->tuplehash[dir].tuple.src.u.udp.port == port) {
+ newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
+ } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip &&
+ ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
+ newaddr = ct->tuplehash[!dir].tuple.src.u3.ip;
+ newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
+ } else
return 1;
- if ((matchlen == map->addr[dir].srciplen ||
- matchlen == map->addr[dir].srclen) &&
- memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) {
- addr = map->addr[!dir].dst;
- addrlen = map->addr[!dir].dstlen;
- } else if ((matchlen == map->addr[dir].dstiplen ||
- matchlen == map->addr[dir].dstlen) &&
- memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) {
- addr = map->addr[!dir].src;
- addrlen = map->addr[!dir].srclen;
- } else
+ if (newaddr == addr->ip && newport == port)
return 1;
- if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
- matchoff, matchlen, addr, addrlen))
- return 0;
- *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
- return 1;
+ buflen = sprintf(buffer, "%u.%u.%u.%u:%u",
+ NIPQUAD(newaddr), ntohs(newport));
+ return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+ buffer, buflen);
}
-static unsigned int ip_nat_sip(struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conn *ct,
- const char **dptr)
+static int map_sip_addr(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ enum sip_header_types type)
{
- enum sip_header_pos pos;
- struct addr_map map;
- int dataoff, datalen;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ unsigned int matchlen, matchoff;
+ union nf_inet_addr addr;
+ __be16 port;
- dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
- datalen = skb->len - dataoff;
- if (datalen < sizeof("SIP/2.0") - 1)
- return NF_ACCEPT;
+ if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
+ &matchoff, &matchlen, &addr, &port) <= 0)
+ return 1;
+ return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port);
+}
- addr_map_init(ct, &map);
+static unsigned int ip_nat_sip(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ unsigned int dataoff, matchoff, matchlen;
+ union nf_inet_addr addr;
+ __be16 port;
+ int request, in_header;
/* Basic rules: requests and responses. */
- if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) {
- /* 10.2: Constructing the REGISTER Request:
- *
- * The "userinfo" and "@" components of the SIP URI MUST NOT
- * be present.
- */
- if (datalen >= sizeof("REGISTER") - 1 &&
- strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0)
- pos = POS_REG_REQ_URI;
- else
- pos = POS_REQ_URI;
-
- if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map))
+ if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) {
+ if (ct_sip_parse_request(ct, *dptr, *datalen,
+ &matchoff, &matchlen,
+ &addr, &port) > 0 &&
+ !map_addr(skb, dptr, datalen, matchoff, matchlen,
+ &addr, port))
+ return NF_DROP;
+ request = 1;
+ } else
+ request = 0;
+
+ /* Translate topmost Via header and parameters */
+ if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+ SIP_HDR_VIA, NULL, &matchoff, &matchlen,
+ &addr, &port) > 0) {
+ unsigned int matchend, poff, plen, buflen, n;
+ char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+
+ /* We're only interested in headers related to this
+ * connection */
+ if (request) {
+ if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip ||
+ port != ct->tuplehash[dir].tuple.src.u.udp.port)
+ goto next;
+ } else {
+ if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip ||
+ port != ct->tuplehash[dir].tuple.dst.u.udp.port)
+ goto next;
+ }
+
+ if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
+ &addr, port))
return NF_DROP;
+
+ matchend = matchoff + matchlen;
+
+ /* The maddr= parameter (RFC 2361) specifies where to send
+ * the reply. */
+ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
+ "maddr=", &poff, &plen,
+ &addr) > 0 &&
+ addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+ addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
+ __be32 ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
+ if (!mangle_packet(skb, dptr, datalen, poff, plen,
+ buffer, buflen))
+ return NF_DROP;
+ }
+
+ /* The received= parameter (RFC 2361) contains the address
+ * from which the server received the request. */
+ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
+ "received=", &poff, &plen,
+ &addr) > 0 &&
+ addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
+ addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
+ __be32 ip = ct->tuplehash[!dir].tuple.src.u3.ip;
+ buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
+ if (!mangle_packet(skb, dptr, datalen, poff, plen,
+ buffer, buflen))
+ return NF_DROP;
+ }
+
+ /* The rport= parameter (RFC 3581) contains the port number
+ * from which the server received the request. */
+ if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen,
+ "rport=", &poff, &plen,
+ &n) > 0 &&
+ htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port &&
+ htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
+ __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
+ buflen = sprintf(buffer, "%u", ntohs(p));
+ if (!mangle_packet(skb, dptr, datalen, poff, plen,
+ buffer, buflen))
+ return NF_DROP;
+ }
}
- if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
- !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
- !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
- !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
+next:
+ /* Translate Contact headers */
+ dataoff = 0;
+ in_header = 0;
+ while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
+ SIP_HDR_CONTACT, &in_header,
+ &matchoff, &matchlen,
+ &addr, &port) > 0) {
+ if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
+ &addr, port))
+ return NF_DROP;
+ }
+
+ if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) ||
+ !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO))
return NF_DROP;
return NF_ACCEPT;
}
-static unsigned int mangle_sip_packet(struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conn *ct,
- const char **dptr, size_t dlen,
- char *buffer, int bufflen,
- enum sip_header_pos pos)
+/* Handles expected signalling connections and media streams */
+static void ip_nat_sip_expected(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
{
- unsigned int matchlen, matchoff;
+ struct nf_nat_range range;
- if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0)
- return 0;
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
- if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
- matchoff, matchlen, buffer, bufflen))
- return 0;
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = exp->saved_proto;
+ range.min_ip = range.max_ip = exp->saved_ip;
+ nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
- /* We need to reload this. Thanks Patrick. */
- *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
- return 1;
+ /* Change src to where master sends to, but only if the connection
+ * actually came from the same source. */
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
+ ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+ }
}
-static int mangle_content_len(struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conn *ct,
- const char *dptr)
+static unsigned int ip_nat_sip_expect(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen,
+ struct nf_conntrack_expect *exp,
+ unsigned int matchoff,
+ unsigned int matchlen)
{
- unsigned int dataoff, matchoff, matchlen;
- char buffer[sizeof("65536")];
- int bufflen;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ __be32 newip;
+ u_int16_t port;
+ char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ unsigned buflen;
- dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
+ /* Connection will come from reply */
+ if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
+ newip = exp->tuple.dst.u3.ip;
+ else
+ newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
- /* Get actual SDP length */
- if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
- &matchlen, POS_SDP_HEADER) > 0) {
+ /* If the signalling port matches the connection's source port in the
+ * original direction, try to use the destination port in the opposite
+ * direction. */
+ if (exp->tuple.dst.u.udp.port ==
+ ct->tuplehash[dir].tuple.src.u.udp.port)
+ port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
+ else
+ port = ntohs(exp->tuple.dst.u.udp.port);
+
+ exp->saved_ip = exp->tuple.dst.u3.ip;
+ exp->tuple.dst.u3.ip = newip;
+ exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
+ exp->dir = !dir;
+ exp->expectfn = ip_nat_sip_expected;
- /* since ct_sip_get_info() give us a pointer passing 'v='
- we need to add 2 bytes in this count. */
- int c_len = skb->len - dataoff - matchoff + 2;
+ for (; port != 0; port++) {
+ exp->tuple.dst.u.udp.port = htons(port);
+ if (nf_ct_expect_related(exp) == 0)
+ break;
+ }
- /* Now, update SDP length */
- if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
- &matchlen, POS_CONTENT) > 0) {
+ if (port == 0)
+ return NF_DROP;
- bufflen = sprintf(buffer, "%u", c_len);
- return nf_nat_mangle_udp_packet(skb, ct, ctinfo,
- matchoff, matchlen,
- buffer, bufflen);
- }
+ if (exp->tuple.dst.u3.ip != exp->saved_ip ||
+ exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
+ buflen = sprintf(buffer, "%u.%u.%u.%u:%u",
+ NIPQUAD(newip), port);
+ if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+ buffer, buflen))
+ goto err;
}
- return 0;
+ return NF_ACCEPT;
+
+err:
+ nf_ct_unexpect_related(exp);
+ return NF_DROP;
}
-static unsigned int mangle_sdp(struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conn *ct,
- __be32 newip, u_int16_t port,
- const char *dptr)
+static int mangle_content_len(struct sk_buff *skb,
+ const char **dptr, unsigned int *datalen)
{
- char buffer[sizeof("nnn.nnn.nnn.nnn")];
- unsigned int dataoff, bufflen;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ unsigned int matchoff, matchlen;
+ char buffer[sizeof("65536")];
+ int buflen, c_len;
- dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
+ /* Get actual SDP length */
+ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
+ SDP_HDR_VERSION, SDP_HDR_UNSPEC,
+ &matchoff, &matchlen) <= 0)
+ return 0;
+ c_len = *datalen - matchoff + strlen("v=");
- /* Mangle owner and contact info. */
- bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
- if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
- buffer, bufflen, POS_OWNER_IP4))
+ /* Now, update SDP length */
+ if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH,
+ &matchoff, &matchlen) <= 0)
return 0;
- if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
- buffer, bufflen, POS_CONNECTION_IP4))
+ buflen = sprintf(buffer, "%u", c_len);
+ return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+ buffer, buflen);
+}
+
+static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
+ unsigned int dataoff, unsigned int *datalen,
+ enum sdp_header_types type,
+ enum sdp_header_types term,
+ char *buffer, int buflen)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ unsigned int matchlen, matchoff;
+
+ if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term,
+ &matchoff, &matchlen) <= 0)
return 0;
+ return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+ buffer, buflen);
+}
- /* Mangle media port. */
- bufflen = sprintf(buffer, "%u", port);
- if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
- buffer, bufflen, POS_MEDIA))
+static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
+ unsigned int dataoff,
+ unsigned int *datalen,
+ enum sdp_header_types type,
+ enum sdp_header_types term,
+ const union nf_inet_addr *addr)
+{
+ char buffer[sizeof("nnn.nnn.nnn.nnn")];
+ unsigned int buflen;
+
+ buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip));
+ if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term,
+ buffer, buflen))
return 0;
- return mangle_content_len(skb, ctinfo, ct, dptr);
+ return mangle_content_len(skb, dptr, datalen);
}
-static void ip_nat_sdp_expect(struct nf_conn *ct,
- struct nf_conntrack_expect *exp)
+static unsigned int ip_nat_sdp_port(struct sk_buff *skb,
+ const char **dptr,
+ unsigned int *datalen,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ u_int16_t port)
{
- struct nf_nat_range range;
+ char buffer[sizeof("nnnnn")];
+ unsigned int buflen;
- /* This must be a fresh one. */
- BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+ buflen = sprintf(buffer, "%u", port);
+ if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+ buffer, buflen))
+ return 0;
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+ return mangle_content_len(skb, dptr, datalen);
+}
- /* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = exp->saved_proto;
- range.min_ip = range.max_ip = exp->saved_ip;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
+ unsigned int dataoff,
+ unsigned int *datalen,
+ const union nf_inet_addr *addr)
+{
+ char buffer[sizeof("nnn.nnn.nnn.nnn")];
+ unsigned int buflen;
+
+ /* Mangle session description owner and contact addresses */
+ buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip));
+ if (!mangle_sdp_packet(skb, dptr, dataoff, datalen,
+ SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
+ buffer, buflen))
+ return 0;
+
+ if (!mangle_sdp_packet(skb, dptr, dataoff, datalen,
+ SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
+ buffer, buflen))
+ return 0;
+
+ return mangle_content_len(skb, dptr, datalen);
}
/* So, this packet has hit the connection tracking matching code.
Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp(struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conntrack_expect *exp,
- const char *dptr)
+static unsigned int ip_nat_sdp_media(struct sk_buff *skb,
+ const char **dptr,
+ unsigned int *datalen,
+ struct nf_conntrack_expect *rtp_exp,
+ struct nf_conntrack_expect *rtcp_exp,
+ unsigned int mediaoff,
+ unsigned int medialen,
+ union nf_inet_addr *rtp_addr)
{
- struct nf_conn *ct = exp->master;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- __be32 newip;
u_int16_t port;
/* Connection will come from reply */
if (ct->tuplehash[dir].tuple.src.u3.ip ==
ct->tuplehash[!dir].tuple.dst.u3.ip)
- newip = exp->tuple.dst.u3.ip;
+ rtp_addr->ip = rtp_exp->tuple.dst.u3.ip;
else
- newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
-
- exp->saved_ip = exp->tuple.dst.u3.ip;
- exp->tuple.dst.u3.ip = newip;
- exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
- exp->dir = !dir;
-
- /* When you see the packet, we need to NAT it the same as the
- this one. */
- exp->expectfn = ip_nat_sdp_expect;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
- exp->tuple.dst.u.udp.port = htons(port);
- if (nf_ct_expect_related(exp) == 0)
+ rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+
+ rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip;
+ rtp_exp->tuple.dst.u3.ip = rtp_addr->ip;
+ rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
+ rtp_exp->dir = !dir;
+ rtp_exp->expectfn = ip_nat_sip_expected;
+
+ rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip;
+ rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip;
+ rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
+ rtcp_exp->dir = !dir;
+ rtcp_exp->expectfn = ip_nat_sip_expected;
+
+ /* Try to get same pair of ports: if not, try to change them. */
+ for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
+ port != 0; port += 2) {
+ rtp_exp->tuple.dst.u.udp.port = htons(port);
+ if (nf_ct_expect_related(rtp_exp) != 0)
+ continue;
+ rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
+ if (nf_ct_expect_related(rtcp_exp) == 0)
break;
+ nf_ct_unexpect_related(rtp_exp);
}
if (port == 0)
- return NF_DROP;
+ goto err1;
+
+ /* Update media port. */
+ if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
+ !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port))
+ goto err2;
- if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) {
- nf_ct_unexpect_related(exp);
- return NF_DROP;
- }
return NF_ACCEPT;
+
+err2:
+ nf_ct_unexpect_related(rtp_exp);
+ nf_ct_unexpect_related(rtcp_exp);
+err1:
+ return NF_DROP;
}
static void __exit nf_nat_sip_fini(void)
{
rcu_assign_pointer(nf_nat_sip_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_hook, NULL);
+ rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
+ rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
+ rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
+ rcu_assign_pointer(nf_nat_sdp_session_hook, NULL);
+ rcu_assign_pointer(nf_nat_sdp_media_hook, NULL);
synchronize_rcu();
}
static int __init nf_nat_sip_init(void)
{
BUG_ON(nf_nat_sip_hook != NULL);
- BUG_ON(nf_nat_sdp_hook != NULL);
+ BUG_ON(nf_nat_sip_expect_hook != NULL);
+ BUG_ON(nf_nat_sdp_addr_hook != NULL);
+ BUG_ON(nf_nat_sdp_port_hook != NULL);
+ BUG_ON(nf_nat_sdp_session_hook != NULL);
+ BUG_ON(nf_nat_sdp_media_hook != NULL);
rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
- rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp);
+ rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
+ rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
+ rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
+ rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session);
+ rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 540ce6ae887..5daefad3d19 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -50,6 +50,7 @@
#include <net/udp.h>
#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_nat_helper.h>
@@ -219,7 +220,7 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
if (ch < 0x80)
*len = ch;
else {
- cnt = (unsigned char) (ch & 0x7F);
+ cnt = ch & 0x7F;
*len = 0;
while (cnt > 0) {
@@ -617,8 +618,7 @@ struct snmp_cnv
int syntax;
};
-static struct snmp_cnv snmp_conv [] =
-{
+static const struct snmp_cnv snmp_conv[] = {
{ASN1_UNI, ASN1_NUL, SNMP_NULL},
{ASN1_UNI, ASN1_INT, SNMP_INTEGER},
{ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
@@ -643,7 +643,7 @@ static unsigned char snmp_tag_cls2syntax(unsigned int tag,
unsigned int cls,
unsigned short *syntax)
{
- struct snmp_cnv *cnv;
+ const struct snmp_cnv *cnv;
cnv = snmp_conv;
@@ -903,7 +903,7 @@ static inline void mangle_address(unsigned char *begin,
u_int32_t old;
if (debug)
- memcpy(&old, (unsigned char *)addr, sizeof(old));
+ memcpy(&old, addr, sizeof(old));
*addr = map->to;
@@ -998,7 +998,7 @@ err_id_free:
*
*****************************************************************************/
-static void hex_dump(unsigned char *buf, size_t len)
+static void hex_dump(const unsigned char *buf, size_t len)
{
size_t i;
@@ -1079,7 +1079,7 @@ static int snmp_parse_mangle(unsigned char *msg,
if (cls != ASN1_CTX || con != ASN1_CON)
return 0;
if (debug > 1) {
- unsigned char *pdus[] = {
+ static const unsigned char *const pdus[] = {
[SNMP_PDU_GET] = "get",
[SNMP_PDU_NEXT] = "get-next",
[SNMP_PDU_RESPONSE] = "response",
@@ -1231,8 +1231,8 @@ static int help(struct sk_buff *skb, unsigned int protoff,
{
int dir = CTINFO2DIR(ctinfo);
unsigned int ret;
- struct iphdr *iph = ip_hdr(skb);
- struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
/* SNMP replies and originating SNMP traps get mangled */
if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
@@ -1267,11 +1267,15 @@ static int help(struct sk_buff *skb, unsigned int protoff,
return ret;
}
+static const struct nf_conntrack_expect_policy snmp_exp_policy = {
+ .max_expected = 0,
+ .timeout = 180,
+};
+
static struct nf_conntrack_helper snmp_helper __read_mostly = {
- .max_expected = 0,
- .timeout = 180,
.me = THIS_MODULE,
.help = help,
+ .expect_policy = &snmp_exp_policy,
.name = "snmp",
.tuple.src.l3num = AF_INET,
.tuple.src.u.udp.port = __constant_htons(SNMP_PORT),
@@ -1279,10 +1283,9 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = {
};
static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
- .max_expected = 0,
- .timeout = 180,
.me = THIS_MODULE,
.help = help,
+ .expect_policy = &snmp_exp_policy,
.name = "snmp_trap",
.tuple.src.l3num = AF_INET,
.tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT),
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 99b2c788d5a..b7dd695691a 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -30,8 +30,8 @@
#ifdef CONFIG_XFRM
static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
{
- struct nf_conn *ct;
- struct nf_conntrack_tuple *t;
+ const struct nf_conn *ct;
+ const struct nf_conntrack_tuple *t;
enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
unsigned long statusbit;
@@ -50,7 +50,10 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
if (ct->status & statusbit) {
fl->fl4_dst = t->dst.u3.ip;
if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP)
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
fl->fl_ip_dport = t->dst.u.tcp.port;
}
@@ -59,7 +62,10 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
if (ct->status & statusbit) {
fl->fl4_src = t->src.u3.ip;
if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP)
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
fl->fl_ip_sport = t->src.u.tcp.port;
}
}
@@ -87,21 +93,8 @@ nf_nat_fn(unsigned int hooknum,
have dropped it. Hence it's the user's responsibilty to
packet filter it out, or implement conntrack/NAT for that
protocol. 8) --RR */
- if (!ct) {
- /* Exception: ICMP redirect to new connection (not in
- hash table yet). We must not let this through, in
- case we're doing NAT to the same network. */
- if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- struct icmphdr _hdr, *hp;
-
- hp = skb_header_pointer(skb, ip_hdrlen(skb),
- sizeof(_hdr), &_hdr);
- if (hp != NULL &&
- hp->type == ICMP_REDIRECT)
- return NF_DROP;
- }
+ if (!ct)
return NF_ACCEPT;
- }
/* Don't try to NAT if this packet is not conntracked */
if (ct == &nf_conntrack_untracked)
@@ -109,6 +102,9 @@ nf_nat_fn(unsigned int hooknum,
nat = nfct_nat(ct);
if (!nat) {
+ /* NAT module was loaded late. */
+ if (nf_ct_is_confirmed(ct))
+ return NF_ACCEPT;
nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
if (nat == NULL) {
pr_debug("failed to add NAT extension\n");
@@ -134,10 +130,7 @@ nf_nat_fn(unsigned int hooknum,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- if (unlikely(nf_ct_is_confirmed(ct)))
- /* NAT module was loaded late */
- ret = alloc_null_binding_confirmed(ct, hooknum);
- else if (hooknum == NF_INET_LOCAL_IN)
+ if (hooknum == NF_INET_LOCAL_IN)
/* LOCAL_IN hook doesn't have a chain! */
ret = alloc_null_binding(ct, hooknum);
else
@@ -189,7 +182,7 @@ nf_nat_out(unsigned int hooknum,
int (*okfn)(struct sk_buff *))
{
#ifdef CONFIG_XFRM
- struct nf_conn *ct;
+ const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
#endif
unsigned int ret;
@@ -223,7 +216,7 @@ nf_nat_local_fn(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct nf_conn *ct;
+ const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned int ret;
@@ -252,25 +245,6 @@ nf_nat_local_fn(unsigned int hooknum,
return ret;
}
-static unsigned int
-nf_nat_adjust(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
-
- ct = nf_ct_get(skb, &ctinfo);
- if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
- pr_debug("nf_nat_standalone: adjusting sequence number\n");
- if (!nf_nat_seq_adjust(skb, ct, ctinfo))
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
/* We must be after connection tracking and before packet filtering. */
static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
@@ -290,14 +264,6 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC,
},
- /* After conntrack, adjust sequence number */
- {
- .hook = nf_nat_adjust,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
- },
/* Before packet filtering, change destination */
{
.hook = nf_nat_local_fn,
@@ -314,14 +280,6 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
},
- /* After conntrack, adjust sequence number */
- {
- .hook = nf_nat_adjust,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
- },
};
static int __init nf_nat_standalone_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d63474c6b40..552169b41b1 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -51,24 +51,54 @@
*/
static int sockstat_seq_show(struct seq_file *seq, void *v)
{
+ struct net *net = seq->private;
+
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
- sock_prot_inuse_get(&tcp_prot),
+ sock_prot_inuse_get(net, &tcp_prot),
atomic_read(&tcp_orphan_count),
tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
atomic_read(&tcp_memory_allocated));
- seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot),
+ seq_printf(seq, "UDP: inuse %d mem %d\n",
+ sock_prot_inuse_get(net, &udp_prot),
atomic_read(&udp_memory_allocated));
- seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
- seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
+ seq_printf(seq, "UDPLITE: inuse %d\n",
+ sock_prot_inuse_get(net, &udplite_prot));
+ seq_printf(seq, "RAW: inuse %d\n",
+ sock_prot_inuse_get(net, &raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n",
- ip_frag_nqueues(&init_net), ip_frag_mem(&init_net));
+ ip_frag_nqueues(net), ip_frag_mem(net));
return 0;
}
static int sockstat_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, sockstat_seq_show, NULL);
+ int err;
+ struct net *net;
+
+ err = -ENXIO;
+ net = get_proc_net(inode);
+ if (net == NULL)
+ goto err_net;
+
+ err = single_open(file, sockstat_seq_show, net);
+ if (err < 0)
+ goto err_open;
+
+ return 0;
+
+err_open:
+ put_net(net);
+err_net:
+ return err;
+}
+
+static int sockstat_seq_release(struct inode *inode, struct file *file)
+{
+ struct net *net = ((struct seq_file *)file->private_data)->private;
+
+ put_net(net);
+ return single_release(inode, file);
}
static const struct file_operations sockstat_seq_fops = {
@@ -76,7 +106,7 @@ static const struct file_operations sockstat_seq_fops = {
.open = sockstat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = sockstat_seq_release,
};
/* snmp items */
@@ -423,25 +453,42 @@ static const struct file_operations netstat_seq_fops = {
.release = single_release,
};
+static __net_init int ip_proc_init_net(struct net *net)
+{
+ if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+static __net_exit void ip_proc_exit_net(struct net *net)
+{
+ proc_net_remove(net, "sockstat");
+}
+
+static __net_initdata struct pernet_operations ip_proc_ops = {
+ .init = ip_proc_init_net,
+ .exit = ip_proc_exit_net,
+};
+
int __init ip_misc_proc_init(void)
{
int rc = 0;
+ if (register_pernet_subsys(&ip_proc_ops))
+ goto out_pernet;
+
if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops))
goto out_netstat;
if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
goto out_snmp;
-
- if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops))
- goto out_sockstat;
out:
return rc;
-out_sockstat:
- proc_net_remove(&init_net, "snmp");
out_snmp:
proc_net_remove(&init_net, "netstat");
out_netstat:
+ unregister_pernet_subsys(&ip_proc_ops);
+out_pernet:
rc = -ENOMEM;
goto out;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a3002fe65b7..11d7f753a82 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -81,41 +81,34 @@
#include <linux/netfilter_ipv4.h>
static struct raw_hashinfo raw_v4_hashinfo = {
- .lock = __RW_LOCK_UNLOCKED(),
+ .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
};
-void raw_hash_sk(struct sock *sk, struct raw_hashinfo *h)
+void raw_hash_sk(struct sock *sk)
{
+ struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_head *head;
head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)];
write_lock_bh(&h->lock);
sk_add_node(sk, head);
- sock_prot_inuse_add(sk->sk_prot, 1);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
}
EXPORT_SYMBOL_GPL(raw_hash_sk);
-void raw_unhash_sk(struct sock *sk, struct raw_hashinfo *h)
+void raw_unhash_sk(struct sock *sk)
{
+ struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
+
write_lock_bh(&h->lock);
if (sk_del_node_init(sk))
- sock_prot_inuse_add(sk->sk_prot, -1);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
write_unlock_bh(&h->lock);
}
EXPORT_SYMBOL_GPL(raw_unhash_sk);
-static void raw_v4_hash(struct sock *sk)
-{
- raw_hash_sk(sk, &raw_v4_hashinfo);
-}
-
-static void raw_v4_unhash(struct sock *sk)
-{
- raw_unhash_sk(sk, &raw_v4_hashinfo);
-}
-
static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
unsigned short num, __be32 raddr, __be32 laddr, int dif)
{
@@ -124,7 +117,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
sk_for_each_from(sk, node) {
struct inet_sock *inet = inet_sk(sk);
- if (sk->sk_net == net && inet->num == num &&
+ if (net_eq(sock_net(sk), net) && inet->num == num &&
!(inet->daddr && inet->daddr != raddr) &&
!(inet->rcv_saddr && inet->rcv_saddr != laddr) &&
!(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
@@ -175,7 +168,7 @@ static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
if (hlist_empty(head))
goto out;
- net = skb->dev->nd_net;
+ net = dev_net(skb->dev);
sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
iph->saddr, iph->daddr,
skb->dev->ifindex);
@@ -283,7 +276,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
if (raw_sk != NULL) {
iph = (struct iphdr *)skb->data;
- net = skb->dev->nd_net;
+ net = dev_net(skb->dev);
while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
iph->daddr, iph->saddr,
@@ -506,7 +499,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
- err = ip_cmsg_send(msg, &ipc);
+ err = ip_cmsg_send(sock_net(sk), msg, &ipc);
if (err)
goto out;
if (ipc.opt)
@@ -560,7 +553,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
security_sk_classify_flow(sk, &fl);
- err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
+ err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
}
if (err)
goto done;
@@ -627,7 +620,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
goto out;
- chk_addr_ret = inet_addr_type(sk->sk_net, addr->sin_addr.s_addr);
+ chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
ret = -EADDRNOTAVAIL;
if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
@@ -825,8 +818,6 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
}
}
-DEFINE_PROTO_INUSE(raw)
-
struct proto raw_prot = {
.name = "RAW",
.owner = THIS_MODULE,
@@ -841,14 +832,14 @@ struct proto raw_prot = {
.recvmsg = raw_recvmsg,
.bind = raw_bind,
.backlog_rcv = raw_rcv_skb,
- .hash = raw_v4_hash,
- .unhash = raw_v4_unhash,
+ .hash = raw_hash_sk,
+ .unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw_sock),
+ .h.raw_hash = &raw_v4_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_raw_setsockopt,
.compat_getsockopt = compat_raw_getsockopt,
#endif
- REF_PROTO_INUSE(raw)
};
#ifdef CONFIG_PROC_FS
@@ -862,7 +853,7 @@ static struct sock *raw_get_first(struct seq_file *seq)
struct hlist_node *node;
sk_for_each(sk, node, &state->h->ht[state->bucket])
- if (sk->sk_net == state->p.net)
+ if (sock_net(sk) == seq_file_net(seq))
goto found;
}
sk = NULL;
@@ -878,7 +869,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
sk = sk_next(sk);
try_again:
;
- } while (sk && sk->sk_net != state->p.net);
+ } while (sk && sock_net(sk) != seq_file_net(seq));
if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
sk = sk_head(&state->h->ht[state->bucket]);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 525787b52b7..780e9484c82 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -118,21 +118,19 @@
#define RT_GC_TIMEOUT (300*HZ)
static int ip_rt_max_size;
-static int ip_rt_gc_timeout = RT_GC_TIMEOUT;
-static int ip_rt_gc_interval = 60 * HZ;
-static int ip_rt_gc_min_interval = HZ / 2;
-static int ip_rt_redirect_number = 9;
-static int ip_rt_redirect_load = HZ / 50;
-static int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1));
-static int ip_rt_error_cost = HZ;
-static int ip_rt_error_burst = 5 * HZ;
-static int ip_rt_gc_elasticity = 8;
-static int ip_rt_mtu_expires = 10 * 60 * HZ;
-static int ip_rt_min_pmtu = 512 + 20 + 20;
-static int ip_rt_min_advmss = 256;
-static int ip_rt_secret_interval = 10 * 60 * HZ;
-
-#define RTprint(a...) printk(KERN_DEBUG a)
+static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
+static int ip_rt_gc_interval __read_mostly = 60 * HZ;
+static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
+static int ip_rt_redirect_number __read_mostly = 9;
+static int ip_rt_redirect_load __read_mostly = HZ / 50;
+static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
+static int ip_rt_error_cost __read_mostly = HZ;
+static int ip_rt_error_burst __read_mostly = 5 * HZ;
+static int ip_rt_gc_elasticity __read_mostly = 8;
+static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
+static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
+static int ip_rt_min_advmss __read_mostly = 256;
+static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
static void rt_worker_func(struct work_struct *work);
static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
@@ -252,40 +250,41 @@ static inline void rt_hash_lock_init(void)
}
#endif
-static struct rt_hash_bucket *rt_hash_table;
-static unsigned rt_hash_mask;
-static unsigned int rt_hash_log;
-static atomic_t rt_genid;
+static struct rt_hash_bucket *rt_hash_table __read_mostly;
+static unsigned rt_hash_mask __read_mostly;
+static unsigned int rt_hash_log __read_mostly;
+static atomic_t rt_genid __read_mostly;
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field) \
(__raw_get_cpu_var(rt_cache_stat).field++)
-static unsigned int rt_hash_code(u32 daddr, u32 saddr)
+static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx)
{
- return jhash_2words(daddr, saddr, atomic_read(&rt_genid))
+ return jhash_3words((__force u32)(__be32)(daddr),
+ (__force u32)(__be32)(saddr),
+ idx, atomic_read(&rt_genid))
& rt_hash_mask;
}
-#define rt_hash(daddr, saddr, idx) \
- rt_hash_code((__force u32)(__be32)(daddr),\
- (__force u32)(__be32)(saddr) ^ ((idx) << 5))
-
#ifdef CONFIG_PROC_FS
struct rt_cache_iter_state {
+ struct seq_net_private p;
int bucket;
int genid;
};
-static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st)
+static struct rtable *rt_cache_get_first(struct seq_file *seq)
{
+ struct rt_cache_iter_state *st = seq->private;
struct rtable *r = NULL;
for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
rcu_read_lock_bh();
r = rcu_dereference(rt_hash_table[st->bucket].chain);
while (r) {
- if (r->rt_genid == st->genid)
+ if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
+ r->rt_genid == st->genid)
return r;
r = rcu_dereference(r->u.dst.rt_next);
}
@@ -294,8 +293,10 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st)
return r;
}
-static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r)
+static struct rtable *__rt_cache_get_next(struct seq_file *seq,
+ struct rtable *r)
{
+ struct rt_cache_iter_state *st = seq->private;
r = r->u.dst.rt_next;
while (!r) {
rcu_read_unlock_bh();
@@ -307,25 +308,34 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct r
return rcu_dereference(r);
}
-static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos)
+static struct rtable *rt_cache_get_next(struct seq_file *seq,
+ struct rtable *r)
+{
+ struct rt_cache_iter_state *st = seq->private;
+ while ((r = __rt_cache_get_next(seq, r)) != NULL) {
+ if (dev_net(r->u.dst.dev) != seq_file_net(seq))
+ continue;
+ if (r->rt_genid == st->genid)
+ break;
+ }
+ return r;
+}
+
+static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)
{
- struct rtable *r = rt_cache_get_first(st);
+ struct rtable *r = rt_cache_get_first(seq);
if (r)
- while (pos && (r = rt_cache_get_next(st, r))) {
- if (r->rt_genid != st->genid)
- continue;
+ while (pos && (r = rt_cache_get_next(seq, r)))
--pos;
- }
return pos ? NULL : r;
}
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
{
struct rt_cache_iter_state *st = seq->private;
-
if (*pos)
- return rt_cache_get_idx(st, *pos - 1);
+ return rt_cache_get_idx(seq, *pos - 1);
st->genid = atomic_read(&rt_genid);
return SEQ_START_TOKEN;
}
@@ -333,12 +343,11 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct rtable *r;
- struct rt_cache_iter_state *st = seq->private;
if (v == SEQ_START_TOKEN)
- r = rt_cache_get_first(st);
+ r = rt_cache_get_first(seq);
else
- r = rt_cache_get_next(st, v);
+ r = rt_cache_get_next(seq, v);
++*pos;
return r;
}
@@ -390,7 +399,7 @@ static const struct seq_operations rt_cache_seq_ops = {
static int rt_cache_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &rt_cache_seq_ops,
+ return seq_open_net(inode, file, &rt_cache_seq_ops,
sizeof(struct rt_cache_iter_state));
}
@@ -399,7 +408,7 @@ static const struct file_operations rt_cache_seq_fops = {
.open = rt_cache_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
@@ -533,7 +542,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
}
#endif
-static __init int ip_rt_proc_init(struct net *net)
+static int __net_init ip_rt_do_proc_init(struct net *net)
{
struct proc_dir_entry *pde;
@@ -542,12 +551,11 @@ static __init int ip_rt_proc_init(struct net *net)
if (!pde)
goto err1;
- pde = create_proc_entry("rt_cache", S_IRUGO, net->proc_net_stat);
+ pde = proc_create("rt_cache", S_IRUGO,
+ net->proc_net_stat, &rt_cpu_seq_fops);
if (!pde)
goto err2;
- pde->proc_fops = &rt_cpu_seq_fops;
-
#ifdef CONFIG_NET_CLS_ROUTE
pde = create_proc_read_entry("rt_acct", 0, net->proc_net,
ip_rt_acct_read, NULL);
@@ -565,25 +573,43 @@ err2:
err1:
return -ENOMEM;
}
+
+static void __net_exit ip_rt_do_proc_exit(struct net *net)
+{
+ remove_proc_entry("rt_cache", net->proc_net_stat);
+ remove_proc_entry("rt_cache", net->proc_net);
+ remove_proc_entry("rt_acct", net->proc_net);
+}
+
+static struct pernet_operations ip_rt_proc_ops __net_initdata = {
+ .init = ip_rt_do_proc_init,
+ .exit = ip_rt_do_proc_exit,
+};
+
+static int __init ip_rt_proc_init(void)
+{
+ return register_pernet_subsys(&ip_rt_proc_ops);
+}
+
#else
-static inline int ip_rt_proc_init(struct net *net)
+static inline int ip_rt_proc_init(void)
{
return 0;
}
#endif /* CONFIG_PROC_FS */
-static __inline__ void rt_free(struct rtable *rt)
+static inline void rt_free(struct rtable *rt)
{
call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
}
-static __inline__ void rt_drop(struct rtable *rt)
+static inline void rt_drop(struct rtable *rt)
{
ip_rt_put(rt);
call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
}
-static __inline__ int rt_fast_clean(struct rtable *rth)
+static inline int rt_fast_clean(struct rtable *rth)
{
/* Kill broadcast/multicast entries very aggresively, if they
collide in hash table with more useful entries */
@@ -591,7 +617,7 @@ static __inline__ int rt_fast_clean(struct rtable *rth)
rth->fl.iif && rth->u.dst.rt_next;
}
-static __inline__ int rt_valuable(struct rtable *rth)
+static inline int rt_valuable(struct rtable *rth)
{
return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
rth->u.dst.expires;
@@ -653,7 +679,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
{
- return rt1->u.dst.dev->nd_net == rt2->u.dst.dev->nd_net;
+ return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
}
/*
@@ -1033,10 +1059,10 @@ restart:
#if RT_CACHE_DEBUG >= 2
if (rt->u.dst.rt_next) {
struct rtable *trt;
- printk(KERN_DEBUG "rt_cache @%02x: %u.%u.%u.%u", hash,
+ printk(KERN_DEBUG "rt_cache @%02x: " NIPQUAD_FMT, hash,
NIPQUAD(rt->rt_dst));
for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
- printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst));
+ printk(" . " NIPQUAD_FMT, NIPQUAD(trt->rt_dst));
printk("\n");
}
#endif
@@ -1132,10 +1158,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
__be32 skeys[2] = { saddr, 0 };
int ikeys[2] = { dev->ifindex, 0 };
struct netevent_redirect netevent;
+ struct net *net;
if (!in_dev)
return;
+ net = dev_net(dev);
if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
|| ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
|| ipv4_is_zeronet(new_gw))
@@ -1147,7 +1175,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
goto reject_redirect;
} else {
- if (inet_addr_type(&init_net, new_gw) != RTN_UNICAST)
+ if (inet_addr_type(net, new_gw) != RTN_UNICAST)
goto reject_redirect;
}
@@ -1165,7 +1193,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
rth->fl.fl4_src != skeys[i] ||
rth->fl.oif != ikeys[k] ||
rth->fl.iif != 0 ||
- rth->rt_genid != atomic_read(&rt_genid)) {
+ rth->rt_genid != atomic_read(&rt_genid) ||
+ !net_eq(dev_net(rth->u.dst.dev), net)) {
rthp = &rth->u.dst.rt_next;
continue;
}
@@ -1246,9 +1275,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
reject_redirect:
#ifdef CONFIG_IP_ROUTE_VERBOSE
if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
- printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about "
- "%u.%u.%u.%u ignored.\n"
- " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u\n",
+ printk(KERN_INFO "Redirect from " NIPQUAD_FMT " on %s about "
+ NIPQUAD_FMT " ignored.\n"
+ " Advised path = " NIPQUAD_FMT " -> " NIPQUAD_FMT "\n",
NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw),
NIPQUAD(saddr), NIPQUAD(daddr));
#endif
@@ -1257,7 +1286,7 @@ reject_redirect:
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable*)dst;
+ struct rtable *rt = (struct rtable *)dst;
struct dst_entry *ret = dst;
if (rt) {
@@ -1270,7 +1299,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
rt->fl.oif);
#if RT_CACHE_DEBUG >= 1
printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
- "%u.%u.%u.%u/%02x dropped\n",
+ NIPQUAD_FMT "/%02x dropped\n",
NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
#endif
rt_del(hash, rt);
@@ -1298,7 +1327,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
void ip_rt_send_redirect(struct sk_buff *skb)
{
- struct rtable *rt = (struct rtable*)skb->dst;
+ struct rtable *rt = skb->rtable;
struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
if (!in_dev)
@@ -1335,8 +1364,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
if (IN_DEV_LOG_MARTIANS(in_dev) &&
rt->u.dst.rate_tokens == ip_rt_redirect_number &&
net_ratelimit())
- printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores "
- "redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n",
+ printk(KERN_WARNING "host " NIPQUAD_FMT "/if%d ignores "
+ "redirects for " NIPQUAD_FMT " to " NIPQUAD_FMT ".\n",
NIPQUAD(rt->rt_src), rt->rt_iif,
NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));
#endif
@@ -1347,7 +1376,7 @@ out:
static int ip_error(struct sk_buff *skb)
{
- struct rtable *rt = (struct rtable*)skb->dst;
+ struct rtable *rt = skb->rtable;
unsigned long now;
int code;
@@ -1389,7 +1418,7 @@ out: kfree_skb(skb);
static const unsigned short mtu_plateau[] =
{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
-static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
+static inline unsigned short guess_mtu(unsigned short old_mtu)
{
int i;
@@ -1424,7 +1453,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
rth->rt_src == iph->saddr &&
rth->fl.iif == 0 &&
!(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
- rth->u.dst.dev->nd_net == net &&
+ net_eq(dev_net(rth->u.dst.dev), net) &&
rth->rt_genid == atomic_read(&rt_genid)) {
unsigned short mtu = new_mtu;
@@ -1500,9 +1529,9 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
{
struct rtable *rt = (struct rtable *) dst;
struct in_device *idev = rt->idev;
- if (dev != dev->nd_net->loopback_dev && idev && idev->dev == dev) {
+ if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) {
struct in_device *loopback_idev =
- in_dev_get(dev->nd_net->loopback_dev);
+ in_dev_get(dev_net(dev)->loopback_dev);
if (loopback_idev) {
rt->idev = loopback_idev;
in_dev_put(idev);
@@ -1516,14 +1545,14 @@ static void ipv4_link_failure(struct sk_buff *skb)
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
- rt = (struct rtable *) skb->dst;
+ rt = skb->rtable;
if (rt)
dst_set_expires(&rt->u.dst, 0);
}
static int ip_rt_bug(struct sk_buff *skb)
{
- printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n",
+ printk(KERN_DEBUG "ip_rt_bug: " NIPQUAD_FMT " -> " NIPQUAD_FMT ", %s\n",
NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr),
skb->dev ? skb->dev->name : "?");
kfree_skb(skb);
@@ -1546,7 +1575,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
if (rt->fl.iif == 0)
src = rt->rt_src;
- else if (fib_lookup(rt->u.dst.dev->nd_net, &rt->fl, &res) == 0) {
+ else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) {
src = FIB_RES_PREFSRC(res);
fib_res_put(&res);
} else
@@ -1676,7 +1705,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
in_dev_put(in_dev);
hash = rt_hash(daddr, saddr, dev->ifindex);
- return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);
+ return rt_intern_hash(hash, rth, &skb->rtable);
e_nobufs:
in_dev_put(in_dev);
@@ -1701,8 +1730,8 @@ static void ip_handle_martian_source(struct net_device *dev,
* RFC1812 recommendation, if source is martian,
* the only hint is MAC header.
*/
- printk(KERN_WARNING "martian source %u.%u.%u.%u from "
- "%u.%u.%u.%u, on dev %s\n",
+ printk(KERN_WARNING "martian source " NIPQUAD_FMT " from "
+ NIPQUAD_FMT", on dev %s\n",
NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
int i;
@@ -1719,11 +1748,11 @@ static void ip_handle_martian_source(struct net_device *dev,
#endif
}
-static inline int __mkroute_input(struct sk_buff *skb,
- struct fib_result* res,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos,
- struct rtable **result)
+static int __mkroute_input(struct sk_buff *skb,
+ struct fib_result *res,
+ struct in_device *in_dev,
+ __be32 daddr, __be32 saddr, u32 tos,
+ struct rtable **result)
{
struct rtable *rth;
@@ -1815,11 +1844,11 @@ static inline int __mkroute_input(struct sk_buff *skb,
return err;
}
-static inline int ip_mkroute_input(struct sk_buff *skb,
- struct fib_result* res,
- const struct flowi *fl,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+static int ip_mkroute_input(struct sk_buff *skb,
+ struct fib_result *res,
+ const struct flowi *fl,
+ struct in_device *in_dev,
+ __be32 daddr, __be32 saddr, u32 tos)
{
struct rtable* rth = NULL;
int err;
@@ -1837,7 +1866,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
/* put it into the cache */
hash = rt_hash(daddr, saddr, fl->iif);
- return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
+ return rt_intern_hash(hash, rth, &skb->rtable);
}
/*
@@ -1870,7 +1899,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
__be32 spec_dst;
int err = -EINVAL;
int free_res = 0;
- struct net * net = dev->nd_net;
+ struct net * net = dev_net(dev);
/* IP on this device is disabled. */
@@ -1993,7 +2022,7 @@ local_input:
}
rth->rt_type = res.type;
hash = rt_hash(daddr, saddr, fl.iif);
- err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
+ err = rt_intern_hash(hash, rth, &skb->rtable);
goto done;
no_route:
@@ -2011,8 +2040,8 @@ martian_destination:
RT_CACHE_STAT_INC(in_martian_dst);
#ifdef CONFIG_IP_ROUTE_VERBOSE
if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
- printk(KERN_WARNING "martian destination %u.%u.%u.%u from "
- "%u.%u.%u.%u, dev %s\n",
+ printk(KERN_WARNING "martian destination " NIPQUAD_FMT " from "
+ NIPQUAD_FMT ", dev %s\n",
NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
#endif
@@ -2041,25 +2070,25 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
int iif = dev->ifindex;
struct net *net;
- net = dev->nd_net;
+ net = dev_net(dev);
tos &= IPTOS_RT_MASK;
hash = rt_hash(daddr, saddr, iif);
rcu_read_lock();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
rth = rcu_dereference(rth->u.dst.rt_next)) {
- if (rth->fl.fl4_dst == daddr &&
- rth->fl.fl4_src == saddr &&
- rth->fl.iif == iif &&
- rth->fl.oif == 0 &&
+ if (((rth->fl.fl4_dst ^ daddr) |
+ (rth->fl.fl4_src ^ saddr) |
+ (rth->fl.iif ^ iif) |
+ rth->fl.oif |
+ (rth->fl.fl4_tos ^ tos)) == 0 &&
rth->fl.mark == skb->mark &&
- rth->fl.fl4_tos == tos &&
- rth->u.dst.dev->nd_net == net &&
+ net_eq(dev_net(rth->u.dst.dev), net) &&
rth->rt_genid == atomic_read(&rt_genid)) {
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(in_hit);
rcu_read_unlock();
- skb->dst = (struct dst_entry*)rth;
+ skb->rtable = rth;
return 0;
}
RT_CACHE_STAT_INC(in_hlist_search);
@@ -2101,12 +2130,12 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
return ip_route_input_slow(skb, daddr, saddr, tos, dev);
}
-static inline int __mkroute_output(struct rtable **result,
- struct fib_result* res,
- const struct flowi *fl,
- const struct flowi *oldflp,
- struct net_device *dev_out,
- unsigned flags)
+static int __mkroute_output(struct rtable **result,
+ struct fib_result *res,
+ const struct flowi *fl,
+ const struct flowi *oldflp,
+ struct net_device *dev_out,
+ unsigned flags)
{
struct rtable *rth;
struct in_device *in_dev;
@@ -2221,12 +2250,12 @@ static inline int __mkroute_output(struct rtable **result,
return err;
}
-static inline int ip_mkroute_output(struct rtable **rp,
- struct fib_result* res,
- const struct flowi *fl,
- const struct flowi *oldflp,
- struct net_device *dev_out,
- unsigned flags)
+static int ip_mkroute_output(struct rtable **rp,
+ struct fib_result *res,
+ const struct flowi *fl,
+ const struct flowi *oldflp,
+ struct net_device *dev_out,
+ unsigned flags)
{
struct rtable *rth = NULL;
int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
@@ -2456,7 +2485,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
rth->fl.mark == flp->mark &&
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK)) &&
- rth->u.dst.dev->nd_net == net &&
+ net_eq(dev_net(rth->u.dst.dev), net) &&
rth->rt_genid == atomic_read(&rt_genid)) {
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(out_hit);
@@ -2488,7 +2517,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
};
-static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk)
+static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
{
struct rtable *ort = *rp;
struct rtable *rt = (struct rtable *)
@@ -2548,7 +2577,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
flags ? XFRM_LOOKUP_WAIT : 0);
if (err == -EREMOTE)
- err = ipv4_dst_blackhole(rp, flp, sk);
+ err = ipv4_dst_blackhole(rp, flp);
return err;
}
@@ -2566,7 +2595,7 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
int nowait, unsigned int flags)
{
- struct rtable *rt = (struct rtable*)skb->dst;
+ struct rtable *rt = skb->rtable;
struct rtmsg *r;
struct nlmsghdr *nlh;
long expires;
@@ -2659,7 +2688,7 @@ nla_put_failure:
static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
- struct net *net = in_skb->sk->sk_net;
+ struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
struct rtable *rt = NULL;
@@ -2669,9 +2698,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
int err;
struct sk_buff *skb;
- if (net != &init_net)
- return -EINVAL;
-
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
if (err < 0)
goto errout;
@@ -2701,7 +2727,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
if (iif) {
struct net_device *dev;
- dev = __dev_get_by_index(&init_net, iif);
+ dev = __dev_get_by_index(net, iif);
if (dev == NULL) {
err = -ENODEV;
goto errout_free;
@@ -2713,7 +2739,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
local_bh_enable();
- rt = (struct rtable*) skb->dst;
+ rt = skb->rtable;
if (err == 0 && rt->u.dst.error)
err = -rt->u.dst.error;
} else {
@@ -2727,22 +2753,22 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
},
.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
};
- err = ip_route_output_key(&init_net, &rt, &fl);
+ err = ip_route_output_key(net, &rt, &fl);
}
if (err)
goto errout_free;
- skb->dst = &rt->u.dst;
+ skb->rtable = rt;
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
- RTM_NEWROUTE, 0, 0);
+ RTM_NEWROUTE, 0, 0);
if (err <= 0)
goto errout_free;
- err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
errout:
return err;
@@ -2756,6 +2782,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct rtable *rt;
int h, s_h;
int idx, s_idx;
+ struct net *net;
+
+ net = sock_net(skb->sk);
s_h = cb->args[0];
if (s_h < 0)
@@ -2765,7 +2794,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock_bh();
for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
- if (idx < s_idx)
+ if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
continue;
if (rt->rt_genid != atomic_read(&rt_genid))
continue;
@@ -3029,7 +3058,9 @@ int __init ip_rt_init(void)
devinet_init();
ip_fib_init();
- setup_timer(&rt_secret_timer, rt_secret_rebuild, 0);
+ rt_secret_timer.function = rt_secret_rebuild;
+ rt_secret_timer.data = 0;
+ init_timer_deferrable(&rt_secret_timer);
/* All the timers, started at system startup tend
to synchronize. Perturb it a bit.
@@ -3041,7 +3072,7 @@ int __init ip_rt_init(void)
ip_rt_secret_interval;
add_timer(&rt_secret_timer);
- if (ip_rt_proc_init(&init_net))
+ if (ip_rt_proc_init())
printk(KERN_ERR "Unable to create route proc files\n");
#ifdef CONFIG_XFRM
xfrm_init();
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f470fe4511d..73ba98921d6 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -10,8 +10,6 @@
* 2 of the License, or (at your option) any later version.
*
* $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
- *
- * Missing: IPv6 support.
*/
#include <linux/tcp.h>
@@ -21,26 +19,33 @@
#include <linux/kernel.h>
#include <net/tcp.h>
+/* Timestamps: lowest 9 bits store TCP options */
+#define TSBITS 9
+#define TSMASK (((__u32)1 << TSBITS) - 1)
+
extern int sysctl_tcp_syncookies;
-static __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS];
+__u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
+EXPORT_SYMBOL(syncookie_secret);
static __init int init_syncookies(void)
{
get_random_bytes(syncookie_secret, sizeof(syncookie_secret));
return 0;
}
-module_init(init_syncookies);
+__initcall(init_syncookies);
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
+static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS];
+
static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
u32 count, int c)
{
- __u32 tmp[16 + 5 + SHA_WORKSPACE_WORDS];
+ __u32 *tmp = __get_cpu_var(cookie_scratch);
- memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c]));
+ memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c]));
tmp[0] = (__force u32)saddr;
tmp[1] = (__force u32)daddr;
tmp[2] = ((__force u32)sport << 16) + (__force u32)dport;
@@ -50,6 +55,39 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
return tmp[17];
}
+
+/*
+ * when syncookies are in effect and tcp timestamps are enabled we encode
+ * tcp options in the lowest 9 bits of the timestamp value that will be
+ * sent in the syn-ack.
+ * Since subsequent timestamps use the normal tcp_time_stamp value, we
+ * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
+ */
+__u32 cookie_init_timestamp(struct request_sock *req)
+{
+ struct inet_request_sock *ireq;
+ u32 ts, ts_now = tcp_time_stamp;
+ u32 options = 0;
+
+ ireq = inet_rsk(req);
+ if (ireq->wscale_ok) {
+ options = ireq->snd_wscale;
+ options |= ireq->rcv_wscale << 4;
+ }
+ options |= ireq->sack_ok << 8;
+
+ ts = ts_now & ~TSMASK;
+ ts |= options;
+ if (ts > ts_now) {
+ ts >>= TSBITS;
+ ts--;
+ ts <<= TSBITS;
+ ts |= options;
+ }
+ return ts;
+}
+
+
static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport,
__be16 dport, __u32 sseq, __u32 count,
__u32 data)
@@ -184,6 +222,35 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
return child;
}
+
+/*
+ * when syncookies are in effect and tcp timestamps are enabled we stored
+ * additional tcp options in the timestamp.
+ * This extracts these options from the timestamp echo.
+ *
+ * The lowest 4 bits are for snd_wscale
+ * The next 4 lsb are for rcv_wscale
+ * The next lsb is for sack_ok
+ */
+void cookie_check_timestamp(struct tcp_options_received *tcp_opt)
+{
+ /* echoed timestamp, 9 lowest bits contain options */
+ u32 options = tcp_opt->rcv_tsecr & TSMASK;
+
+ tcp_opt->snd_wscale = options & 0xf;
+ options >>= 4;
+ tcp_opt->rcv_wscale = options & 0xf;
+
+ tcp_opt->sack_ok = (options >> 4) & 0x1;
+
+ if (tcp_opt->sack_ok)
+ tcp_sack_reset(tcp_opt);
+
+ if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale)
+ tcp_opt->wscale_ok = 1;
+}
+EXPORT_SYMBOL(cookie_check_timestamp);
+
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt)
{
@@ -197,6 +264,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
int mss;
struct rtable *rt;
__u8 rcv_wscale;
+ struct tcp_options_received tcp_opt;
if (!sysctl_tcp_syncookies || !th->ack)
goto out;
@@ -209,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
+ /* check for timestamp cookie support */
+ memset(&tcp_opt, 0, sizeof(tcp_opt));
+ tcp_parse_options(skb, &tcp_opt, 0);
+
+ if (tcp_opt.saw_tstamp)
+ cookie_check_timestamp(&tcp_opt);
+
ret = NULL;
req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
if (!req)
@@ -227,6 +302,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
ireq->loc_addr = ip_hdr(skb)->daddr;
ireq->rmt_addr = ip_hdr(skb)->saddr;
ireq->opt = NULL;
+ ireq->snd_wscale = tcp_opt.snd_wscale;
+ ireq->rcv_wscale = tcp_opt.rcv_wscale;
+ ireq->sack_ok = tcp_opt.sack_ok;
+ ireq->wscale_ok = tcp_opt.wscale_ok;
+ ireq->tstamp_ok = tcp_opt.saw_tstamp;
+ req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -241,8 +322,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
}
}
- ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0;
- ireq->wscale_ok = ireq->sack_ok = 0;
req->expires = 0UL;
req->retrans = 0;
@@ -271,11 +350,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
}
/* Try to redo what tcp_v4_send_synack did. */
- req->window_clamp = dst_metric(&rt->u.dst, RTAX_WINDOW);
+ req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
+
tcp_select_initial_window(tcp_full_space(sk), req->mss,
&req->rcv_wnd, &req->window_clamp,
- 0, &rcv_wscale);
- /* BTW win scale with syncookies is 0 by definition */
+ ireq->wscale_ok, &rcv_wscale);
+
ireq->rcv_wscale = rcv_wscale;
ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 88286f35d1e..c437f804ee3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -404,38 +404,6 @@ static struct ctl_table ipv4_table[] = {
.strategy = &ipv4_sysctl_local_port_range,
},
{
- .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL,
- .procname = "icmp_echo_ignore_all",
- .data = &sysctl_icmp_echo_ignore_all,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS,
- .procname = "icmp_echo_ignore_broadcasts",
- .data = &sysctl_icmp_echo_ignore_broadcasts,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES,
- .procname = "icmp_ignore_bogus_error_responses",
- .data = &sysctl_icmp_ignore_bogus_error_responses,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR,
- .procname = "icmp_errors_use_inbound_ifaddr",
- .data = &sysctl_icmp_errors_use_inbound_ifaddr,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
.ctl_name = NET_IPV4_ROUTE,
.procname = "route",
.maxlen = 0,
@@ -586,22 +554,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = &proc_dointvec
},
{
- .ctl_name = NET_IPV4_ICMP_RATELIMIT,
- .procname = "icmp_ratelimit",
- .data = &sysctl_icmp_ratelimit,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_IPV4_ICMP_RATEMASK,
- .procname = "icmp_ratemask",
- .data = &sysctl_icmp_ratemask,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
.ctl_name = NET_TCP_TW_REUSE,
.procname = "tcp_tw_reuse",
.data = &sysctl_tcp_tw_reuse,
@@ -804,6 +756,58 @@ static struct ctl_table ipv4_table[] = {
{ .ctl_name = 0 }
};
+static struct ctl_table ipv4_net_table[] = {
+ {
+ .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL,
+ .procname = "icmp_echo_ignore_all",
+ .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS,
+ .procname = "icmp_echo_ignore_broadcasts",
+ .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES,
+ .procname = "icmp_ignore_bogus_error_responses",
+ .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR,
+ .procname = "icmp_errors_use_inbound_ifaddr",
+ .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_IPV4_ICMP_RATELIMIT,
+ .procname = "icmp_ratelimit",
+ .data = &init_net.ipv4.sysctl_icmp_ratelimit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = NET_IPV4_ICMP_RATEMASK,
+ .procname = "icmp_ratemask",
+ .data = &init_net.ipv4.sysctl_icmp_ratemask,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ { }
+};
+
struct ctl_path net_ipv4_ctl_path[] = {
{ .procname = "net", .ctl_name = CTL_NET, },
{ .procname = "ipv4", .ctl_name = NET_IPV4, },
@@ -811,12 +815,72 @@ struct ctl_path net_ipv4_ctl_path[] = {
};
EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
+static __net_init int ipv4_sysctl_init_net(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = ipv4_net_table;
+ if (net != &init_net) {
+ table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
+ if (table == NULL)
+ goto err_alloc;
+
+ table[0].data =
+ &net->ipv4.sysctl_icmp_echo_ignore_all;
+ table[1].data =
+ &net->ipv4.sysctl_icmp_echo_ignore_broadcasts;
+ table[2].data =
+ &net->ipv4.sysctl_icmp_ignore_bogus_error_responses;
+ table[3].data =
+ &net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr;
+ table[4].data =
+ &net->ipv4.sysctl_icmp_ratelimit;
+ table[5].data =
+ &net->ipv4.sysctl_icmp_ratemask;
+ }
+
+ net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
+ net_ipv4_ctl_path, table);
+ if (net->ipv4.ipv4_hdr == NULL)
+ goto err_reg;
+
+ return 0;
+
+err_reg:
+ if (net != &init_net)
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static __net_exit void ipv4_sysctl_exit_net(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->ipv4.ipv4_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
+ kfree(table);
+}
+
+static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
+ .init = ipv4_sysctl_init_net,
+ .exit = ipv4_sysctl_exit_net,
+};
+
static __init int sysctl_ipv4_init(void)
{
struct ctl_table_header *hdr;
hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
- return hdr == NULL ? -ENOMEM : 0;
+ if (hdr == NULL)
+ return -ENOMEM;
+
+ if (register_pernet_subsys(&ipv4_sysctl_ops)) {
+ unregister_sysctl_table(hdr);
+ return -ENOMEM;
+ }
+
+ return 0;
}
__initcall(sysctl_ipv4_init);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 071e83a894a..58ac838bf46 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -735,7 +735,7 @@ new_segment:
if (!(psize -= copy))
goto out;
- if (skb->len < mss_now || (flags & MSG_OOB))
+ if (skb->len < size_goal || (flags & MSG_OOB))
continue;
if (forced_push(tp)) {
@@ -981,7 +981,7 @@ new_segment:
if ((seglen -= copy) == 0 && iovlen == 0)
goto out;
- if (skb->len < mss_now || (flags & MSG_OOB))
+ if (skb->len < size_goal || (flags & MSG_OOB))
continue;
if (forced_push(tp)) {
@@ -2105,15 +2105,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_DEFER_ACCEPT:
- icsk->icsk_accept_queue.rskq_defer_accept = 0;
- if (val > 0) {
- /* Translate value in seconds to number of
- * retransmits */
- while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
- val > ((TCP_TIMEOUT_INIT / HZ) <<
- icsk->icsk_accept_queue.rskq_defer_accept))
- icsk->icsk_accept_queue.rskq_defer_accept++;
- icsk->icsk_accept_queue.rskq_defer_accept++;
+ if (val < 0) {
+ err = -EINVAL;
+ } else {
+ if (val > MAX_TCP_ACCEPT_DEFERRED)
+ val = MAX_TCP_ACCEPT_DEFERRED;
+ icsk->icsk_accept_queue.rskq_defer_accept = val;
}
break;
@@ -2295,8 +2292,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = (val ? : sysctl_tcp_fin_timeout) / HZ;
break;
case TCP_DEFER_ACCEPT:
- val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
- ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
+ val = icsk->icsk_accept_queue.rskq_defer_accept;
break;
case TCP_WINDOW_CLAMP:
val = tp->window_clamp;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5212ed9b0c9..7eb7636db0d 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -1,12 +1,13 @@
/*
* Binary Increase Congestion control for TCP
- *
+ * Home page:
+ * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
* This is from the implementation of BICTCP in
* Lison-Xu, Kahaled Harfoush, and Injong Rhee.
* "Binary Increase Congestion Control for Fast, Long Distance
* Networks" in InfoComm 2004
* Available from:
- * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp.pdf
+ * http://netsrv.csc.ncsu.edu/export/bitcp.pdf
*
* Unless BIC is enabled and congestion window is large
* this behaves the same as the original Reno.
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 3aa0b23c1ea..eb5b9854c8c 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,12 +1,13 @@
/*
- * TCP CUBIC: Binary Increase Congestion control for TCP v2.1
- *
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.2
+ * Home page:
+ * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
* This is from the implementation of CUBIC TCP in
* Injong Rhee, Lisong Xu.
* "CUBIC: A New TCP-Friendly High-Speed TCP Variant
* in PFLDnet 2005
* Available from:
- * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
+ * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf
*
* Unless CUBIC is enabled and congestion window is large
* this behaves the same as the original Reno.
@@ -20,15 +21,10 @@
#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
* max_cwnd = snd_cwnd * beta
*/
-#define BICTCP_B 4 /*
- * In binary search,
- * go to point (max+min)/N
- */
#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
static int fast_convergence __read_mostly = 1;
-static int max_increment __read_mostly = 16;
-static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
+static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */
static int initial_ssthresh __read_mostly;
static int bic_scale __read_mostly = 41;
static int tcp_friendliness __read_mostly = 1;
@@ -40,9 +36,7 @@ static u64 cube_factor __read_mostly;
/* Note parameters that are used for precomputing scale factors are read-only */
module_param(fast_convergence, int, 0644);
MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
-module_param(max_increment, int, 0644);
-MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
-module_param(beta, int, 0444);
+module_param(beta, int, 0644);
MODULE_PARM_DESC(beta, "beta for multiplicative increase");
module_param(initial_ssthresh, int, 0644);
MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
@@ -145,7 +139,7 @@ static u32 cubic_root(u64 a)
static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
{
u64 offs;
- u32 delta, t, bic_target, min_cnt, max_cnt;
+ u32 delta, t, bic_target, max_cnt;
ca->ack_cnt++; /* count the number of ACKs */
@@ -211,19 +205,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
ca->cnt = 100 * cwnd; /* very small increment*/
}
- if (ca->delay_min > 0) {
- /* max increment = Smax * rtt / 0.1 */
- min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
-
- /* use concave growth when the target is above the origin */
- if (ca->cnt < min_cnt && t >= ca->bic_K)
- ca->cnt = min_cnt;
- }
-
- /* slow start and low utilization */
- if (ca->loss_cwnd == 0) /* could be aggressive in slow start */
- ca->cnt = 50;
-
/* TCP Friendly */
if (tcp_friendliness) {
u32 scale = beta_scale;
@@ -391,4 +372,4 @@ module_exit(cubictcp_unregister);
MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("CUBIC TCP");
-MODULE_VERSION("2.1");
+MODULE_VERSION("2.2");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 19c449f6267..cdc051bfdb4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1367,7 +1367,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
* a normal way
*/
static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
- u32 skip_to_seq)
+ u32 skip_to_seq, int *fack_count)
{
tcp_for_write_queue_from(skb, sk) {
if (skb == tcp_send_head(sk))
@@ -1375,6 +1375,8 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
break;
+
+ *fack_count += tcp_skb_pcount(skb);
}
return skb;
}
@@ -1390,7 +1392,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
return skb;
if (before(next_dup->start_seq, skip_to_seq)) {
- skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
+ skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
tcp_sacktag_walk(skb, sk, NULL,
next_dup->start_seq, next_dup->end_seq,
1, fack_count, reord, flag);
@@ -1537,7 +1539,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
/* Head todo? */
if (before(start_seq, cache->start_seq)) {
- skb = tcp_sacktag_skip(skb, sk, start_seq);
+ skb = tcp_sacktag_skip(skb, sk, start_seq,
+ &fack_count);
skb = tcp_sacktag_walk(skb, sk, next_dup,
start_seq,
cache->start_seq,
@@ -1565,7 +1568,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
goto walk;
}
- skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
+ skb = tcp_sacktag_skip(skb, sk, cache->end_seq,
+ &fack_count);
/* Check overlap against next cached too (past this one already) */
cache++;
continue;
@@ -1577,7 +1581,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
break;
fack_count = tp->fackets_out;
}
- skb = tcp_sacktag_skip(skb, sk, start_seq);
+ skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count);
walk:
skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
@@ -1621,13 +1625,11 @@ out:
return flag;
}
-/* If we receive more dupacks than we expected counting segments
- * in assumption of absent reordering, interpret this as reordering.
- * The only another reason could be bug in receiver TCP.
+/* Limits sacked_out so that sum with lost_out isn't ever larger than
+ * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
*/
-static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+int tcp_limit_reno_sacked(struct tcp_sock *tp)
{
- struct tcp_sock *tp = tcp_sk(sk);
u32 holes;
holes = max(tp->lost_out, 1U);
@@ -1635,8 +1637,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
if ((tp->sacked_out + holes) > tp->packets_out) {
tp->sacked_out = tp->packets_out - holes;
- tcp_update_reordering(sk, tp->packets_out + addend, 0);
+ return 1;
}
+ return 0;
+}
+
+/* If we receive more dupacks than we expected counting segments
+ * in assumption of absent reordering, interpret this as reordering.
+ * The only another reason could be bug in receiver TCP.
+ */
+static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ if (tcp_limit_reno_sacked(tp))
+ tcp_update_reordering(sk, tp->packets_out + addend, 0);
}
/* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -1677,11 +1691,16 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
int tcp_use_frto(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb;
if (!sysctl_tcp_frto)
return 0;
+ /* MTU probe and F-RTO won't really play nicely along currently */
+ if (icsk->icsk_mtup.probe_size)
+ return 0;
+
if (IsSackFrto())
return 1;
@@ -2130,11 +2149,13 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
* is against sacked "cnt", otherwise it's against facked "cnt"
*/
-static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
+static void tcp_mark_head_lost(struct sock *sk, int packets)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int cnt;
+ int cnt, oldcnt;
+ int err;
+ unsigned int mss;
BUG_TRAP(packets <= tp->packets_out);
if (tp->lost_skb_hint) {
@@ -2153,13 +2174,25 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
tp->lost_skb_hint = skb;
tp->lost_cnt_hint = cnt;
+ if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+ break;
+
+ oldcnt = cnt;
if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
cnt += tcp_skb_pcount(skb);
- if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) ||
- after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
- break;
+ if (cnt > packets) {
+ if (tcp_is_sack(tp) || (oldcnt >= packets))
+ break;
+
+ mss = skb_shinfo(skb)->gso_size;
+ err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
+ if (err < 0)
+ break;
+ cnt = packets;
+ }
+
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
@@ -2176,17 +2209,17 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_is_reno(tp)) {
- tcp_mark_head_lost(sk, 1, fast_rexmit);
+ tcp_mark_head_lost(sk, 1);
} else if (tcp_is_fack(tp)) {
int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
- tcp_mark_head_lost(sk, lost, fast_rexmit);
+ tcp_mark_head_lost(sk, lost);
} else {
int sacked_upto = tp->sacked_out - tp->reordering;
- if (sacked_upto < 0)
- sacked_upto = 0;
- tcp_mark_head_lost(sk, sacked_upto, fast_rexmit);
+ if (sacked_upto < fast_rexmit)
+ sacked_upto = fast_rexmit;
+ tcp_mark_head_lost(sk, sacked_upto);
}
/* New heuristics: it is possible only after we switched
@@ -2276,12 +2309,25 @@ static void DBGUNDO(struct sock *sk, const char *msg)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
- printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
- msg,
- NIPQUAD(inet->daddr), ntohs(inet->dport),
- tp->snd_cwnd, tcp_left_out(tp),
- tp->snd_ssthresh, tp->prior_ssthresh,
- tp->packets_out);
+ if (sk->sk_family == AF_INET) {
+ printk(KERN_DEBUG "Undo %s " NIPQUAD_FMT "/%u c%u l%u ss%u/%u p%u\n",
+ msg,
+ NIPQUAD(inet->daddr), ntohs(inet->dport),
+ tp->snd_cwnd, tcp_left_out(tp),
+ tp->snd_ssthresh, tp->prior_ssthresh,
+ tp->packets_out);
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n",
+ msg,
+ NIP6(np->daddr), ntohs(inet->dport),
+ tp->snd_cwnd, tcp_left_out(tp),
+ tp->snd_ssthresh, tp->prior_ssthresh,
+ tp->packets_out);
+ }
+#endif
}
#else
#define DBGUNDO(x...) do { } while (0)
@@ -2520,7 +2566,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
before(tp->snd_una, tp->high_seq) &&
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
- tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
+ tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
}
@@ -2582,6 +2628,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
case TCP_CA_Loss:
if (flag & FLAG_DATA_ACKED)
icsk->icsk_retransmits = 0;
+ if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
+ tcp_reset_reno_sack(tp);
if (!tcp_try_undo_loss(sk)) {
tcp_moderate_cwnd(tp);
tcp_xmit_retransmit_queue(sk);
@@ -3557,7 +3605,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
* cases we should never reach this piece of code.
*/
printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
- __FUNCTION__, sk->sk_state);
+ __func__, sk->sk_state);
break;
}
@@ -3806,8 +3854,28 @@ static void tcp_ofo_queue(struct sock *sk)
}
}
+static int tcp_prune_ofo_queue(struct sock *sk);
static int tcp_prune_queue(struct sock *sk);
+static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
+{
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+ !sk_rmem_schedule(sk, size)) {
+
+ if (tcp_prune_queue(sk) < 0)
+ return -1;
+
+ if (!sk_rmem_schedule(sk, size)) {
+ if (!tcp_prune_ofo_queue(sk))
+ return -1;
+
+ if (!sk_rmem_schedule(sk, size))
+ return -1;
+ }
+ }
+ return 0;
+}
+
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
struct tcphdr *th = tcp_hdr(skb);
@@ -3857,12 +3925,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (eaten <= 0) {
queue_and_out:
if (eaten < 0 &&
- (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- !sk_rmem_schedule(sk, skb->truesize))) {
- if (tcp_prune_queue(sk) < 0 ||
- !sk_rmem_schedule(sk, skb->truesize))
- goto drop;
- }
+ tcp_try_rmem_schedule(sk, skb->truesize))
+ goto drop;
+
skb_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
@@ -3931,12 +3996,8 @@ drop:
TCP_ECN_check_ce(tp, skb);
- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- !sk_rmem_schedule(sk, skb->truesize)) {
- if (tcp_prune_queue(sk) < 0 ||
- !sk_rmem_schedule(sk, skb->truesize))
- goto drop;
- }
+ if (tcp_try_rmem_schedule(sk, skb->truesize))
+ goto drop;
/* Disable header prediction. */
tp->pred_flags = 0;
@@ -3964,7 +4025,7 @@ drop:
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (seq == TCP_SKB_CB(skb1)->end_seq) {
- __skb_append(skb1, skb, &tp->out_of_order_queue);
+ __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
if (!tp->rx_opt.num_sacks ||
tp->selective_acks[0].end_seq != seq)
@@ -4163,6 +4224,32 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
}
}
+/*
+ * Purge the out-of-order queue.
+ * Return true if queue was pruned.
+ */
+static int tcp_prune_ofo_queue(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int res = 0;
+
+ if (!skb_queue_empty(&tp->out_of_order_queue)) {
+ NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
+ __skb_queue_purge(&tp->out_of_order_queue);
+
+ /* Reset SACK state. A conforming SACK implementation will
+ * do the same at a timeout based retransmit. When a connection
+ * is in a sad state like this, we care only about integrity
+ * of the connection not performance.
+ */
+ if (tp->rx_opt.sack_ok)
+ tcp_sack_reset(&tp->rx_opt);
+ sk_mem_reclaim(sk);
+ res = 1;
+ }
+ return res;
+}
+
/* Reduce allocated memory if we can, trying to get
* the socket within its memory limits again.
*
@@ -4196,20 +4283,7 @@ static int tcp_prune_queue(struct sock *sk)
/* Collapsing did not help, destructive actions follow.
* This must not ever occur. */
- /* First, purge the out_of_order queue. */
- if (!skb_queue_empty(&tp->out_of_order_queue)) {
- NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
- __skb_queue_purge(&tp->out_of_order_queue);
-
- /* Reset SACK state. A conforming SACK implementation will
- * do the same at a timeout based retransmit. When a connection
- * is in a sad state like this, we care only about integrity
- * of the connection not performance.
- */
- if (tcp_is_sack(tp))
- tcp_sack_reset(&tp->rx_opt);
- sk_mem_reclaim(sk);
- }
+ tcp_prune_ofo_queue(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
@@ -4447,6 +4521,49 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
}
}
+static int tcp_defer_accept_check(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->defer_tcp_accept.request) {
+ int queued_data = tp->rcv_nxt - tp->copied_seq;
+ int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
+ tcp_hdr((struct sk_buff *)
+ sk->sk_receive_queue.prev)->fin : 0;
+
+ if (queued_data && hasfin)
+ queued_data--;
+
+ if (queued_data &&
+ tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
+ if (sock_flag(sk, SOCK_KEEPOPEN)) {
+ inet_csk_reset_keepalive_timer(sk,
+ keepalive_time_when(tp));
+ } else {
+ inet_csk_delete_keepalive_timer(sk);
+ }
+
+ inet_csk_reqsk_queue_add(
+ tp->defer_tcp_accept.listen_sk,
+ tp->defer_tcp_accept.request,
+ sk);
+
+ tp->defer_tcp_accept.listen_sk->sk_data_ready(
+ tp->defer_tcp_accept.listen_sk, 0);
+
+ sock_put(tp->defer_tcp_accept.listen_sk);
+ sock_put(sk);
+ tp->defer_tcp_accept.listen_sk = NULL;
+ tp->defer_tcp_accept.request = NULL;
+ } else if (hasfin ||
+ tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
+ tcp_reset(sk);
+ return -1;
+ }
+ }
+ return 0;
+}
+
static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4807,6 +4924,9 @@ step5:
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
+
+ if (tcp_defer_accept_check(sk))
+ return -1;
return 0;
csum_error:
@@ -5326,6 +5446,7 @@ discard:
EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
EXPORT_SYMBOL(tcp_parse_options);
EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 63414ea427c..776615180b9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -88,9 +88,6 @@ int sysctl_tcp_low_latency __read_mostly;
/* Check TCP sequence numbers in ICMP packets. */
#define ICMP_MIN_LENGTH 8
-/* Socket used for sending RSTs */
-static struct socket *tcp_socket __read_mostly;
-
void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
#ifdef CONFIG_TCP_MD5SIG
@@ -353,7 +350,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
return;
}
- sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest,
+ sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest,
iph->saddr, th->source, inet_iif(skb));
if (!sk) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
@@ -552,7 +549,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
if (th->rst)
return;
- if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
+ if (skb->rtable->rt_type != RTN_LOCAL)
return;
/* Swap the send and the receive. */
@@ -598,7 +595,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
sizeof(struct tcphdr), IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
+ ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb,
+ &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
@@ -693,7 +691,8 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
if (twsk)
arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
+ ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb,
+ &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
}
@@ -719,12 +718,12 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
}
/*
- * Send a SYN-ACK after having received an ACK.
+ * Send a SYN-ACK after having received a SYN.
* This still operates on a request_sock only, not on a big
* socket.
*/
-static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
- struct dst_entry *dst)
+static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
+ struct dst_entry *dst)
{
const struct inet_request_sock *ireq = inet_rsk(req);
int err = -1;
@@ -732,7 +731,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
/* First, grab a route. */
if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
- goto out;
+ return -1;
skb = tcp_make_synack(sk, dst, req);
@@ -751,11 +750,15 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
err = net_xmit_eval(err);
}
-out:
dst_release(dst);
return err;
}
+static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
+{
+ return __tcp_v4_send_synack(sk, req, NULL);
+}
+
/*
* IPv4 request_sock destructor.
*/
@@ -1258,8 +1261,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
#endif
/* Never answer to SYNs send to broadcast or multicast */
- if (((struct rtable *)skb->dst)->rt_flags &
- (RTCF_BROADCAST | RTCF_MULTICAST))
+ if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
goto drop;
/* TW buckets are converted to open requests without
@@ -1297,10 +1299,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_parse_options(skb, &tmp_opt, 0);
- if (want_cookie) {
+ if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
- tmp_opt.saw_tstamp = 0;
- }
if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
/* Some OSes (unknown ones, but I see them on web server, which
@@ -1328,6 +1328,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (want_cookie) {
#ifdef CONFIG_SYN_COOKIES
syn_flood_warning(skb);
+ req->cookie_ts = tmp_opt.tstamp_ok;
#endif
isn = cookie_v4_init_sequence(sk, skb, &req->mss);
} else if (!isn) {
@@ -1351,8 +1352,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
(s32)(peer->tcp_ts - req->ts_recent) >
TCP_PAWS_WINDOW) {
NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
- dst_release(dst);
- goto drop_and_free;
+ goto drop_and_release;
}
}
/* Kill the following clause, if you dislike this way. */
@@ -1369,27 +1369,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* to the moment of synflood.
*/
LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
- "request from %u.%u.%u.%u/%u\n",
+ "request from " NIPQUAD_FMT "/%u\n",
NIPQUAD(saddr),
ntohs(tcp_hdr(skb)->source));
- dst_release(dst);
- goto drop_and_free;
+ goto drop_and_release;
}
isn = tcp_v4_init_sequence(skb);
}
tcp_rsk(req)->snt_isn = isn;
- if (tcp_v4_send_synack(sk, req, dst))
+ if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
goto drop_and_free;
- if (want_cookie) {
- reqsk_free(req);
- } else {
- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- }
+ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
return 0;
+drop_and_release:
+ dst_release(dst);
drop_and_free:
reqsk_free(req);
drop:
@@ -1487,7 +1484,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
if (req)
return tcp_check_req(sk, skb, req, prev);
- nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr,
+ nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
th->source, iph->daddr, th->dest, inet_iif(skb));
if (nsk) {
@@ -1645,7 +1642,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->flags = iph->tos;
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr,
+ sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr,
th->source, iph->daddr, th->dest, inet_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1719,7 +1716,7 @@ do_time_wait:
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
- struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net,
+ struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
&tcp_hashinfo,
iph->daddr, th->dest,
inet_iif(skb));
@@ -1921,6 +1918,14 @@ int tcp_v4_destroy_sock(struct sock *sk)
sk->sk_sndmsg_page = NULL;
}
+ if (tp->defer_tcp_accept.request) {
+ reqsk_free(tp->defer_tcp_accept.request);
+ sock_put(tp->defer_tcp_accept.listen_sk);
+ sock_put(sk);
+ tp->defer_tcp_accept.listen_sk = NULL;
+ tp->defer_tcp_accept.request = NULL;
+ }
+
atomic_dec(&tcp_sockets_allocated);
return 0;
@@ -1949,6 +1954,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
struct hlist_node *node;
struct sock *sk = cur;
struct tcp_iter_state* st = seq->private;
+ struct net *net = seq_file_net(seq);
if (!sk) {
st->bucket = 0;
@@ -1965,7 +1971,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
req = req->dl_next;
while (1) {
while (req) {
- if (req->rsk_ops->family == st->family) {
+ if (req->rsk_ops->family == st->family &&
+ net_eq(sock_net(req->sk), net)) {
cur = req;
goto out;
}
@@ -1989,7 +1996,7 @@ get_req:
}
get_sk:
sk_for_each_from(sk, node) {
- if (sk->sk_family == st->family) {
+ if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
cur = sk;
goto out;
}
@@ -2028,6 +2035,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
static void *established_get_first(struct seq_file *seq)
{
struct tcp_iter_state* st = seq->private;
+ struct net *net = seq_file_net(seq);
void *rc = NULL;
for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
@@ -2038,7 +2046,8 @@ static void *established_get_first(struct seq_file *seq)
read_lock_bh(lock);
sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
- if (sk->sk_family != st->family) {
+ if (sk->sk_family != st->family ||
+ !net_eq(sock_net(sk), net)) {
continue;
}
rc = sk;
@@ -2047,7 +2056,8 @@ static void *established_get_first(struct seq_file *seq)
st->state = TCP_SEQ_STATE_TIME_WAIT;
inet_twsk_for_each(tw, node,
&tcp_hashinfo.ehash[st->bucket].twchain) {
- if (tw->tw_family != st->family) {
+ if (tw->tw_family != st->family ||
+ !net_eq(twsk_net(tw), net)) {
continue;
}
rc = tw;
@@ -2066,6 +2076,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
struct inet_timewait_sock *tw;
struct hlist_node *node;
struct tcp_iter_state* st = seq->private;
+ struct net *net = seq_file_net(seq);
++st->num;
@@ -2073,7 +2084,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
tw = cur;
tw = tw_next(tw);
get_tw:
- while (tw && tw->tw_family != st->family) {
+ while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
tw = tw_next(tw);
}
if (tw) {
@@ -2094,7 +2105,7 @@ get_tw:
sk = sk_next(sk);
sk_for_each_from(sk, node) {
- if (sk->sk_family == st->family)
+ if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
goto found;
}
@@ -2200,48 +2211,37 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
static int tcp_seq_open(struct inode *inode, struct file *file)
{
struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
- struct seq_file *seq;
struct tcp_iter_state *s;
- int rc;
+ int err;
if (unlikely(afinfo == NULL))
return -EINVAL;
- s = kzalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
+ err = seq_open_net(inode, file, &afinfo->seq_ops,
+ sizeof(struct tcp_iter_state));
+ if (err < 0)
+ return err;
+
+ s = ((struct seq_file *)file->private_data)->private;
s->family = afinfo->family;
- s->seq_ops.start = tcp_seq_start;
- s->seq_ops.next = tcp_seq_next;
- s->seq_ops.show = afinfo->seq_show;
- s->seq_ops.stop = tcp_seq_stop;
-
- rc = seq_open(file, &s->seq_ops);
- if (rc)
- goto out_kfree;
- seq = file->private_data;
- seq->private = s;
-out:
- return rc;
-out_kfree:
- kfree(s);
- goto out;
+ return 0;
}
-int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
+int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
{
int rc = 0;
struct proc_dir_entry *p;
- if (!afinfo)
- return -EINVAL;
- afinfo->seq_fops->owner = afinfo->owner;
- afinfo->seq_fops->open = tcp_seq_open;
- afinfo->seq_fops->read = seq_read;
- afinfo->seq_fops->llseek = seq_lseek;
- afinfo->seq_fops->release = seq_release_private;
+ afinfo->seq_fops.open = tcp_seq_open;
+ afinfo->seq_fops.read = seq_read;
+ afinfo->seq_fops.llseek = seq_lseek;
+ afinfo->seq_fops.release = seq_release_net;
+
+ afinfo->seq_ops.start = tcp_seq_start;
+ afinfo->seq_ops.next = tcp_seq_next;
+ afinfo->seq_ops.stop = tcp_seq_stop;
- p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops);
+ p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops);
if (p)
p->data = afinfo;
else
@@ -2249,12 +2249,9 @@ int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
return rc;
}
-void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
+void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
{
- if (!afinfo)
- return;
- proc_net_remove(&init_net, afinfo->name);
- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ proc_net_remove(net, afinfo->name);
}
static void get_openreq4(struct sock *sk, struct request_sock *req,
@@ -2383,28 +2380,43 @@ out:
return 0;
}
-static struct file_operations tcp4_seq_fops;
static struct tcp_seq_afinfo tcp4_seq_afinfo = {
- .owner = THIS_MODULE,
.name = "tcp",
.family = AF_INET,
- .seq_show = tcp4_seq_show,
- .seq_fops = &tcp4_seq_fops,
+ .seq_fops = {
+ .owner = THIS_MODULE,
+ },
+ .seq_ops = {
+ .show = tcp4_seq_show,
+ },
+};
+
+static int tcp4_proc_init_net(struct net *net)
+{
+ return tcp_proc_register(net, &tcp4_seq_afinfo);
+}
+
+static void tcp4_proc_exit_net(struct net *net)
+{
+ tcp_proc_unregister(net, &tcp4_seq_afinfo);
+}
+
+static struct pernet_operations tcp4_net_ops = {
+ .init = tcp4_proc_init_net,
+ .exit = tcp4_proc_exit_net,
};
int __init tcp4_proc_init(void)
{
- return tcp_proc_register(&tcp4_seq_afinfo);
+ return register_pernet_subsys(&tcp4_net_ops);
}
void tcp4_proc_exit(void)
{
- tcp_proc_unregister(&tcp4_seq_afinfo);
+ unregister_pernet_subsys(&tcp4_net_ops);
}
#endif /* CONFIG_PROC_FS */
-DEFINE_PROTO_INUSE(tcp)
-
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
@@ -2435,18 +2447,33 @@ struct proto tcp_prot = {
.obj_size = sizeof(struct tcp_sock),
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
- .hashinfo = &tcp_hashinfo,
+ .h.hashinfo = &tcp_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
- REF_PROTO_INUSE(tcp)
};
-void __init tcp_v4_init(struct net_proto_family *ops)
+
+static int __net_init tcp_sk_init(struct net *net)
+{
+ return inet_ctl_sock_create(&net->ipv4.tcp_sock,
+ PF_INET, SOCK_RAW, IPPROTO_TCP, net);
+}
+
+static void __net_exit tcp_sk_exit(struct net *net)
+{
+ inet_ctl_sock_destroy(net->ipv4.tcp_sock);
+}
+
+static struct pernet_operations __net_initdata tcp_sk_ops = {
+ .init = tcp_sk_init,
+ .exit = tcp_sk_exit,
+};
+
+void __init tcp_v4_init(void)
{
- if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
- IPPROTO_TCP) < 0)
+ if (register_pernet_device(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b61b76847ad..019c8c16e5c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -35,6 +35,8 @@
#endif
int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
+EXPORT_SYMBOL(sysctl_tcp_syncookies);
+
int sysctl_tcp_abort_on_overflow __read_mostly;
struct inet_timewait_death_row tcp_death_row = {
@@ -536,7 +538,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
* Enforce "SYN-ACK" according to figure 8, figure 6
* of RFC793, fixed by RFC1122.
*/
- req->rsk_ops->rtx_syn_ack(sk, req, NULL);
+ req->rsk_ops->rtx_syn_ack(sk, req);
return NULL;
}
@@ -569,10 +571,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
does sequence test, SYN is truncated, and thus we consider
it a bare ACK.
- If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
- bare ACK. Otherwise, we create an established connection. Both
- ends (listening sockets) accept the new incoming connection and try
- to talk to each other. 8-)
+ Both ends (listening sockets) accept the new incoming
+ connection and try to talk to each other. 8-)
Note: This case is both harmless, and rare. Possibility is about the
same as us discovering intelligent life on another plant tomorrow.
@@ -640,13 +640,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
if (!(flg & TCP_FLAG_ACK))
return NULL;
- /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
- if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
- TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
- inet_rsk(req)->acked = 1;
- return NULL;
- }
-
/* OK, ACK is valid, create big socket and
* feed this segment to it. It will repeat all
* the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -685,7 +678,24 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
- inet_csk_reqsk_queue_add(sk, req, child);
+ if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+ TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+
+ /* the accept queue handling is done is est recv slow
+ * path so lets make sure to start there
+ */
+ tcp_sk(child)->pred_flags = 0;
+ sock_hold(sk);
+ sock_hold(child);
+ tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
+ tcp_sk(child)->defer_tcp_accept.request = req;
+
+ inet_csk_reset_keepalive_timer(child,
+ inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
+ } else {
+ inet_csk_reqsk_queue_add(sk, req, child);
+ }
+
return child;
listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ed750f9ceb0..debf2358160 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -255,7 +255,7 @@ static u16 tcp_select_window(struct sock *sk)
*
* Relax Will Robinson.
*/
- new_win = cur_win;
+ new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
}
tp->rcv_wnd = new_win;
tp->rcv_wup = tp->rcv_nxt;
@@ -998,7 +998,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
xmit_size_goal = mss_now;
if (doing_tso) {
- xmit_size_goal = (65535 -
+ xmit_size_goal = ((sk->sk_gso_max_size - 1) -
inet_csk(sk)->icsk_af_ops->net_header_len -
inet_csk(sk)->icsk_ext_hdr_len -
tp->tcp_header_len);
@@ -1035,6 +1035,13 @@ static void tcp_cwnd_validate(struct sock *sk)
* introducing MSS oddities to segment boundaries. In rare cases where
* mss_now != mss_cache, we will request caller to create a small skb
* per input skb which could be mostly avoided here (if desired).
+ *
+ * We explicitly want to create a request for splitting write queue tail
+ * to a small skb for Nagle purposes while avoiding unnecessary modulos,
+ * thus all the complexity (cwnd_len is always MSS multiple which we
+ * return whenever allowed by the other factors). Basically we need the
+ * modulo only when the receiver window alone is the limiting factor or
+ * when we would be allowed to send the split-due-to-Nagle skb fully.
*/
static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
unsigned int mss_now, unsigned int cwnd)
@@ -1048,10 +1055,11 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
return cwnd_len;
- if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len)
+ needed = min(skb->len, window);
+
+ if (cwnd_len <= needed)
return cwnd_len;
- needed = min(skb->len, window);
return needed - needed % mss_now;
}
@@ -1274,7 +1282,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
limit = min(send_win, cong_win);
/* If a full-sized TSO skb can be sent, do it. */
- if (limit >= 65536)
+ if (limit >= sk->sk_gso_max_size)
goto send_now;
if (sysctl_tcp_tso_win_divisor) {
@@ -1800,6 +1808,9 @@ void tcp_simple_retransmit(struct sock *sk)
if (!lost)
return;
+ if (tcp_is_reno(tp))
+ tcp_limit_reno_sacked(tp);
+
tcp_verify_left_out(tp);
/* Don't muck with the congestion window here.
@@ -2225,7 +2236,11 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rcv_wnd, 65535U));
-
+#ifdef CONFIG_SYN_COOKIES
+ if (unlikely(req->cookie_ts))
+ TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
+ else
+#endif
TCP_SKB_CB(skb)->when = tcp_time_stamp;
tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
@@ -2560,6 +2575,7 @@ void tcp_send_probe0(struct sock *sk)
}
}
+EXPORT_SYMBOL(tcp_select_initial_window);
EXPORT_SYMBOL(tcp_connect);
EXPORT_SYMBOL(tcp_make_synack);
EXPORT_SYMBOL(tcp_simple_retransmit);
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 87dd5bff315..1c509592574 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -153,7 +153,7 @@ static int tcpprobe_sprint(char *tbuf, int n)
= ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
return snprintf(tbuf, n,
- "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u"
+ "%lu.%09lu " NIPQUAD_FMT ":%u " NIPQUAD_FMT ":%u"
" %d %#x %#x %u %u %u %u\n",
(unsigned long) tv.tv_sec,
(unsigned long) tv.tv_nsec,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 803d758a2b1..4de68cf5f2a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -299,12 +299,20 @@ static void tcp_retransmit_timer(struct sock *sk)
* we cannot allow such beasts to hang infinitely.
*/
#ifdef TCP_DEBUG
- if (1) {
- struct inet_sock *inet = inet_sk(sk);
- LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
+ struct inet_sock *inet = inet_sk(sk);
+ if (sk->sk_family == AF_INET) {
+ LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIPQUAD_FMT ":%u/%u shrinks window %u:%u. Repaired.\n",
NIPQUAD(inet->daddr), ntohs(inet->dport),
inet->num, tp->snd_una, tp->snd_nxt);
}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIP6_FMT ":%u/%u shrinks window %u:%u. Repaired.\n",
+ NIP6(np->daddr), ntohs(inet->dport),
+ inet->num, tp->snd_una, tp->snd_nxt);
+ }
+#endif
#endif
if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
tcp_write_err(sk);
@@ -481,6 +489,11 @@ static void tcp_keepalive_timer (unsigned long data)
goto death;
}
+ if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
+ tcp_send_active_reset(sk, GFP_ATOMIC);
+ goto death;
+ }
+
if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
goto out;
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 978b3fd61e6..d3b709a6f26 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -136,6 +136,7 @@ static struct net_protocol tunnel4_protocol = {
.handler = tunnel4_rcv,
.err_handler = tunnel4_err,
.no_policy = 1,
+ .netns_ok = 1,
};
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -143,6 +144,7 @@ static struct net_protocol tunnel64_protocol = {
.handler = tunnel64_rcv,
.err_handler = tunnel64_err,
.no_policy = 1,
+ .netns_ok = 1,
};
#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7ea1b67b6de..b053ac79527 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -137,29 +137,28 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
struct hlist_node *node;
sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
- if (sk->sk_net == net && sk->sk_hash == num)
+ if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
return 1;
return 0;
}
/**
- * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
+ * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
*
* @sk: socket struct in question
* @snum: port number to look up
- * @udptable: hash list table, must be of UDP_HTABLE_SIZE
* @saddr_comp: AF-dependent comparison of bound local IP addresses
*/
-int __udp_lib_get_port(struct sock *sk, unsigned short snum,
- struct hlist_head udptable[],
+int udp_lib_get_port(struct sock *sk, unsigned short snum,
int (*saddr_comp)(const struct sock *sk1,
const struct sock *sk2 ) )
{
+ struct hlist_head *udptable = sk->sk_prot->h.udp_hash;
struct hlist_node *node;
struct hlist_head *head;
struct sock *sk2;
int error = 1;
- struct net *net = sk->sk_net;
+ struct net *net = sock_net(sk);
write_lock_bh(&udp_hash_lock);
@@ -219,7 +218,7 @@ gotit:
sk_for_each(sk2, node, head)
if (sk2->sk_hash == snum &&
sk2 != sk &&
- sk2->sk_net == net &&
+ net_eq(sock_net(sk2), net) &&
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
@@ -232,7 +231,7 @@ gotit:
if (sk_unhashed(sk)) {
head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
sk_add_node(sk, head);
- sock_prot_inuse_add(sk->sk_prot, 1);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
error = 0;
fail:
@@ -240,13 +239,7 @@ fail:
return error;
}
-int udp_get_port(struct sock *sk, unsigned short snum,
- int (*scmp)(const struct sock *, const struct sock *))
-{
- return __udp_lib_get_port(sk, snum, udp_hash, scmp);
-}
-
-int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@ -255,9 +248,9 @@ int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
inet1->rcv_saddr == inet2->rcv_saddr ));
}
-static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
+int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
- return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
+ return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
}
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
@@ -276,7 +269,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet = inet_sk(sk);
- if (sk->sk_net == net && sk->sk_hash == hnum &&
+ if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
!ipv6_only_sock(sk)) {
int score = (sk->sk_family == PF_INET ? 1 : 0);
if (inet->rcv_saddr) {
@@ -364,7 +357,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
int harderr;
int err;
- sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
+ sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest,
iph->saddr, uh->source, skb->dev->ifindex, udptable);
if (sk == NULL) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
@@ -614,7 +607,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
- err = ip_cmsg_send(msg, &ipc);
+ err = ip_cmsg_send(sock_net(sk), msg, &ipc);
if (err)
return err;
if (ipc.opt)
@@ -663,7 +656,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
{ .sport = inet->sport,
.dport = dport } } };
security_sk_classify_flow(sk, &fl);
- err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
+ err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
if (err) {
if (err == -ENETUNREACH)
IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
@@ -1188,7 +1181,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
- sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
+ sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr,
uh->dest, inet_iif(skb), udptable);
if (sk != NULL) {
@@ -1228,7 +1221,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
return 0;
short_packet:
- LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
+ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n",
proto == IPPROTO_UDPLITE ? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
@@ -1243,7 +1236,7 @@ csum_error:
* RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
- LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
+ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n",
proto == IPPROTO_UDPLITE ? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
@@ -1474,8 +1467,6 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
}
-DEFINE_PROTO_INUSE(udp)
-
struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
@@ -1498,11 +1489,11 @@ struct proto udp_prot = {
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp_sock),
+ .h.udp_hash = udp_hash,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
.compat_getsockopt = compat_udp_getsockopt,
#endif
- REF_PROTO_INUSE(udp)
};
/* ------------------------------------------------------------------------ */
@@ -1512,10 +1503,13 @@ static struct sock *udp_get_first(struct seq_file *seq)
{
struct sock *sk;
struct udp_iter_state *state = seq->private;
+ struct net *net = seq_file_net(seq);
for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
struct hlist_node *node;
sk_for_each(sk, node, state->hashtable + state->bucket) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
if (sk->sk_family == state->family)
goto found;
}
@@ -1528,12 +1522,13 @@ found:
static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
{
struct udp_iter_state *state = seq->private;
+ struct net *net = seq_file_net(seq);
do {
sk = sk_next(sk);
try_again:
;
- } while (sk && sk->sk_family != state->family);
+ } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
sk = sk_head(state->hashtable + state->bucket);
@@ -1556,14 +1551,14 @@ static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(udp_hash_lock)
{
read_lock(&udp_hash_lock);
- return *pos ? udp_get_idx(seq, *pos-1) : (void *)1;
+ return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
}
static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sock *sk;
- if (v == (void *)1)
+ if (v == SEQ_START_TOKEN)
sk = udp_get_idx(seq, 0);
else
sk = udp_get_next(seq, v);
@@ -1581,47 +1576,36 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
static int udp_seq_open(struct inode *inode, struct file *file)
{
struct udp_seq_afinfo *afinfo = PDE(inode)->data;
- struct seq_file *seq;
- int rc = -ENOMEM;
- struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
+ struct udp_iter_state *s;
+ int err;
- if (!s)
- goto out;
+ err = seq_open_net(inode, file, &afinfo->seq_ops,
+ sizeof(struct udp_iter_state));
+ if (err < 0)
+ return err;
+
+ s = ((struct seq_file *)file->private_data)->private;
s->family = afinfo->family;
s->hashtable = afinfo->hashtable;
- s->seq_ops.start = udp_seq_start;
- s->seq_ops.next = udp_seq_next;
- s->seq_ops.show = afinfo->seq_show;
- s->seq_ops.stop = udp_seq_stop;
-
- rc = seq_open(file, &s->seq_ops);
- if (rc)
- goto out_kfree;
-
- seq = file->private_data;
- seq->private = s;
-out:
- return rc;
-out_kfree:
- kfree(s);
- goto out;
+ return err;
}
/* ------------------------------------------------------------------------ */
-int udp_proc_register(struct udp_seq_afinfo *afinfo)
+int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
{
struct proc_dir_entry *p;
int rc = 0;
- if (!afinfo)
- return -EINVAL;
- afinfo->seq_fops->owner = afinfo->owner;
- afinfo->seq_fops->open = udp_seq_open;
- afinfo->seq_fops->read = seq_read;
- afinfo->seq_fops->llseek = seq_lseek;
- afinfo->seq_fops->release = seq_release_private;
+ afinfo->seq_fops.open = udp_seq_open;
+ afinfo->seq_fops.read = seq_read;
+ afinfo->seq_fops.llseek = seq_lseek;
+ afinfo->seq_fops.release = seq_release_net;
- p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops);
+ afinfo->seq_ops.start = udp_seq_start;
+ afinfo->seq_ops.next = udp_seq_next;
+ afinfo->seq_ops.stop = udp_seq_stop;
+
+ p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops);
if (p)
p->data = afinfo;
else
@@ -1629,12 +1613,9 @@ int udp_proc_register(struct udp_seq_afinfo *afinfo)
return rc;
}
-void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
+void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
{
- if (!afinfo)
- return;
- proc_net_remove(&init_net, afinfo->name);
- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ proc_net_remove(net, afinfo->name);
}
/* ------------------------------------------------------------------------ */
@@ -1673,24 +1654,41 @@ int udp4_seq_show(struct seq_file *seq, void *v)
}
/* ------------------------------------------------------------------------ */
-static struct file_operations udp4_seq_fops;
static struct udp_seq_afinfo udp4_seq_afinfo = {
- .owner = THIS_MODULE,
.name = "udp",
.family = AF_INET,
.hashtable = udp_hash,
- .seq_show = udp4_seq_show,
- .seq_fops = &udp4_seq_fops,
+ .seq_fops = {
+ .owner = THIS_MODULE,
+ },
+ .seq_ops = {
+ .show = udp4_seq_show,
+ },
+};
+
+static int udp4_proc_init_net(struct net *net)
+{
+ return udp_proc_register(net, &udp4_seq_afinfo);
+}
+
+static void udp4_proc_exit_net(struct net *net)
+{
+ udp_proc_unregister(net, &udp4_seq_afinfo);
+}
+
+static struct pernet_operations udp4_net_ops = {
+ .init = udp4_proc_init_net,
+ .exit = udp4_proc_exit_net,
};
int __init udp4_proc_init(void)
{
- return udp_proc_register(&udp4_seq_afinfo);
+ return register_pernet_subsys(&udp4_net_ops);
}
void udp4_proc_exit(void)
{
- udp_proc_unregister(&udp4_seq_afinfo);
+ unregister_pernet_subsys(&udp4_net_ops);
}
#endif /* CONFIG_PROC_FS */
@@ -1717,12 +1715,12 @@ EXPORT_SYMBOL(udp_disconnect);
EXPORT_SYMBOL(udp_hash);
EXPORT_SYMBOL(udp_hash_lock);
EXPORT_SYMBOL(udp_ioctl);
-EXPORT_SYMBOL(udp_get_port);
EXPORT_SYMBOL(udp_prot);
EXPORT_SYMBOL(udp_sendmsg);
EXPORT_SYMBOL(udp_lib_getsockopt);
EXPORT_SYMBOL(udp_lib_setsockopt);
EXPORT_SYMBOL(udp_poll);
+EXPORT_SYMBOL(udp_lib_get_port);
#ifdef CONFIG_PROC_FS
EXPORT_SYMBOL(udp_proc_register);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 6c55828e41b..7288bf7977f 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -8,11 +8,7 @@
extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int );
extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []);
-extern int __udp_lib_get_port(struct sock *sk, unsigned short snum,
- struct hlist_head udptable[],
- int (*)(const struct sock*,const struct sock*));
-extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
-
+extern int udp_v4_get_port(struct sock *sk, unsigned short snum);
extern int udp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 001b881ca36..72ce26b6c4d 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -17,17 +17,6 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly;
struct hlist_head udplite_hash[UDP_HTABLE_SIZE];
-int udplite_get_port(struct sock *sk, unsigned short p,
- int (*c)(const struct sock *, const struct sock *))
-{
- return __udp_lib_get_port(sk, p, udplite_hash, c);
-}
-
-static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
-{
- return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal);
-}
-
static int udplite_rcv(struct sk_buff *skb)
{
return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
@@ -42,10 +31,9 @@ static struct net_protocol udplite_protocol = {
.handler = udplite_rcv,
.err_handler = udplite_err,
.no_policy = 1,
+ .netns_ok = 1,
};
-DEFINE_PROTO_INUSE(udplite)
-
struct proto udplite_prot = {
.name = "UDP-Lite",
.owner = THIS_MODULE,
@@ -63,13 +51,13 @@ struct proto udplite_prot = {
.backlog_rcv = udp_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
- .get_port = udplite_v4_get_port,
+ .get_port = udp_v4_get_port,
.obj_size = sizeof(struct udp_sock),
+ .h.udp_hash = udplite_hash,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
.compat_getsockopt = compat_udp_getsockopt,
#endif
- REF_PROTO_INUSE(udplite)
};
static struct inet_protosw udplite4_protosw = {
@@ -83,15 +71,42 @@ static struct inet_protosw udplite4_protosw = {
};
#ifdef CONFIG_PROC_FS
-static struct file_operations udplite4_seq_fops;
static struct udp_seq_afinfo udplite4_seq_afinfo = {
- .owner = THIS_MODULE,
.name = "udplite",
.family = AF_INET,
.hashtable = udplite_hash,
- .seq_show = udp4_seq_show,
- .seq_fops = &udplite4_seq_fops,
+ .seq_fops = {
+ .owner = THIS_MODULE,
+ },
+ .seq_ops = {
+ .show = udp4_seq_show,
+ },
+};
+
+static int udplite4_proc_init_net(struct net *net)
+{
+ return udp_proc_register(net, &udplite4_seq_afinfo);
+}
+
+static void udplite4_proc_exit_net(struct net *net)
+{
+ udp_proc_unregister(net, &udplite4_seq_afinfo);
+}
+
+static struct pernet_operations udplite4_net_ops = {
+ .init = udplite4_proc_init_net,
+ .exit = udplite4_proc_exit_net,
};
+
+static __init int udplite4_proc_init(void)
+{
+ return register_pernet_subsys(&udplite4_net_ops);
+}
+#else
+static inline int udplite4_proc_init(void)
+{
+ return 0;
+}
#endif
void __init udplite4_register(void)
@@ -104,18 +119,15 @@ void __init udplite4_register(void)
inet_register_protosw(&udplite4_protosw);
-#ifdef CONFIG_PROC_FS
- if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */
- printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__);
-#endif
+ if (udplite4_proc_init())
+ printk(KERN_ERR "%s: Cannot register /proc!\n", __func__);
return;
out_unregister_proto:
proto_unregister(&udplite_prot);
out_register_err:
- printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __FUNCTION__);
+ printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__);
}
EXPORT_SYMBOL(udplite_hash);
EXPORT_SYMBOL(udplite_prot);
-EXPORT_SYMBOL(udplite_get_port);
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index b47030ba162..9c798abce73 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -39,13 +39,11 @@ static void xfrm4_beet_make_header(struct sk_buff *skb)
static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
{
struct ip_beet_phdr *ph;
- struct iphdr *iph, *top_iph;
+ struct iphdr *top_iph;
int hdrlen, optlen;
- iph = ip_hdr(skb);
-
hdrlen = 0;
- optlen = iph->ihl * 4 - sizeof(*iph);
+ optlen = XFRM_MODE_SKB_CB(skb)->optlen;
if (unlikely(optlen))
hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
@@ -53,11 +51,12 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
hdrlen);
skb->mac_header = skb->network_header +
offsetof(struct iphdr, protocol);
- skb->transport_header = skb->network_header + sizeof(*iph);
+ skb->transport_header = skb->network_header + sizeof(*top_iph);
xfrm4_beet_make_header(skb);
- ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen);
+ ph = (struct ip_beet_phdr *)
+ __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen);
top_iph = ip_hdr(skb);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 8dee617ee90..584e6d74e3a 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -41,7 +41,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->ihl = 5;
top_iph->version = 4;
- top_iph->protocol = x->inner_mode->afinfo->proto;
+ top_iph->protocol = xfrm_af2proto(skb->dst->ops->family);
/* DS disclosed */
top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d5a58a81802..8c3180adddb 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -56,7 +56,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
- err = x->inner_mode->afinfo->extract_output(x, skb);
+ err = xfrm_inner_extract_output(x, skb);
if (err)
return err;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 10ed7049143..c63de0a72ab 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -221,7 +221,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xdst = (struct xfrm_dst *)dst;
if (xdst->u.rt.idev->dev == dev) {
struct in_device *loopback_idev =
- in_dev_get(dev->nd_net->loopback_dev);
+ in_dev_get(dev_net(dev)->loopback_dev);
BUG_ON(!loopback_idev);
do {
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index fdeebe68a37..07735ed280d 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -52,10 +52,12 @@ int xfrm4_extract_header(struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
+ XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
XFRM_MODE_SKB_CB(skb)->id = iph->id;
XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
+ XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph);
memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));