summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/arp.c21
-rw-r--r--net/ipv4/devinet.c25
-rw-r--r--net/ipv4/esp4.c17
-rw-r--r--net/ipv4/fib_frontend.c6
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c40
-rw-r--r--net/ipv4/icmp.c7
-rw-r--r--net/ipv4/igmp.c7
-rw-r--r--net/ipv4/inet_connection_sock.c16
-rw-r--r--net/ipv4/inet_timewait_sock.c7
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_output.c91
-rw-r--r--net/ipv4/ipmr.c6
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c2
-rw-r--r--net/ipv4/netfilter/Kconfig21
-rw-r--r--net/ipv4/netfilter/Makefile7
-rw-r--r--net/ipv4/netfilter/arp_tables.c215
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c184
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c21
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c67
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c3
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c30
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c41
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c25
-rw-r--r--net/ipv4/netfilter/ip_queue.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c17
-rw-r--r--net/ipv4/netfilter/ipt_CONNMARK.c1
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c4
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c2
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/route.c6
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_bic.c4
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_ipv4.c13
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c16
47 files changed, 630 insertions, 338 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8bf312bdea1..b425748f02d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -241,7 +241,7 @@ static int arp_constructor(struct neighbour *neigh)
neigh->type = inet_addr_type(addr);
rcu_read_lock();
- in_dev = rcu_dereference(__in_dev_get(dev));
+ in_dev = __in_dev_get_rcu(dev);
if (in_dev == NULL) {
rcu_read_unlock();
return -EINVAL;
@@ -697,12 +697,6 @@ void arp_send(int type, int ptype, u32 dest_ip,
arp_xmit(skb);
}
-static void parp_redo(struct sk_buff *skb)
-{
- nf_reset(skb);
- arp_rcv(skb, skb->dev, NULL, skb->dev);
-}
-
/*
* Process an arp request.
*/
@@ -922,6 +916,11 @@ out:
return 0;
}
+static void parp_redo(struct sk_buff *skb)
+{
+ arp_process(skb);
+}
+
/*
* Receive an arp request from the device layer.
@@ -990,8 +989,8 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev)
ipv4_devconf.proxy_arp = 1;
return 0;
}
- if (__in_dev_get(dev)) {
- __in_dev_get(dev)->cnf.proxy_arp = 1;
+ if (__in_dev_get_rtnl(dev)) {
+ __in_dev_get_rtnl(dev)->cnf.proxy_arp = 1;
return 0;
}
return -ENXIO;
@@ -1096,8 +1095,8 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev)
ipv4_devconf.proxy_arp = 0;
return 0;
}
- if (__in_dev_get(dev)) {
- __in_dev_get(dev)->cnf.proxy_arp = 0;
+ if (__in_dev_get_rtnl(dev)) {
+ __in_dev_get_rtnl(dev)->cnf.proxy_arp = 0;
return 0;
}
return -ENXIO;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ba2895ae815..4ec4b2ca6ab 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -351,7 +351,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
{
- struct in_device *in_dev = __in_dev_get(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
ASSERT_RTNL();
@@ -449,7 +449,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
goto out;
rc = -ENOBUFS;
- if ((in_dev = __in_dev_get(dev)) == NULL) {
+ if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
in_dev = inetdev_init(dev);
if (!in_dev)
goto out;
@@ -584,7 +584,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
if (colon)
*colon = ':';
- if ((in_dev = __in_dev_get(dev)) != NULL) {
+ if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
if (tryaddrmatch) {
/* Matthias Andree */
/* compare label and address (4.4BSD style) */
@@ -715,6 +715,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
break;
ret = 0;
if (ifa->ifa_mask != sin->sin_addr.s_addr) {
+ u32 old_mask = ifa->ifa_mask;
inet_del_ifa(in_dev, ifap, 0);
ifa->ifa_mask = sin->sin_addr.s_addr;
ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
@@ -728,7 +729,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
if ((dev->flags & IFF_BROADCAST) &&
(ifa->ifa_prefixlen < 31) &&
(ifa->ifa_broadcast ==
- (ifa->ifa_local|~ifa->ifa_mask))) {
+ (ifa->ifa_local|~old_mask))) {
ifa->ifa_broadcast = (ifa->ifa_local |
~sin->sin_addr.s_addr);
}
@@ -748,7 +749,7 @@ rarok:
static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
{
- struct in_device *in_dev = __in_dev_get(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
struct in_ifaddr *ifa;
struct ifreq ifr;
int done = 0;
@@ -791,7 +792,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
struct in_device *in_dev;
rcu_read_lock();
- in_dev = __in_dev_get(dev);
+ in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
goto no_in_dev;
@@ -818,7 +819,7 @@ no_in_dev:
read_lock(&dev_base_lock);
rcu_read_lock();
for (dev = dev_base; dev; dev = dev->next) {
- if ((in_dev = __in_dev_get(dev)) == NULL)
+ if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
continue;
for_primary_ifa(in_dev) {
@@ -887,7 +888,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
if (dev) {
rcu_read_lock();
- if ((in_dev = __in_dev_get(dev)))
+ if ((in_dev = __in_dev_get_rcu(dev)))
addr = confirm_addr_indev(in_dev, dst, local, scope);
rcu_read_unlock();
@@ -897,7 +898,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
read_lock(&dev_base_lock);
rcu_read_lock();
for (dev = dev_base; dev; dev = dev->next) {
- if ((in_dev = __in_dev_get(dev))) {
+ if ((in_dev = __in_dev_get_rcu(dev))) {
addr = confirm_addr_indev(in_dev, dst, local, scope);
if (addr)
break;
@@ -957,7 +958,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = ptr;
- struct in_device *in_dev = __in_dev_get(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
ASSERT_RTNL();
@@ -1078,7 +1079,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
if (idx > s_idx)
s_ip_idx = 0;
rcu_read_lock();
- if ((in_dev = __in_dev_get(dev)) == NULL) {
+ if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
rcu_read_unlock();
continue;
}
@@ -1149,7 +1150,7 @@ void inet_forward_change(void)
for (dev = dev_base; dev; dev = dev->next) {
struct in_device *in_dev;
rcu_read_lock();
- in_dev = __in_dev_get(dev);
+ in_dev = __in_dev_get_rcu(dev);
if (in_dev)
in_dev->cnf.forwarding = on;
rcu_read_unlock();
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1b5a09d1b90..1b18ce66e7b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -5,6 +5,7 @@
#include <net/esp.h>
#include <asm/scatterlist.h>
#include <linux/crypto.h>
+#include <linux/kernel.h>
#include <linux/pfkeyv2.h>
#include <linux/random.h>
#include <net/icmp.h>
@@ -42,10 +43,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
esp = x->data;
alen = esp->auth.icv_trunc_len;
tfm = esp->conf.tfm;
- blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
- clen = (clen + 2 + blksize-1)&~(blksize-1);
+ blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4);
+ clen = ALIGN(clen + 2, blksize);
if (esp->conf.padlen)
- clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+ clen = ALIGN(clen, esp->conf.padlen);
if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0)
goto error;
@@ -143,7 +144,7 @@ static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc
struct ip_esp_hdr *esph;
struct esp_data *esp = x->data;
struct sk_buff *trailer;
- int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+ int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
int alen = esp->auth.icv_trunc_len;
int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen;
int nfrags;
@@ -304,16 +305,16 @@ static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap,
static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
- u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+ u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
if (x->props.mode) {
- mtu = (mtu + 2 + blksize-1)&~(blksize-1);
+ mtu = ALIGN(mtu + 2, blksize);
} else {
/* The worst case. */
- mtu += 2 + blksize;
+ mtu = ALIGN(mtu + 2, 4) + blksize - 4;
}
if (esp->conf.padlen)
- mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+ mtu = ALIGN(mtu, esp->conf.padlen);
return mtu + x->props.header_len + esp->auth.icv_trunc_len;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4e1379f7126..990633c09df 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -173,7 +173,7 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
no_addr = rpf = 0;
rcu_read_lock();
- in_dev = __in_dev_get(dev);
+ in_dev = __in_dev_get_rcu(dev);
if (in_dev) {
no_addr = in_dev->ifa_list == NULL;
rpf = IN_DEV_RPFILTER(in_dev);
@@ -591,7 +591,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
break;
case NETDEV_DOWN:
fib_del_ifaddr(ifa);
- if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
+ if (ifa->ifa_dev->ifa_list == NULL) {
/* Last address was deleted from this interface.
Disable IP.
*/
@@ -607,7 +607,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
- struct in_device *in_dev = __in_dev_get(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
if (event == NETDEV_UNREGISTER) {
fib_disable_ip(dev, 2);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d41219e8037..186f20c4a45 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1087,7 +1087,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
rta->rta_oif = &dev->ifindex;
if (colon) {
struct in_ifaddr *ifa;
- struct in_device *in_dev = __in_dev_get(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
if (!in_dev)
return -ENODEV;
*colon = ':';
@@ -1268,7 +1268,7 @@ int fib_sync_up(struct net_device *dev)
}
if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
continue;
- if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
+ if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
continue;
alive++;
spin_lock_bh(&fib_multipath_lock);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 90ae70870a1..66247f38b37 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -286,6 +286,8 @@ static inline void check_tnode(const struct tnode *tn)
static int halve_threshold = 25;
static int inflate_threshold = 50;
+static int halve_threshold_root = 15;
+static int inflate_threshold_root = 25;
static void __alias_free_mem(struct rcu_head *head)
@@ -449,6 +451,8 @@ static struct node *resize(struct trie *t, struct tnode *tn)
int i;
int err = 0;
struct tnode *old_tn;
+ int inflate_threshold_use;
+ int halve_threshold_use;
if (!tn)
return NULL;
@@ -541,10 +545,17 @@ static struct node *resize(struct trie *t, struct tnode *tn)
check_tnode(tn);
+ /* Keep root node larger */
+
+ if(!tn->parent)
+ inflate_threshold_use = inflate_threshold_root;
+ else
+ inflate_threshold_use = inflate_threshold;
+
err = 0;
while ((tn->full_children > 0 &&
50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
- inflate_threshold * tnode_child_length(tn))) {
+ inflate_threshold_use * tnode_child_length(tn))) {
old_tn = tn;
tn = inflate(t, tn);
@@ -564,10 +575,18 @@ static struct node *resize(struct trie *t, struct tnode *tn)
* node is above threshold.
*/
+
+ /* Keep root node larger */
+
+ if(!tn->parent)
+ halve_threshold_use = halve_threshold_root;
+ else
+ halve_threshold_use = halve_threshold;
+
err = 0;
while (tn->bits > 1 &&
100 * (tnode_child_length(tn) - tn->empty_children) <
- halve_threshold * tnode_child_length(tn)) {
+ halve_threshold_use * tnode_child_length(tn)) {
old_tn = tn;
tn = halve(t, tn);
@@ -1086,7 +1105,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
}
if (tp && tp->pos + tp->bits > 32)
- printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
+ printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
tp, tp->pos, tp->bits, key, plen);
/* Rebalance the trie */
@@ -1832,16 +1851,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
i++;
continue;
}
- if (fa->fa_info->fib_nh == NULL) {
- printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
- i++;
- continue;
- }
- if (fa->fa_info == NULL) {
- printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
- i++;
- continue;
- }
+ BUG_ON(!fa->fa_info);
if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
@@ -1964,7 +1974,7 @@ struct fib_table * __init fib_hash_init(int id)
trie_main = t;
if (id == RT_TABLE_LOCAL)
- printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
+ printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
return tb;
}
@@ -2394,7 +2404,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
prefix = htonl(l->key);
list_for_each_entry_rcu(fa, &li->falh, fa_list) {
- const struct fib_info *fi = rcu_dereference(fa->fa_info);
+ const struct fib_info *fi = fa->fa_info;
unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 24eb56ae1b5..175e093ec56 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -188,7 +188,7 @@ struct icmp_err icmp_err_convert[] = {
/* Control parameters for ECHO replies. */
int sysctl_icmp_echo_ignore_all;
-int sysctl_icmp_echo_ignore_broadcasts;
+int sysctl_icmp_echo_ignore_broadcasts = 1;
/* Control parameter - ignore bogus broadcast responses? */
int sysctl_icmp_ignore_bogus_error_responses;
@@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops)
struct inet_sock *inet;
int i;
- for (i = 0; i < NR_CPUS; i++) {
+ for_each_cpu(i) {
int err;
- if (!cpu_possible(i))
- continue;
-
err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
&per_cpu(__icmp_socket, i));
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 70c44e4c3ce..c6247fc8406 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1323,7 +1323,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
}
if (dev) {
imr->imr_ifindex = dev->ifindex;
- idev = __in_dev_get(dev);
+ idev = __in_dev_get_rtnl(dev);
}
return idev;
}
@@ -1908,8 +1908,11 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max));
goto done;
}
- } else
+ } else {
newpsl = NULL;
+ (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
+ msf->imsf_fmode, 0, NULL, 0);
+ }
psl = pmc->sflist;
if (psl) {
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fe3c6d3d0c9..3fe021f1a56 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -78,17 +78,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
int low = sysctl_local_port_range[0];
int high = sysctl_local_port_range[1];
int remaining = (high - low) + 1;
- int rover;
+ int rover = net_random() % (high - low) + low;
- spin_lock(&hashinfo->portalloc_lock);
- if (hashinfo->port_rover < low)
- rover = low;
- else
- rover = hashinfo->port_rover;
do {
- rover++;
- if (rover > high)
- rover = low;
head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
@@ -97,9 +89,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
break;
next:
spin_unlock(&head->lock);
+ if (++rover > high)
+ rover = low;
} while (--remaining > 0);
- hashinfo->port_rover = rover;
- spin_unlock(&hashinfo->portalloc_lock);
/* Exhausted local port range during search? It is not
* possible for us to be holding one of the bind hash
@@ -494,7 +486,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
- const unsigned int __nocast priority)
+ const gfp_t priority)
{
struct sock *newsk = sk_clone(sk, priority);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 4d1502a4985..a010e9a6881 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -20,7 +20,7 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi
struct inet_bind_hashbucket *bhead;
struct inet_bind_bucket *tb;
/* Unlink from established hashes. */
- struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent];
+ struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash);
write_lock(&ehead->lock);
if (hlist_unhashed(&tw->tw_node)) {
@@ -60,7 +60,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
{
const struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent];
+ struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
struct inet_bind_hashbucket *bhead;
/* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in
@@ -106,11 +106,12 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
tw->tw_dport = inet->dport;
tw->tw_family = sk->sk_family;
tw->tw_reuse = sk->sk_reuse;
- tw->tw_hashent = sk->sk_hashent;
+ tw->tw_hash = sk->sk_hash;
tw->tw_ipv6only = 0;
tw->tw_prot = sk->sk_prot_creator;
atomic_set(&tw->tw_refcnt, 1);
inet_twsk_dead_node_init(tw);
+ __module_get(tw->tw_prot->owner);
}
return tw;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f0d5740d7e2..896ce3f8f53 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1104,10 +1104,10 @@ static int ipgre_open(struct net_device *dev)
return -EADDRNOTAVAIL;
dev = rt->u.dst.dev;
ip_rt_put(rt);
- if (__in_dev_get(dev) == NULL)
+ if (__in_dev_get_rtnl(dev) == NULL)
return -EADDRNOTAVAIL;
t->mlink = dev->ifindex;
- ip_mc_inc_group(__in_dev_get(dev), t->parms.iph.daddr);
+ ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
}
return 0;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3f1a263e124..17758234a3e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb)
{
IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
- if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size)
+ if (skb->len > dst_mtu(skb->dst) &&
+ !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
return ip_fragment(skb, ip_finish_output);
else
return ip_finish_output(skb);
@@ -391,6 +392,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->nfct = from->nfct;
nf_conntrack_get(to->nfct);
to->nfctinfo = from->nfctinfo;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+ to->ipvs_property = from->ipvs_property;
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(to->nf_bridge);
to->nf_bridge = from->nf_bridge;
@@ -685,6 +689,60 @@ csum_page(struct page *page, int offset, int copy)
return csum;
}
+inline int ip_ufo_append_data(struct sock *sk,
+ int getfrag(void *from, char *to, int offset, int len,
+ int odd, struct sk_buff *skb),
+ void *from, int length, int hh_len, int fragheaderlen,
+ int transhdrlen, int mtu,unsigned int flags)
+{
+ struct sk_buff *skb;
+ int err;
+
+ /* There is support for UDP fragmentation offload by network
+ * device, so create one single skb packet containing complete
+ * udp datagram
+ */
+ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+ skb = sock_alloc_send_skb(sk,
+ hh_len + fragheaderlen + transhdrlen + 20,
+ (flags & MSG_DONTWAIT), &err);
+
+ if (skb == NULL)
+ return err;
+
+ /* reserve space for Hardware header */
+ skb_reserve(skb, hh_len);
+
+ /* create space for UDP/IP header */
+ skb_put(skb,fragheaderlen + transhdrlen);
+
+ /* initialize network header pointer */
+ skb->nh.raw = skb->data;
+
+ /* initialize protocol header pointer */
+ skb->h.raw = skb->data + fragheaderlen;
+
+ skb->ip_summed = CHECKSUM_HW;
+ skb->csum = 0;
+ sk->sk_sndmsg_off = 0;
+ }
+
+ err = skb_append_datato_frags(sk,skb, getfrag, from,
+ (length - transhdrlen));
+ if (!err) {
+ /* specify the length of each IP datagram fragment*/
+ skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+ __skb_queue_tail(&sk->sk_write_queue, skb);
+
+ return 0;
+ }
+ /* There is not enough support do UFO ,
+ * so follow normal path
+ */
+ kfree_skb(skb);
+ return err;
+}
+
/*
* ip_append_data() and ip_append_page() can make one large IP datagram
* from many pieces of data. Each pieces will be holded on the socket
@@ -774,6 +832,15 @@ int ip_append_data(struct sock *sk,
csummode = CHECKSUM_HW;
inet->cork.length += length;
+ if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
+ (rt->u.dst.dev->features & NETIF_F_UFO)) {
+
+ if(ip_ufo_append_data(sk, getfrag, from, length, hh_len,
+ fragheaderlen, transhdrlen, mtu, flags))
+ goto error;
+
+ return 0;
+ }
/* So, what's going on in the loop below?
*
@@ -1005,14 +1072,23 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
return -EINVAL;
inet->cork.length += size;
+ if ((sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->u.dst.dev->features & NETIF_F_UFO))
+ skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+
while (size > 0) {
int i;
- /* Check if the remaining data fits into current packet. */
- len = mtu - skb->len;
- if (len < size)
- len = maxfraglen - skb->len;
+ if (skb_shinfo(skb)->ufo_size)
+ len = size;
+ else {
+
+ /* Check if the remaining data fits into current packet. */
+ len = mtu - skb->len;
+ if (len < size)
+ len = maxfraglen - skb->len;
+ }
if (len <= 0) {
struct sk_buff *skb_prev;
char *data;
@@ -1020,10 +1096,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
int alloclen;
skb_prev = skb;
- if (skb_prev)
- fraggap = skb_prev->len - maxfraglen;
- else
- fraggap = 0;
+ fraggap = skb_prev->len - maxfraglen;
alloclen = fragheaderlen + hh_len + fraggap + 15;
skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9dbf5909f3a..302b7eb507c 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -149,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
dev->flags |= IFF_MULTICAST;
- in_dev = __in_dev_get(dev);
+ in_dev = __in_dev_get_rtnl(dev);
if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
goto failure;
in_dev->cnf.rp_filter = 0;
@@ -278,7 +278,7 @@ static int vif_delete(int vifi)
dev_set_allmulti(dev, -1);
- if ((in_dev = __in_dev_get(dev)) != NULL) {
+ if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
in_dev->cnf.mc_forwarding--;
ip_rt_multicast_event(in_dev);
}
@@ -421,7 +421,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
return -EINVAL;
}
- if ((in_dev = __in_dev_get(dev)) == NULL)
+ if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
return -EADDRNOTAVAIL;
in_dev->cnf.mc_forwarding++;
dev_set_allmulti(dev, +1);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 6e092dadb38..fc6f95aaa96 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -604,7 +604,7 @@ static struct file_operations ip_vs_app_fops = {
/*
* Replace a segment of data with a new segment
*/
-int ip_vs_skb_replace(struct sk_buff *skb, int pri,
+int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
char *o_buf, int o_len, char *n_buf, int n_len)
{
struct iphdr *iph;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 3cf9b451675..7d917e4ce1d 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -139,9 +139,10 @@ config IP_NF_AMANDA
config IP_NF_PPTP
tristate 'PPTP protocol support'
+ depends on IP_NF_CONNTRACK
help
This module adds support for PPTP (Point to Point Tunnelling
- Protocol, RFC2637) conncection tracking and NAT.
+ Protocol, RFC2637) connection tracking and NAT.
If you are running PPTP sessions over a stateful firewall or NAT
box, you may want to enable this feature.
@@ -498,9 +499,14 @@ config IP_NF_TARGET_LOG
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_ULOG
- tristate "ULOG target support"
+ tristate "ULOG target support (OBSOLETE)"
depends on IP_NF_IPTABLES
---help---
+
+ This option enables the old IPv4-only "ipt_ULOG" implementation
+ which has been obsoleted by the new "nfnetlink_log" code (see
+ CONFIG_NETFILTER_NETLINK_LOG).
+
This option adds a `ULOG' target, which allows you to create rules in
any iptables table. The packet is passed to a userspace logging
daemon using netlink multicast sockets; unlike the LOG target
@@ -537,6 +543,17 @@ config IP_NF_TARGET_TCPMSS
To compile it as a module, choose M here. If unsure, say N.
+config IP_NF_TARGET_NFQUEUE
+ tristate "NFQUEUE Target Support"
+ depends on IP_NF_IPTABLES
+ help
+ This Target replaced the old obsolete QUEUE target.
+
+ As opposed to QUEUE, it supports 65535 different queues,
+ not just one.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
# NAT + specific targets
config IP_NF_NAT
tristate "Full NAT"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3d45d3c0283..dab4b58dd31 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -4,7 +4,8 @@
# objects for the standalone - connection tracking / NAT
ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
-iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o
ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
@@ -40,7 +41,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
# matches
@@ -92,6 +93,7 @@ obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
+obj-$(CONFIG_IP_NF_TARGET_NFQUEUE) += ipt_NFQUEUE.o
# generic ARP tables
obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
@@ -101,4 +103,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
-obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index fa163425668..3c2e9639bba 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -347,58 +347,106 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
return verdict;
}
-static inline void *find_inlist_lock_noload(struct list_head *head,
- const char *name,
- int *error,
- struct semaphore *mutex)
+/*
+ * These are weird, but module loading must not be done with mutex
+ * held (since they will register), and we have to have a single
+ * function to use try_then_request_module().
+ */
+
+/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
+static inline struct arpt_table *find_table_lock(const char *name)
{
- void *ret;
+ struct arpt_table *t;
- *error = down_interruptible(mutex);
- if (*error != 0)
- return NULL;
+ if (down_interruptible(&arpt_mutex) != 0)
+ return ERR_PTR(-EINTR);
- ret = list_named_find(head, name);
- if (!ret) {
- *error = -ENOENT;
- up(mutex);
- }
- return ret;
+ list_for_each_entry(t, &arpt_tables, list)
+ if (strcmp(t->name, name) == 0 && try_module_get(t->me))
+ return t;
+ up(&arpt_mutex);
+ return NULL;
}
-#ifndef CONFIG_KMOD
-#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
-#else
-static void *
-find_inlist_lock(struct list_head *head,
- const char *name,
- const char *prefix,
- int *error,
- struct semaphore *mutex)
+
+/* Find target, grabs ref. Returns ERR_PTR() on error. */
+static inline struct arpt_target *find_target(const char *name, u8 revision)
{
- void *ret;
+ struct arpt_target *t;
+ int err = 0;
- ret = find_inlist_lock_noload(head, name, error, mutex);
- if (!ret) {
- duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
- request_module("%s%s", prefix, name);
- ret = find_inlist_lock_noload(head, name, error, mutex);
+ if (down_interruptible(&arpt_mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+ list_for_each_entry(t, &arpt_target, list) {
+ if (strcmp(t->name, name) == 0) {
+ if (t->revision == revision) {
+ if (try_module_get(t->me)) {
+ up(&arpt_mutex);
+ return t;
+ }
+ } else
+ err = -EPROTOTYPE; /* Found something. */
+ }
}
+ up(&arpt_mutex);
+ return ERR_PTR(err);
+}
- return ret;
+struct arpt_target *arpt_find_target(const char *name, u8 revision)
+{
+ struct arpt_target *target;
+
+ target = try_then_request_module(find_target(name, revision),
+ "arpt_%s", name);
+ if (IS_ERR(target) || !target)
+ return NULL;
+ return target;
}
-#endif
-static inline struct arpt_table *arpt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
+static int target_revfn(const char *name, u8 revision, int *bestp)
{
- return find_inlist_lock(&arpt_tables, name, "arptable_", error, mutex);
+ struct arpt_target *t;
+ int have_rev = 0;
+
+ list_for_each_entry(t, &arpt_target, list) {
+ if (strcmp(t->name, name) == 0) {
+ if (t->revision > *bestp)
+ *bestp = t->revision;
+ if (t->revision == revision)
+ have_rev =1;
+ }
+ }
+ return have_rev;
}
-static struct arpt_target *arpt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
+/* Returns true or false (if no such extension at all) */
+static inline int find_revision(const char *name, u8 revision,
+ int (*revfn)(const char *, u8, int *),
+ int *err)
{
- return find_inlist_lock(&arpt_target, name, "arpt_", error, mutex);
+ int have_rev, best = -1;
+
+ if (down_interruptible(&arpt_mutex) != 0) {
+ *err = -EINTR;
+ return 1;
+ }
+ have_rev = revfn(name, revision, &best);
+ up(&arpt_mutex);
+
+ /* Nothing at all? Return 0 to try loading module. */
+ if (best == -1) {
+ *err = -ENOENT;
+ return 0;
+ }
+
+ *err = best;
+ if (!have_rev)
+ *err = -EPROTONOSUPPORT;
+ return 1;
}
+
/* All zeroes == unconditional rule. */
static inline int unconditional(const struct arpt_arp *arp)
{
@@ -544,17 +592,15 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
}
t = arpt_get_target(e);
- target = arpt_find_target_lock(t->u.user.name, &ret, &arpt_mutex);
- if (!target) {
+ target = try_then_request_module(find_target(t->u.user.name,
+ t->u.user.revision),
+ "arpt_%s", t->u.user.name);
+ if (IS_ERR(target) || !target) {
duprintf("check_entry: `%s' not found\n", t->u.user.name);
+ ret = target ? PTR_ERR(target) : -ENOENT;
goto out;
}
- if (!try_module_get((target->me))) {
- ret = -ENOENT;
- goto out_unlock;
- }
t->u.kernel.target = target;
- up(&arpt_mutex);
if (t->u.kernel.target == &arpt_standard_target) {
if (!standard_check(t, size)) {
@@ -576,8 +622,6 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
(*i)++;
return 0;
-out_unlock:
- up(&arpt_mutex);
out:
return ret;
}
@@ -716,8 +760,10 @@ static int translate_table(const char *name,
}
/* And one copy for every other CPU */
- for (i = 1; i < num_possible_cpus(); i++) {
- memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+ for_each_cpu(i) {
+ if (i == 0)
+ continue;
+ memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
newinfo->entries,
SMP_ALIGN(newinfo->size));
}
@@ -767,7 +813,7 @@ static void get_counters(const struct arpt_table_info *t,
unsigned int cpu;
unsigned int i;
- for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+ for_each_cpu(cpu) {
i = 0;
ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size,
@@ -844,8 +890,8 @@ static int get_entries(const struct arpt_get_entries *entries,
int ret;
struct arpt_table *t;
- t = arpt_find_table_lock(entries->name, &ret, &arpt_mutex);
- if (t) {
+ t = find_table_lock(entries->name);
+ if (t || !IS_ERR(t)) {
duprintf("t->private->number = %u\n",
t->private->number);
if (entries->size == t->private->size)
@@ -857,10 +903,10 @@ static int get_entries(const struct arpt_get_entries *entries,
entries->size);
ret = -EINVAL;
}
+ module_put(t->me);
up(&arpt_mutex);
} else
- duprintf("get_entries: Can't find %s!\n",
- entries->name);
+ ret = t ? PTR_ERR(t) : -ENOENT;
return ret;
}
@@ -885,7 +931,8 @@ static int do_replace(void __user *user, unsigned int len)
return -ENOMEM;
newinfo = vmalloc(sizeof(struct arpt_table_info)
- + SMP_ALIGN(tmp.size) * num_possible_cpus());
+ + SMP_ALIGN(tmp.size) *
+ (highest_possible_processor_id()+1));
if (!newinfo)
return -ENOMEM;
@@ -910,22 +957,19 @@ static int do_replace(void __user *user, unsigned int len)
duprintf("arp_tables: Translated table\n");
- t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex);
- if (!t)
+ t = try_then_request_module(find_table_lock(tmp.name),
+ "arptable_%s", tmp.name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
goto free_newinfo_counters_untrans;
+ }
/* You lied! */
if (tmp.valid_hooks != t->valid_hooks) {
duprintf("Valid hook crap: %08X vs %08X\n",
tmp.valid_hooks, t->valid_hooks);
ret = -EINVAL;
- goto free_newinfo_counters_untrans_unlock;
- }
-
- /* Get a reference in advance, we're not allowed fail later */
- if (!try_module_get(t->me)) {
- ret = -EBUSY;
- goto free_newinfo_counters_untrans_unlock;
+ goto put_module;
}
oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
@@ -956,7 +1000,6 @@ static int do_replace(void __user *user, unsigned int len)
put_module:
module_put(t->me);
- free_newinfo_counters_untrans_unlock:
up(&arpt_mutex);
free_newinfo_counters_untrans:
ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL);
@@ -986,7 +1029,7 @@ static int do_add_counters(void __user *user, unsigned int len)
unsigned int i;
struct arpt_counters_info tmp, *paddc;
struct arpt_table *t;
- int ret;
+ int ret = 0;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1003,9 +1046,11 @@ static int do_add_counters(void __user *user, unsigned int len)
goto free;
}
- t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex);
- if (!t)
+ t = find_table_lock(tmp.name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
+ }
write_lock_bh(&t->lock);
if (t->private->number != paddc->num_counters) {
@@ -1022,6 +1067,7 @@ static int do_add_counters(void __user *user, unsigned int len)
unlock_up_free:
write_unlock_bh(&t->lock);
up(&arpt_mutex);
+ module_put(t->me);
free:
vfree(paddc);
@@ -1076,8 +1122,10 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
break;
}
name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
- t = arpt_find_table_lock(name, &ret, &arpt_mutex);
- if (t) {
+
+ t = try_then_request_module(find_table_lock(name),
+ "arptable_%s", name);
+ if (t && !IS_ERR(t)) {
struct arpt_getinfo info;
info.valid_hooks = t->valid_hooks;
@@ -1093,9 +1141,10 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
ret = -EFAULT;
else
ret = 0;
-
up(&arpt_mutex);
- }
+ module_put(t->me);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
}
break;
@@ -1116,6 +1165,24 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
break;
}
+ case ARPT_SO_GET_REVISION_TARGET: {
+ struct arpt_get_revision rev;
+
+ if (*len != sizeof(rev)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+
+ try_then_request_module(find_revision(rev.name, rev.revision,
+ target_revfn, &ret),
+ "arpt_%s", rev.name);
+ break;
+ }
+
default:
duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
@@ -1133,12 +1200,9 @@ int arpt_register_target(struct arpt_target *target)
if (ret != 0)
return ret;
- if (!list_named_insert(&arpt_target, target)) {
- duprintf("arpt_register_target: `%s' already in list!\n",
- target->name);
- ret = -EINVAL;
- }
+ list_add(&target->list, &arpt_target);
up(&arpt_mutex);
+
return ret;
}
@@ -1158,7 +1222,8 @@ int arpt_register_table(struct arpt_table *table,
= { 0, 0, 0, { 0 }, { 0 }, { } };
newinfo = vmalloc(sizeof(struct arpt_table_info)
- + SMP_ALIGN(repl->size) * num_possible_cpus());
+ + SMP_ALIGN(repl->size) *
+ (highest_possible_processor_id()+1));
if (!newinfo) {
ret = -ENOMEM;
return ret;
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index dc20881004b..fa3f914117e 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb,
/* increase the UDP timeout of the master connection as replies from
* Amanda clients to the server can be quite delayed */
- ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ);
+ ip_ct_refresh(ct, *pskb, master_timeout * HZ);
/* No data? */
dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index c1f82e0c81c..422ab68ee7f 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -50,7 +50,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/listhelp.h>
-#define IP_CONNTRACK_VERSION "2.3"
+#define IP_CONNTRACK_VERSION "2.4"
#if 0
#define DEBUGP printk
@@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
static int ip_conntrack_hash_rnd_initted;
static unsigned int ip_conntrack_hash_rnd;
-static u_int32_t
-hash_conntrack(const struct ip_conntrack_tuple *tuple)
+static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
+ unsigned int size, unsigned int rnd)
{
-#if 0
- dump_tuple(tuple);
-#endif
return (jhash_3words(tuple->src.ip,
(tuple->dst.ip ^ tuple->dst.protonum),
(tuple->src.u.all | (tuple->dst.u.all << 16)),
- ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
+ rnd) % size);
+}
+
+static u_int32_t
+hash_conntrack(const struct ip_conntrack_tuple *tuple)
+{
+ return __hash_conntrack(tuple, ip_conntrack_htable_size,
+ ip_conntrack_hash_rnd);
}
int
@@ -1112,45 +1116,49 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
synchronize_net();
}
-static inline void ct_add_counters(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb)
-{
-#ifdef CONFIG_IP_NF_CT_ACCT
- if (skb) {
- ct->counters[CTINFO2DIR(ctinfo)].packets++;
- ct->counters[CTINFO2DIR(ctinfo)].bytes +=
- ntohs(skb->nh.iph->tot_len);
- }
-#endif
-}
-
-/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
-void ip_ct_refresh_acct(struct ip_conntrack *ct,
+/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
+void __ip_ct_refresh_acct(struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
- unsigned long extra_jiffies)
+ unsigned long extra_jiffies,
+ int do_acct)
{
+ int event = 0;
+
IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
+ IP_NF_ASSERT(skb);
+
+ write_lock_bh(&ip_conntrack_lock);
/* If not in hash table, timer will not be active yet */
if (!is_confirmed(ct)) {
ct->timeout.expires = extra_jiffies;
- ct_add_counters(ct, ctinfo, skb);
+ event = IPCT_REFRESH;
} else {
- write_lock_bh(&ip_conntrack_lock);
/* Need del_timer for race avoidance (may already be dying). */
if (del_timer(&ct->timeout)) {
ct->timeout.expires = jiffies + extra_jiffies;
add_timer(&ct->timeout);
- /* FIXME: We loose some REFRESH events if this function
- * is called without an skb. I'll fix this later -HW */
- if (skb)
- ip_conntrack_event_cache(IPCT_REFRESH, skb);
+ event = IPCT_REFRESH;
}
- ct_add_counters(ct, ctinfo, skb);
- write_unlock_bh(&ip_conntrack_lock);
}
+
+#ifdef CONFIG_IP_NF_CT_ACCT
+ if (do_acct) {
+ ct->counters[CTINFO2DIR(ctinfo)].packets++;
+ ct->counters[CTINFO2DIR(ctinfo)].bytes +=
+ ntohs(skb->nh.iph->tot_len);
+ if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
+ || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
+ event |= IPCT_COUNTER_FILLING;
+ }
+#endif
+
+ write_unlock_bh(&ip_conntrack_lock);
+
+ /* must be unlocked when calling event cache */
+ if (event)
+ ip_conntrack_event_cache(event, skb);
}
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
@@ -1337,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data)
return 1;
}
-static void free_conntrack_hash(void)
+static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
{
- if (ip_conntrack_vmalloc)
- vfree(ip_conntrack_hash);
+ if (vmalloced)
+ vfree(hash);
else
- free_pages((unsigned long)ip_conntrack_hash,
- get_order(sizeof(struct list_head)
- * ip_conntrack_htable_size));
+ free_pages((unsigned long)hash,
+ get_order(sizeof(struct list_head) * size));
}
void ip_conntrack_flush()
@@ -1374,12 +1381,83 @@ void ip_conntrack_cleanup(void)
ip_conntrack_flush();
kmem_cache_destroy(ip_conntrack_cachep);
kmem_cache_destroy(ip_conntrack_expect_cachep);
- free_conntrack_hash();
+ free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+ ip_conntrack_htable_size);
nf_unregister_sockopt(&so_getorigdst);
}
-static int hashsize;
-module_param(hashsize, int, 0400);
+static struct list_head *alloc_hashtable(int size, int *vmalloced)
+{
+ struct list_head *hash;
+ unsigned int i;
+
+ *vmalloced = 0;
+ hash = (void*)__get_free_pages(GFP_KERNEL,
+ get_order(sizeof(struct list_head)
+ * size));
+ if (!hash) {
+ *vmalloced = 1;
+ printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
+ hash = vmalloc(sizeof(struct list_head) * size);
+ }
+
+ if (hash)
+ for (i = 0; i < size; i++)
+ INIT_LIST_HEAD(&hash[i]);
+
+ return hash;
+}
+
+int set_hashsize(const char *val, struct kernel_param *kp)
+{
+ int i, bucket, hashsize, vmalloced;
+ int old_vmalloced, old_size;
+ int rnd;
+ struct list_head *hash, *old_hash;
+ struct ip_conntrack_tuple_hash *h;
+
+ /* On boot, we can set this without any fancy locking. */
+ if (!ip_conntrack_htable_size)
+ return param_set_int(val, kp);
+
+ hashsize = simple_strtol(val, NULL, 0);
+ if (!hashsize)
+ return -EINVAL;
+
+ hash = alloc_hashtable(hashsize, &vmalloced);
+ if (!hash)
+ return -ENOMEM;
+
+ /* We have to rehash for the new table anyway, so we also can
+ * use a new random seed */
+ get_random_bytes(&rnd, 4);
+
+ write_lock_bh(&ip_conntrack_lock);
+ for (i = 0; i < ip_conntrack_htable_size; i++) {
+ while (!list_empty(&ip_conntrack_hash[i])) {
+ h = list_entry(ip_conntrack_hash[i].next,
+ struct ip_conntrack_tuple_hash, list);
+ list_del(&h->list);
+ bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+ list_add_tail(&h->list, &hash[bucket]);
+ }
+ }
+ old_size = ip_conntrack_htable_size;
+ old_vmalloced = ip_conntrack_vmalloc;
+ old_hash = ip_conntrack_hash;
+
+ ip_conntrack_htable_size = hashsize;
+ ip_conntrack_vmalloc = vmalloced;
+ ip_conntrack_hash = hash;
+ ip_conntrack_hash_rnd = rnd;
+ write_unlock_bh(&ip_conntrack_lock);
+
+ free_conntrack_hash(old_hash, old_vmalloced, old_size);
+ return 0;
+}
+
+module_param_call(hashsize, set_hashsize, param_get_uint,
+ &ip_conntrack_htable_size, 0600);
int __init ip_conntrack_init(void)
{
@@ -1388,9 +1466,7 @@ int __init ip_conntrack_init(void)
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 256 buckets. >= 1GB machines have 8192 buckets. */
- if (hashsize) {
- ip_conntrack_htable_size = hashsize;
- } else {
+ if (!ip_conntrack_htable_size) {
ip_conntrack_htable_size
= (((num_physpages << PAGE_SHIFT) / 16384)
/ sizeof(struct list_head));
@@ -1412,20 +1488,8 @@ int __init ip_conntrack_init(void)
return ret;
}
- /* AK: the hash table is twice as big than needed because it
- uses list_head. it would be much nicer to caches to use a
- single pointer list head here. */
- ip_conntrack_vmalloc = 0;
- ip_conntrack_hash
- =(void*)__get_free_pages(GFP_KERNEL,
- get_order(sizeof(struct list_head)
- *ip_conntrack_htable_size));
- if (!ip_conntrack_hash) {
- ip_conntrack_vmalloc = 1;
- printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
- ip_conntrack_hash = vmalloc(sizeof(struct list_head)
- * ip_conntrack_htable_size);
- }
+ ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
+ &ip_conntrack_vmalloc);
if (!ip_conntrack_hash) {
printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
goto err_unreg_sockopt;
@@ -1457,9 +1521,6 @@ int __init ip_conntrack_init(void)
ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
write_unlock_bh(&ip_conntrack_lock);
- for (i = 0; i < ip_conntrack_htable_size; i++)
- INIT_LIST_HEAD(&ip_conntrack_hash[i]);
-
/* For use by ipt_REJECT */
ip_ct_attach = ip_conntrack_attach;
@@ -1474,7 +1535,8 @@ int __init ip_conntrack_init(void)
err_free_conntrack_slab:
kmem_cache_destroy(ip_conntrack_cachep);
err_free_hash:
- free_conntrack_hash();
+ free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+ ip_conntrack_htable_size);
err_unreg_sockopt:
nf_unregister_sockopt(&so_getorigdst);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 79db5b70d5f..4108a5e12b3 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -172,7 +172,6 @@ static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
sibling->proto.gre.timeout = 0;
sibling->proto.gre.stream_timeout = 0;
- /* refresh_acct will not modify counters if skb == NULL */
if (del_timer(&sibling->timeout))
sibling->timeout.function((unsigned long)sibling);
ip_conntrack_put(sibling);
@@ -223,8 +222,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
static inline int
exp_gre(struct ip_conntrack *master,
u_int32_t seq,
- u_int16_t callid,
- u_int16_t peer_callid)
+ __be16 callid,
+ __be16 peer_callid)
{
struct ip_conntrack_tuple inv_tuple;
struct ip_conntrack_tuple exp_tuples[] = {
@@ -263,7 +262,7 @@ exp_gre(struct ip_conntrack *master,
exp_orig->mask.src.ip = 0xffffffff;
exp_orig->mask.src.u.all = 0;
exp_orig->mask.dst.u.all = 0;
- exp_orig->mask.dst.u.gre.key = 0xffff;
+ exp_orig->mask.dst.u.gre.key = htons(0xffff);
exp_orig->mask.dst.ip = 0xffffffff;
exp_orig->mask.dst.protonum = 0xff;
@@ -271,14 +270,10 @@ exp_gre(struct ip_conntrack *master,
exp_orig->expectfn = pptp_expectfn;
exp_orig->flags = 0;
- exp_orig->dir = IP_CT_DIR_ORIGINAL;
-
/* both expectations are identical apart from tuple */
memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
- exp_reply->dir = !exp_orig->dir;
-
if (ip_nat_pptp_hook_exp_gre)
ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
else {
@@ -340,7 +335,8 @@ pptp_inbound_pkt(struct sk_buff **pskb,
unsigned int reqlen;
union pptp_ctrl_union _pptpReq, *pptpReq;
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg, *cid, *pcid;
+ u_int16_t msg;
+ __be16 *cid, *pcid;
u_int32_t seq;
ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
@@ -485,7 +481,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
if (info->pns_call_id != ntohs(*pcid)) {
DEBUGP("%s for unknown CallID %u\n",
- pptp_msg_name[msg], ntohs(*cid));
+ pptp_msg_name[msg], ntohs(*pcid));
break;
}
@@ -551,7 +547,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
unsigned int reqlen;
union pptp_ctrl_union _pptpReq, *pptpReq;
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg, *cid, *pcid;
+ u_int16_t msg;
+ __be16 *cid, *pcid;
ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
if (!ctlh)
@@ -755,7 +752,7 @@ static struct ip_conntrack_helper pptp = {
}
},
.mask = { .src = { .ip = 0,
- .u = { .tcp = { .port = 0xffff } }
+ .u = { .tcp = { .port = __constant_htons(0xffff) } }
},
.dst = { .ip = 0,
.u = { .all = 0 },
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 71ef19d126d..186646eb249 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -58,7 +58,7 @@ static int help(struct sk_buff **pskb,
goto out;
rcu_read_lock();
- in_dev = __in_dev_get(rt->u.dst.dev);
+ in_dev = __in_dev_get_rcu(rt->u.dst.dev);
if (in_dev != NULL) {
for_primary_ifa(in_dev) {
if (ifa->ifa_broadcast == iph->daddr) {
@@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb,
ip_conntrack_expect_related(exp);
ip_conntrack_expect_put(exp);
- ip_ct_refresh_acct(ct, ctinfo, NULL, timeout * HZ);
+ ip_ct_refresh(ct, *pskb, timeout * HZ);
out:
return NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index b08a432efcf..82a65043a8e 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -177,11 +177,11 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
struct nfattr *nest_count = NFA_NEST(skb, type);
u_int64_t tmp;
- tmp = cpu_to_be64(ct->counters[dir].packets);
- NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp);
+ tmp = htonl(ct->counters[dir].packets);
+ NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
- tmp = cpu_to_be64(ct->counters[dir].bytes);
- NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp);
+ tmp = htonl(ct->counters[dir].bytes);
+ NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
NFA_NEST_END(skb, nest_count);
@@ -815,7 +815,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
IPCTNL_MSG_CT_NEW, 1, ct);
ip_conntrack_put(ct);
if (err <= 0)
- goto out;
+ goto free;
err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
if (err < 0)
@@ -824,16 +824,17 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
DEBUGP("leaving\n");
return 0;
+free:
+ kfree_skb(skb2);
out:
- if (skb2)
- kfree_skb(skb2);
return -1;
}
static inline int
ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
{
- unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]);
+ unsigned long d;
+ unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
d = ct->status ^ status;
if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
@@ -948,6 +949,31 @@ ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
return 0;
}
+static inline int
+ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
+{
+ struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
+ struct ip_conntrack_protocol *proto;
+ u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+ int err = 0;
+
+ if (nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr) < 0)
+ goto nfattr_failure;
+
+ proto = ip_conntrack_proto_find_get(npt);
+ if (!proto)
+ return -EINVAL;
+
+ if (proto->from_nfattr)
+ err = proto->from_nfattr(tb, ct);
+ ip_conntrack_proto_put(proto);
+
+ return err;
+
+nfattr_failure:
+ return -ENOMEM;
+}
+
static int
ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
{
@@ -973,6 +999,12 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
return err;
}
+ if (cda[CTA_PROTOINFO-1]) {
+ err = ctnetlink_change_protoinfo(ct, cda);
+ if (err < 0)
+ return err;
+ }
+
DEBUGP("all done\n");
return 0;
}
@@ -1002,6 +1034,12 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
if (err < 0)
goto err;
+ if (cda[CTA_PROTOINFO-1]) {
+ err = ctnetlink_change_protoinfo(ct, cda);
+ if (err < 0)
+ return err;
+ }
+
ct->helper = ip_conntrack_helper_find_get(rtuple);
add_timer(&ct->timeout);
@@ -1284,21 +1322,16 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
1, exp);
if (err <= 0)
- goto out;
+ goto free;
ip_conntrack_expect_put(exp);
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
- if (err < 0)
- goto free;
-
- return err;
+ return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+free:
+ kfree_skb(skb2);
out:
ip_conntrack_expect_put(exp);
-free:
- if (skb2)
- kfree_skb(skb2);
return err;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index de3cb9db6f8..744abb9d377 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -247,6 +247,7 @@ static int gre_packet(struct ip_conntrack *ct,
ct->proto.gre.stream_timeout);
/* Also, more likely to be important, and not a probe. */
set_bit(IPS_ASSURED_BIT, &ct->status);
+ ip_conntrack_event_cache(IPCT_STATUS, skb);
} else
ip_ct_refresh_acct(ct, conntrackinfo, skb,
ct->proto.gre.timeout);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 838d1d69b36..98f0015dd25 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -296,8 +296,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
struct ip_conntrack_tuple *tuple)
{
if (!tb[CTA_PROTO_ICMP_TYPE-1]
- || !tb[CTA_PROTO_ICMP_CODE-1]
- || !tb[CTA_PROTO_ICMP_ID-1])
+ || !tb[CTA_PROTO_ICMP_CODE-1])
return -1;
tuple->dst.u.icmp.type =
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index a875f35e576..59a4a0111dd 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -416,6 +416,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
DEBUGP("Setting assured bit\n");
set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ ip_conntrack_event_cache(IPCT_STATUS, skb);
}
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 1985abc59d2..d6701cafbcc 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -341,17 +341,43 @@ static int tcp_print_conntrack(struct seq_file *s,
static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
const struct ip_conntrack *ct)
{
+ struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+
read_lock_bh(&tcp_lock);
NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
&ct->proto.tcp.state);
read_unlock_bh(&tcp_lock);
+ NFA_NEST_END(skb, nest_parms);
+
return 0;
nfattr_failure:
read_unlock_bh(&tcp_lock);
return -1;
}
+
+static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
+{
+ struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
+ struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
+
+ if (nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr) < 0)
+ goto nfattr_failure;
+
+ if (!tb[CTA_PROTOINFO_TCP_STATE-1])
+ return -EINVAL;
+
+ write_lock_bh(&tcp_lock);
+ ct->proto.tcp.state =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+ write_unlock_bh(&tcp_lock);
+
+ return 0;
+
+nfattr_failure:
+ return -1;
+}
#endif
static unsigned int get_conntrack_index(const struct tcphdr *tcph)
@@ -1014,7 +1040,8 @@ static int tcp_packet(struct ip_conntrack *conntrack,
/* Set ASSURED if we see see valid ack in ESTABLISHED
after SYN_RECV or a valid answer for a picked up
connection. */
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ ip_conntrack_event_cache(IPCT_STATUS, skb);
}
ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
@@ -1122,6 +1149,7 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
.to_nfattr = tcp_to_nfattr,
+ .from_nfattr = nfattr_to_tcp,
.tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
.nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index d3c7808010e..dd476b191f4 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -989,7 +989,7 @@ EXPORT_SYMBOL(need_ip_conntrack);
EXPORT_SYMBOL(ip_conntrack_helper_register);
EXPORT_SYMBOL(ip_conntrack_helper_unregister);
EXPORT_SYMBOL(ip_ct_iterate_cleanup);
-EXPORT_SYMBOL(ip_ct_refresh_acct);
+EXPORT_SYMBOL(__ip_ct_refresh_acct);
EXPORT_SYMBOL(ip_conntrack_expect_alloc);
EXPORT_SYMBOL(ip_conntrack_expect_put);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index c3ea891d38e..762f4d93936 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -66,20 +66,20 @@ ip_nat_proto_find_get(u_int8_t protonum)
* removed until we've grabbed the reference */
preempt_disable();
p = __ip_nat_proto_find(protonum);
- if (p) {
- if (!try_module_get(p->me))
- p = &ip_nat_unknown_protocol;
- }
+ if (!try_module_get(p->me))
+ p = &ip_nat_unknown_protocol;
preempt_enable();
return p;
}
+EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
void
ip_nat_proto_put(struct ip_nat_protocol *p)
{
module_put(p->me);
}
+EXPORT_SYMBOL_GPL(ip_nat_proto_put);
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
@@ -111,6 +111,7 @@ ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
oldcheck^0xFFFF));
}
+EXPORT_SYMBOL(ip_nat_cheat_check);
/* Is this tuple already taken? (not by us) */
int
@@ -127,6 +128,7 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
invert_tuplepr(&reply, tuple);
return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
}
+EXPORT_SYMBOL(ip_nat_used_tuple);
/* If we source map this tuple so reply looks like reply_tuple, will
* that meet the constraints of range. */
@@ -347,6 +349,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
return NF_ACCEPT;
}
+EXPORT_SYMBOL(ip_nat_setup_info);
/* Returns true if succeeded. */
static int
@@ -387,10 +390,10 @@ manip_pkt(u_int16_t proto,
}
/* Do packet manipulations according to ip_nat_setup_info. */
-unsigned int nat_packet(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
+unsigned int ip_nat_packet(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
@@ -417,12 +420,13 @@ unsigned int nat_packet(struct ip_conntrack *ct,
}
return NF_ACCEPT;
}
+EXPORT_SYMBOL_GPL(ip_nat_packet);
/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int icmp_reply_translation(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_nat_manip_type manip,
- enum ip_conntrack_dir dir)
+int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_nat_manip_type manip,
+ enum ip_conntrack_dir dir)
{
struct {
struct icmphdr icmp;
@@ -509,6 +513,7 @@ int icmp_reply_translation(struct sk_buff **pskb,
return 1;
}
+EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
/* Protocol registration. */
int ip_nat_protocol_register(struct ip_nat_protocol *proto)
@@ -525,6 +530,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto)
write_unlock_bh(&ip_nat_lock);
return ret;
}
+EXPORT_SYMBOL(ip_nat_protocol_register);
/* Noone stores the protocol anywhere; simply delete it. */
void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
@@ -536,6 +542,7 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
/* Someone could be still looking at the proto in a bh. */
synchronize_net();
}
+EXPORT_SYMBOL(ip_nat_protocol_unregister);
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
@@ -582,7 +589,7 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
#endif
-int __init ip_nat_init(void)
+static int __init ip_nat_init(void)
{
size_t i;
@@ -624,10 +631,14 @@ static int clean_nat(struct ip_conntrack *i, void *data)
return 0;
}
-/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */
-void ip_nat_cleanup(void)
+static void __exit ip_nat_cleanup(void)
{
ip_ct_iterate_cleanup(&clean_nat, NULL);
ip_conntrack_destroyed = NULL;
vfree(bysource);
}
+
+MODULE_LICENSE("GPL");
+
+module_init(ip_nat_init);
+module_exit(ip_nat_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index d2dd5d31355..5d506e0564d 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -199,6 +199,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
}
return 1;
}
+EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
/* Generic function for mangling variable-length address changes inside
* NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
@@ -256,6 +257,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
return 1;
}
+EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
/* Adjust one found SACK option including checksum correction */
static void
@@ -399,6 +401,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
return 1;
}
+EXPORT_SYMBOL(ip_nat_seq_adjust);
/* Setup NAT on this expected conntrack so it follows master. */
/* If we fail to get a free NAT slot, we'll get dropped on confirm */
@@ -425,3 +428,4 @@ void ip_nat_follow_master(struct ip_conntrack *ct,
/* hook doesn't matter, but it has to do destination manip */
ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
}
+EXPORT_SYMBOL(ip_nat_follow_master);
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 3cdd0684d30..ee6ab74ad3a 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -216,6 +216,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+ expect_orig->dir = IP_CT_DIR_ORIGINAL;
inv_t.src.ip = reply_t->src.ip;
inv_t.dst.ip = reply_t->dst.ip;
inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
@@ -233,6 +234,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+ expect_reply->dir = IP_CT_DIR_REPLY;
inv_t.src.ip = orig_t->src.ip;
inv_t.dst.ip = orig_t->dst.ip;
inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 7c128540167..f7cad7cf1ae 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -139,8 +139,8 @@ gre_manip_pkt(struct sk_buff **pskb,
break;
case GRE_VERSION_PPTP:
DEBUGP("call_id -> 0x%04x\n",
- ntohl(tuple->dst.u.gre.key));
- pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key));
+ ntohs(tuple->dst.u.gre.key));
+ pgreh->call_id = tuple->dst.u.gre.key;
break;
default:
DEBUGP("can't nat unknown GRE version\n");
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index 99bbef56f84..f0099a646a0 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -62,7 +62,7 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range)
struct ip_nat_protocol ip_nat_unknown_protocol = {
.name = "unknown",
- .me = THIS_MODULE,
+ /* .me isn't set: getting a ref to this cannot fail. */
.manip_pkt = unknown_manip_pkt,
.in_range = unknown_in_range,
.unique_tuple = unknown_unique_tuple,
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 0ff368b131f..30cd4e18c12 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -108,8 +108,8 @@ ip_nat_fn(unsigned int hooknum,
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- if (!icmp_reply_translation(pskb, ct, maniptype,
- CTINFO2DIR(ctinfo)))
+ if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
+ CTINFO2DIR(ctinfo)))
return NF_DROP;
else
return NF_ACCEPT;
@@ -152,7 +152,7 @@ ip_nat_fn(unsigned int hooknum,
}
IP_NF_ASSERT(info);
- return nat_packet(ct, ctinfo, hooknum, pskb);
+ return ip_nat_packet(ct, ctinfo, hooknum, pskb);
}
static unsigned int
@@ -325,15 +325,10 @@ static int init_or_cleanup(int init)
printk("ip_nat_init: can't setup rules.\n");
goto cleanup_nothing;
}
- ret = ip_nat_init();
- if (ret < 0) {
- printk("ip_nat_init: can't setup rules.\n");
- goto cleanup_rule_init;
- }
ret = nf_register_hook(&ip_nat_in_ops);
if (ret < 0) {
printk("ip_nat_init: can't register in hook.\n");
- goto cleanup_nat;
+ goto cleanup_rule_init;
}
ret = nf_register_hook(&ip_nat_out_ops);
if (ret < 0) {
@@ -374,8 +369,6 @@ static int init_or_cleanup(int init)
nf_unregister_hook(&ip_nat_out_ops);
cleanup_inops:
nf_unregister_hook(&ip_nat_in_ops);
- cleanup_nat:
- ip_nat_cleanup();
cleanup_rule_init:
ip_nat_rule_cleanup();
cleanup_nothing:
@@ -395,14 +388,4 @@ static void __exit fini(void)
module_init(init);
module_exit(fini);
-EXPORT_SYMBOL(ip_nat_setup_info);
-EXPORT_SYMBOL(ip_nat_protocol_register);
-EXPORT_SYMBOL(ip_nat_protocol_unregister);
-EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
-EXPORT_SYMBOL_GPL(ip_nat_proto_put);
-EXPORT_SYMBOL(ip_nat_cheat_check);
-EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
-EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
-EXPORT_SYMBOL(ip_nat_used_tuple);
-EXPORT_SYMBOL(ip_nat_follow_master);
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index d54f14d926f..36339eb39e1 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
pmsg->packet_id = (unsigned long )entry;
pmsg->data_len = data_len;
- pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec;
- pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec;
+ pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
+ pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
pmsg->mark = entry->skb->nfmark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index eef99a1b5de..75c27e92f6a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -27,6 +27,7 @@
#include <asm/semaphore.h>
#include <linux/proc_fs.h>
#include <linux/err.h>
+#include <linux/cpumask.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -921,8 +922,10 @@ translate_table(const char *name,
}
/* And one copy for every other CPU */
- for (i = 1; i < num_possible_cpus(); i++) {
- memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+ for_each_cpu(i) {
+ if (i == 0)
+ continue;
+ memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
newinfo->entries,
SMP_ALIGN(newinfo->size));
}
@@ -943,7 +946,7 @@ replace_table(struct ipt_table *table,
struct ipt_entry *table_base;
unsigned int i;
- for (i = 0; i < num_possible_cpus(); i++) {
+ for_each_cpu(i) {
table_base =
(void *)newinfo->entries
+ TABLE_OFFSET(newinfo, i);
@@ -990,7 +993,7 @@ get_counters(const struct ipt_table_info *t,
unsigned int cpu;
unsigned int i;
- for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+ for_each_cpu(cpu) {
i = 0;
IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size,
@@ -1128,7 +1131,8 @@ do_replace(void __user *user, unsigned int len)
return -ENOMEM;
newinfo = vmalloc(sizeof(struct ipt_table_info)
- + SMP_ALIGN(tmp.size) * num_possible_cpus());
+ + SMP_ALIGN(tmp.size) *
+ (highest_possible_processor_id()+1));
if (!newinfo)
return -ENOMEM;
@@ -1458,7 +1462,8 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
= { 0, 0, 0, { 0 }, { 0 }, { } };
newinfo = vmalloc(sizeof(struct ipt_table_info)
- + SMP_ALIGN(repl->size) * num_possible_cpus());
+ + SMP_ALIGN(repl->size) *
+ (highest_possible_processor_id()+1));
if (!newinfo)
return -ENOMEM;
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
index 13463802133..05d66ab5942 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -109,6 +109,7 @@ static struct ipt_target ipt_connmark_reg = {
static int __init init(void)
{
+ need_ip_conntrack();
return ipt_register_target(&ipt_connmark_reg);
}
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 715cb613405..5245bfd33d5 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -93,7 +93,7 @@ redirect_target(struct sk_buff **pskb,
newdst = 0;
rcu_read_lock();
- indev = __in_dev_get((*pskb)->dev);
+ indev = __in_dev_get_rcu((*pskb)->dev);
if (indev && (ifa = indev->ifa_list))
newdst = ifa->ifa_local;
rcu_read_unlock();
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index e2c14f3cb2f..2883ccd8a91 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -225,8 +225,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
/* copy hook, prefix, timestamp, payload, etc. */
pm->data_len = copy_len;
- pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec;
- pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec;
+ pm->timestamp_sec = skb->tstamp.off_sec;
+ pm->timestamp_usec = skb->tstamp.off_usec;
pm->mark = skb->nfmark;
pm->hook = hooknum;
if (prefix != NULL)
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index f5909a4c3fc..e19c2a52d00 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -48,7 +48,7 @@ static int checkentry(const char *tablename, const struct ipt_ip *ip,
unsigned int hook_mask)
{
if (matchsize != IPT_ALIGN(sizeof(struct ipt_addrtype_info))) {
- printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n.",
+ printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n",
matchsize, IPT_ALIGN(sizeof(struct ipt_addrtype_info)));
return 0;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f7943ba1f43..a65e508fbd4 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -90,9 +90,7 @@ fold_field(void *mib[], int offt)
unsigned long res = 0;
int i;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_possible(i))
- continue;
+ for_each_cpu(i) {
res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8549f26e249..381dd6a6aeb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2128,7 +2128,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
struct in_device *in_dev;
rcu_read_lock();
- if ((in_dev = __in_dev_get(dev)) != NULL) {
+ if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
int our = ip_check_mc(in_dev, daddr, saddr,
skb->nh.iph->protocol);
if (our
@@ -2443,7 +2443,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
err = -ENODEV;
if (dev_out == NULL)
goto out;
- if (__in_dev_get(dev_out) == NULL) {
+
+ /* RACE: Check return value of inet_select_addr instead. */
+ if (__in_dev_get_rtnl(dev_out) == NULL) {
dev_put(dev_out);
goto out; /* Wrong error code */
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f3f0013a958..72b7c22e1ea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2112,7 +2112,6 @@ void __init tcp_init(void)
sysctl_tcp_max_orphans >>= (3 - order);
sysctl_max_syn_backlog = 128;
}
- tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1;
sysctl_tcp_mem[0] = 768 << order;
sysctl_tcp_mem[1] = 1024 << order;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index b940346de4e..ae35e060904 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -27,7 +27,7 @@
*/
static int fast_convergence = 1;
-static int max_increment = 32;
+static int max_increment = 16;
static int low_window = 14;
static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
static int low_utilization_threshold = 153;
@@ -136,7 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
else if (cwnd < ca->last_max_cwnd + max_increment*(BICTCP_B-1))
/* slow start */
ca->cnt = (cwnd * (BICTCP_B-1))
- / cwnd-ca->last_max_cwnd;
+ / (cwnd - ca->last_max_cwnd);
else
/* linear increase */
ca->cnt = cwnd / max_increment;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a7537c7bbd0..3e98b57578d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -355,8 +355,6 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
app_win -= icsk->icsk_ack.rcv_mss;
app_win = max(app_win, 2U*tp->advmss);
- if (!ofo_win)
- tp->window_clamp = min(tp->window_clamp, app_win);
tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
}
}
@@ -2241,6 +2239,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
/* Note, it is the only place, where
* fast path is recovered for sending TCP.
*/
+ tp->pred_flags = 0;
tcp_fast_path_check(sk, tp);
if (nwin > tp->max_window) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 13dfb391cdf..49d67cd75ed 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -93,8 +93,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
.lhash_lock = RW_LOCK_UNLOCKED,
.lhash_users = ATOMIC_INIT(0),
.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
- .portalloc_lock = SPIN_LOCK_UNLOCKED,
- .port_rover = 1024 - 1,
};
static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
@@ -130,19 +128,20 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
int dif = sk->sk_bound_dev_if;
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
- const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size);
- struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
+ unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+ struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
+ prefetch(head->chain.first);
write_lock(&head->lock);
/* Check TIME-WAIT sockets first. */
sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
tw = inet_twsk(sk2);
- if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
+ if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
struct tcp_sock *tp = tcp_sk(sk);
@@ -179,7 +178,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
/* And established part... */
sk_for_each(sk2, node, &head->chain) {
- if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+ if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
goto not_unique;
}
@@ -188,7 +187,7 @@ unique:
* in hash table socket with a funny identity. */
inet->num = lport;
inet->sport = htons(lport);
- sk->sk_hashent = hash;
+ sk->sk_hash = hash;
BUG_TRAP(sk_unhashed(sk));
__sk_add_node(sk, &head->chain);
sock_prot_inc_use(sk->sk_prot);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a88db28b0af..b1a63b2c6b4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
- newicsk->icsk_ca_ops = &tcp_reno;
+ newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5dd6dd7d091..b907456a79f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -190,7 +190,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
}
/* Set initial window to value enough for senders,
- * following RFC1414. Senders, not following this RFC,
+ * following RFC2414. Senders, not following this RFC,
* will be satisfied with 2.
*/
if (mss > (1<<*rcv_wscale)) {
@@ -435,8 +435,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
int nsize, old_factor;
u16 flags;
- BUG_ON(len >= skb->len);
-
+ BUG_ON(len > skb->len);
nsize = skb_headlen(skb) - len;
if (nsize < 0)
nsize = 0;
@@ -509,7 +508,16 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
tp->lost_out -= diff;
tp->left_out -= diff;
}
+
if (diff > 0) {
+ /* Adjust Reno SACK estimate. */
+ if (!tp->rx_opt.sack_ok) {
+ tp->sacked_out -= diff;
+ if ((int)tp->sacked_out < 0)
+ tp->sacked_out = 0;
+ tcp_sync_left_out(tp);
+ }
+
tp->fackets_out -= diff;
if ((int)tp->fackets_out < 0)
tp->fackets_out = 0;
@@ -1601,7 +1609,7 @@ void tcp_send_fin(struct sock *sk)
* was unread data in the receive queue. This behavior is recommended
* by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM
*/
-void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority)
+void tcp_send_active_reset(struct sock *sk, gfp_t priority)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;