diff options
Diffstat (limited to 'net/ipv4')
47 files changed, 630 insertions, 338 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8bf312bdea1..b425748f02d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -241,7 +241,7 @@ static int arp_constructor(struct neighbour *neigh) neigh->type = inet_addr_type(addr); rcu_read_lock(); - in_dev = rcu_dereference(__in_dev_get(dev)); + in_dev = __in_dev_get_rcu(dev); if (in_dev == NULL) { rcu_read_unlock(); return -EINVAL; @@ -697,12 +697,6 @@ void arp_send(int type, int ptype, u32 dest_ip, arp_xmit(skb); } -static void parp_redo(struct sk_buff *skb) -{ - nf_reset(skb); - arp_rcv(skb, skb->dev, NULL, skb->dev); -} - /* * Process an arp request. */ @@ -922,6 +916,11 @@ out: return 0; } +static void parp_redo(struct sk_buff *skb) +{ + arp_process(skb); +} + /* * Receive an arp request from the device layer. @@ -990,8 +989,8 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev) ipv4_devconf.proxy_arp = 1; return 0; } - if (__in_dev_get(dev)) { - __in_dev_get(dev)->cnf.proxy_arp = 1; + if (__in_dev_get_rtnl(dev)) { + __in_dev_get_rtnl(dev)->cnf.proxy_arp = 1; return 0; } return -ENXIO; @@ -1096,8 +1095,8 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev) ipv4_devconf.proxy_arp = 0; return 0; } - if (__in_dev_get(dev)) { - __in_dev_get(dev)->cnf.proxy_arp = 0; + if (__in_dev_get_rtnl(dev)) { + __in_dev_get_rtnl(dev)->cnf.proxy_arp = 0; return 0; } return -ENXIO; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ba2895ae815..4ec4b2ca6ab 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -351,7 +351,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -449,7 +449,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg goto out; rc = -ENOBUFS; - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { in_dev = inetdev_init(dev); if (!in_dev) goto out; @@ -584,7 +584,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) if (colon) *colon = ':'; - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { if (tryaddrmatch) { /* Matthias Andree */ /* compare label and address (4.4BSD style) */ @@ -715,6 +715,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) break; ret = 0; if (ifa->ifa_mask != sin->sin_addr.s_addr) { + u32 old_mask = ifa->ifa_mask; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_mask = sin->sin_addr.s_addr; ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); @@ -728,7 +729,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) if ((dev->flags & IFF_BROADCAST) && (ifa->ifa_prefixlen < 31) && (ifa->ifa_broadcast == - (ifa->ifa_local|~ifa->ifa_mask))) { + (ifa->ifa_local|~old_mask))) { ifa->ifa_broadcast = (ifa->ifa_local | ~sin->sin_addr.s_addr); } @@ -748,7 +749,7 @@ rarok: static int inet_gifconf(struct net_device *dev, char __user *buf, int len) { - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; @@ -791,7 +792,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto no_in_dev; @@ -818,7 +819,7 @@ no_in_dev: read_lock(&dev_base_lock); rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { - if ((in_dev = __in_dev_get(dev)) == NULL) + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; for_primary_ifa(in_dev) { @@ -887,7 +888,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop if (dev) { rcu_read_lock(); - if ((in_dev = __in_dev_get(dev))) + if ((in_dev = __in_dev_get_rcu(dev))) addr = confirm_addr_indev(in_dev, dst, local, scope); rcu_read_unlock(); @@ -897,7 +898,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop read_lock(&dev_base_lock); rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { - if ((in_dev = __in_dev_get(dev))) { + if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) break; @@ -957,7 +958,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -1078,7 +1079,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (idx > s_idx) s_ip_idx = 0; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { rcu_read_unlock(); continue; } @@ -1149,7 +1150,7 @@ void inet_forward_change(void) for (dev = dev_base; dev; dev = dev->next) { struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev) in_dev->cnf.forwarding = on; rcu_read_unlock(); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1b5a09d1b90..1b18ce66e7b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -5,6 +5,7 @@ #include <net/esp.h> #include <asm/scatterlist.h> #include <linux/crypto.h> +#include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/random.h> #include <net/icmp.h> @@ -42,10 +43,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esp = x->data; alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3; - clen = (clen + 2 + blksize-1)&~(blksize-1); + blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) - clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1); + clen = ALIGN(clen, esp->conf.padlen); if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) goto error; @@ -143,7 +144,7 @@ static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc struct ip_esp_hdr *esph; struct esp_data *esp = x->data; struct sk_buff *trailer; - int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); + int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; int nfrags; @@ -304,16 +305,16 @@ static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); + u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); if (x->props.mode) { - mtu = (mtu + 2 + blksize-1)&~(blksize-1); + mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ - mtu += 2 + blksize; + mtu = ALIGN(mtu + 2, 4) + blksize - 4; } if (esp->conf.padlen) - mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1); + mtu = ALIGN(mtu, esp->conf.padlen); return mtu + x->props.header_len + esp->auth.icv_trunc_len; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4e1379f7126..990633c09df 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -173,7 +173,7 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, no_addr = rpf = 0; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); @@ -591,7 +591,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, break; case NETDEV_DOWN: fib_del_ifaddr(ifa); - if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) { + if (ifa->ifa_dev->ifa_list == NULL) { /* Last address was deleted from this interface. Disable IP. */ @@ -607,7 +607,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d41219e8037..186f20c4a45 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1087,7 +1087,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, rta->rta_oif = &dev->ifindex; if (colon) { struct in_ifaddr *ifa; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); if (!in_dev) return -ENODEV; *colon = ':'; @@ -1268,7 +1268,7 @@ int fib_sync_up(struct net_device *dev) } if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) continue; - if (nh->nh_dev != dev || __in_dev_get(dev) == NULL) + if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) continue; alive++; spin_lock_bh(&fib_multipath_lock); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 90ae70870a1..66247f38b37 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -286,6 +286,8 @@ static inline void check_tnode(const struct tnode *tn) static int halve_threshold = 25; static int inflate_threshold = 50; +static int halve_threshold_root = 15; +static int inflate_threshold_root = 25; static void __alias_free_mem(struct rcu_head *head) @@ -449,6 +451,8 @@ static struct node *resize(struct trie *t, struct tnode *tn) int i; int err = 0; struct tnode *old_tn; + int inflate_threshold_use; + int halve_threshold_use; if (!tn) return NULL; @@ -541,10 +545,17 @@ static struct node *resize(struct trie *t, struct tnode *tn) check_tnode(tn); + /* Keep root node larger */ + + if(!tn->parent) + inflate_threshold_use = inflate_threshold_root; + else + inflate_threshold_use = inflate_threshold; + err = 0; while ((tn->full_children > 0 && 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= - inflate_threshold * tnode_child_length(tn))) { + inflate_threshold_use * tnode_child_length(tn))) { old_tn = tn; tn = inflate(t, tn); @@ -564,10 +575,18 @@ static struct node *resize(struct trie *t, struct tnode *tn) * node is above threshold. */ + + /* Keep root node larger */ + + if(!tn->parent) + halve_threshold_use = halve_threshold_root; + else + halve_threshold_use = halve_threshold; + err = 0; while (tn->bits > 1 && 100 * (tnode_child_length(tn) - tn->empty_children) < - halve_threshold * tnode_child_length(tn)) { + halve_threshold_use * tnode_child_length(tn)) { old_tn = tn; tn = halve(t, tn); @@ -1086,7 +1105,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) } if (tp && tp->pos + tp->bits > 32) - printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", + printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", tp, tp->pos, tp->bits, key, plen); /* Rebalance the trie */ @@ -1832,16 +1851,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi i++; continue; } - if (fa->fa_info->fib_nh == NULL) { - printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); - i++; - continue; - } - if (fa->fa_info == NULL) { - printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); - i++; - continue; - } + BUG_ON(!fa->fa_info); if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -1964,7 +1974,7 @@ struct fib_table * __init fib_hash_init(int id) trie_main = t; if (id == RT_TABLE_LOCAL) - printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); + printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); return tb; } @@ -2394,7 +2404,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) prefix = htonl(l->key); list_for_each_entry_rcu(fa, &li->falh, fa_list) { - const struct fib_info *fi = rcu_dereference(fa->fa_info); + const struct fib_info *fi = fa->fa_info; unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); if (fa->fa_type == RTN_BROADCAST diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 24eb56ae1b5..175e093ec56 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -188,7 +188,7 @@ struct icmp_err icmp_err_convert[] = { /* Control parameters for ECHO replies. */ int sysctl_icmp_echo_ignore_all; -int sysctl_icmp_echo_ignore_broadcasts; +int sysctl_icmp_echo_ignore_broadcasts = 1; /* Control parameter - ignore bogus broadcast responses? */ int sysctl_icmp_ignore_bogus_error_responses; @@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops) struct inet_sock *inet; int i; - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu(i) { int err; - if (!cpu_possible(i)) - continue; - err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &per_cpu(__icmp_socket, i)); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 70c44e4c3ce..c6247fc8406 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1323,7 +1323,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) } if (dev) { imr->imr_ifindex = dev->ifindex; - idev = __in_dev_get(dev); + idev = __in_dev_get_rtnl(dev); } return idev; } @@ -1908,8 +1908,11 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max)); goto done; } - } else + } else { newpsl = NULL; + (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, + msf->imsf_fmode, 0, NULL, 0); + } psl = pmc->sflist; if (psl) { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fe3c6d3d0c9..3fe021f1a56 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -78,17 +78,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, int low = sysctl_local_port_range[0]; int high = sysctl_local_port_range[1]; int remaining = (high - low) + 1; - int rover; + int rover = net_random() % (high - low) + low; - spin_lock(&hashinfo->portalloc_lock); - if (hashinfo->port_rover < low) - rover = low; - else - rover = hashinfo->port_rover; do { - rover++; - if (rover > high) - rover = low; head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) @@ -97,9 +89,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, break; next: spin_unlock(&head->lock); + if (++rover > high) + rover = low; } while (--remaining > 0); - hashinfo->port_rover = rover; - spin_unlock(&hashinfo->portalloc_lock); /* Exhausted local port range during search? It is not * possible for us to be holding one of the bind hash @@ -494,7 +486,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, - const unsigned int __nocast priority) + const gfp_t priority) { struct sock *newsk = sk_clone(sk, priority); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 4d1502a4985..a010e9a6881 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -20,7 +20,7 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; /* Unlink from established hashes. */ - struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent]; + struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash); write_lock(&ehead->lock); if (hlist_unhashed(&tw->tw_node)) { @@ -60,7 +60,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; + struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in @@ -106,11 +106,12 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_dport = inet->dport; tw->tw_family = sk->sk_family; tw->tw_reuse = sk->sk_reuse; - tw->tw_hashent = sk->sk_hashent; + tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; tw->tw_prot = sk->sk_prot_creator; atomic_set(&tw->tw_refcnt, 1); inet_twsk_dead_node_init(tw); + __module_get(tw->tw_prot->owner); } return tw; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f0d5740d7e2..896ce3f8f53 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1104,10 +1104,10 @@ static int ipgre_open(struct net_device *dev) return -EADDRNOTAVAIL; dev = rt->u.dst.dev; ip_rt_put(rt); - if (__in_dev_get(dev) == NULL) + if (__in_dev_get_rtnl(dev) == NULL) return -EADDRNOTAVAIL; t->mlink = dev->ifindex; - ip_mc_inc_group(__in_dev_get(dev), t->parms.iph.daddr); + ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); } return 0; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3f1a263e124..17758234a3e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb) { IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); - if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size) + if (skb->len > dst_mtu(skb->dst) && + !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) return ip_fragment(skb, ip_finish_output); else return ip_finish_output(skb); @@ -391,6 +392,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->nfct = from->nfct; nf_conntrack_get(to->nfct); to->nfctinfo = from->nfctinfo; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + to->ipvs_property = from->ipvs_property; +#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(to->nf_bridge); to->nf_bridge = from->nf_bridge; @@ -685,6 +689,60 @@ csum_page(struct page *page, int offset, int copy) return csum; } +inline int ip_ufo_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int hh_len, int fragheaderlen, + int transhdrlen, int mtu,unsigned int flags) +{ + struct sk_buff *skb; + int err; + + /* There is support for UDP fragmentation offload by network + * device, so create one single skb packet containing complete + * udp datagram + */ + if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + skb = sock_alloc_send_skb(sk, + hh_len + fragheaderlen + transhdrlen + 20, + (flags & MSG_DONTWAIT), &err); + + if (skb == NULL) + return err; + + /* reserve space for Hardware header */ + skb_reserve(skb, hh_len); + + /* create space for UDP/IP header */ + skb_put(skb,fragheaderlen + transhdrlen); + + /* initialize network header pointer */ + skb->nh.raw = skb->data; + + /* initialize protocol header pointer */ + skb->h.raw = skb->data + fragheaderlen; + + skb->ip_summed = CHECKSUM_HW; + skb->csum = 0; + sk->sk_sndmsg_off = 0; + } + + err = skb_append_datato_frags(sk,skb, getfrag, from, + (length - transhdrlen)); + if (!err) { + /* specify the length of each IP datagram fragment*/ + skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); + __skb_queue_tail(&sk->sk_write_queue, skb); + + return 0; + } + /* There is not enough support do UFO , + * so follow normal path + */ + kfree_skb(skb); + return err; +} + /* * ip_append_data() and ip_append_page() can make one large IP datagram * from many pieces of data. Each pieces will be holded on the socket @@ -774,6 +832,15 @@ int ip_append_data(struct sock *sk, csummode = CHECKSUM_HW; inet->cork.length += length; + if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && + (rt->u.dst.dev->features & NETIF_F_UFO)) { + + if(ip_ufo_append_data(sk, getfrag, from, length, hh_len, + fragheaderlen, transhdrlen, mtu, flags)) + goto error; + + return 0; + } /* So, what's going on in the loop below? * @@ -1005,14 +1072,23 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, return -EINVAL; inet->cork.length += size; + if ((sk->sk_protocol == IPPROTO_UDP) && + (rt->u.dst.dev->features & NETIF_F_UFO)) + skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); + while (size > 0) { int i; - /* Check if the remaining data fits into current packet. */ - len = mtu - skb->len; - if (len < size) - len = maxfraglen - skb->len; + if (skb_shinfo(skb)->ufo_size) + len = size; + else { + + /* Check if the remaining data fits into current packet. */ + len = mtu - skb->len; + if (len < size) + len = maxfraglen - skb->len; + } if (len <= 0) { struct sk_buff *skb_prev; char *data; @@ -1020,10 +1096,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, int alloclen; skb_prev = skb; - if (skb_prev) - fraggap = skb_prev->len - maxfraglen; - else - fraggap = 0; + fraggap = skb_prev->len - maxfraglen; alloclen = fragheaderlen + hh_len + fraggap + 15; skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9dbf5909f3a..302b7eb507c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -149,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { dev->flags |= IFF_MULTICAST; - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rtnl(dev); if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) goto failure; in_dev->cnf.rp_filter = 0; @@ -278,7 +278,7 @@ static int vif_delete(int vifi) dev_set_allmulti(dev, -1); - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { in_dev->cnf.mc_forwarding--; ip_rt_multicast_event(in_dev); } @@ -421,7 +421,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) return -EINVAL; } - if ((in_dev = __in_dev_get(dev)) == NULL) + if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) return -EADDRNOTAVAIL; in_dev->cnf.mc_forwarding++; dev_set_allmulti(dev, +1); diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 6e092dadb38..fc6f95aaa96 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -604,7 +604,7 @@ static struct file_operations ip_vs_app_fops = { /* * Replace a segment of data with a new segment */ -int ip_vs_skb_replace(struct sk_buff *skb, int pri, +int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, char *o_buf, int o_len, char *n_buf, int n_len) { struct iphdr *iph; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3cf9b451675..7d917e4ce1d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -139,9 +139,10 @@ config IP_NF_AMANDA config IP_NF_PPTP tristate 'PPTP protocol support' + depends on IP_NF_CONNTRACK help This module adds support for PPTP (Point to Point Tunnelling - Protocol, RFC2637) conncection tracking and NAT. + Protocol, RFC2637) connection tracking and NAT. If you are running PPTP sessions over a stateful firewall or NAT box, you may want to enable this feature. @@ -498,9 +499,14 @@ config IP_NF_TARGET_LOG To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ULOG - tristate "ULOG target support" + tristate "ULOG target support (OBSOLETE)" depends on IP_NF_IPTABLES ---help--- + + This option enables the old IPv4-only "ipt_ULOG" implementation + which has been obsoleted by the new "nfnetlink_log" code (see + CONFIG_NETFILTER_NETLINK_LOG). + This option adds a `ULOG' target, which allows you to create rules in any iptables table. The packet is passed to a userspace logging daemon using netlink multicast sockets; unlike the LOG target @@ -537,6 +543,17 @@ config IP_NF_TARGET_TCPMSS To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_NFQUEUE + tristate "NFQUEUE Target Support" + depends on IP_NF_IPTABLES + help + This Target replaced the old obsolete QUEUE target. + + As opposed to QUEUE, it supports 65535 different queues, + not just one. + + To compile it as a module, choose M here. If unsure, say N. + # NAT + specific targets config IP_NF_NAT tristate "Full NAT" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3d45d3c0283..dab4b58dd31 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -4,7 +4,8 @@ # objects for the standalone - connection tracking / NAT ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o -iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o @@ -40,7 +41,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o -obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o # matches @@ -92,6 +93,7 @@ obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o +obj-$(CONFIG_IP_NF_TARGET_NFQUEUE) += ipt_NFQUEUE.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o @@ -101,4 +103,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o -obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fa163425668..3c2e9639bba 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -347,58 +347,106 @@ unsigned int arpt_do_table(struct sk_buff **pskb, return verdict; } -static inline void *find_inlist_lock_noload(struct list_head *head, - const char *name, - int *error, - struct semaphore *mutex) +/* + * These are weird, but module loading must not be done with mutex + * held (since they will register), and we have to have a single + * function to use try_then_request_module(). + */ + +/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ +static inline struct arpt_table *find_table_lock(const char *name) { - void *ret; + struct arpt_table *t; - *error = down_interruptible(mutex); - if (*error != 0) - return NULL; + if (down_interruptible(&arpt_mutex) != 0) + return ERR_PTR(-EINTR); - ret = list_named_find(head, name); - if (!ret) { - *error = -ENOENT; - up(mutex); - } - return ret; + list_for_each_entry(t, &arpt_tables, list) + if (strcmp(t->name, name) == 0 && try_module_get(t->me)) + return t; + up(&arpt_mutex); + return NULL; } -#ifndef CONFIG_KMOD -#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) -#else -static void * -find_inlist_lock(struct list_head *head, - const char *name, - const char *prefix, - int *error, - struct semaphore *mutex) + +/* Find target, grabs ref. Returns ERR_PTR() on error. */ +static inline struct arpt_target *find_target(const char *name, u8 revision) { - void *ret; + struct arpt_target *t; + int err = 0; - ret = find_inlist_lock_noload(head, name, error, mutex); - if (!ret) { - duprintf("find_inlist: loading `%s%s'.\n", prefix, name); - request_module("%s%s", prefix, name); - ret = find_inlist_lock_noload(head, name, error, mutex); + if (down_interruptible(&arpt_mutex) != 0) + return ERR_PTR(-EINTR); + + list_for_each_entry(t, &arpt_target, list) { + if (strcmp(t->name, name) == 0) { + if (t->revision == revision) { + if (try_module_get(t->me)) { + up(&arpt_mutex); + return t; + } + } else + err = -EPROTOTYPE; /* Found something. */ + } } + up(&arpt_mutex); + return ERR_PTR(err); +} - return ret; +struct arpt_target *arpt_find_target(const char *name, u8 revision) +{ + struct arpt_target *target; + + target = try_then_request_module(find_target(name, revision), + "arpt_%s", name); + if (IS_ERR(target) || !target) + return NULL; + return target; } -#endif -static inline struct arpt_table *arpt_find_table_lock(const char *name, int *error, struct semaphore *mutex) +static int target_revfn(const char *name, u8 revision, int *bestp) { - return find_inlist_lock(&arpt_tables, name, "arptable_", error, mutex); + struct arpt_target *t; + int have_rev = 0; + + list_for_each_entry(t, &arpt_target, list) { + if (strcmp(t->name, name) == 0) { + if (t->revision > *bestp) + *bestp = t->revision; + if (t->revision == revision) + have_rev =1; + } + } + return have_rev; } -static struct arpt_target *arpt_find_target_lock(const char *name, int *error, struct semaphore *mutex) +/* Returns true or false (if no such extension at all) */ +static inline int find_revision(const char *name, u8 revision, + int (*revfn)(const char *, u8, int *), + int *err) { - return find_inlist_lock(&arpt_target, name, "arpt_", error, mutex); + int have_rev, best = -1; + + if (down_interruptible(&arpt_mutex) != 0) { + *err = -EINTR; + return 1; + } + have_rev = revfn(name, revision, &best); + up(&arpt_mutex); + + /* Nothing at all? Return 0 to try loading module. */ + if (best == -1) { + *err = -ENOENT; + return 0; + } + + *err = best; + if (!have_rev) + *err = -EPROTONOSUPPORT; + return 1; } + /* All zeroes == unconditional rule. */ static inline int unconditional(const struct arpt_arp *arp) { @@ -544,17 +592,15 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i } t = arpt_get_target(e); - target = arpt_find_target_lock(t->u.user.name, &ret, &arpt_mutex); - if (!target) { + target = try_then_request_module(find_target(t->u.user.name, + t->u.user.revision), + "arpt_%s", t->u.user.name); + if (IS_ERR(target) || !target) { duprintf("check_entry: `%s' not found\n", t->u.user.name); + ret = target ? PTR_ERR(target) : -ENOENT; goto out; } - if (!try_module_get((target->me))) { - ret = -ENOENT; - goto out_unlock; - } t->u.kernel.target = target; - up(&arpt_mutex); if (t->u.kernel.target == &arpt_standard_target) { if (!standard_check(t, size)) { @@ -576,8 +622,6 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i (*i)++; return 0; -out_unlock: - up(&arpt_mutex); out: return ret; } @@ -716,8 +760,10 @@ static int translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -767,7 +813,7 @@ static void get_counters(const struct arpt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -844,8 +890,8 @@ static int get_entries(const struct arpt_get_entries *entries, int ret; struct arpt_table *t; - t = arpt_find_table_lock(entries->name, &ret, &arpt_mutex); - if (t) { + t = find_table_lock(entries->name); + if (t || !IS_ERR(t)) { duprintf("t->private->number = %u\n", t->private->number); if (entries->size == t->private->size) @@ -857,10 +903,10 @@ static int get_entries(const struct arpt_get_entries *entries, entries->size); ret = -EINVAL; } + module_put(t->me); up(&arpt_mutex); } else - duprintf("get_entries: Can't find %s!\n", - entries->name); + ret = t ? PTR_ERR(t) : -ENOENT; return ret; } @@ -885,7 +931,8 @@ static int do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -910,22 +957,19 @@ static int do_replace(void __user *user, unsigned int len) duprintf("arp_tables: Translated table\n"); - t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex); - if (!t) + t = try_then_request_module(find_table_lock(tmp.name), + "arptable_%s", tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; + } /* You lied! */ if (tmp.valid_hooks != t->valid_hooks) { duprintf("Valid hook crap: %08X vs %08X\n", tmp.valid_hooks, t->valid_hooks); ret = -EINVAL; - goto free_newinfo_counters_untrans_unlock; - } - - /* Get a reference in advance, we're not allowed fail later */ - if (!try_module_get(t->me)) { - ret = -EBUSY; - goto free_newinfo_counters_untrans_unlock; + goto put_module; } oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); @@ -956,7 +1000,6 @@ static int do_replace(void __user *user, unsigned int len) put_module: module_put(t->me); - free_newinfo_counters_untrans_unlock: up(&arpt_mutex); free_newinfo_counters_untrans: ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL); @@ -986,7 +1029,7 @@ static int do_add_counters(void __user *user, unsigned int len) unsigned int i; struct arpt_counters_info tmp, *paddc; struct arpt_table *t; - int ret; + int ret = 0; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1003,9 +1046,11 @@ static int do_add_counters(void __user *user, unsigned int len) goto free; } - t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex); - if (!t) + t = find_table_lock(tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; goto free; + } write_lock_bh(&t->lock); if (t->private->number != paddc->num_counters) { @@ -1022,6 +1067,7 @@ static int do_add_counters(void __user *user, unsigned int len) unlock_up_free: write_unlock_bh(&t->lock); up(&arpt_mutex); + module_put(t->me); free: vfree(paddc); @@ -1076,8 +1122,10 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len break; } name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; - t = arpt_find_table_lock(name, &ret, &arpt_mutex); - if (t) { + + t = try_then_request_module(find_table_lock(name), + "arptable_%s", name); + if (t && !IS_ERR(t)) { struct arpt_getinfo info; info.valid_hooks = t->valid_hooks; @@ -1093,9 +1141,10 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len ret = -EFAULT; else ret = 0; - up(&arpt_mutex); - } + module_put(t->me); + } else + ret = t ? PTR_ERR(t) : -ENOENT; } break; @@ -1116,6 +1165,24 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len break; } + case ARPT_SO_GET_REVISION_TARGET: { + struct arpt_get_revision rev; + + if (*len != sizeof(rev)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&rev, user, sizeof(rev)) != 0) { + ret = -EFAULT; + break; + } + + try_then_request_module(find_revision(rev.name, rev.revision, + target_revfn, &ret), + "arpt_%s", rev.name); + break; + } + default: duprintf("do_arpt_get_ctl: unknown request %i\n", cmd); ret = -EINVAL; @@ -1133,12 +1200,9 @@ int arpt_register_target(struct arpt_target *target) if (ret != 0) return ret; - if (!list_named_insert(&arpt_target, target)) { - duprintf("arpt_register_target: `%s' already in list!\n", - target->name); - ret = -EINVAL; - } + list_add(&target->list, &arpt_target); up(&arpt_mutex); + return ret; } @@ -1158,7 +1222,8 @@ int arpt_register_table(struct arpt_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) { ret = -ENOMEM; return ret; diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index dc20881004b..fa3f914117e 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ); + ip_ct_refresh(ct, *pskb, master_timeout * HZ); /* No data? */ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index c1f82e0c81c..422ab68ee7f 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -50,7 +50,7 @@ #include <linux/netfilter_ipv4/ip_conntrack_core.h> #include <linux/netfilter_ipv4/listhelp.h> -#define IP_CONNTRACK_VERSION "2.3" +#define IP_CONNTRACK_VERSION "2.4" #if 0 #define DEBUGP printk @@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); static int ip_conntrack_hash_rnd_initted; static unsigned int ip_conntrack_hash_rnd; -static u_int32_t -hash_conntrack(const struct ip_conntrack_tuple *tuple) +static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple, + unsigned int size, unsigned int rnd) { -#if 0 - dump_tuple(tuple); -#endif return (jhash_3words(tuple->src.ip, (tuple->dst.ip ^ tuple->dst.protonum), (tuple->src.u.all | (tuple->dst.u.all << 16)), - ip_conntrack_hash_rnd) % ip_conntrack_htable_size); + rnd) % size); +} + +static u_int32_t +hash_conntrack(const struct ip_conntrack_tuple *tuple) +{ + return __hash_conntrack(tuple, ip_conntrack_htable_size, + ip_conntrack_hash_rnd); } int @@ -1112,45 +1116,49 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) synchronize_net(); } -static inline void ct_add_counters(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb) -{ -#ifdef CONFIG_IP_NF_CT_ACCT - if (skb) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - ntohs(skb->nh.iph->tot_len); - } -#endif -} - -/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */ -void ip_ct_refresh_acct(struct ip_conntrack *ct, +/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ +void __ip_ct_refresh_acct(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies) + unsigned long extra_jiffies, + int do_acct) { + int event = 0; + IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); + IP_NF_ASSERT(skb); + + write_lock_bh(&ip_conntrack_lock); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - ct_add_counters(ct, ctinfo, skb); + event = IPCT_REFRESH; } else { - write_lock_bh(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - /* FIXME: We loose some REFRESH events if this function - * is called without an skb. I'll fix this later -HW */ - if (skb) - ip_conntrack_event_cache(IPCT_REFRESH, skb); + event = IPCT_REFRESH; } - ct_add_counters(ct, ctinfo, skb); - write_unlock_bh(&ip_conntrack_lock); } + +#ifdef CONFIG_IP_NF_CT_ACCT + if (do_acct) { + ct->counters[CTINFO2DIR(ctinfo)].packets++; + ct->counters[CTINFO2DIR(ctinfo)].bytes += + ntohs(skb->nh.iph->tot_len); + if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) + || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) + event |= IPCT_COUNTER_FILLING; + } +#endif + + write_unlock_bh(&ip_conntrack_lock); + + /* must be unlocked when calling event cache */ + if (event) + ip_conntrack_event_cache(event, skb); } #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ @@ -1337,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data) return 1; } -static void free_conntrack_hash(void) +static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) { - if (ip_conntrack_vmalloc) - vfree(ip_conntrack_hash); + if (vmalloced) + vfree(hash); else - free_pages((unsigned long)ip_conntrack_hash, - get_order(sizeof(struct list_head) - * ip_conntrack_htable_size)); + free_pages((unsigned long)hash, + get_order(sizeof(struct list_head) * size)); } void ip_conntrack_flush() @@ -1374,12 +1381,83 @@ void ip_conntrack_cleanup(void) ip_conntrack_flush(); kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_expect_cachep); - free_conntrack_hash(); + free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, + ip_conntrack_htable_size); nf_unregister_sockopt(&so_getorigdst); } -static int hashsize; -module_param(hashsize, int, 0400); +static struct list_head *alloc_hashtable(int size, int *vmalloced) +{ + struct list_head *hash; + unsigned int i; + + *vmalloced = 0; + hash = (void*)__get_free_pages(GFP_KERNEL, + get_order(sizeof(struct list_head) + * size)); + if (!hash) { + *vmalloced = 1; + printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n"); + hash = vmalloc(sizeof(struct list_head) * size); + } + + if (hash) + for (i = 0; i < size; i++) + INIT_LIST_HEAD(&hash[i]); + + return hash; +} + +int set_hashsize(const char *val, struct kernel_param *kp) +{ + int i, bucket, hashsize, vmalloced; + int old_vmalloced, old_size; + int rnd; + struct list_head *hash, *old_hash; + struct ip_conntrack_tuple_hash *h; + + /* On boot, we can set this without any fancy locking. */ + if (!ip_conntrack_htable_size) + return param_set_int(val, kp); + + hashsize = simple_strtol(val, NULL, 0); + if (!hashsize) + return -EINVAL; + + hash = alloc_hashtable(hashsize, &vmalloced); + if (!hash) + return -ENOMEM; + + /* We have to rehash for the new table anyway, so we also can + * use a new random seed */ + get_random_bytes(&rnd, 4); + + write_lock_bh(&ip_conntrack_lock); + for (i = 0; i < ip_conntrack_htable_size; i++) { + while (!list_empty(&ip_conntrack_hash[i])) { + h = list_entry(ip_conntrack_hash[i].next, + struct ip_conntrack_tuple_hash, list); + list_del(&h->list); + bucket = __hash_conntrack(&h->tuple, hashsize, rnd); + list_add_tail(&h->list, &hash[bucket]); + } + } + old_size = ip_conntrack_htable_size; + old_vmalloced = ip_conntrack_vmalloc; + old_hash = ip_conntrack_hash; + + ip_conntrack_htable_size = hashsize; + ip_conntrack_vmalloc = vmalloced; + ip_conntrack_hash = hash; + ip_conntrack_hash_rnd = rnd; + write_unlock_bh(&ip_conntrack_lock); + + free_conntrack_hash(old_hash, old_vmalloced, old_size); + return 0; +} + +module_param_call(hashsize, set_hashsize, param_get_uint, + &ip_conntrack_htable_size, 0600); int __init ip_conntrack_init(void) { @@ -1388,9 +1466,7 @@ int __init ip_conntrack_init(void) /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ - if (hashsize) { - ip_conntrack_htable_size = hashsize; - } else { + if (!ip_conntrack_htable_size) { ip_conntrack_htable_size = (((num_physpages << PAGE_SHIFT) / 16384) / sizeof(struct list_head)); @@ -1412,20 +1488,8 @@ int __init ip_conntrack_init(void) return ret; } - /* AK: the hash table is twice as big than needed because it - uses list_head. it would be much nicer to caches to use a - single pointer list head here. */ - ip_conntrack_vmalloc = 0; - ip_conntrack_hash - =(void*)__get_free_pages(GFP_KERNEL, - get_order(sizeof(struct list_head) - *ip_conntrack_htable_size)); - if (!ip_conntrack_hash) { - ip_conntrack_vmalloc = 1; - printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n"); - ip_conntrack_hash = vmalloc(sizeof(struct list_head) - * ip_conntrack_htable_size); - } + ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size, + &ip_conntrack_vmalloc); if (!ip_conntrack_hash) { printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); goto err_unreg_sockopt; @@ -1457,9 +1521,6 @@ int __init ip_conntrack_init(void) ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; write_unlock_bh(&ip_conntrack_lock); - for (i = 0; i < ip_conntrack_htable_size; i++) - INIT_LIST_HEAD(&ip_conntrack_hash[i]); - /* For use by ipt_REJECT */ ip_ct_attach = ip_conntrack_attach; @@ -1474,7 +1535,8 @@ int __init ip_conntrack_init(void) err_free_conntrack_slab: kmem_cache_destroy(ip_conntrack_cachep); err_free_hash: - free_conntrack_hash(); + free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, + ip_conntrack_htable_size); err_unreg_sockopt: nf_unregister_sockopt(&so_getorigdst); diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 79db5b70d5f..4108a5e12b3 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -172,7 +172,6 @@ static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) DEBUGP("setting timeout of conntrack %p to 0\n", sibling); sibling->proto.gre.timeout = 0; sibling->proto.gre.stream_timeout = 0; - /* refresh_acct will not modify counters if skb == NULL */ if (del_timer(&sibling->timeout)) sibling->timeout.function((unsigned long)sibling); ip_conntrack_put(sibling); @@ -223,8 +222,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) static inline int exp_gre(struct ip_conntrack *master, u_int32_t seq, - u_int16_t callid, - u_int16_t peer_callid) + __be16 callid, + __be16 peer_callid) { struct ip_conntrack_tuple inv_tuple; struct ip_conntrack_tuple exp_tuples[] = { @@ -263,7 +262,7 @@ exp_gre(struct ip_conntrack *master, exp_orig->mask.src.ip = 0xffffffff; exp_orig->mask.src.u.all = 0; exp_orig->mask.dst.u.all = 0; - exp_orig->mask.dst.u.gre.key = 0xffff; + exp_orig->mask.dst.u.gre.key = htons(0xffff); exp_orig->mask.dst.ip = 0xffffffff; exp_orig->mask.dst.protonum = 0xff; @@ -271,14 +270,10 @@ exp_gre(struct ip_conntrack *master, exp_orig->expectfn = pptp_expectfn; exp_orig->flags = 0; - exp_orig->dir = IP_CT_DIR_ORIGINAL; - /* both expectations are identical apart from tuple */ memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); - exp_reply->dir = !exp_orig->dir; - if (ip_nat_pptp_hook_exp_gre) ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); else { @@ -340,7 +335,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, unsigned int reqlen; union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - u_int16_t msg, *cid, *pcid; + u_int16_t msg; + __be16 *cid, *pcid; u_int32_t seq; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); @@ -485,7 +481,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, if (info->pns_call_id != ntohs(*pcid)) { DEBUGP("%s for unknown CallID %u\n", - pptp_msg_name[msg], ntohs(*cid)); + pptp_msg_name[msg], ntohs(*pcid)); break; } @@ -551,7 +547,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, unsigned int reqlen; union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - u_int16_t msg, *cid, *pcid; + u_int16_t msg; + __be16 *cid, *pcid; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) @@ -755,7 +752,7 @@ static struct ip_conntrack_helper pptp = { } }, .mask = { .src = { .ip = 0, - .u = { .tcp = { .port = 0xffff } } + .u = { .tcp = { .port = __constant_htons(0xffff) } } }, .dst = { .ip = 0, .u = { .all = 0 }, diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 71ef19d126d..186646eb249 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -58,7 +58,7 @@ static int help(struct sk_buff **pskb, goto out; rcu_read_lock(); - in_dev = __in_dev_get(rt->u.dst.dev); + in_dev = __in_dev_get_rcu(rt->u.dst.dev); if (in_dev != NULL) { for_primary_ifa(in_dev) { if (ifa->ifa_broadcast == iph->daddr) { @@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb, ip_conntrack_expect_related(exp); ip_conntrack_expect_put(exp); - ip_ct_refresh_acct(ct, ctinfo, NULL, timeout * HZ); + ip_ct_refresh(ct, *pskb, timeout * HZ); out: return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index b08a432efcf..82a65043a8e 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -177,11 +177,11 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, struct nfattr *nest_count = NFA_NEST(skb, type); u_int64_t tmp; - tmp = cpu_to_be64(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp); + tmp = htonl(ct->counters[dir].packets); + NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); - tmp = cpu_to_be64(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp); + tmp = htonl(ct->counters[dir].bytes); + NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); NFA_NEST_END(skb, nest_count); @@ -815,7 +815,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, IPCTNL_MSG_CT_NEW, 1, ct); ip_conntrack_put(ct); if (err <= 0) - goto out; + goto free; err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); if (err < 0) @@ -824,16 +824,17 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, DEBUGP("leaving\n"); return 0; +free: + kfree_skb(skb2); out: - if (skb2) - kfree_skb(skb2); return -1; } static inline int ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) { - unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]); + unsigned long d; + unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1])); d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) @@ -948,6 +949,31 @@ ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) return 0; } +static inline int +ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1]; + struct ip_conntrack_protocol *proto; + u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + int err = 0; + + if (nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr) < 0) + goto nfattr_failure; + + proto = ip_conntrack_proto_find_get(npt); + if (!proto) + return -EINVAL; + + if (proto->from_nfattr) + err = proto->from_nfattr(tb, ct); + ip_conntrack_proto_put(proto); + + return err; + +nfattr_failure: + return -ENOMEM; +} + static int ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) { @@ -973,6 +999,12 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) return err; } + if (cda[CTA_PROTOINFO-1]) { + err = ctnetlink_change_protoinfo(ct, cda); + if (err < 0) + return err; + } + DEBUGP("all done\n"); return 0; } @@ -1002,6 +1034,12 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (err < 0) goto err; + if (cda[CTA_PROTOINFO-1]) { + err = ctnetlink_change_protoinfo(ct, cda); + if (err < 0) + return err; + } + ct->helper = ip_conntrack_helper_find_get(rtuple); add_timer(&ct->timeout); @@ -1284,21 +1322,16 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, 1, exp); if (err <= 0) - goto out; + goto free; ip_conntrack_expect_put(exp); - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); - if (err < 0) - goto free; - - return err; + return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); +free: + kfree_skb(skb2); out: ip_conntrack_expect_put(exp); -free: - if (skb2) - kfree_skb(skb2); return err; } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index de3cb9db6f8..744abb9d377 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -247,6 +247,7 @@ static int gre_packet(struct ip_conntrack *ct, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ set_bit(IPS_ASSURED_BIT, &ct->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); } else ip_ct_refresh_acct(ct, conntrackinfo, skb, ct->proto.gre.timeout); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 838d1d69b36..98f0015dd25 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -296,8 +296,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], struct ip_conntrack_tuple *tuple) { if (!tb[CTA_PROTO_ICMP_TYPE-1] - || !tb[CTA_PROTO_ICMP_CODE-1] - || !tb[CTA_PROTO_ICMP_ID-1]) + || !tb[CTA_PROTO_ICMP_CODE-1]) return -1; tuple->dst.u.icmp.type = diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index a875f35e576..59a4a0111dd 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -416,6 +416,7 @@ static int sctp_packet(struct ip_conntrack *conntrack, && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { DEBUGP("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &conntrack->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 1985abc59d2..d6701cafbcc 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -341,17 +341,43 @@ static int tcp_print_conntrack(struct seq_file *s, static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, const struct ip_conntrack *ct) { + struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); + read_lock_bh(&tcp_lock); NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), &ct->proto.tcp.state); read_unlock_bh(&tcp_lock); + NFA_NEST_END(skb, nest_parms); + return 0; nfattr_failure: read_unlock_bh(&tcp_lock); return -1; } + +static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) +{ + struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; + struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; + + if (nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr) < 0) + goto nfattr_failure; + + if (!tb[CTA_PROTOINFO_TCP_STATE-1]) + return -EINVAL; + + write_lock_bh(&tcp_lock); + ct->proto.tcp.state = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); + write_unlock_bh(&tcp_lock); + + return 0; + +nfattr_failure: + return -1; +} #endif static unsigned int get_conntrack_index(const struct tcphdr *tcph) @@ -1014,7 +1040,8 @@ static int tcp_packet(struct ip_conntrack *conntrack, /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ - set_bit(IPS_ASSURED_BIT, &conntrack->status); + set_bit(IPS_ASSURED_BIT, &conntrack->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); } ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout); @@ -1122,6 +1149,7 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) .to_nfattr = tcp_to_nfattr, + .from_nfattr = nfattr_to_tcp, .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, #endif diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index d3c7808010e..dd476b191f4 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -989,7 +989,7 @@ EXPORT_SYMBOL(need_ip_conntrack); EXPORT_SYMBOL(ip_conntrack_helper_register); EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_iterate_cleanup); -EXPORT_SYMBOL(ip_ct_refresh_acct); +EXPORT_SYMBOL(__ip_ct_refresh_acct); EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index c3ea891d38e..762f4d93936 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -66,20 +66,20 @@ ip_nat_proto_find_get(u_int8_t protonum) * removed until we've grabbed the reference */ preempt_disable(); p = __ip_nat_proto_find(protonum); - if (p) { - if (!try_module_get(p->me)) - p = &ip_nat_unknown_protocol; - } + if (!try_module_get(p->me)) + p = &ip_nat_unknown_protocol; preempt_enable(); return p; } +EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); void ip_nat_proto_put(struct ip_nat_protocol *p) { module_put(p->me); } +EXPORT_SYMBOL_GPL(ip_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int @@ -111,6 +111,7 @@ ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) return csum_fold(csum_partial((char *)diffs, sizeof(diffs), oldcheck^0xFFFF)); } +EXPORT_SYMBOL(ip_nat_cheat_check); /* Is this tuple already taken? (not by us) */ int @@ -127,6 +128,7 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, invert_tuplepr(&reply, tuple); return ip_conntrack_tuple_taken(&reply, ignored_conntrack); } +EXPORT_SYMBOL(ip_nat_used_tuple); /* If we source map this tuple so reply looks like reply_tuple, will * that meet the constraints of range. */ @@ -347,6 +349,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, return NF_ACCEPT; } +EXPORT_SYMBOL(ip_nat_setup_info); /* Returns true if succeeded. */ static int @@ -387,10 +390,10 @@ manip_pkt(u_int16_t proto, } /* Do packet manipulations according to ip_nat_setup_info. */ -unsigned int nat_packet(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) +unsigned int ip_nat_packet(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -417,12 +420,13 @@ unsigned int nat_packet(struct ip_conntrack *ct, } return NF_ACCEPT; } +EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) +int ip_nat_icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir) { struct { struct icmphdr icmp; @@ -509,6 +513,7 @@ int icmp_reply_translation(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation); /* Protocol registration. */ int ip_nat_protocol_register(struct ip_nat_protocol *proto) @@ -525,6 +530,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto) write_unlock_bh(&ip_nat_lock); return ret; } +EXPORT_SYMBOL(ip_nat_protocol_register); /* Noone stores the protocol anywhere; simply delete it. */ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) @@ -536,6 +542,7 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) /* Someone could be still looking at the proto in a bh. */ synchronize_net(); } +EXPORT_SYMBOL(ip_nat_protocol_unregister); #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) @@ -582,7 +589,7 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); #endif -int __init ip_nat_init(void) +static int __init ip_nat_init(void) { size_t i; @@ -624,10 +631,14 @@ static int clean_nat(struct ip_conntrack *i, void *data) return 0; } -/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */ -void ip_nat_cleanup(void) +static void __exit ip_nat_cleanup(void) { ip_ct_iterate_cleanup(&clean_nat, NULL); ip_conntrack_destroyed = NULL; vfree(bysource); } + +MODULE_LICENSE("GPL"); + +module_init(ip_nat_init); +module_exit(ip_nat_cleanup); diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index d2dd5d31355..5d506e0564d 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -199,6 +199,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, } return 1; } +EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); /* Generic function for mangling variable-length address changes inside * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX @@ -256,6 +257,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL(ip_nat_mangle_udp_packet); /* Adjust one found SACK option including checksum correction */ static void @@ -399,6 +401,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL(ip_nat_seq_adjust); /* Setup NAT on this expected conntrack so it follows master. */ /* If we fail to get a free NAT slot, we'll get dropped on confirm */ @@ -425,3 +428,4 @@ void ip_nat_follow_master(struct ip_conntrack *ct, /* hook doesn't matter, but it has to do destination manip */ ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); } +EXPORT_SYMBOL(ip_nat_follow_master); diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 3cdd0684d30..ee6ab74ad3a 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -216,6 +216,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig, expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id); expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + expect_orig->dir = IP_CT_DIR_ORIGINAL; inv_t.src.ip = reply_t->src.ip; inv_t.dst.ip = reply_t->dst.ip; inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); @@ -233,6 +234,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig, expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + expect_reply->dir = IP_CT_DIR_REPLY; inv_t.src.ip = orig_t->src.ip; inv_t.dst.ip = orig_t->dst.ip; inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 7c128540167..f7cad7cf1ae 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -139,8 +139,8 @@ gre_manip_pkt(struct sk_buff **pskb, break; case GRE_VERSION_PPTP: DEBUGP("call_id -> 0x%04x\n", - ntohl(tuple->dst.u.gre.key)); - pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key)); + ntohs(tuple->dst.u.gre.key)); + pgreh->call_id = tuple->dst.u.gre.key; break; default: DEBUGP("can't nat unknown GRE version\n"); diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c index 99bbef56f84..f0099a646a0 100644 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c @@ -62,7 +62,7 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range) struct ip_nat_protocol ip_nat_unknown_protocol = { .name = "unknown", - .me = THIS_MODULE, + /* .me isn't set: getting a ref to this cannot fail. */ .manip_pkt = unknown_manip_pkt, .in_range = unknown_in_range, .unique_tuple = unknown_unique_tuple, diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 0ff368b131f..30cd4e18c12 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -108,8 +108,8 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!icmp_reply_translation(pskb, ct, maniptype, - CTINFO2DIR(ctinfo))) + if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, + CTINFO2DIR(ctinfo))) return NF_DROP; else return NF_ACCEPT; @@ -152,7 +152,7 @@ ip_nat_fn(unsigned int hooknum, } IP_NF_ASSERT(info); - return nat_packet(ct, ctinfo, hooknum, pskb); + return ip_nat_packet(ct, ctinfo, hooknum, pskb); } static unsigned int @@ -325,15 +325,10 @@ static int init_or_cleanup(int init) printk("ip_nat_init: can't setup rules.\n"); goto cleanup_nothing; } - ret = ip_nat_init(); - if (ret < 0) { - printk("ip_nat_init: can't setup rules.\n"); - goto cleanup_rule_init; - } ret = nf_register_hook(&ip_nat_in_ops); if (ret < 0) { printk("ip_nat_init: can't register in hook.\n"); - goto cleanup_nat; + goto cleanup_rule_init; } ret = nf_register_hook(&ip_nat_out_ops); if (ret < 0) { @@ -374,8 +369,6 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_nat_out_ops); cleanup_inops: nf_unregister_hook(&ip_nat_in_ops); - cleanup_nat: - ip_nat_cleanup(); cleanup_rule_init: ip_nat_rule_cleanup(); cleanup_nothing: @@ -395,14 +388,4 @@ static void __exit fini(void) module_init(init); module_exit(fini); -EXPORT_SYMBOL(ip_nat_setup_info); -EXPORT_SYMBOL(ip_nat_protocol_register); -EXPORT_SYMBOL(ip_nat_protocol_unregister); -EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); -EXPORT_SYMBOL_GPL(ip_nat_proto_put); -EXPORT_SYMBOL(ip_nat_cheat_check); -EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); -EXPORT_SYMBOL(ip_nat_mangle_udp_packet); -EXPORT_SYMBOL(ip_nat_used_tuple); -EXPORT_SYMBOL(ip_nat_follow_master); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index d54f14d926f..36339eb39e1 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; - pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; + pmsg->timestamp_sec = entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index eef99a1b5de..75c27e92f6a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -27,6 +27,7 @@ #include <asm/semaphore.h> #include <linux/proc_fs.h> #include <linux/err.h> +#include <linux/cpumask.h> #include <linux/netfilter_ipv4/ip_tables.h> @@ -921,8 +922,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -943,7 +946,7 @@ replace_table(struct ipt_table *table, struct ipt_entry *table_base; unsigned int i; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -990,7 +993,7 @@ get_counters(const struct ipt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1128,7 +1131,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1458,7 +1462,8 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c index 13463802133..05d66ab5942 100644 --- a/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/net/ipv4/netfilter/ipt_CONNMARK.c @@ -109,6 +109,7 @@ static struct ipt_target ipt_connmark_reg = { static int __init init(void) { + need_ip_conntrack(); return ipt_register_target(&ipt_connmark_reg); } diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 715cb613405..5245bfd33d5 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -93,7 +93,7 @@ redirect_target(struct sk_buff **pskb, newdst = 0; rcu_read_lock(); - indev = __in_dev_get((*pskb)->dev); + indev = __in_dev_get_rcu((*pskb)->dev); if (indev && (ifa = indev->ifa_list)) newdst = ifa->ifa_local; rcu_read_unlock(); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index e2c14f3cb2f..2883ccd8a91 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -225,8 +225,8 @@ static void ipt_ulog_packet(unsigned int hooknum, /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; - pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; + pm->timestamp_sec = skb->tstamp.off_sec; + pm->timestamp_usec = skb->tstamp.off_usec; pm->mark = skb->nfmark; pm->hook = hooknum; if (prefix != NULL) diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index f5909a4c3fc..e19c2a52d00 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -48,7 +48,7 @@ static int checkentry(const char *tablename, const struct ipt_ip *ip, unsigned int hook_mask) { if (matchsize != IPT_ALIGN(sizeof(struct ipt_addrtype_info))) { - printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n.", + printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n", matchsize, IPT_ALIGN(sizeof(struct ipt_addrtype_info))); return 0; } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f7943ba1f43..a65e508fbd4 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -90,9 +90,7 @@ fold_field(void *mib[], int offt) unsigned long res = 0; int i; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_cpu(i) { res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8549f26e249..381dd6a6aeb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2128,7 +2128,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, struct in_device *in_dev; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { int our = ip_check_mc(in_dev, daddr, saddr, skb->nh.iph->protocol); if (our @@ -2443,7 +2443,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) err = -ENODEV; if (dev_out == NULL) goto out; - if (__in_dev_get(dev_out) == NULL) { + + /* RACE: Check return value of inet_select_addr instead. */ + if (__in_dev_get_rtnl(dev_out) == NULL) { dev_put(dev_out); goto out; /* Wrong error code */ } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f3f0013a958..72b7c22e1ea 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2112,7 +2112,6 @@ void __init tcp_init(void) sysctl_tcp_max_orphans >>= (3 - order); sysctl_max_syn_backlog = 128; } - tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1; sysctl_tcp_mem[0] = 768 << order; sysctl_tcp_mem[1] = 1024 << order; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index b940346de4e..ae35e060904 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -27,7 +27,7 @@ */ static int fast_convergence = 1; -static int max_increment = 32; +static int max_increment = 16; static int low_window = 14; static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ static int low_utilization_threshold = 153; @@ -136,7 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) else if (cwnd < ca->last_max_cwnd + max_increment*(BICTCP_B-1)) /* slow start */ ca->cnt = (cwnd * (BICTCP_B-1)) - / cwnd-ca->last_max_cwnd; + / (cwnd - ca->last_max_cwnd); else /* linear increase */ ca->cnt = cwnd / max_increment; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a7537c7bbd0..3e98b57578d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -355,8 +355,6 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) app_win -= icsk->icsk_ack.rcv_mss; app_win = max(app_win, 2U*tp->advmss); - if (!ofo_win) - tp->window_clamp = min(tp->window_clamp, app_win); tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); } } @@ -2241,6 +2239,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, /* Note, it is the only place, where * fast path is recovered for sending TCP. */ + tp->pred_flags = 0; tcp_fast_path_check(sk, tp); if (nwin > tp->max_window) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 13dfb391cdf..49d67cd75ed 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -93,8 +93,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { .lhash_lock = RW_LOCK_UNLOCKED, .lhash_users = ATOMIC_INIT(0), .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), - .portalloc_lock = SPIN_LOCK_UNLOCKED, - .port_rover = 1024 - 1, }; static int tcp_v4_get_port(struct sock *sk, unsigned short snum) @@ -130,19 +128,20 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + prefetch(head->chain.first); write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); @@ -179,7 +178,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -188,7 +187,7 @@ unique: * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - sk->sk_hashent = hash; + sk->sk_hash = hash; BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inc_use(sk->sk_prot); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a88db28b0af..b1a63b2c6b4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->frto_counter = 0; newtp->frto_highmark = 0; - newicsk->icsk_ca_ops = &tcp_reno; + newicsk->icsk_ca_ops = &tcp_init_congestion_ops; tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5dd6dd7d091..b907456a79f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -190,7 +190,7 @@ void tcp_select_initial_window(int __space, __u32 mss, } /* Set initial window to value enough for senders, - * following RFC1414. Senders, not following this RFC, + * following RFC2414. Senders, not following this RFC, * will be satisfied with 2. */ if (mss > (1<<*rcv_wscale)) { @@ -435,8 +435,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss int nsize, old_factor; u16 flags; - BUG_ON(len >= skb->len); - + BUG_ON(len > skb->len); nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; @@ -509,7 +508,16 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss tp->lost_out -= diff; tp->left_out -= diff; } + if (diff > 0) { + /* Adjust Reno SACK estimate. */ + if (!tp->rx_opt.sack_ok) { + tp->sacked_out -= diff; + if ((int)tp->sacked_out < 0) + tp->sacked_out = 0; + tcp_sync_left_out(tp); + } + tp->fackets_out -= diff; if ((int)tp->fackets_out < 0) tp->fackets_out = 0; @@ -1601,7 +1609,7 @@ void tcp_send_fin(struct sock *sk) * was unread data in the receive queue. This behavior is recommended * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM */ -void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority) +void tcp_send_active_reset(struct sock *sk, gfp_t priority) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; |