From 6ed106549d17474ca17a16057f4c0ed4eba5a7ca Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 23 Jun 2009 06:03:08 +0000 Subject: net: use NETDEV_TX_OK instead of 0 in ndo_start_xmit() functions This patch is the result of an automatic spatch transformation to convert all ndo_start_xmit() return values of 0 to NETDEV_TX_OK. Some occurences are missed by the automatic conversion, those will be handled in a seperate patch. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 6 +++--- net/ipv4/ipip.c | 6 +++--- net/ipv4/ipmr.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 44e2a3d2359..cd85ebc119a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -821,7 +821,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_dropped++; dev_kfree_skb(skb); tunnel->recursion--; - return 0; + return NETDEV_TX_OK; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); @@ -889,7 +889,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) IPTUNNEL_XMIT(); tunnel->recursion--; - return 0; + return NETDEV_TX_OK; tx_error_icmp: dst_link_failure(skb); @@ -898,7 +898,7 @@ tx_error: stats->tx_errors++; dev_kfree_skb(skb); tunnel->recursion--; - return 0; + return NETDEV_TX_OK; } static int ipgre_tunnel_bind_dev(struct net_device *dev) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 93e2b787da2..98075b6d619 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -486,7 +486,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_dropped++; dev_kfree_skb(skb); tunnel->recursion--; - return 0; + return NETDEV_TX_OK; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); @@ -524,7 +524,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) IPTUNNEL_XMIT(); tunnel->recursion--; - return 0; + return NETDEV_TX_OK; tx_error_icmp: dst_link_failure(skb); @@ -532,7 +532,7 @@ tx_error: stats->tx_errors++; dev_kfree_skb(skb); tunnel->recursion--; - return 0; + return NETDEV_TX_OK; } static void ipip_tunnel_bind_dev(struct net_device *dev) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9a8da5ed92b..06c33fb6b32 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -212,7 +212,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); - return 0; + return NETDEV_TX_OK; } static const struct net_device_ops reg_vif_netdev_ops = { -- cgit v1.2.3-70-g09d2 From d7ca4cc01fd154f2da30ae6dae160fa5800af758 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 9 Jul 2009 08:09:47 +0000 Subject: udpv4: Handle large incoming UDP/IPv4 packets and support software UFO. - validate and forward GSO UDP/IPv4 packets from untrusted sources. - do software UFO if the outgoing device doesn't support UFO. Signed-off-by: Sridhar Samudrala Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/udp.h | 3 +++ net/ipv4/af_inet.c | 12 ++++++++++- net/ipv4/udp.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/include/net/udp.h b/include/net/udp.h index 90e6ce56be6..5fb029f817a 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -207,4 +207,7 @@ extern void udp4_proc_exit(void); #endif extern void udp_init(void); + +extern int udp4_ufo_send_check(struct sk_buff *skb); +extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features); #endif /* _UDP_H */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 566ea6c4321..197d024b253 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1187,6 +1187,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) int proto; int ihl; int id; + unsigned int offset = 0; if (!(features & NETIF_F_V4_CSUM)) features &= ~NETIF_F_SG; @@ -1229,7 +1230,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) skb = segs; do { iph = ip_hdr(skb); - iph->id = htons(id++); + if (proto == IPPROTO_UDP) { + iph->id = htons(id); + iph->frag_off = htons(offset >> 3); + if (skb->next != NULL) + iph->frag_off |= htons(IP_MF); + offset += (skb->len - skb->mac_len - iph->ihl * 4); + } else + iph->id = htons(id++); iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); @@ -1425,6 +1433,8 @@ static struct net_protocol tcp_protocol = { static struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, .no_policy = 1, .netns_ok = 1, }; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 80e3812837a..7bc2d082a49 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1816,6 +1816,67 @@ void __init udp_init(void) sysctl_udp_wmem_min = SK_MEM_QUANTUM; } +int udp4_ufo_send_check(struct sk_buff *skb) +{ + const struct iphdr *iph; + struct udphdr *uh; + + if (!pskb_may_pull(skb, sizeof(*uh))) + return -EINVAL; + + iph = ip_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + return 0; +} + +struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + int offset; + __wsum csum; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + !(type & (SKB_GSO_UDP)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot + * do checksum of UDP packets sent as multiple IP fragments. + */ + offset = skb->csum_start - skb_headroom(skb); + csum = skb_checksum(skb, offset, skb->len- offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + /* Fragment the skb. IP headers of the fragments are updated in + * inet_gso_segment() + */ + segs = skb_segment(skb, features); +out: + return segs; +} + EXPORT_SYMBOL(udp_disconnect); EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_prot); -- cgit v1.2.3-70-g09d2 From c482c568577a2b31716e1019f2868bda7cf5629c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jul 2009 00:26:32 +0000 Subject: udp: cleanups Pure style cleanups. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 87 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 43 insertions(+), 44 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7bc2d082a49..29ebb0d27a1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -110,11 +110,12 @@ struct udp_table udp_table; EXPORT_SYMBOL(udp_table); int sysctl_udp_mem[3] __read_mostly; -int sysctl_udp_rmem_min __read_mostly; -int sysctl_udp_wmem_min __read_mostly; - EXPORT_SYMBOL(sysctl_udp_mem); + +int sysctl_udp_rmem_min __read_mostly; EXPORT_SYMBOL(sysctl_udp_rmem_min); + +int sysctl_udp_wmem_min __read_mostly; EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; @@ -158,7 +159,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, */ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2 ) ) + const struct sock *sk2)) { struct udp_hslot *hslot; struct udp_table *udptable = sk->sk_prot->h.udp_table; @@ -221,14 +222,15 @@ fail_unlock: fail: return error; } +EXPORT_SYMBOL(udp_lib_get_port); static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); - return ( !ipv6_only_sock(sk2) && - (!inet1->rcv_saddr || !inet2->rcv_saddr || - inet1->rcv_saddr == inet2->rcv_saddr )); + return (!ipv6_only_sock(sk2) && + (!inet1->rcv_saddr || !inet2->rcv_saddr || + inet1->rcv_saddr == inet2->rcv_saddr)); } int udp_v4_get_port(struct sock *sk, unsigned short snum) @@ -383,8 +385,8 @@ found: void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) { struct inet_sock *inet; - struct iphdr *iph = (struct iphdr*)skb->data; - struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); + struct iphdr *iph = (struct iphdr *)skb->data; + struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct sock *sk; @@ -439,7 +441,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { - ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); + ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); } sk->sk_err = err; sk->sk_error_report(sk); @@ -474,7 +476,7 @@ EXPORT_SYMBOL(udp_flush_pending_frames); * (checksum field must be zeroed out) */ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, int len ) + __be32 src, __be32 dst, int len) { unsigned int offset; struct udphdr *uh = udp_hdr(skb); @@ -545,7 +547,7 @@ static int udp_push_pending_frames(struct sock *sk) } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); + udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); goto send; } else /* `normal' UDP */ @@ -553,7 +555,7 @@ static int udp_push_pending_frames(struct sock *sk) /* add protocol-dependent pseudo-header */ uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, - sk->sk_protocol, csum ); + sk->sk_protocol, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -592,7 +594,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * Check the flags. */ - if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ + if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ return -EOPNOTSUPP; ipc.opt = NULL; @@ -619,7 +621,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * Get and verify the address. */ if (msg->msg_name) { - struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; + struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name; if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) { @@ -684,7 +686,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } if (connected) - rt = (struct rtable*)sk_dst_check(sk, 0); + rt = (struct rtable *)sk_dst_check(sk, 0); if (rt == NULL) { struct flowi fl = { .oif = ipc.oif, @@ -782,6 +784,7 @@ do_confirm: err = 0; goto out; } +EXPORT_SYMBOL(udp_sendmsg); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) @@ -871,6 +874,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) return 0; } +EXPORT_SYMBOL(udp_ioctl); /* * This should be easy, if there is something there we @@ -892,7 +896,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * Check any passed addresses */ if (addr_len) - *addr_len=sizeof(*sin); + *addr_len = sizeof(*sin); if (flags & MSG_ERRQUEUE) return ip_recv_error(sk, msg, len); @@ -923,9 +927,11 @@ try_again: if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied ); + msg->msg_iov, copied); else { - err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); + err = skb_copy_and_csum_datagram_iovec(skb, + sizeof(struct udphdr), + msg->msg_iov); if (err == -EINVAL) goto csum_copy_err; @@ -941,8 +947,7 @@ try_again: sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ - if (sin) - { + if (sin) { sin->sin_family = AF_INET; sin->sin_port = udp_hdr(skb)->source; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; @@ -995,6 +1000,7 @@ int udp_disconnect(struct sock *sk, int flags) sk_dst_reset(sk); return 0; } +EXPORT_SYMBOL(udp_disconnect); void udp_lib_unhash(struct sock *sk) { @@ -1044,7 +1050,7 @@ drop: * Note that in the success and error cases, the skb is assumed to * have either been requeued or freed. */ -int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int rc; @@ -1214,7 +1220,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, if (uh->check == 0) { skb->ip_summed = CHECKSUM_UNNECESSARY; } else if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, proto, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -1355,7 +1361,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, int err = 0; int is_udplite = IS_UDPLITE(sk); - if (optlenf_flags & O_NONBLOCK) && - !(sk->sk_shutdown & RCV_SHUTDOWN)){ + if ((mask & POLLRDNORM) && + !(file->f_flags & O_NONBLOCK) && + !(sk->sk_shutdown & RCV_SHUTDOWN)) { struct sk_buff_head *rcvq = &sk->sk_receive_queue; struct sk_buff *skb; @@ -1552,6 +1560,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) return mask; } +EXPORT_SYMBOL(udp_poll); struct proto udp_prot = { .name = "UDP", @@ -1582,6 +1591,7 @@ struct proto udp_prot = { .compat_getsockopt = compat_udp_getsockopt, #endif }; +EXPORT_SYMBOL(udp_prot); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS @@ -1703,11 +1713,13 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) rc = -ENOMEM; return rc; } +EXPORT_SYMBOL(udp_proc_register); void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) { proc_net_remove(net, afinfo->name); } +EXPORT_SYMBOL(udp_proc_unregister); /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, struct seq_file *f, @@ -1741,7 +1753,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) int len; udp4_format_sock(v, seq, state->bucket, &len); - seq_printf(seq, "%*s\n", 127 - len ,""); + seq_printf(seq, "%*s\n", 127 - len, ""); } return 0; } @@ -1864,7 +1876,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) * do checksum of UDP packets sent as multiple IP fragments. */ offset = skb->csum_start - skb_headroom(skb); - csum = skb_checksum(skb, offset, skb->len- offset, 0); + csum = skb_checksum(skb, offset, skb->len - offset, 0); offset += skb->csum_offset; *(__sum16 *)(skb->data + offset) = csum_fold(csum); skb->ip_summed = CHECKSUM_NONE; @@ -1877,16 +1889,3 @@ out: return segs; } -EXPORT_SYMBOL(udp_disconnect); -EXPORT_SYMBOL(udp_ioctl); -EXPORT_SYMBOL(udp_prot); -EXPORT_SYMBOL(udp_sendmsg); -EXPORT_SYMBOL(udp_lib_getsockopt); -EXPORT_SYMBOL(udp_lib_setsockopt); -EXPORT_SYMBOL(udp_poll); -EXPORT_SYMBOL(udp_lib_get_port); - -#ifdef CONFIG_PROC_FS -EXPORT_SYMBOL(udp_proc_register); -EXPORT_SYMBOL(udp_proc_unregister); -#endif -- cgit v1.2.3-70-g09d2 From c3059477fce2d956a0bb3e04357324780c5d8eeb Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 14 Jul 2009 08:33:08 +0000 Subject: ipv4: Use synchronize_rcu() during trie_rebalance() During trie_rebalance() we free memory after resizing with call_rcu(), but large updates, especially with PREEMPT_NONE configs, can cause memory stresses, so this patch calls synchronize_rcu() in tnode_free_flush() after each sync_pages to guarantee such freeing (especially before resizing the root node). The value of sync_pages = 128 is based on Pawel Staszewski's tests as the lowest which doesn't hinder updating times. (For testing purposes there was a sysfs module parameter to change it on demand, but it's removed until we're sure it could be really useful.) The patch is based on suggestions by: Paul E. McKenney Reported-by: Pawel Staszewski Tested-by: Pawel Staszewski Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 63c2fa7b68c..58ba9f4f2c9 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -164,6 +164,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); /* tnodes to free after resize(); protected by RTNL */ static struct tnode *tnode_free_head; +static size_t tnode_free_size; + +/* + * synchronize_rcu after call_rcu for that many pages; it should be especially + * useful before resizing the root node with PREEMPT_NONE configs; the value was + * obtained experimentally, aiming to avoid visible slowdown. + */ +static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; @@ -393,6 +401,8 @@ static void tnode_free_safe(struct tnode *tn) BUG_ON(IS_LEAF(tn)); tn->tnode_free = tnode_free_head; tnode_free_head = tn; + tnode_free_size += sizeof(struct tnode) + + (sizeof(struct node *) << tn->bits); } static void tnode_free_flush(void) @@ -404,6 +414,11 @@ static void tnode_free_flush(void) tn->tnode_free = NULL; tnode_free(tn); } + + if (tnode_free_size >= PAGE_SIZE * sync_pages) { + tnode_free_size = 0; + synchronize_rcu(); + } } static struct leaf *leaf_new(void) -- cgit v1.2.3-70-g09d2 From be916cdebe4dc720a23b1a9bb589f2c22afd6589 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 14 Jul 2009 09:41:00 +0000 Subject: ipv4: Fix inflate_threshold_root automatically During large updates there could be triggered warnings like: "Fix inflate_threshold_root. Now=25 size=11 bits" if inflate() of the root node isn't finished in 10 loops. It should be much rarer now, after changing the threshold from 15 to 25, and a temporary problem, so this patch tries to handle it automatically using a fix variable to increase by one inflate threshold for next root resizes (up to the 35 limit, max fix = 10). The fix variable is decreased when root's inflate() finishes below 7 loops (even if some other, smaller table/ trie is updated -- for simplicity the fix variable is global for now). Reported-by: Pawel Staszewski Reported-by: Jorge Boncompte [DTI2] Tested-by: Pawel Staszewski Signed-off-by: Jarek Poplawski Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 58ba9f4f2c9..5741d1306cc 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -327,6 +327,8 @@ static const int inflate_threshold = 50; static const int halve_threshold_root = 15; static const int inflate_threshold_root = 25; +static int inflate_threshold_root_fix; +#define INFLATE_FIX_MAX 10 /* a comment in resize() */ static void __alias_free_mem(struct rcu_head *head) { @@ -617,7 +619,8 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Keep root node larger */ if (!tn->parent) - inflate_threshold_use = inflate_threshold_root; + inflate_threshold_use = inflate_threshold_root + + inflate_threshold_root_fix; else inflate_threshold_use = inflate_threshold; @@ -641,15 +644,27 @@ static struct node *resize(struct trie *t, struct tnode *tn) } if (max_resize < 0) { - if (!tn->parent) - pr_warning("Fix inflate_threshold_root." - " Now=%d size=%d bits\n", - inflate_threshold_root, tn->bits); - else + if (!tn->parent) { + /* + * It was observed that during large updates even + * inflate_threshold_root = 35 might be needed to avoid + * this warning; but it should be temporary, so let's + * try to handle this automatically. + */ + if (inflate_threshold_root_fix < INFLATE_FIX_MAX) + inflate_threshold_root_fix++; + else + pr_warning("Fix inflate_threshold_root." + " Now=%d size=%d bits fix=%d\n", + inflate_threshold_root, tn->bits, + inflate_threshold_root_fix); + } else { pr_warning("Fix inflate_threshold." " Now=%d size=%d bits\n", inflate_threshold, tn->bits); - } + } + } else if (max_resize > 3 && !tn->parent && inflate_threshold_root_fix) + inflate_threshold_root_fix--; check_tnode(tn); -- cgit v1.2.3-70-g09d2 From b902e5735272b6a79fe2853180b2ad6658aa9678 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 14 Jul 2009 11:20:32 +0000 Subject: ipv4: fib_trie: Use tnode_get_child_rcu() and node_parent_rcu() in lookups While looking for other fib_trie problems reported by Pawel Staszewski I noticed there are a few uses of tnode_get_child() and node_parent() in lookups instead of their rcu versions. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5741d1306cc..d58b4911538 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1465,7 +1465,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), pos, bits); - n = tnode_get_child(pn, cindex); + n = tnode_get_child_rcu(pn, cindex); if (n == NULL) { #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -1600,7 +1600,7 @@ backtrace: if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); } else { - struct tnode *parent = node_parent((struct node *) pn); + struct tnode *parent = node_parent_rcu((struct node *) pn); if (!parent) goto failed; @@ -1813,7 +1813,7 @@ static struct leaf *trie_firstleaf(struct trie *t) static struct leaf *trie_nextleaf(struct leaf *l) { struct node *c = (struct node *) l; - struct tnode *p = node_parent(c); + struct tnode *p = node_parent_rcu(c); if (!p) return NULL; /* trie with just one leaf */ -- cgit v1.2.3-70-g09d2 From 67edfef78639573e9b01c26295a935349aab6fa3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 21 Jul 2009 23:00:40 +0000 Subject: TCP: Add comments to (near) all functions in tcp_output.c v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While looking for something else I spent some time adding one liner comments to the tcp_output.c functions that didn't have any. That makes the comments more consistent. I hope I documented everything right. No code changes. v2: Incorporated feedback from Ilpo. v3: Change style of one liner comments, add a few more comments. Signed-off-by: Andi Kleen Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 58 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 14 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5bdf08d312d..c464892cf64 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -59,6 +59,7 @@ int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +/* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -142,6 +143,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) tp->snd_cwnd_used = 0; } +/* Congestion state accounting after a packet has been sent. */ static void tcp_event_data_sent(struct tcp_sock *tp, struct sk_buff *skb, struct sock *sk) { @@ -161,6 +163,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, icsk->icsk_ack.pingpong = 1; } +/* Account for an ACK we sent. */ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) { tcp_dec_quickack_mode(sk, pkts); @@ -276,6 +279,7 @@ static u16 tcp_select_window(struct sock *sk) return new_win; } +/* Packet ECN state for a SYN-ACK */ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; @@ -283,6 +287,7 @@ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; } +/* Packet ECN state for a SYN. */ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -301,6 +306,9 @@ TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) th->ece = 1; } +/* Set up ECN state for a packet on a ESTABLISHED socket that is about to + * be sent. + */ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, int tcp_header_len) { @@ -362,7 +370,9 @@ struct tcp_out_options { __u32 tsval, tsecr; /* need to include OPTION_TS */ }; -/* Beware: Something in the Internet is very sensitive to the ordering of +/* Write previously computed TCP options to the packet. + * + * Beware: Something in the Internet is very sensitive to the ordering of * TCP options, we learned this through the hard way, so be careful here. * Luckily we can at least blame others for their non-compliance but from * inter-operatibility perspective it seems that we're somewhat stuck with @@ -445,6 +455,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, } } +/* Compute TCP options for SYN packets. This is not the final + * network wire format yet. + */ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5) { @@ -493,6 +506,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, return size; } +/* Set up TCP options for SYN-ACKs. */ static unsigned tcp_synack_options(struct sock *sk, struct request_sock *req, unsigned mss, struct sk_buff *skb, @@ -541,6 +555,9 @@ static unsigned tcp_synack_options(struct sock *sk, return size; } +/* Compute TCP options for ESTABLISHED sockets. This is not the + * final wire format yet. + */ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5) { @@ -705,7 +722,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, return net_xmit_eval(err); } -/* This routine just queue's the buffer +/* This routine just queues the buffer for sending. * * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, * otherwise socket can stall. @@ -722,6 +739,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) sk_mem_charge(sk, skb->truesize); } +/* Initialize TSO segments for a packet. */ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { @@ -909,6 +927,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) skb->len = skb->data_len; } +/* Remove acked data from a packet in the transmit queue. */ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -937,7 +956,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) return 0; } -/* Not accounting for SACKs here. */ +/* Calculate MSS. Not accounting for SACKs here. */ int tcp_mtu_to_mss(struct sock *sk, int pmtu) { struct tcp_sock *tp = tcp_sk(sk); @@ -981,6 +1000,7 @@ int tcp_mss_to_mtu(struct sock *sk, int mss) return mtu; } +/* MTU probing init per socket */ void tcp_mtup_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -1143,7 +1163,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, return 0; } -/* This must be invoked the first time we consider transmitting +/* Intialize TSO state of a skb. + * This must be invoked the first time we consider transmitting * SKB onto the wire. */ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, @@ -1158,6 +1179,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, return tso_segs; } +/* Minshall's variant of the Nagle send check. */ static inline int tcp_minshall_check(const struct tcp_sock *tp) { return after(tp->snd_sml, tp->snd_una) && @@ -1242,6 +1264,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, return cwnd_quota; } +/* Test if sending is allowed right now. */ int tcp_may_send_now(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -1378,6 +1401,10 @@ send_now: } /* Create a new MTU probe if we are ready. + * MTU probe is regularly attempting to increase the path MTU by + * deliberately sending larger packets. This discovers routing + * changes resulting in larger path MTUs. + * * Returns 0 if we should wait to probe (no cwnd available), * 1 if a probe was sent, * -1 otherwise @@ -1790,6 +1817,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) sk_wmem_free_skb(sk, next_skb); } +/* Check if coalescing SKBs is legal. */ static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) { if (tcp_skb_pcount(skb) > 1) @@ -1808,6 +1836,9 @@ static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) return 1; } +/* Collapse packets in the retransmit queue to make to create + * less packets on the wire. This is only done on retransmission. + */ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, int space) { @@ -1957,6 +1988,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return err; } +/* Check if we forward retransmits are possible in the current + * window/congestion state. + */ static int tcp_can_forward_retransmit(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2145,7 +2179,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); } -/* WARNING: This routine must only be called when we have already sent +/* Send a crossed SYN-ACK during socket establishment. + * WARNING: This routine must only be called when we have already sent * a SYN packet that crossed the incoming SYN that caused this routine * to get called. If this assumption fails then the initial rcv_wnd * and rcv_wscale values will not be correct. @@ -2180,9 +2215,7 @@ int tcp_send_synack(struct sock *sk) return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } -/* - * Prepare a SYN-ACK. - */ +/* Prepare a SYN-ACK. */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req) { @@ -2269,9 +2302,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, return skb; } -/* - * Do all connect socket setups that can be done AF independent. - */ +/* Do all connect socket setups that can be done AF independent. */ static void tcp_connect_init(struct sock *sk) { struct dst_entry *dst = __sk_dst_get(sk); @@ -2330,9 +2361,7 @@ static void tcp_connect_init(struct sock *sk) tcp_clear_retrans(tp); } -/* - * Build a SYN and send it off. - */ +/* Build a SYN and send it off. */ int tcp_connect(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2493,6 +2522,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } +/* Initiate keepalive or window probe from timer. */ int tcp_write_wakeup(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); -- cgit v1.2.3-70-g09d2 From a44a4a006b860476881ec0098c36584036e1cb91 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 27 Jul 2009 08:22:46 +0000 Subject: xfrm: export xfrm garbage collector thresholds via sysctl Export garbage collector thresholds for xfrm[4|6]_dst_ops Had a problem reported to me recently in which a high volume of ipsec connections on a system began reporting ENOBUFS for new connections eventually. It seemed that after about 2000 connections we started being unable to create more. A quick look revealed that the xfrm code used a dst_ops structure that limited the gc_thresh value to 1024, and always dropped route cache entries after 2x the gc_thresh. It seems the most direct solution is to export the gc_thresh values in the xfrm[4|6] dst_ops as sysctls, like the main routing table does, so that higher volumes of connections can be supported. This patch has been tested and allows the reporter to increase their ipsec connection volume successfully. Reported-by: Joe Nall Signed-off-by: Neil Horman ipv4/xfrm4_policy.c | 18 ++++++++++++++++++ ipv6/xfrm6_policy.c | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) Signed-off-by: David S. Miller --- net/ipv4/xfrm4_policy.c | 18 ++++++++++++++++++ net/ipv6/xfrm6_policy.c | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0071ee6f441..26496babdf3 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -264,6 +264,20 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .fill_dst = xfrm4_fill_dst, }; +static struct ctl_table xfrm4_policy_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm4_gc_thresh", + .data = &xfrm4_dst_ops.gc_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static struct ctl_table_header *sysctl_hdr; + static void __init xfrm4_policy_init(void) { xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); @@ -271,6 +285,8 @@ static void __init xfrm4_policy_init(void) static void __exit xfrm4_policy_fini(void) { + if (sysctl_hdr) + unregister_net_sysctl_table(sysctl_hdr); xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); } @@ -278,5 +294,7 @@ void __init xfrm4_init(void) { xfrm4_state_init(); xfrm4_policy_init(); + sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, + xfrm4_policy_table); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 3a3c677bc0f..4acc308eac7 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -306,6 +306,20 @@ static void xfrm6_policy_fini(void) xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo); } +static struct ctl_table xfrm6_policy_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm6_gc_thresh", + .data = &xfrm6_dst_ops.gc_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static struct ctl_table_header *sysctl_hdr; + int __init xfrm6_init(void) { int ret; @@ -317,6 +331,8 @@ int __init xfrm6_init(void) ret = xfrm6_state_init(); if (ret) goto out_policy; + sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path, + xfrm6_policy_table); out: return ret; out_policy: @@ -326,6 +342,8 @@ out_policy: void xfrm6_fini(void) { + if (sysctl_hdr) + unregister_net_sysctl_table(sysctl_hdr); //xfrm6_input_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); -- cgit v1.2.3-70-g09d2 From a33bc5c15154c835aae26f16e6a3a7d9ad4acb45 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 30 Jul 2009 18:52:15 -0700 Subject: xfrm: select sane defaults for xfrm[4|6] gc_thresh Choose saner defaults for xfrm[4|6] gc_thresh values on init Currently, the xfrm[4|6] code has hard-coded initial gc_thresh values (set to 1024). Given that the ipv4 and ipv6 routing caches are sized dynamically at boot time, the static selections can be non-sensical. This patch dynamically selects an appropriate gc threshold based on the corresponding main routing table size, using the assumption that we should in the worst case be able to handle as many connections as the routing table can. For ipv4, the maximum route cache size is 16 * the number of hash buckets in the route cache. Given that xfrm4 starts garbage collection at the gc_thresh and prevents new allocations at 2 * gc_thresh, we set gc_thresh to half the maximum route cache size. For ipv6, its a bit trickier. there is no maximum route cache size, but the ipv6 dst_ops gc_thresh is statically set to 1024. It seems sane to select a simmilar gc_thresh for the xfrm6 code that is half the number of hash buckets in the v6 route cache times 16 (like the v4 code does). Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 6 ++++++ include/net/xfrm.h | 2 +- net/ipv4/route.c | 2 +- net/ipv4/xfrm4_policy.c | 13 ++++++++++++- net/ipv6/ip6_fib.c | 16 +++++----------- net/ipv6/xfrm6_policy.c | 15 +++++++++++++++ 6 files changed, 40 insertions(+), 14 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 7c5c0f79168..15b492a9aa7 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -22,6 +22,12 @@ #include #include +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +#define FIB6_TABLE_HASHSZ 256 +#else +#define FIB6_TABLE_HASHSZ 1 +#endif + struct rt6_info; struct fib6_config diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9e3a3f4c1f6..223e90a4482 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1280,7 +1280,7 @@ struct xfrm6_tunnel { }; extern void xfrm_init(void); -extern void xfrm4_init(void); +extern void xfrm4_init(int rt_hash_size); extern int xfrm_state_init(struct net *net); extern void xfrm_state_fini(struct net *net); extern void xfrm4_state_init(void); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 278f46f5011..fafbe163e2b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3442,7 +3442,7 @@ int __init ip_rt_init(void) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM xfrm_init(); - xfrm4_init(); + xfrm4_init(ip_rt_max_size); #endif rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 26496babdf3..1ba44742ebb 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -290,10 +290,21 @@ static void __exit xfrm4_policy_fini(void) xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); } -void __init xfrm4_init(void) +void __init xfrm4_init(int rt_max_size) { xfrm4_state_init(); xfrm4_policy_init(); + /* + * Select a default value for the gc_thresh based on the main route + * table hash size. It seems to me the worst case scenario is when + * we have ipsec operating in transport mode, in which we create a + * dst_entry per socket. The xfrm gc algorithm starts trying to remove + * entries at gc_thresh, and prevents new allocations as 2*gc_thresh + * so lets set an initial xfrm gc_thresh value at the rt_max_size/2. + * That will let us store an ipsec connection per route table entry, + * and start cleaning when were 1/2 full + */ + xfrm4_dst_ops.gc_thresh = rt_max_size/2; sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, xfrm4_policy_table); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 52ee1dced2f..0e93ca56eb6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -164,12 +164,6 @@ static __inline__ void rt6_release(struct rt6_info *rt) dst_free(&rt->u.dst); } -#ifdef CONFIG_IPV6_MULTIPLE_TABLES -#define FIB_TABLE_HASHSZ 256 -#else -#define FIB_TABLE_HASHSZ 1 -#endif - static void fib6_link_table(struct net *net, struct fib6_table *tb) { unsigned int h; @@ -180,7 +174,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb) */ rwlock_init(&tb->tb6_lock); - h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); + h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); /* * No protection necessary, this is the only list mutatation @@ -231,7 +225,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) if (id == 0) id = RT6_TABLE_MAIN; - h = id & (FIB_TABLE_HASHSZ - 1); + h = id & (FIB6_TABLE_HASHSZ - 1); rcu_read_lock(); head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { @@ -382,7 +376,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) arg.net = net; w->args = &arg; - for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry(tb, node, head, tb6_hlist) { @@ -1368,7 +1362,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), unsigned int h; rcu_read_lock(); - for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); @@ -1483,7 +1477,7 @@ static int fib6_net_init(struct net *net) if (!net->ipv6.rt6_stats) goto out_timer; - net->ipv6.fib_table_hash = kcalloc(FIB_TABLE_HASHSZ, + net->ipv6.fib_table_hash = kcalloc(FIB6_TABLE_HASHSZ, sizeof(*net->ipv6.fib_table_hash), GFP_KERNEL); if (!net->ipv6.fib_table_hash) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4acc308eac7..611cffcf554 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -323,6 +323,7 @@ static struct ctl_table_header *sysctl_hdr; int __init xfrm6_init(void) { int ret; + unsigned int gc_thresh; ret = xfrm6_policy_init(); if (ret) @@ -331,6 +332,20 @@ int __init xfrm6_init(void) ret = xfrm6_state_init(); if (ret) goto out_policy; + /* + * We need a good default value for the xfrm6 gc threshold. + * In ipv4 we set it to the route hash table size * 8, which + * is half the size of the maximaum route cache for ipv4. It + * would be good to do the same thing for v6, except the table is + * constructed differently here. Here each table for a net namespace + * can have FIB_TABLE_HASHSZ entries, so lets go with the same + * computation that we used for ipv4 here. Also, lets keep the initial + * gc_thresh to a minimum of 1024, since, the ipv6 route cache defaults + * to that as a minimum as well + */ + gc_thresh = FIB6_TABLE_HASHSZ * 8; + xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; + sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path, xfrm6_policy_table); out: -- cgit v1.2.3-70-g09d2 From f816700aa9ef1b1e2f984f638cb211e79dcab495 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Aug 2009 20:18:33 -0700 Subject: xfrm4: fix build when SYSCTLs are disabled Fix build errors when SYSCTLs are not enabled: (.init.text+0x5154): undefined reference to `net_ipv4_ctl_path' (.init.text+0x5176): undefined reference to `register_net_sysctl_table' xfrm4_policy.c:(.exit.text+0x573): undefined reference to `unregister_net_sysctl_table Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/ipv4/xfrm4_policy.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 1ba44742ebb..74fb2eb833e 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -264,6 +264,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .fill_dst = xfrm4_fill_dst, }; +#ifdef CONFIG_SYSCTL static struct ctl_table xfrm4_policy_table[] = { { .ctl_name = CTL_UNNUMBERED, @@ -277,6 +278,7 @@ static struct ctl_table xfrm4_policy_table[] = { }; static struct ctl_table_header *sysctl_hdr; +#endif static void __init xfrm4_policy_init(void) { @@ -285,8 +287,10 @@ static void __init xfrm4_policy_init(void) static void __exit xfrm4_policy_fini(void) { +#ifdef CONFIG_SYSCTL if (sysctl_hdr) unregister_net_sysctl_table(sysctl_hdr); +#endif xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); } @@ -305,7 +309,9 @@ void __init xfrm4_init(int rt_max_size) * and start cleaning when were 1/2 full */ xfrm4_dst_ops.gc_thresh = rt_max_size/2; +#ifdef CONFIG_SYSCTL sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, xfrm4_policy_table); +#endif } -- cgit v1.2.3-70-g09d2 From 36cbd3dcc10384f813ec0814255f576c84f2bcd4 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 5 Aug 2009 10:42:58 -0700 Subject: net: mark read-only arrays as const String literals are constant, and usually, we can also tag the array of pointers const too, moving it to the .rodata section. Signed-off-by: Jan Engelhardt Signed-off-by: David S. Miller --- include/net/ip_vs.h | 3 ++- include/net/irda/ircomm_event.h | 2 +- include/net/irda/ircomm_tty_attach.h | 4 ++-- include/net/irda/irlap_event.h | 2 +- include/net/irda/irlmp_event.h | 4 ++-- include/net/sctp/constants.h | 4 +++- net/8021q/vlanproc.c | 2 +- net/atm/lec.c | 9 +++++---- net/atm/proc.c | 9 +++++---- net/bluetooth/af_bluetooth.c | 4 ++-- net/bridge/br_stp.c | 2 +- net/core/dev.c | 2 +- net/core/net-sysfs.c | 2 +- net/core/sock.c | 6 +++--- net/dccp/ccids/ccid3.c | 4 ++-- net/dccp/feat.c | 7 ++++--- net/dccp/proto.c | 4 ++-- net/ipv4/fib_trie.c | 2 +- net/ipv6/proc.c | 4 ++-- net/irda/ircomm/ircomm_event.c | 4 ++-- net/irda/ircomm/ircomm_tty_attach.c | 4 ++-- net/irda/iriap.c | 4 ++-- net/irda/irlan/irlan_common.c | 4 ++-- net/irda/irlap.c | 2 +- net/irda/irlap_event.c | 4 ++-- net/irda/irlmp_event.c | 6 +++--- net/llc/llc_proc.c | 2 +- net/netfilter/ipvs/ip_vs_proto.c | 3 ++- net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +- net/rds/ib_stats.c | 2 +- net/rds/iw_stats.c | 2 +- net/rds/rds.h | 3 ++- net/rds/stats.c | 4 ++-- net/rxrpc/ar-ack.c | 2 +- net/sctp/debug.c | 14 +++++++------- 36 files changed, 74 insertions(+), 66 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1c8ee1b1365..98978e73f66 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -738,7 +738,8 @@ extern void ip_vs_protocol_cleanup(void); extern void ip_vs_protocol_timeout_change(int flags); extern int *ip_vs_create_timeout_table(int *table, int size); extern int -ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to); +ip_vs_set_state_timeout(int *table, int num, const char *const *names, + const char *name, int to); extern void ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg); diff --git a/include/net/irda/ircomm_event.h b/include/net/irda/ircomm_event.h index c290447872d..bc0c6f31f1c 100644 --- a/include/net/irda/ircomm_event.h +++ b/include/net/irda/ircomm_event.h @@ -74,7 +74,7 @@ struct ircomm_info { struct qos_info *qos; }; -extern char *ircomm_state[]; +extern const char *const ircomm_state[]; struct ircomm_cb; /* Forward decl. */ diff --git a/include/net/irda/ircomm_tty_attach.h b/include/net/irda/ircomm_tty_attach.h index f91a5695aa4..0a63bbb972d 100644 --- a/include/net/irda/ircomm_tty_attach.h +++ b/include/net/irda/ircomm_tty_attach.h @@ -66,8 +66,8 @@ struct ircomm_tty_info { __u8 dlsap_sel; }; -extern char *ircomm_state[]; -extern char *ircomm_tty_state[]; +extern const char *const ircomm_state[]; +extern const char *const ircomm_tty_state[]; int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event, struct sk_buff *skb, struct ircomm_tty_info *info); diff --git a/include/net/irda/irlap_event.h b/include/net/irda/irlap_event.h index 2ae2e119ef4..4c90824c50f 100644 --- a/include/net/irda/irlap_event.h +++ b/include/net/irda/irlap_event.h @@ -120,7 +120,7 @@ typedef enum { /* FIXME check the two first reason codes */ LAP_PRIMARY_CONFLICT, } LAP_REASON; -extern const char *irlap_state[]; +extern const char *const irlap_state[]; void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event, struct sk_buff *skb, struct irlap_info *info); diff --git a/include/net/irda/irlmp_event.h b/include/net/irda/irlmp_event.h index e03ae4ae396..9e4ec17a744 100644 --- a/include/net/irda/irlmp_event.h +++ b/include/net/irda/irlmp_event.h @@ -79,8 +79,8 @@ typedef enum { LM_LAP_IDLE_TIMEOUT, } IRLMP_EVENT; -extern const char *irlmp_state[]; -extern const char *irlsap_state[]; +extern const char *const irlmp_state[]; +extern const char *const irlsap_state[]; void irlmp_watchdog_timer_expired(void *data); void irlmp_discovery_timer_expired(void *data); diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index b05b0557211..8bc25f7b04c 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -241,7 +241,9 @@ const char *sctp_tname(const sctp_subtype_t); /* timeouts */ const char *sctp_pname(const sctp_subtype_t); /* primitives */ /* This is a table of printable names of sctp_state_t's. */ -extern const char *sctp_state_tbl[], *sctp_evttype_tbl[], *sctp_status_tbl[]; +extern const char *const sctp_state_tbl[]; +extern const char *const sctp_evttype_tbl[]; +extern const char *const sctp_status_tbl[]; /* Maximum chunk length considering padding requirements. */ enum { SCTP_MAX_CHUNK_LEN = ((1<<16) - sizeof(__u32)) }; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index b55a091a33d..6262c335f3c 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -107,7 +107,7 @@ static const struct file_operations vlandev_fops = { */ /* Strings */ -static const char *vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = { +static const char *const vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = { [VLAN_NAME_TYPE_RAW_PLUS_VID] = "VLAN_NAME_TYPE_RAW_PLUS_VID", [VLAN_NAME_TYPE_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_PLUS_VID_NO_PAD", [VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD", diff --git a/net/atm/lec.c b/net/atm/lec.c index c463868c993..8e723c2654c 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -935,9 +935,9 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) } #ifdef CONFIG_PROC_FS -static char *lec_arp_get_status_string(unsigned char status) +static const char *lec_arp_get_status_string(unsigned char status) { - static char *lec_arp_status_string[] = { + static const char *const lec_arp_status_string[] = { "ESI_UNKNOWN ", "ESI_ARP_PENDING ", "ESI_VC_PENDING ", @@ -1121,7 +1121,8 @@ static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos) static int lec_seq_show(struct seq_file *seq, void *v) { - static char lec_banner[] = "Itf MAC ATM destination" + static const char lec_banner[] = + "Itf MAC ATM destination" " Status Flags " "VPI/VCI Recv VPI/VCI\n"; @@ -1505,7 +1506,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) } #if DEBUG_ARP_TABLE -static char *get_status_string(unsigned char st) +static const char *get_status_string(unsigned char st) { switch (st) { case ESI_UNKNOWN: diff --git a/net/atm/proc.c b/net/atm/proc.c index 38de5ff61ec..ab8419a324b 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -151,8 +151,9 @@ static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void pvc_info(struct seq_file *seq, struct atm_vcc *vcc) { - static const char *class_name[] = { "off","UBR","CBR","VBR","ABR" }; - static const char *aal_name[] = { + static const char *const class_name[] = + {"off","UBR","CBR","VBR","ABR"}; + static const char *const aal_name[] = { "---", "1", "2", "3/4", /* 0- 3 */ "???", "5", "???", "???", /* 4- 7 */ "???", "???", "???", "???", /* 8-11 */ @@ -178,7 +179,7 @@ static void pvc_info(struct seq_file *seq, struct atm_vcc *vcc) static const char *vcc_state(struct atm_vcc *vcc) { - static const char *map[] = { ATM_VS2TXT_MAP }; + static const char *const map[] = { ATM_VS2TXT_MAP }; return map[ATM_VF2VS(vcc->flags)]; } @@ -335,7 +336,7 @@ static const struct file_operations vcc_seq_fops = { static int svc_seq_show(struct seq_file *seq, void *v) { - static char atm_svc_banner[] = + static const char atm_svc_banner[] = "Itf VPI VCI State Remote\n"; if (v == SEQ_START_TOKEN) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 0250e060015..8cfb5a84984 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -49,7 +49,7 @@ static struct net_proto_family *bt_proto[BT_MAX_PROTO]; static DEFINE_RWLOCK(bt_proto_lock); static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; -static const char *bt_key_strings[BT_MAX_PROTO] = { +static const char *const bt_key_strings[BT_MAX_PROTO] = { "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP", "sk_lock-AF_BLUETOOTH-BTPROTO_HCI", "sk_lock-AF_BLUETOOTH-BTPROTO_SCO", @@ -61,7 +61,7 @@ static const char *bt_key_strings[BT_MAX_PROTO] = { }; static struct lock_class_key bt_slock_key[BT_MAX_PROTO]; -static const char *bt_slock_key_strings[BT_MAX_PROTO] = { +static const char *const bt_slock_key_strings[BT_MAX_PROTO] = { "slock-AF_BLUETOOTH-BTPROTO_L2CAP", "slock-AF_BLUETOOTH-BTPROTO_HCI", "slock-AF_BLUETOOTH-BTPROTO_SCO", diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 0660515f399..fd3f8d6c099 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -21,7 +21,7 @@ */ #define MESSAGE_AGE_INCR ((HZ < 256) ? 1 : (HZ/256)) -static const char *br_port_state_names[] = { +static const char *const br_port_state_names[] = { [BR_STATE_DISABLED] = "disabled", [BR_STATE_LISTENING] = "listening", [BR_STATE_LEARNING] = "learning", diff --git a/net/core/dev.c b/net/core/dev.c index 71347668c50..f01a9c41f11 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -272,7 +272,7 @@ static const unsigned short netdev_lock_type[] = ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY, ARPHRD_VOID, ARPHRD_NONE}; -static const char *netdev_lock_name[] = +static const char *const netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 3994680c08b..ad91e9e5f47 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -141,7 +141,7 @@ static ssize_t show_dormant(struct device *dev, return -EINVAL; } -static const char *operstates[] = { +static const char *const operstates[] = { "unknown", "notpresent", /* currently unused */ "down", diff --git a/net/core/sock.c b/net/core/sock.c index bbb25be7ddf..a324a80c163 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -142,7 +142,7 @@ static struct lock_class_key af_family_slock_keys[AF_MAX]; * strings build-time, so that runtime initialization of socket * locks is fast): */ -static const char *af_family_key_strings[AF_MAX+1] = { +static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , @@ -158,7 +158,7 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_IEEE802154", "sk_lock-AF_MAX" }; -static const char *af_family_slock_key_strings[AF_MAX+1] = { +static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , @@ -174,7 +174,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_IEEE802154", "slock-AF_MAX" }; -static const char *af_family_clock_key_strings[AF_MAX+1] = { +static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index a27b7f4c19c..f596ce149c3 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -52,7 +52,7 @@ static int ccid3_debug; #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) { - static char *ccid3_state_names[] = { + static const char *const ccid3_state_names[] = { [TFRC_SSTATE_NO_SENT] = "NO_SENT", [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", [TFRC_SSTATE_FBACK] = "FBACK", @@ -646,7 +646,7 @@ enum ccid3_fback_type { #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) { - static char *ccid3_rx_state_names[] = { + static const char *const ccid3_rx_state_names[] = { [TFRC_RSTATE_NO_DATA] = "NO_DATA", [TFRC_RSTATE_DATA] = "DATA", [TFRC_RSTATE_TERM] = "TERM", diff --git a/net/dccp/feat.c b/net/dccp/feat.c index b04160a2eea..972b8dc918d 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -213,7 +213,7 @@ static int dccp_feat_default_value(u8 feat_num) */ static const char *dccp_feat_fname(const u8 feat) { - static const char *feature_names[] = { + static const char *const feature_names[] = { [DCCPF_RESERVED] = "Reserved", [DCCPF_CCID] = "CCID", [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos", @@ -236,8 +236,9 @@ static const char *dccp_feat_fname(const u8 feat) return feature_names[feat]; } -static const char *dccp_feat_sname[] = { "DEFAULT", "INITIALISING", "CHANGING", - "UNSTABLE", "STABLE" }; +static const char *const dccp_feat_sname[] = { + "DEFAULT", "INITIALISING", "CHANGING", "UNSTABLE", "STABLE", +}; #ifdef CONFIG_IP_DCCP_DEBUG static const char *dccp_feat_oname(const u8 opt) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 94ca8eaace7..37b3b4293ef 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -124,7 +124,7 @@ EXPORT_SYMBOL_GPL(dccp_done); const char *dccp_packet_name(const int type) { - static const char *dccp_packet_names[] = { + static const char *const dccp_packet_names[] = { [DCCP_PKT_REQUEST] = "REQUEST", [DCCP_PKT_RESPONSE] = "RESPONSE", [DCCP_PKT_DATA] = "DATA", @@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(dccp_packet_name); const char *dccp_state_name(const int state) { - static char *dccp_state_names[] = { + static const char *const dccp_state_names[] = { [DCCP_OPEN] = "OPEN", [DCCP_REQUESTING] = "REQUESTING", [DCCP_PARTOPEN] = "PARTOPEN", diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d58b4911538..fe3c846b99a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2421,7 +2421,7 @@ static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) } } -static const char *rtn_type_names[__RTN_MAX] = { +static const char *const rtn_type_names[__RTN_MAX] = { [RTN_UNSPEC] = "UNSPEC", [RTN_UNICAST] = "UNICAST", [RTN_LOCAL] = "LOCAL", diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 590ddefb7ff..c9605c3ad91 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -101,7 +101,7 @@ static struct snmp_mib snmp6_icmp6_list[] = { }; /* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */ -static char *icmp6type2name[256] = { +static const char *const icmp6type2name[256] = { [ICMPV6_DEST_UNREACH] = "DestUnreachs", [ICMPV6_PKT_TOOBIG] = "PktTooBigs", [ICMPV6_TIME_EXCEED] = "TimeExcds", @@ -144,7 +144,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) /* print by name -- deprecated items */ for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { int icmptype; - char *p; + const char *p; icmptype = i & 0xff; p = icmp6type2name[icmptype]; diff --git a/net/irda/ircomm/ircomm_event.c b/net/irda/ircomm/ircomm_event.c index c35b3ef5c2f..d78554fedba 100644 --- a/net/irda/ircomm/ircomm_event.c +++ b/net/irda/ircomm/ircomm_event.c @@ -49,7 +49,7 @@ static int ircomm_state_waitr(struct ircomm_cb *self, IRCOMM_EVENT event, static int ircomm_state_conn(struct ircomm_cb *self, IRCOMM_EVENT event, struct sk_buff *skb, struct ircomm_info *info); -char *ircomm_state[] = { +const char *const ircomm_state[] = { "IRCOMM_IDLE", "IRCOMM_WAITI", "IRCOMM_WAITR", @@ -57,7 +57,7 @@ char *ircomm_state[] = { }; #ifdef CONFIG_IRDA_DEBUG -static char *ircomm_event[] = { +static const char *const ircomm_event[] = { "IRCOMM_CONNECT_REQUEST", "IRCOMM_CONNECT_RESPONSE", "IRCOMM_TTP_CONNECT_INDICATION", diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index 9032a1d1190..eafc010907c 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -80,7 +80,7 @@ static int ircomm_tty_state_ready(struct ircomm_tty_cb *self, struct sk_buff *skb, struct ircomm_tty_info *info); -char *ircomm_tty_state[] = { +const char *const ircomm_tty_state[] = { "IRCOMM_TTY_IDLE", "IRCOMM_TTY_SEARCH", "IRCOMM_TTY_QUERY_PARAMETERS", @@ -91,7 +91,7 @@ char *ircomm_tty_state[] = { }; #ifdef CONFIG_IRDA_DEBUG -static char *ircomm_tty_event[] = { +static const char *const ircomm_tty_event[] = { "IRCOMM_TTY_ATTACH_CABLE", "IRCOMM_TTY_DETACH_CABLE", "IRCOMM_TTY_DATA_REQUEST", diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 4a105dc32dc..294e34d3517 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -44,7 +44,7 @@ #ifdef CONFIG_IRDA_DEBUG /* FIXME: This one should go in irlmp.c */ -static const char *ias_charset_types[] = { +static const char *const ias_charset_types[] = { "CS_ASCII", "CS_ISO_8859_1", "CS_ISO_8859_2", @@ -966,7 +966,7 @@ static void iriap_watchdog_timer_expired(void *data) #ifdef CONFIG_PROC_FS -static const char *ias_value_types[] = { +static const char *const ias_value_types[] = { "IAS_MISSING", "IAS_INTEGER", "IAS_OCT_SEQ", diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 774d73a7685..62116829b81 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -69,14 +69,14 @@ static int eth; /* Use "eth" or "irlan" name for devices */ static int access = ACCESS_PEER; /* PEER, DIRECT or HOSTED */ #ifdef CONFIG_PROC_FS -static const char *irlan_access[] = { +static const char *const irlan_access[] = { "UNKNOWN", "DIRECT", "PEER", "HOSTED" }; -static const char *irlan_media[] = { +static const char *const irlan_media[] = { "UNKNOWN", "802.3", "802.5" diff --git a/net/irda/irlap.c b/net/irda/irlap.c index e4965b764b9..356e65b1dc4 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -63,7 +63,7 @@ static void irlap_init_qos_capabilities(struct irlap_cb *self, struct qos_info *qos_user); #ifdef CONFIG_IRDA_DEBUG -static char *lap_reasons[] = { +static const char *const lap_reasons[] = { "ERROR, NOT USED", "LAP_DISC_INDICATION", "LAP_NO_RESPONSE", diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index 16c4ef0f5c1..c5c51959e3c 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -78,7 +78,7 @@ static int irlap_state_reset_check(struct irlap_cb *, IRLAP_EVENT event, struct sk_buff *, struct irlap_info *); #ifdef CONFIG_IRDA_DEBUG -static const char *irlap_event[] = { +static const char *const irlap_event[] = { "DISCOVERY_REQUEST", "CONNECT_REQUEST", "CONNECT_RESPONSE", @@ -120,7 +120,7 @@ static const char *irlap_event[] = { }; #endif /* CONFIG_IRDA_DEBUG */ -const char *irlap_state[] = { +const char *const irlap_state[] = { "LAP_NDM", "LAP_QUERY", "LAP_REPLY", diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c index 78cce0cb073..c1fb5db8104 100644 --- a/net/irda/irlmp_event.c +++ b/net/irda/irlmp_event.c @@ -33,13 +33,13 @@ #include #include -const char *irlmp_state[] = { +const char *const irlmp_state[] = { "LAP_STANDBY", "LAP_U_CONNECT", "LAP_ACTIVE", }; -const char *irlsap_state[] = { +const char *const irlsap_state[] = { "LSAP_DISCONNECTED", "LSAP_CONNECT", "LSAP_CONNECT_PEND", @@ -49,7 +49,7 @@ const char *irlsap_state[] = { }; #ifdef CONFIG_IRDA_DEBUG -static const char *irlmp_event[] = { +static const char *const irlmp_event[] = { "LM_CONNECT_REQUEST", "LM_CONNECT_CONFIRM", "LM_CONNECT_RESPONSE", diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index f97be471fe2..be47ac427f6 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -143,7 +143,7 @@ out: return 0; } -static char *llc_conn_state_names[] = { +static const char *const llc_conn_state_names[] = { [LLC_CONN_STATE_ADM] = "adm", [LLC_CONN_STATE_SETUP] = "setup", [LLC_CONN_STATE_NORMAL] = "normal", diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 85c8892e1e8..3e767167454 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -127,7 +127,8 @@ ip_vs_create_timeout_table(int *table, int size) * Set timeout value for state specified by name */ int -ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to) +ip_vs_set_state_timeout(int *table, int num, const char *const *names, + const char *name, int to) { int i; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 2278e141489..91d28e07374 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -377,7 +377,7 @@ static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { [IP_VS_TCP_S_LAST] = 2*HZ, }; -static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = { +static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = { [IP_VS_TCP_S_NONE] = "NONE", [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED", [IP_VS_TCP_S_SYN_SENT] = "SYN_SENT", diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 33a05d3684d..e7a6885e016 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -472,7 +472,7 @@ static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_LAST] = 2*HZ, }; -static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = { +static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_NORMAL] = "UDP", [IP_VS_UDP_S_LAST] = "BUG!", }; diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c index 02e3e3d50d4..8d8488306fe 100644 --- a/net/rds/ib_stats.c +++ b/net/rds/ib_stats.c @@ -39,7 +39,7 @@ DEFINE_PER_CPU(struct rds_ib_statistics, rds_ib_stats) ____cacheline_aligned; -static char *rds_ib_stat_names[] = { +static const char *const rds_ib_stat_names[] = { "ib_connect_raced", "ib_listen_closed_stale", "ib_tx_cq_call", diff --git a/net/rds/iw_stats.c b/net/rds/iw_stats.c index ccc7e8f0bf0..d33ea790484 100644 --- a/net/rds/iw_stats.c +++ b/net/rds/iw_stats.c @@ -39,7 +39,7 @@ DEFINE_PER_CPU(struct rds_iw_statistics, rds_iw_stats) ____cacheline_aligned; -static char *rds_iw_stat_names[] = { +static const char *const rds_iw_stat_names[] = { "iw_connect_raced", "iw_listen_closed_stale", "iw_tx_cq_call", diff --git a/net/rds/rds.h b/net/rds/rds.h index dbe11123678..290566c69d2 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -652,7 +652,8 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); int __init rds_stats_init(void); void rds_stats_exit(void); void rds_stats_info_copy(struct rds_info_iterator *iter, - uint64_t *values, char **names, size_t nr); + uint64_t *values, const char *const *names, + size_t nr); /* sysctl.c */ int __init rds_sysctl_init(void); diff --git a/net/rds/stats.c b/net/rds/stats.c index 637146893cf..91d8c58b833 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -40,7 +40,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); /* :.,$s/unsigned long\>.*\ Date: Mon, 10 Aug 2009 10:08:27 +0200 Subject: netfilter: nf_conntrack: add SCTP support for SO_ORIGINAL_DST Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 7d2ead7228a..05a9bc8df53 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -256,11 +256,11 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) tuple.dst.u3.ip = inet->daddr; tuple.dst.u.tcp.port = inet->dport; tuple.src.l3num = PF_INET; - tuple.dst.protonum = IPPROTO_TCP; + tuple.dst.protonum = sk->sk_protocol; - /* We only do TCP at the moment: is there a better way? */ - if (strcmp(sk->sk_prot->name, "TCP")) { - pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n"); + /* We only do TCP and SCTP at the moment: is there a better way? */ + if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) { + pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); return -ENOPROTOOPT; } -- cgit v1.2.3-70-g09d2 From 57750a22ed022ed6fcdcc5bc58d16011ccfe575f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 13 Jun 2009 06:22:18 +0200 Subject: netfilter: conntrack: switch hook PFs to nfproto Simple substitution to indicate that the fields indeed use the NFPROTO_ space. Signed-off-by: Jan Engelhardt --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 8 ++++---- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 7d2ead7228a..8905cc76e0b 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -158,28 +158,28 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { { .hook = ipv4_conntrack_in, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK, }, { .hook = ipv4_conntrack_local, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK, }, { .hook = ipv4_confirm, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_CONNTRACK_CONFIRM, }, { .hook = ipv4_confirm, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_CONNTRACK_CONFIRM, }, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 2a15c2d66c6..a7f4cd60735 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -265,42 +265,42 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { { .hook = ipv6_defrag, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv6_conntrack_in, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK, }, { .hook = ipv6_conntrack_local, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK, }, { .hook = ipv6_defrag, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv6_confirm, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_LAST, }, { .hook = ipv6_confirm, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_LAST-1, }, -- cgit v1.2.3-70-g09d2 From 24c232d8e911ef6189e02da411dc2b72cb03bfcf Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 13 Jun 2009 06:20:29 +0200 Subject: netfilter: xtables: switch hook PFs to nfproto Signed-off-by: Jan Engelhardt --- net/bridge/netfilter/ebtable_filter.c | 6 +++--- net/bridge/netfilter/ebtable_nat.c | 6 +++--- net/ipv4/netfilter/iptable_filter.c | 6 +++--- net/ipv4/netfilter/iptable_mangle.c | 10 +++++----- net/ipv4/netfilter/iptable_raw.c | 4 ++-- net/ipv4/netfilter/iptable_security.c | 6 +++--- net/ipv4/netfilter/nf_nat_standalone.c | 8 ++++---- net/ipv6/netfilter/ip6table_filter.c | 6 +++--- net/ipv6/netfilter/ip6table_mangle.c | 10 +++++----- net/ipv6/netfilter/ip6table_raw.c | 4 ++-- net/ipv6/netfilter/ip6table_security.c | 6 +++--- 11 files changed, 36 insertions(+), 36 deletions(-) (limited to 'net/ipv4') diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index a5eea72938a..4b988db3cd4 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -77,21 +77,21 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { { .hook = ebt_in_hook, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_IN, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { .hook = ebt_in_hook, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { .hook = ebt_out_hook, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_FILTER_OTHER, }, diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 6024c551f9a..4a98804203b 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -77,21 +77,21 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { { .hook = ebt_nat_out, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_NAT_DST_OTHER, }, { .hook = ebt_nat_out, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_NAT_SRC, }, { .hook = ebt_nat_in, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_NAT_DST_BRIDGED, }, diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index c30a969724f..bef326c22e2 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -102,21 +102,21 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { { .hook = ipt_local_in_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_FILTER, }, { .hook = ipt_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = NF_IP_PRI_FILTER, }, { .hook = ipt_local_out_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_FILTER, }, diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 4087614d951..1442df7bb76 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -162,35 +162,35 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { { .hook = ipt_pre_routing_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_MANGLE, }, { .hook = ipt_local_in_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_MANGLE, }, { .hook = ipt_forward_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = NF_IP_PRI_MANGLE, }, { .hook = ipt_local_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_MANGLE, }, { .hook = ipt_post_routing_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_MANGLE, }, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index e5356da1fb5..1d28df8b876 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -74,14 +74,14 @@ ipt_local_hook(unsigned int hook, static struct nf_hook_ops ipt_ops[] __read_mostly = { { .hook = ipt_hook, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_RAW, .owner = THIS_MODULE, }, { .hook = ipt_local_hook, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_RAW, .owner = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 29ab630f240..8c5a250c94f 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -105,21 +105,21 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { { .hook = ipt_local_in_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_SECURITY, }, { .hook = ipt_forward_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = NF_IP_PRI_SECURITY, }, { .hook = ipt_local_out_hook, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_SECURITY, }, diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 5567bd0d075..5f41d017ddd 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -251,7 +251,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { { .hook = nf_nat_in, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_NAT_DST, }, @@ -259,7 +259,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { { .hook = nf_nat_out, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC, }, @@ -267,7 +267,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { { .hook = nf_nat_local_fn, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST, }, @@ -275,7 +275,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { { .hook = nf_nat_fn, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, }, diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index ef5a0a32bf8..b35c3582860 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -95,21 +95,21 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { { .hook = ip6t_in_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_FILTER, }, { .hook = ip6t_in_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_FILTER, }, { .hook = ip6t_local_out_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_FILTER, }, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index ab0d398a2ba..a98ced12fcd 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -136,35 +136,35 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { { .hook = ip6t_in_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_in_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_in_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_local_out_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_post_routing_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_MANGLE, }, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 4b792b6ca32..ec12540ae8a 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -68,14 +68,14 @@ ip6t_local_out_hook(unsigned int hook, static struct nf_hook_ops ip6t_ops[] __read_mostly = { { .hook = ip6t_pre_routing_hook, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, .owner = THIS_MODULE, }, { .hook = ip6t_local_out_hook, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_FIRST, .owner = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 0ea37ff15d5..d8085072755 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -101,21 +101,21 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { { .hook = ip6t_local_in_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_SECURITY, }, { .hook = ip6t_forward_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_SECURITY, }, { .hook = ip6t_local_out_hook, .owner = THIS_MODULE, - .pf = PF_INET6, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_SECURITY, }, -- cgit v1.2.3-70-g09d2 From f88e6a8a50a603f8347343e75d035889784a507c Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 13 Jun 2009 06:25:44 +0200 Subject: netfilter: xtables: switch table AFs to nfproto Signed-off-by: Jan Engelhardt --- net/ipv4/netfilter/iptable_filter.c | 2 +- net/ipv4/netfilter/iptable_mangle.c | 2 +- net/ipv4/netfilter/iptable_raw.c | 2 +- net/ipv4/netfilter/iptable_security.c | 2 +- net/ipv4/netfilter/nf_nat_rule.c | 2 +- net/ipv6/netfilter/ip6table_filter.c | 2 +- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/ipv6/netfilter/ip6table_raw.c | 2 +- net/ipv6/netfilter/ip6table_security.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index bef326c22e2..97dbd94a8e3 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -57,7 +57,7 @@ static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, - .af = AF_INET, + .af = NFPROTO_IPV4, }; /* The work comes in here from netfilter.c. */ diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 1442df7bb76..28647f10aa7 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -68,7 +68,7 @@ static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, - .af = AF_INET, + .af = NFPROTO_IPV4, }; /* The work comes in here from netfilter.c. */ diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 1d28df8b876..494784c999e 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -40,7 +40,7 @@ static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, - .af = AF_INET, + .af = NFPROTO_IPV4, }; /* The work comes in here from netfilter.c. */ diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 8c5a250c94f..8804e1a0f91 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -61,7 +61,7 @@ static struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, - .af = AF_INET, + .af = NFPROTO_IPV4, }; static unsigned int diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 6348a793936..6448a9b7d6f 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -62,7 +62,7 @@ static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .me = THIS_MODULE, - .af = AF_INET, + .af = NFPROTO_IPV4, }; /* Source NAT */ diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index b35c3582860..0a3ae48ac4d 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -55,7 +55,7 @@ static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, - .af = AF_INET6, + .af = NFPROTO_IPV6, }; /* The work comes in here from netfilter.c. */ diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index a98ced12fcd..0f49e005a8c 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -61,7 +61,7 @@ static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, - .af = AF_INET6, + .af = NFPROTO_IPV6, }; /* The work comes in here from netfilter.c. */ diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index ec12540ae8a..679865e3d5f 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -39,7 +39,7 @@ static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, - .af = AF_INET6, + .af = NFPROTO_IPV6, }; /* The work comes in here from netfilter.c. */ diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index d8085072755..822afabbdc8 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -60,7 +60,7 @@ static struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, - .af = AF_INET6, + .af = NFPROTO_IPV6, }; static unsigned int -- cgit v1.2.3-70-g09d2 From e5afbba1869a5d9509c61f8962be9bdebf95f7d3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Jul 2009 12:33:10 +0200 Subject: netfilter: iptables: remove unused datalen variable Signed-off-by: Jan Engelhardt --- net/ipv4/netfilter/ip_tables.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fdefae6b5df..3856aa3f231 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -315,7 +315,6 @@ ipt_do_table(struct sk_buff *skb, static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; - u_int16_t datalen; bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; @@ -328,7 +327,6 @@ ipt_do_table(struct sk_buff *skb, /* Initialization */ ip = ip_hdr(skb); - datalen = skb->len - ip->ihl * 4; indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; /* We handle fragments by dealing with the first fragment as @@ -427,8 +425,6 @@ ipt_do_table(struct sk_buff *skb, #endif /* Target might have changed stuff. */ ip = ip_hdr(skb); - datalen = skb->len - ip->ihl * 4; - if (verdict == IPT_CONTINUE) e = ipt_next_entry(e); else -- cgit v1.2.3-70-g09d2 From 47901dc2c4a3f1f9af453486a005d31fe9b393f0 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 9 Jul 2009 23:00:19 +0200 Subject: netfilter: xtables: use memcmp in unconditional check Instead of inspecting each u32/char open-coded, clean up and make use of memcmp. On some arches, memcmp is implemented as assembly or GCC's __builtin_memcmp which can possibly take advantages of known alignment. Signed-off-by: Jan Engelhardt --- net/ipv4/netfilter/arp_tables.c | 10 +++------- net/ipv4/netfilter/ip_tables.c | 11 +++-------- net/ipv6/netfilter/ip6_tables.c | 11 +++-------- 3 files changed, 9 insertions(+), 23 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7505dff4ffd..b9f7243f422 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -341,15 +341,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, } /* All zeroes == unconditional rule. */ -static inline int unconditional(const struct arpt_arp *arp) +static inline bool unconditional(const struct arpt_arp *arp) { - unsigned int i; - - for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++) - if (((__u32 *)arp)[i]) - return 0; + static const struct arpt_arp uncond; - return 1; + return memcmp(arp, &uncond, sizeof(uncond)) == 0; } /* Figures out from what hook each rule can be called: returns 0 if diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 3856aa3f231..3431a771ff1 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -190,16 +190,11 @@ get_entry(void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline int -unconditional(const struct ipt_ip *ip) +static inline bool unconditional(const struct ipt_ip *ip) { - unsigned int i; + static const struct ipt_ip uncond; - for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++) - if (((__u32 *)ip)[i]) - return 0; - - return 1; + return memcmp(ip, &uncond, sizeof(uncond)) == 0; #undef FWINV } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index ced1f2c0cb6..1389ad904bc 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -222,16 +222,11 @@ get_entry(void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline int -unconditional(const struct ip6t_ip6 *ipv6) +static inline bool unconditional(const struct ip6t_ip6 *ipv6) { - unsigned int i; - - for (i = 0; i < sizeof(*ipv6); i++) - if (((char *)ipv6)[i]) - break; + static const struct ip6t_ip6 uncond; - return (i == sizeof(*ipv6)); + return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; } #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ -- cgit v1.2.3-70-g09d2 From a7d51738e757c1ab94595e7d05594c61f0fb32ce Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 18 Jul 2009 14:52:58 +0200 Subject: netfilter: xtables: ignore unassigned hooks in check_entry_size_and_hooks The "hook_entry" and "underflow" array contains values even for hooks not provided, such as PREROUTING in conjunction with the "filter" table. Usually, the values point to whatever the next rule is. For the upcoming unconditionality and underflow checking patches however, we must not inspect that arbitrary rule. Skipping unassigned hooks seems like a good idea, also because newinfo->hook_entry and newinfo->underflow will then continue to have the poison value for detecting abnormalities. Signed-off-by: Jan Engelhardt --- net/ipv4/netfilter/arp_tables.c | 5 ++++- net/ipv4/netfilter/ip_tables.c | 5 ++++- net/ipv6/netfilter/ip6_tables.c | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b9f7243f422..d91f0834d57 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -539,6 +539,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, + unsigned int valid_hooks, unsigned int *i) { unsigned int h; @@ -558,6 +559,8 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, /* Check hooks & underflows */ for (h = 0; h < NF_ARP_NUMHOOKS; h++) { + if (!(valid_hooks & (1 << h))) + continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) @@ -626,7 +629,7 @@ static int translate_table(const char *name, newinfo, entry0, entry0 + size, - hook_entries, underflows, &i); + hook_entries, underflows, valid_hooks, &i); duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); if (ret != 0) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 3431a771ff1..6e7b7e8b80b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -714,6 +714,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, + unsigned int valid_hooks, unsigned int *i) { unsigned int h; @@ -733,6 +734,8 @@ check_entry_size_and_hooks(struct ipt_entry *e, /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if (!(valid_hooks & (1 << h))) + continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) @@ -804,7 +807,7 @@ translate_table(const char *name, newinfo, entry0, entry0 + size, - hook_entries, underflows, &i); + hook_entries, underflows, valid_hooks, &i); if (ret != 0) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1389ad904bc..8e4921a937f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -747,6 +747,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, + unsigned int valid_hooks, unsigned int *i) { unsigned int h; @@ -766,6 +767,8 @@ check_entry_size_and_hooks(struct ip6t_entry *e, /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if (!(valid_hooks & (1 << h))) + continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) @@ -837,7 +840,7 @@ translate_table(const char *name, newinfo, entry0, entry0 + size, - hook_entries, underflows, &i); + hook_entries, underflows, valid_hooks, &i); if (ret != 0) return ret; -- cgit v1.2.3-70-g09d2 From 90e7d4ab5c8b0c4c2e00e4893977f6aeec0f18f1 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 9 Jul 2009 22:54:53 +0200 Subject: netfilter: xtables: check for unconditionality of policies This adds a check that iptables's original author Rusty set forth in a FIXME comment. Underflows in iptables are better known as chain policies, and are required to be unconditional or there would be a stochastical chance for the policy rule to be skipped if it does not match. If that were to happen, rule execution would continue in an unexpected spurious fashion. Signed-off-by: Jan Engelhardt --- net/ipv4/netfilter/arp_tables.c | 12 +++++++----- net/ipv4/netfilter/ip_tables.c | 11 +++++++---- net/ipv6/netfilter/ip6_tables.c | 12 +++++++----- 3 files changed, 21 insertions(+), 14 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d91f0834d57..064082dffaf 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -8,7 +8,7 @@ * Copyright (C) 2002 David S. Miller (davem@redhat.com) * */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include @@ -563,13 +563,15 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) + if ((unsigned char *)e - base == underflows[h]) { + if (!unconditional(&e->arp)) { + pr_err("Underflows must be unconditional\n"); + return -EINVAL; + } newinfo->underflow[h] = underflows[h]; + } } - /* FIXME: underflows must be unconditional, standard verdicts - < 0 (not ARPT_RETURN). --RR */ - /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6e7b7e8b80b..6e546d573d9 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -8,6 +8,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include @@ -738,13 +739,15 @@ check_entry_size_and_hooks(struct ipt_entry *e, continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) + if ((unsigned char *)e - base == underflows[h]) { + if (!unconditional(&e->ip)) { + pr_err("Underflows must be unconditional\n"); + return -EINVAL; + } newinfo->underflow[h] = underflows[h]; + } } - /* FIXME: underflows must be unconditional, standard verdicts - < 0 (not IPT_RETURN). --RR */ - /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 8e4921a937f..b0599b98d1b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -8,7 +8,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include @@ -771,13 +771,15 @@ check_entry_size_and_hooks(struct ip6t_entry *e, continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) + if ((unsigned char *)e - base == underflows[h]) { + if (!unconditional(&e->ipv6)) { + pr_err("Underflows must be unconditional\n"); + return -EINVAL; + } newinfo->underflow[h] = underflows[h]; + } } - /* FIXME: underflows must be unconditional, standard verdicts - < 0 (not IP6T_RETURN). --RR */ - /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; -- cgit v1.2.3-70-g09d2 From e2fe35c17fed62d4ab5038fa9bc489e967ff8416 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 18 Jul 2009 15:22:30 +0200 Subject: netfilter: xtables: check for standard verdicts in policies This adds the second check that Rusty wanted to have a long time ago. :-) Base chain policies must have absolute verdicts that cease processing in the table, otherwise rule execution may continue in an unexpected spurious fashion (e.g. next chain that follows in memory). Signed-off-by: Jan Engelhardt --- net/ipv4/netfilter/arp_tables.c | 21 +++++++++++++++++++-- net/ipv4/netfilter/ip_tables.c | 21 +++++++++++++++++++-- net/ipv6/netfilter/ip6_tables.c | 21 +++++++++++++++++++-- 3 files changed, 57 insertions(+), 6 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 064082dffaf..7bc11ffbb84 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -533,6 +533,21 @@ out: return ret; } +static bool check_underflow(struct arpt_entry *e) +{ + const struct arpt_entry_target *t; + unsigned int verdict; + + if (!unconditional(&e->arp)) + return false; + t = arpt_get_target(e); + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) + return false; + verdict = ((struct arpt_standard_target *)t)->verdict; + verdict = -verdict - 1; + return verdict == NF_DROP || verdict == NF_ACCEPT; +} + static inline int check_entry_size_and_hooks(struct arpt_entry *e, struct xt_table_info *newinfo, unsigned char *base, @@ -564,8 +579,10 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { - if (!unconditional(&e->arp)) { - pr_err("Underflows must be unconditional\n"); + if (!check_underflow(e)) { + pr_err("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6e546d573d9..0b43fd7ca04 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -708,6 +708,21 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, return ret; } +static bool check_underflow(struct ipt_entry *e) +{ + const struct ipt_entry_target *t; + unsigned int verdict; + + if (!unconditional(&e->ip)) + return false; + t = ipt_get_target(e); + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) + return false; + verdict = ((struct ipt_standard_target *)t)->verdict; + verdict = -verdict - 1; + return verdict == NF_DROP || verdict == NF_ACCEPT; +} + static int check_entry_size_and_hooks(struct ipt_entry *e, struct xt_table_info *newinfo, @@ -740,8 +755,10 @@ check_entry_size_and_hooks(struct ipt_entry *e, if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { - if (!unconditional(&e->ip)) { - pr_err("Underflows must be unconditional\n"); + if (!check_underflow(e)) { + pr_err("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index b0599b98d1b..a5d0c27cc26 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -740,6 +740,21 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, return ret; } +static bool check_underflow(struct ip6t_entry *e) +{ + const struct ip6t_entry_target *t; + unsigned int verdict; + + if (!unconditional(&e->ipv6)) + return false; + t = ip6t_get_target(e); + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) + return false; + verdict = ((struct ip6t_standard_target *)t)->verdict; + verdict = -verdict - 1; + return verdict == NF_DROP || verdict == NF_ACCEPT; +} + static int check_entry_size_and_hooks(struct ip6t_entry *e, struct xt_table_info *newinfo, @@ -772,8 +787,10 @@ check_entry_size_and_hooks(struct ip6t_entry *e, if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { - if (!unconditional(&e->ipv6)) { - pr_err("Underflows must be unconditional\n"); + if (!check_underflow(e)) { + pr_err("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; -- cgit v1.2.3-70-g09d2 From 35aad0ffdf548617940ca1e78be1f2e0bafc4496 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 24 Aug 2009 14:56:30 +0200 Subject: netfilter: xtables: mark initial tables constant The inputted table is never modified, so should be considered const. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 2 +- include/linux/netfilter_arp/arp_tables.h | 2 +- include/linux/netfilter_bridge/ebtables.h | 2 +- include/linux/netfilter_ipv4/ip_tables.h | 2 +- include/linux/netfilter_ipv6/ip6_tables.h | 2 +- net/bridge/netfilter/ebtable_broute.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 2 +- net/bridge/netfilter/ebtables.c | 13 +++++++------ net/ipv4/netfilter/arp_tables.c | 3 ++- net/ipv4/netfilter/arptable_filter.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 3 ++- net/ipv4/netfilter/iptable_filter.c | 2 +- net/ipv4/netfilter/iptable_mangle.c | 4 ++-- net/ipv4/netfilter/iptable_raw.c | 4 ++-- net/ipv4/netfilter/iptable_security.c | 4 ++-- net/ipv4/netfilter/nf_nat_rule.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 3 ++- net/ipv6/netfilter/ip6table_filter.c | 2 +- net/ipv6/netfilter/ip6table_mangle.c | 4 ++-- net/ipv6/netfilter/ip6table_raw.c | 4 ++-- net/ipv6/netfilter/ip6table_security.c | 4 ++-- net/netfilter/x_tables.c | 7 ++++--- 22 files changed, 42 insertions(+), 37 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 4fa6e4c263e..812cb153cab 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -407,7 +407,7 @@ extern int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); extern struct xt_table *xt_register_table(struct net *net, - struct xt_table *table, + const struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); extern void *xt_unregister_table(struct xt_table *table); diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 590ac3d6d5d..6fe3e6aa10d 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -265,7 +265,7 @@ struct arpt_error } extern struct xt_table *arpt_register_table(struct net *net, - struct xt_table *table, + const struct xt_table *table, const struct arpt_replace *repl); extern void arpt_unregister_table(struct xt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index e40ddb94b1a..ea281e6a204 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -301,7 +301,7 @@ struct ebt_table #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ ~(__alignof__(struct ebt_replace)-1)) extern struct ebt_table *ebt_register_table(struct net *net, - struct ebt_table *table); + const struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 092bd50581a..61fafc868a7 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -245,7 +245,7 @@ ipt_get_target(struct ipt_entry *e) extern void ipt_init(void) __init; extern struct xt_table *ipt_register_table(struct net *net, - struct xt_table *table, + const struct xt_table *table, const struct ipt_replace *repl); extern void ipt_unregister_table(struct xt_table *table); diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 1089e33cf63..a64e1451ac3 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -306,7 +306,7 @@ ip6t_get_target(struct ip6t_entry *e) extern void ip6t_init(void) __init; extern struct xt_table *ip6t_register_table(struct net *net, - struct xt_table *table, + const struct xt_table *table, const struct ip6t_replace *repl); extern void ip6t_unregister_table(struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index c751111440f..d32ab13e728 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -41,7 +41,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table broute_table = +static const struct ebt_table broute_table = { .name = "broute", .table = &initial_table, diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 4b988db3cd4..60b1a6ca718 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -50,7 +50,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table frame_filter = +static const struct ebt_table frame_filter = { .name = "filter", .table = &initial_table, diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 37928d5f284..bd1c65425d4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1103,23 +1103,24 @@ free_newinfo: return ret; } -struct ebt_table *ebt_register_table(struct net *net, struct ebt_table *table) +struct ebt_table * +ebt_register_table(struct net *net, const struct ebt_table *input_table) { struct ebt_table_info *newinfo; - struct ebt_table *t; + struct ebt_table *t, *table; struct ebt_replace_kernel *repl; int ret, i, countersize; void *p; - if (!table || !(repl = table->table) || !repl->entries || - repl->entries_size == 0 || - repl->counters || table->private) { + if (input_table == NULL || (repl = input_table->table) == NULL || + repl->entries == 0 || repl->entries_size == 0 || + repl->counters != NULL || input_table->private != NULL) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); return ERR_PTR(-EINVAL); } /* Don't add one table to multiple lists. */ - table = kmemdup(table, sizeof(struct ebt_table), GFP_KERNEL); + table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL); if (!table) { ret = -ENOMEM; goto out; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7bc11ffbb84..27774c99d88 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1778,7 +1778,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -struct xt_table *arpt_register_table(struct net *net, struct xt_table *table, +struct xt_table *arpt_register_table(struct net *net, + const struct xt_table *table, const struct arpt_replace *repl) { int ret; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 6ecfdae7c58..97337601827 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -15,7 +15,7 @@ MODULE_DESCRIPTION("arptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ (1 << NF_ARP_FORWARD)) -static struct +static const struct { struct arpt_replace repl; struct arpt_standard entries[3]; @@ -45,7 +45,7 @@ static struct .term = ARPT_ERROR_INIT, }; -static struct xt_table packet_filter = { +static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 0b43fd7ca04..cde755d5eea 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2065,7 +2065,8 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -struct xt_table *ipt_register_table(struct net *net, struct xt_table *table, +struct xt_table *ipt_register_table(struct net *net, + const struct xt_table *table, const struct ipt_replace *repl) { int ret; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 97dbd94a8e3..df566cbd68e 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -53,7 +53,7 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_filter = { +static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 28647f10aa7..036047f9b0f 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -28,7 +28,7 @@ MODULE_DESCRIPTION("iptables mangle table"); (1 << NF_INET_POST_ROUTING)) /* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ -static struct +static const struct { struct ipt_replace repl; struct ipt_standard entries[5]; @@ -64,7 +64,7 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_mangler = { +static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 494784c999e..993edc23be0 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -9,7 +9,7 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static struct +static const struct { struct ipt_replace repl; struct ipt_standard entries[2]; @@ -36,7 +36,7 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_raw = { +static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 8804e1a0f91..99eb76c65d2 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -27,7 +27,7 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static struct +static const struct { struct ipt_replace repl; struct ipt_standard entries[3]; @@ -57,7 +57,7 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table security_table = { +static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 6448a9b7d6f..9e81e0dfb4e 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -28,7 +28,7 @@ (1 << NF_INET_POST_ROUTING) | \ (1 << NF_INET_LOCAL_OUT)) -static struct +static const struct { struct ipt_replace repl; struct ipt_standard entries[3]; @@ -58,7 +58,7 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table nat_table = { +static const struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a5d0c27cc26..cc9f8ef303f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2100,7 +2100,8 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -struct xt_table *ip6t_register_table(struct net *net, struct xt_table *table, +struct xt_table *ip6t_register_table(struct net *net, + const struct xt_table *table, const struct ip6t_replace *repl) { int ret; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 0a3ae48ac4d..6f4383ad86f 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -51,7 +51,7 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_filter = { +static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 0f49e005a8c..0ad91433ed6 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -21,7 +21,7 @@ MODULE_DESCRIPTION("ip6tables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) -static struct +static const struct { struct ip6t_replace repl; struct ip6t_standard entries[5]; @@ -57,7 +57,7 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_mangler = { +static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 679865e3d5f..ed1a1180f3b 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -8,7 +8,7 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static struct +static const struct { struct ip6t_replace repl; struct ip6t_standard entries[2]; @@ -35,7 +35,7 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_raw = { +static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 822afabbdc8..41b444c6093 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -26,7 +26,7 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static struct +static const struct { struct ip6t_replace repl; struct ip6t_standard entries[3]; @@ -56,7 +56,7 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table security_table = { +static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 025d1a0af78..a6ac83a9334 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -736,16 +736,17 @@ xt_replace_table(struct xt_table *table, } EXPORT_SYMBOL_GPL(xt_replace_table); -struct xt_table *xt_register_table(struct net *net, struct xt_table *table, +struct xt_table *xt_register_table(struct net *net, + const struct xt_table *input_table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { int ret; struct xt_table_info *private; - struct xt_table *t; + struct xt_table *t, *table; /* Don't add one object to multiple lists. */ - table = kmemdup(table, sizeof(struct xt_table), GFP_KERNEL); + table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); if (!table) { ret = -ENOMEM; goto out; -- cgit v1.2.3-70-g09d2 From cce5a5c3029f731b5ea17a8a9a953ff742abf0d6 Mon Sep 17 00:00:00 2001 From: Maximilian Engelhardt Date: Mon, 24 Aug 2009 19:24:54 +0200 Subject: netfilter: nf_nat: fix inverted logic for persistent NAT mappings Kernel 2.6.30 introduced a patch [1] for the persistent option in the netfilter SNAT target. This is exactly what we need here so I had a quick look at the code and noticed that the patch is wrong. The logic is simply inverted. The patch below fixes this. Also note that because of this the default behavior of the SNAT target has changed since kernel 2.6.30 as it now ignores the destination IP in choosing the source IP for nating (which should only be the case if the persistent option is set). [1] http://git.eu.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=98d500d66cb7940747b424b245fc6a51ecfbf005 Signed-off-by: Maximilian Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_nat_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 3229e0a81ba..b6ddd563304 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -212,7 +212,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, range->flags & IP_NAT_RANGE_PERSISTENT ? - (__force u32)tuple->dst.u3.ip : 0, 0); + 0 : (__force u32)tuple->dst.u3.ip, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); } -- cgit v1.2.3-70-g09d2 From 74f7a6552c8d76ffc5e11eb8d9d6c07238b9ae77 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Aug 2009 15:33:08 +0200 Subject: netfilter: nf_conntrack: log packets dropped by helpers Log packets dropped by helpers using the netfilter logging API. This is useful in combination with nfnetlink_log to analyze those packets in userspace for debugging. Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 +++++- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 9ac2fdc36ec..aa95bb82ee6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -26,6 +26,7 @@ #include #include #include +#include int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, struct nf_conn *ct, @@ -113,8 +114,11 @@ static unsigned int ipv4_confirm(unsigned int hooknum, ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), ct, ctinfo); - if (ret != NF_ACCEPT) + if (ret != NF_ACCEPT) { + nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, + "nf_ct_%s: dropping packet", helper->name); return ret; + } if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { typeof(nf_nat_seq_adjust_hook) seq_adjust; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index a7f4cd60735..5f2ec208a8c 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -27,6 +27,7 @@ #include #include #include +#include static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) @@ -176,8 +177,11 @@ static unsigned int ipv6_confirm(unsigned int hooknum, } ret = helper->help(skb, protoff, ct, ctinfo); - if (ret != NF_ACCEPT) + if (ret != NF_ACCEPT) { + nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, + "nf_ct_%s: dropping packet", helper->name); return ret; + } out: /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); -- cgit v1.2.3-70-g09d2 From 3993832464dd4e14a4c926583a11f0fa92c1f0f0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Aug 2009 16:07:58 +0200 Subject: netfilter: nfnetlink: constify message attributes and headers Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink.h | 3 +- include/net/netfilter/nf_nat_core.h | 2 +- net/ipv4/netfilter/nf_nat_core.c | 6 ++-- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 54 ++++++++++++++++++++++-------------- net/netfilter/nfnetlink.c | 2 +- net/netfilter/nfnetlink_log.c | 6 ++-- net/netfilter/nfnetlink_queue.c | 9 ++++-- net/netfilter/xt_osf.c | 6 ++-- 9 files changed, 55 insertions(+), 35 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index bff4d5741d9..9f00da287f2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -58,7 +58,8 @@ struct nfgenmsg { struct nfnl_callback { int (*call)(struct sock *nl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]); + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]); const struct nla_policy *policy; /* netlink attribute policy */ const u_int16_t attr_count; /* number of nlattr's */ }; diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h index 58684066388..33602ab6619 100644 --- a/include/net/netfilter/nf_nat_core.h +++ b/include/net/netfilter/nf_nat_core.h @@ -31,6 +31,6 @@ struct nlattr; extern int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, enum nf_nat_manip_type manip, - struct nlattr *attr); + const struct nlattr *attr); #endif /* _NF_NAT_CORE_H */ diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index b6ddd563304..68afc6ecd34 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -620,7 +620,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { }; static int -nfnetlink_parse_nat(struct nlattr *nat, +nfnetlink_parse_nat(const struct nlattr *nat, const struct nf_conn *ct, struct nf_nat_range *range) { struct nlattr *tb[CTA_NAT_MAX+1]; @@ -656,7 +656,7 @@ nfnetlink_parse_nat(struct nlattr *nat, static int nfnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, - struct nlattr *attr) + const struct nlattr *attr) { struct nf_nat_range range; @@ -671,7 +671,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, static int nfnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, - struct nlattr *attr) + const struct nlattr *attr) { return -EOPNOTSUPP; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b5869b9574b..565c3a86423 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -47,7 +47,7 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, enum nf_nat_manip_type manip, - struct nlattr *attr) __read_mostly; + const struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); DEFINE_SPINLOCK(nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 49479d19457..59d8064eb52 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -704,7 +704,8 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr, } static int -ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple, +ctnetlink_parse_tuple(const struct nlattr * const cda[], + struct nf_conntrack_tuple *tuple, enum ctattr_tuple type, u_int8_t l3num) { struct nlattr *tb[CTA_TUPLE_MAX+1]; @@ -740,7 +741,7 @@ ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple, } static inline int -ctnetlink_parse_help(struct nlattr *attr, char **helper_name) +ctnetlink_parse_help(const struct nlattr *attr, char **helper_name) { struct nlattr *tb[CTA_HELP_MAX+1]; @@ -764,7 +765,8 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { static int ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -823,7 +825,8 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, static int ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -884,7 +887,7 @@ out: static int ctnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, - struct nlattr *attr) + const struct nlattr *attr) { typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup; @@ -914,7 +917,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, #endif static int -ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) { unsigned long d; unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS])); @@ -940,7 +943,7 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) } static int -ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[]) { #ifdef CONFIG_NF_NAT_NEEDED int ret; @@ -966,7 +969,7 @@ ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[]) } static inline int -ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) { struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); @@ -1028,7 +1031,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) } static inline int -ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[]) { u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); @@ -1042,9 +1045,10 @@ ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[]) } static inline int -ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]) { - struct nlattr *tb[CTA_PROTOINFO_MAX+1], *attr = cda[CTA_PROTOINFO]; + const struct nlattr *attr = cda[CTA_PROTOINFO]; + struct nlattr *tb[CTA_PROTOINFO_MAX+1]; struct nf_conntrack_l4proto *l4proto; int err = 0; @@ -1061,7 +1065,7 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[]) #ifdef CONFIG_NF_NAT_NEEDED static inline int -change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr) +change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) { struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; @@ -1089,7 +1093,8 @@ change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr) } static int -ctnetlink_change_nat_seq_adj(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_nat_seq_adj(struct nf_conn *ct, + const struct nlattr * const cda[]) { int ret = 0; struct nf_conn_nat *nat = nfct_nat(ct); @@ -1120,7 +1125,8 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, struct nlattr *cda[]) #endif static int -ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_conntrack(struct nf_conn *ct, + const struct nlattr * const cda[]) { int err; @@ -1169,7 +1175,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) } static struct nf_conn * -ctnetlink_create_conntrack(struct nlattr *cda[], +ctnetlink_create_conntrack(const struct nlattr * const cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, u8 u3) @@ -1304,7 +1310,8 @@ err1: static int ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -1629,7 +1636,8 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { static int ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; @@ -1689,7 +1697,8 @@ out: static int ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; @@ -1767,13 +1776,15 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return 0; } static int -ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nlattr *cda[]) +ctnetlink_change_expect(struct nf_conntrack_expect *x, + const struct nlattr * const cda[]) { return -EOPNOTSUPP; } static int -ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report) +ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3, + u32 pid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -1831,7 +1842,8 @@ out: static int ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 92761a98837..eedc0c1ac7a 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -170,7 +170,7 @@ replay: if (err < 0) return err; - err = nc->call(nfnl, skb, nlh, cda); + err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda); if (err == -EAGAIN) goto replay; return err; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 66a6dd5c519..f900dc3194a 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -694,7 +694,8 @@ static struct notifier_block nfulnl_rtnl_notifier = { static int nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *nfqa[]) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } @@ -716,7 +717,8 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { static int nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *nfula[]) + const struct nlmsghdr *nlh, + const struct nlattr * const nfula[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t group_num = ntohs(nfmsg->res_id); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 71daa0934b6..7a9dec9fb82 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -608,7 +608,8 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { static int nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *nfqa[]) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); @@ -670,7 +671,8 @@ err_out_unlock: static int nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *nfqa[]) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } @@ -687,7 +689,8 @@ static const struct nf_queue_handler nfqh = { static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *nfqa[]) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 0f482e2440b..63e19050465 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -70,7 +70,8 @@ static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head) } static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) + const struct nlmsghdr *nlh, + const struct nlattr * const osf_attrs[]) { struct xt_osf_user_finger *f; struct xt_osf_finger *kf = NULL, *sf; @@ -112,7 +113,8 @@ static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, } static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) + const struct nlmsghdr *nlh, + const struct nlattr * const osf_attrs[]) { struct xt_osf_user_finger *f; struct xt_osf_finger *sf; -- cgit v1.2.3-70-g09d2 From 3d1427f87002735aa54c370558e0c2bacc61f31e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Aug 2009 23:45:21 -0700 Subject: ipv4: af_inet.c cleanups Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 112 ++++++++++++++++++++++++++--------------------------- 1 file changed, 55 insertions(+), 57 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 197d024b253..6c30a73f03f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -124,7 +124,6 @@ static struct list_head inetsw[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw_lock); struct ipv4_config ipv4_config; - EXPORT_SYMBOL(ipv4_config); /* New destruction routine */ @@ -139,12 +138,12 @@ void inet_sock_destruct(struct sock *sk) sk_mem_reclaim(sk); if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { - printk("Attempt to release TCP socket in state %d %p\n", + pr_err("Attempt to release TCP socket in state %d %p\n", sk->sk_state, sk); return; } if (!sock_flag(sk, SOCK_DEAD)) { - printk("Attempt to release alive inet socket %p\n", sk); + pr_err("Attempt to release alive inet socket %p\n", sk); return; } @@ -157,6 +156,7 @@ void inet_sock_destruct(struct sock *sk) dst_release(sk->sk_dst_cache); sk_refcnt_debug_dec(sk); } +EXPORT_SYMBOL(inet_sock_destruct); /* * The routines beyond this point handle the behaviour of an AF_INET @@ -219,6 +219,7 @@ out: release_sock(sk); return err; } +EXPORT_SYMBOL(inet_listen); u32 inet_ehash_secret __read_mostly; EXPORT_SYMBOL(inet_ehash_secret); @@ -435,9 +436,11 @@ int inet_release(struct socket *sock) } return 0; } +EXPORT_SYMBOL(inet_release); /* It is off by default, see below. */ int sysctl_ip_nonlocal_bind __read_mostly; +EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -519,6 +522,7 @@ out_release_sock: out: return err; } +EXPORT_SYMBOL(inet_bind); int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, int addr_len, int flags) @@ -532,6 +536,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, return -EAGAIN; return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); } +EXPORT_SYMBOL(inet_dgram_connect); static long inet_wait_for_connect(struct sock *sk, long timeo) { @@ -641,6 +646,7 @@ sock_error: sock->state = SS_DISCONNECTING; goto out; } +EXPORT_SYMBOL(inet_stream_connect); /* * Accept a pending connection. The TCP layer now gives BSD semantics. @@ -668,6 +674,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) do_err: return err; } +EXPORT_SYMBOL(inet_accept); /* @@ -699,6 +706,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, *uaddr_len = sizeof(*sin); return 0; } +EXPORT_SYMBOL(inet_getname); int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) @@ -711,9 +719,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, return sk->sk_prot->sendmsg(iocb, sk, msg, size); } +EXPORT_SYMBOL(inet_sendmsg); -static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) +static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags) { struct sock *sk = sock->sk; @@ -780,6 +790,7 @@ int inet_shutdown(struct socket *sock, int how) release_sock(sk); return err; } +EXPORT_SYMBOL(inet_shutdown); /* * ioctl() calls you can issue on an INET socket. Most of these are @@ -798,44 +809,45 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct net *net = sock_net(sk); switch (cmd) { - case SIOCGSTAMP: - err = sock_get_timestamp(sk, (struct timeval __user *)arg); - break; - case SIOCGSTAMPNS: - err = sock_get_timestampns(sk, (struct timespec __user *)arg); - break; - case SIOCADDRT: - case SIOCDELRT: - case SIOCRTMSG: - err = ip_rt_ioctl(net, cmd, (void __user *)arg); - break; - case SIOCDARP: - case SIOCGARP: - case SIOCSARP: - err = arp_ioctl(net, cmd, (void __user *)arg); - break; - case SIOCGIFADDR: - case SIOCSIFADDR: - case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: - case SIOCGIFNETMASK: - case SIOCSIFNETMASK: - case SIOCGIFDSTADDR: - case SIOCSIFDSTADDR: - case SIOCSIFPFLAGS: - case SIOCGIFPFLAGS: - case SIOCSIFFLAGS: - err = devinet_ioctl(net, cmd, (void __user *)arg); - break; - default: - if (sk->sk_prot->ioctl) - err = sk->sk_prot->ioctl(sk, cmd, arg); - else - err = -ENOIOCTLCMD; - break; + case SIOCGSTAMP: + err = sock_get_timestamp(sk, (struct timeval __user *)arg); + break; + case SIOCGSTAMPNS: + err = sock_get_timestampns(sk, (struct timespec __user *)arg); + break; + case SIOCADDRT: + case SIOCDELRT: + case SIOCRTMSG: + err = ip_rt_ioctl(net, cmd, (void __user *)arg); + break; + case SIOCDARP: + case SIOCGARP: + case SIOCSARP: + err = arp_ioctl(net, cmd, (void __user *)arg); + break; + case SIOCGIFADDR: + case SIOCSIFADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCSIFPFLAGS: + case SIOCGIFPFLAGS: + case SIOCSIFFLAGS: + err = devinet_ioctl(net, cmd, (void __user *)arg); + break; + default: + if (sk->sk_prot->ioctl) + err = sk->sk_prot->ioctl(sk, cmd, arg); + else + err = -ENOIOCTLCMD; + break; } return err; } +EXPORT_SYMBOL(inet_ioctl); const struct proto_ops inet_stream_ops = { .family = PF_INET, @@ -862,6 +874,7 @@ const struct proto_ops inet_stream_ops = { .compat_getsockopt = compat_sock_common_getsockopt, #endif }; +EXPORT_SYMBOL(inet_stream_ops); const struct proto_ops inet_dgram_ops = { .family = PF_INET, @@ -887,6 +900,7 @@ const struct proto_ops inet_dgram_ops = { .compat_getsockopt = compat_sock_common_getsockopt, #endif }; +EXPORT_SYMBOL(inet_dgram_ops); /* * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without @@ -1016,6 +1030,7 @@ out_illegal: p->type); goto out; } +EXPORT_SYMBOL(inet_register_protosw); void inet_unregister_protosw(struct inet_protosw *p) { @@ -1031,6 +1046,7 @@ void inet_unregister_protosw(struct inet_protosw *p) synchronize_net(); } } +EXPORT_SYMBOL(inet_unregister_protosw); /* * Shall we try to damage output packets if routing dev changes? @@ -1141,7 +1157,6 @@ int inet_sk_rebuild_header(struct sock *sk) return err; } - EXPORT_SYMBOL(inet_sk_rebuild_header); static int inet_gso_send_check(struct sk_buff *skb) @@ -1369,7 +1384,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, } return rc; } - EXPORT_SYMBOL_GPL(inet_ctl_sock_create); unsigned long snmp_fold_field(void *mib[], int offt) @@ -1676,19 +1690,3 @@ static int __init ipv4_proc_init(void) MODULE_ALIAS_NETPROTO(PF_INET); -EXPORT_SYMBOL(inet_accept); -EXPORT_SYMBOL(inet_bind); -EXPORT_SYMBOL(inet_dgram_connect); -EXPORT_SYMBOL(inet_dgram_ops); -EXPORT_SYMBOL(inet_getname); -EXPORT_SYMBOL(inet_ioctl); -EXPORT_SYMBOL(inet_listen); -EXPORT_SYMBOL(inet_register_protosw); -EXPORT_SYMBOL(inet_release); -EXPORT_SYMBOL(inet_sendmsg); -EXPORT_SYMBOL(inet_shutdown); -EXPORT_SYMBOL(inet_sock_destruct); -EXPORT_SYMBOL(inet_stream_connect); -EXPORT_SYMBOL(inet_stream_ops); -EXPORT_SYMBOL(inet_unregister_protosw); -EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); -- cgit v1.2.3-70-g09d2 From df19a6267705456f463871ae2aabc44299909d2a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Aug 2009 23:48:54 -0700 Subject: tcp: keepalive cleanups Introduce keepalive_probes(tp) helper, and use it, like keepalive_time_when(tp) and keepalive_intvl_when(tp) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++++ net/ipv4/tcp.c | 6 +++--- net/ipv4/tcp_timer.c | 3 +-- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/tcp.h b/include/net/tcp.h index 88af8430647..cbb2a4889fc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1007,6 +1007,11 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) return tp->keepalive_time ? : sysctl_tcp_keepalive_time; } +static inline int keepalive_probes(const struct tcp_sock *tp) +{ + return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; +} + static inline int tcp_fin_time(const struct sock *sk) { int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 91145244ea6..59f69a6c586 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2336,13 +2336,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = !!(tp->nonagle&TCP_NAGLE_CORK); break; case TCP_KEEPIDLE: - val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ; + val = keepalive_time_when(tp) / HZ; break; case TCP_KEEPINTVL: - val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ; + val = keepalive_intvl_when(tp) / HZ; break; case TCP_KEEPCNT: - val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; + val = keepalive_probes(tp); break; case TCP_SYNCNT: val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b144a26359b..c520fb6e06d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -499,8 +499,7 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = tcp_time_stamp - tp->rcv_tstamp; if (elapsed >= keepalive_time_when(tp)) { - if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) || - (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) { + if (icsk->icsk_probes_out >= keepalive_probes(tp)) { tcp_send_active_reset(sk, GFP_ATOMIC); tcp_write_err(sk); goto out; -- cgit v1.2.3-70-g09d2 From 30038fc61adfdab162b1966e34261f06eda67f02 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Aug 2009 23:52:01 -0700 Subject: net: ip_rt_send_redirect() optimization While doing some forwarding benchmarks, I noticed ip_rt_send_redirect() is rather expensive, even if send_redirects is false for the device. Fix is to avoid two atomic ops, we dont really need to take a reference on in_dev Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fafbe163e2b..91867d3e632 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1514,13 +1514,17 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) void ip_rt_send_redirect(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); - struct in_device *in_dev = in_dev_get(rt->u.dst.dev); + struct in_device *in_dev; + int log_martians; - if (!in_dev) + rcu_read_lock(); + in_dev = __in_dev_get_rcu(rt->u.dst.dev); + if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { + rcu_read_unlock(); return; - - if (!IN_DEV_TX_REDIRECTS(in_dev)) - goto out; + } + log_martians = IN_DEV_LOG_MARTIANS(in_dev); + rcu_read_unlock(); /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. @@ -1533,7 +1537,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) */ if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { rt->u.dst.rate_last = jiffies; - goto out; + return; } /* Check for load limit; set rate_last to the latest sent @@ -1547,7 +1551,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) rt->u.dst.rate_last = jiffies; ++rt->u.dst.rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE - if (IN_DEV_LOG_MARTIANS(in_dev) && + if (log_martians && rt->u.dst.rate_tokens == ip_rt_redirect_number && net_ratelimit()) printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", @@ -1555,8 +1559,6 @@ void ip_rt_send_redirect(struct sk_buff *skb) &rt->rt_dst, &rt->rt_gateway); #endif } -out: - in_dev_put(in_dev); } static int ip_error(struct sk_buff *skb) -- cgit v1.2.3-70-g09d2 From 80b71b80df14d885f7e50e115c1348398f418759 Mon Sep 17 00:00:00 2001 From: Jens Låås Date: Fri, 28 Aug 2009 23:57:15 -0700 Subject: fib_trie: resize rework MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here is rework and cleanup of the resize function. Some bugs we had. We were using ->parent when we should use node_parent(). Also we used ->parent which is not assigned by inflate in inflate loop. Also a fix to set thresholds to power 2 to fit halve and double strategy. max_resize is renamed to max_work which better indicates it's function. Reaching max_work is not an error, so warning is removed. max_work only limits amount of work done per resize. (limits CPU-usage, outstanding memory etc). The clean-up makes it relatively easy to add fixed sized root-nodes if we would like to decrease the memory pressure on routers with large routing tables and dynamic routing. If we'll need that... Its been tested with 280k routes. Work done together with Robert Olsson. Signed-off-by: Jens Låås Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 95 +++++++++++++---------------------------------------- 1 file changed, 23 insertions(+), 72 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index fe3c846b99a..291bdf50a21 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -48,7 +48,7 @@ * Patrick McHardy */ -#define VERSION "0.408" +#define VERSION "0.409" #include #include @@ -325,10 +325,7 @@ static inline void check_tnode(const struct tnode *tn) static const int halve_threshold = 25; static const int inflate_threshold = 50; static const int halve_threshold_root = 15; -static const int inflate_threshold_root = 25; - -static int inflate_threshold_root_fix; -#define INFLATE_FIX_MAX 10 /* a comment in resize() */ +static const int inflate_threshold_root = 30; static void __alias_free_mem(struct rcu_head *head) { @@ -516,14 +513,14 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, rcu_assign_pointer(tn->child[i], n); } +#define MAX_WORK 10 static struct node *resize(struct trie *t, struct tnode *tn) { int i; - int err = 0; struct tnode *old_tn; int inflate_threshold_use; int halve_threshold_use; - int max_resize; + int max_work; if (!tn) return NULL; @@ -538,18 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) } /* One child */ if (tn->empty_children == tnode_child_length(tn) - 1) - for (i = 0; i < tnode_child_length(tn); i++) { - struct node *n; - - n = tn->child[i]; - if (!n) - continue; - - /* compress one level */ - node_set_parent(n, NULL); - tnode_free_safe(tn); - return n; - } + goto one_child; /* * Double as long as the resulting node has a number of * nonempty nodes that are above the threshold. @@ -618,15 +604,17 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Keep root node larger */ - if (!tn->parent) - inflate_threshold_use = inflate_threshold_root + - inflate_threshold_root_fix; - else + if (!node_parent((struct node*) tn)) { + inflate_threshold_use = inflate_threshold_root; + halve_threshold_use = halve_threshold_root; + } + else { inflate_threshold_use = inflate_threshold; + halve_threshold_use = halve_threshold; + } - err = 0; - max_resize = 10; - while ((tn->full_children > 0 && max_resize-- && + max_work = MAX_WORK; + while ((tn->full_children > 0 && max_work-- && 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= inflate_threshold_use * tnode_child_length(tn))) { @@ -643,47 +631,19 @@ static struct node *resize(struct trie *t, struct tnode *tn) } } - if (max_resize < 0) { - if (!tn->parent) { - /* - * It was observed that during large updates even - * inflate_threshold_root = 35 might be needed to avoid - * this warning; but it should be temporary, so let's - * try to handle this automatically. - */ - if (inflate_threshold_root_fix < INFLATE_FIX_MAX) - inflate_threshold_root_fix++; - else - pr_warning("Fix inflate_threshold_root." - " Now=%d size=%d bits fix=%d\n", - inflate_threshold_root, tn->bits, - inflate_threshold_root_fix); - } else { - pr_warning("Fix inflate_threshold." - " Now=%d size=%d bits\n", - inflate_threshold, tn->bits); - } - } else if (max_resize > 3 && !tn->parent && inflate_threshold_root_fix) - inflate_threshold_root_fix--; - check_tnode(tn); + /* Return if at least one inflate is run */ + if( max_work != MAX_WORK) + return (struct node *) tn; + /* * Halve as long as the number of empty children in this * node is above threshold. */ - - /* Keep root node larger */ - - if (!tn->parent) - halve_threshold_use = halve_threshold_root; - else - halve_threshold_use = halve_threshold; - - err = 0; - max_resize = 10; - while (tn->bits > 1 && max_resize-- && + max_work = MAX_WORK; + while (tn->bits > 1 && max_work-- && 100 * (tnode_child_length(tn) - tn->empty_children) < halve_threshold_use * tnode_child_length(tn)) { @@ -698,19 +658,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) } } - if (max_resize < 0) { - if (!tn->parent) - pr_warning("Fix halve_threshold_root." - " Now=%d size=%d bits\n", - halve_threshold_root, tn->bits); - else - pr_warning("Fix halve_threshold." - " Now=%d size=%d bits\n", - halve_threshold, tn->bits); - } /* Only one child remains */ - if (tn->empty_children == tnode_child_length(tn) - 1) + if (tn->empty_children == tnode_child_length(tn) - 1) { +one_child: for (i = 0; i < tnode_child_length(tn); i++) { struct node *n; @@ -724,7 +675,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) tnode_free_safe(tn); return n; } - + } return (struct node *) tn; } -- cgit v1.2.3-70-g09d2 From 80a1096bac73ce6e98dbbce10cc00a154460bcbd Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sat, 29 Aug 2009 00:00:35 -0700 Subject: tcp: fix premature termination of FIN_WAIT2 time-wait sockets There is a race condition in the time-wait sockets code that can lead to premature termination of FIN_WAIT2 and, subsequently, to RST generation when the FIN,ACK from the peer finally arrives: Time TCP header 0.000000 30755 > http [SYN] Seq=0 Win=2920 Len=0 MSS=1460 TSV=282912 TSER=0 0.000008 http > 30755 aSYN, ACK] Seq=0 Ack=1 Win=2896 Len=0 MSS=1460 TSV=... 0.136899 HEAD /1b.html?n1Lg=v1 HTTP/1.0 [Packet size limited during capture] 0.136934 HTTP/1.0 200 OK [Packet size limited during capture] 0.136945 http > 30755 [FIN, ACK] Seq=187 Ack=207 Win=2690 Len=0 TSV=270521... 0.136974 30755 > http [ACK] Seq=207 Ack=187 Win=2734 Len=0 TSV=283049 TSER=... 0.177983 30755 > http [ACK] Seq=207 Ack=188 Win=2733 Len=0 TSV=283089 TSER=... 0.238618 30755 > http [FIN, ACK] Seq=207 Ack=188 Win=2733 Len=0 TSV=283151... 0.238625 http > 30755 [RST] Seq=188 Win=0 Len=0 Say twdr->slot = 1 and we are running inet_twdr_hangman and in this instance inet_twdr_do_twkill_work returns 1. At that point we will mark slot 1 and schedule inet_twdr_twkill_work. We will also make twdr->slot = 2. Next, a connection is closed and tcp_time_wait(TCP_FIN_WAIT2, timeo) is called which will create a new FIN_WAIT2 time-wait socket and will place it in the last to be reached slot, i.e. twdr->slot = 1. At this point say inet_twdr_twkill_work will run which will start destroying the time-wait sockets in slot 1, including the just added TCP_FIN_WAIT2 one. To avoid this issue we increment the slot only if all entries in the slot have been purged. This change may delay the slots cleanup by a time-wait death row period but only if the worker thread didn't had the time to run/purge the current slot in the next period (6 seconds with default sysctl settings). However, on such a busy system even without this change we would probably see delays... Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- net/ipv4/inet_timewait_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 61283f92882..13f0781f35c 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -218,8 +218,8 @@ void inet_twdr_hangman(unsigned long data) /* We purged the entire slot, anything left? */ if (twdr->tw_count) need_timer = 1; + twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); } - twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); if (need_timer) mod_timer(&twdr->tw_timer, jiffies + twdr->period); out: -- cgit v1.2.3-70-g09d2 From 9a7030b76ab3c2b23b1629c49b6df33fb5aed305 Mon Sep 17 00:00:00 2001 From: John Dykstra Date: Wed, 19 Aug 2009 09:47:41 +0000 Subject: tcp: Remove redundant copy of MD5 authentication key Remove the copy of the MD5 authentication key from tcp_check_req(). This key has already been copied by tcp_v4_syn_recv_sock() or tcp_v6_syn_recv_sock(). Signed-off-by: John Dykstra Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f8d67ccc64f..6c8b42299d9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -657,29 +657,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); if (child == NULL) goto listen_overflow; -#ifdef CONFIG_TCP_MD5SIG - else { - /* Copy over the MD5 key from the original socket */ - struct tcp_md5sig_key *key; - struct tcp_sock *tp = tcp_sk(sk); - key = tp->af_specific->md5_lookup(sk, child); - if (key != NULL) { - /* - * We're using one, so create a matching key on the - * newsk structure. If we fail to get memory then we - * end up not copying the key across. Shucks. - */ - char *newkey = kmemdup(key->key, key->keylen, - GFP_ATOMIC); - if (newkey) { - if (!tcp_alloc_md5sig_pool()) - BUG(); - tp->af_specific->md5_add(child, child, newkey, - key->keylen); - } - } - } -#endif inet_csk_reqsk_queue_unlink(sk, req, prev); inet_csk_reqsk_queue_removed(sk, req); -- cgit v1.2.3-70-g09d2 From 6fef4c0c8eeff7de13007a5f56113475444a253d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 31 Aug 2009 19:50:41 +0000 Subject: netdev: convert pseudo-devices to netdev_tx_t Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/ieee1394/eth1394.c | 14 ++++---------- net/8021q/vlan_dev.c | 7 ++++--- net/bluetooth/bnep/netdev.c | 3 ++- net/bridge/br_device.c | 2 +- net/bridge/br_private.h | 3 ++- net/core/pktgen.c | 2 +- net/dsa/dsa_priv.h | 6 +++--- net/dsa/tag_dsa.c | 2 +- net/dsa/tag_edsa.c | 2 +- net/dsa/tag_trailer.c | 2 +- net/ipv4/ip_gre.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/ip6mr.c | 3 ++- net/ipv6/sit.c | 3 ++- net/sched/sch_teql.c | 4 ++-- 17 files changed, 30 insertions(+), 31 deletions(-) (limited to 'net/ipv4') diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c index f5c586c2bba..1ad8785e398 100644 --- a/drivers/ieee1394/eth1394.c +++ b/drivers/ieee1394/eth1394.c @@ -169,7 +169,8 @@ static int ether1394_header_cache(const struct neighbour *neigh, static void ether1394_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); -static int ether1394_tx(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t ether1394_tx(struct sk_buff *skb, + struct net_device *dev); static void ether1394_iso(struct hpsb_iso *iso); static struct ethtool_ops ethtool_ops; @@ -1555,7 +1556,8 @@ static void ether1394_complete_cb(void *__ptask) } /* Transmit a packet (called by kernel) */ -static int ether1394_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ether1394_tx(struct sk_buff *skb, + struct net_device *dev) { struct eth1394hdr hdr_buf; struct eth1394_priv *priv = netdev_priv(dev); @@ -1694,14 +1696,6 @@ fail: dev->stats.tx_errors++; spin_unlock_irqrestore(&priv->lock, flags); - /* - * FIXME: According to a patch from 2003-02-26, "returning non-zero - * causes serious problems" here, allegedly. Before that patch, - * -ERRNO was returned which is not appropriate under Linux 2.6. - * Perhaps more needs to be done? Stop the queue in serious - * conditions and restart it elsewhere? - */ - /* return NETDEV_TX_BUSY; */ return NETDEV_TX_OK; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 6e695acd1ae..84a2be43185 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -288,7 +288,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, return rc; } -static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); @@ -325,8 +326,8 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev) +static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); u16 vlan_tci; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 9c42990126a..26fb831ef7e 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -165,7 +165,8 @@ static inline int bnep_net_proto_filter(struct sk_buff *skb, struct bnep_session } #endif -static int bnep_net_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t bnep_net_xmit(struct sk_buff *skb, + struct net_device *dev) { struct bnep_session *s = netdev_priv(dev); struct sock *sk = s->sock->sk; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 15d43ba86b5..07a07770c8b 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -20,7 +20,7 @@ #include "br_private.h" /* net device transmit always called with no BH (preempt_disabled) */ -int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); const unsigned char *dest = skb->data; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 8319247dad5..2114e45682e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -143,7 +143,8 @@ static inline int br_is_root_bridge(const struct net_bridge *br) /* br_device.c */ extern void br_dev_setup(struct net_device *dev); -extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev); +extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, + struct net_device *dev); /* br_fdb.c */ extern int br_fdb_init(void); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2001cb1cf5c..0bcecbf0658 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3381,7 +3381,7 @@ static void idle(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = pkt_dev->odev; - int (*xmit)(struct sk_buff *, struct net_device *) + netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *) = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; u16 queue_map; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 41055f33d28..4b0ea054044 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -169,13 +169,13 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds, int port, char *name); /* tag_dsa.c */ -int dsa_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev); /* tag_edsa.c */ -int edsa_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev); /* tag_trailer.c */ -int trailer_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev); #endif diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 8fa25bafe6c..cdf2d28a029 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -15,7 +15,7 @@ #define DSA_HLEN 4 -int dsa_xmit(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); u8 *dsa_header; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 815607bd286..8f53948cff4 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -16,7 +16,7 @@ #define DSA_HLEN 4 #define EDSA_HLEN 8 -int edsa_xmit(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); u8 *edsa_header; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 1c3e30c38b8..a85c829853c 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -13,7 +13,7 @@ #include #include "dsa_priv.h" -int trailer_xmit(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct sk_buff *nskb; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b902ef55be7..5b1af70cd80 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -662,7 +662,7 @@ drop_nolock: return(0); } -static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->dev->stats; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 98075b6d619..62548cb0923 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -387,7 +387,7 @@ static int ipip_rcv(struct sk_buff *skb) * and that skb is filled properly by that function. */ -static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->dev->stats; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 06c33fb6b32..65d421cf5bc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -201,7 +201,7 @@ failure: #ifdef CONFIG_IP_PIMSM -static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a1d6045c469..7d25bbe3211 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1036,7 +1036,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -static int +static netdev_tx_t ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 07ded5075b3..19c8dec2ead 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -416,7 +416,8 @@ static struct inet6_protocol pim6_protocol = { /* Service routines creating virtual interfaces: PIMREG */ -static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, + struct net_device *dev) { struct net *net = dev_net(dev); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1d25a4dff0a..0ae4f644818 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -609,7 +609,8 @@ static inline __be32 try_6to4(struct in6_addr *v6dst) * and that skb is filled properly by that function. */ -static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->dev->stats; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 12434b6c204..5a002c24723 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -268,7 +268,7 @@ static inline int teql_resolve(struct sk_buff *skb, return __teql_resolve(skb, skb_res, dev); } -static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) { struct teql_master *master = netdev_priv(dev); struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); @@ -307,7 +307,7 @@ restart: if (!netif_tx_queue_stopped(slave_txq) && !netif_tx_queue_frozen(slave_txq) && - slave_ops->ndo_start_xmit(skb, slave) == 0) { + slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { txq_trans_update(slave_txq); __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); -- cgit v1.2.3-70-g09d2 From 4d1a2d9ec1c17df077ed09a0d135bccf5637a3b7 Mon Sep 17 00:00:00 2001 From: Damian Lukowski Date: Wed, 26 Aug 2009 00:16:27 +0000 Subject: Revert Backoff [v3]: Rename skb to icmp_skb in tcp_v4_err() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This supplementary patch renames skb to icmp_skb in tcp_v4_err() in order to disambiguate from another sk_buff variable, which will be introduced in a separate patch. Signed-off-by: Damian Lukowski Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6d88219c5e2..6ca1bc8c302 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -328,26 +328,26 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) * */ -void tcp_v4_err(struct sk_buff *skb, u32 info) +void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) { - struct iphdr *iph = (struct iphdr *)skb->data; - struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); + struct iphdr *iph = (struct iphdr *)icmp_skb->data; + struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); struct tcp_sock *tp; struct inet_sock *inet; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; + const int type = icmp_hdr(icmp_skb)->type; + const int code = icmp_hdr(icmp_skb)->code; struct sock *sk; __u32 seq; int err; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net(icmp_skb->dev); - if (skb->len < (iph->ihl << 2) + 8) { + if (icmp_skb->len < (iph->ihl << 2) + 8) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, - iph->saddr, th->source, inet_iif(skb)); + iph->saddr, th->source, inet_iif(icmp_skb)); if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; -- cgit v1.2.3-70-g09d2 From f1ecd5d9e7366609d640ff4040304ea197fbc618 Mon Sep 17 00:00:00 2001 From: Damian Lukowski Date: Wed, 26 Aug 2009 00:16:31 +0000 Subject: Revert Backoff [v3]: Revert RTO on ICMP destination unreachable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here, an ICMP host/network unreachable message, whose payload fits to TCP's SND.UNA, is taken as an indication that the RTO retransmission has not been lost due to congestion, but because of a route failure somewhere along the path. With true congestion, a router won't trigger such a message and the patched TCP will operate as standard TCP. This patch reverts one RTO backoff, if an ICMP host/network unreachable message, whose payload fits to TCP's SND.UNA, arrives. Based on the new RTO, the retransmission timer is reset to reflect the remaining time, or - if the revert clocked out the timer - a retransmission is sent out immediately. Backoffs are only reverted, if TCP is in RTO loss recovery, i.e. if there have been retransmissions and reversible backoffs, already. Changes from v2: 1) Renaming of skb in tcp_v4_err() moved to another patch. 2) Reintroduced tcp_bound_rto() and __tcp_set_rto(). 3) Fixed code comments. Signed-off-by: Damian Lukowski Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 12 ++++++++++++ net/ipv4/tcp_input.c | 5 ++--- net/ipv4/tcp_ipv4.c | 37 +++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_timer.c | 2 +- 4 files changed, 52 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/tcp.h b/include/net/tcp.h index cbb2a4889fc..54f212ce8aa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -469,6 +469,7 @@ extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); extern int tcp_may_send_now(struct sock *sk); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); +extern void tcp_retransmit_timer(struct sock *sk); extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_simple_retransmit(struct sock *); extern int tcp_trim_head(struct sock *, struct sk_buff *, u32); @@ -521,6 +522,17 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu); extern int tcp_mss_to_mtu(struct sock *sk, int mss); extern void tcp_mtup_init(struct sock *sk); +static inline void tcp_bound_rto(const struct sock *sk) +{ + if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) + inet_csk(sk)->icsk_rto = TCP_RTO_MAX; +} + +static inline u32 __tcp_set_rto(const struct tcp_sock *tp) +{ + return (tp->srtt >> 3) + tp->rttvar; +} + static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2bdb0da237e..af6d6fa00db 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -685,7 +685,7 @@ static inline void tcp_set_rto(struct sock *sk) * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some circumstances. */ - inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; + inet_csk(sk)->icsk_rto = __tcp_set_rto(tp); /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, @@ -696,8 +696,7 @@ static inline void tcp_set_rto(struct sock *sk) /* NOTE: clamping at TCP_RTO_MIN is not required, current algo * guarantees that rto is higher. */ - if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) - inet_csk(sk)->icsk_rto = TCP_RTO_MAX; + tcp_bound_rto(sk); } /* Save metrics learned by this TCP session. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6ca1bc8c302..6755e29a6dd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -332,12 +332,15 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) { struct iphdr *iph = (struct iphdr *)icmp_skb->data; struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); + struct inet_connection_sock *icsk; struct tcp_sock *tp; struct inet_sock *inet; const int type = icmp_hdr(icmp_skb)->type; const int code = icmp_hdr(icmp_skb)->code; struct sock *sk; + struct sk_buff *skb; __u32 seq; + __u32 remaining; int err; struct net *net = dev_net(icmp_skb->dev); @@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (sk->sk_state == TCP_CLOSE) goto out; + icsk = inet_csk(sk); tp = tcp_sk(sk); seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && @@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } err = icmp_err_convert[code].errno; + /* check if icmp_skb allows revert of backoff + * (see draft-zimmermann-tcp-lcd) */ + if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) + break; + if (seq != tp->snd_una || !icsk->icsk_retransmits || + !icsk->icsk_backoff) + break; + + icsk->icsk_backoff--; + inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << + icsk->icsk_backoff; + tcp_bound_rto(sk); + + skb = tcp_write_queue_head(sk); + BUG_ON(!skb); + + remaining = icsk->icsk_rto - min(icsk->icsk_rto, + tcp_time_stamp - TCP_SKB_CB(skb)->when); + + if (remaining) { + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + remaining, TCP_RTO_MAX); + } else if (sock_owned_by_user(sk)) { + /* RTO revert clocked out retransmission, + * but socket is locked. Will defer. */ + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + HZ/20, TCP_RTO_MAX); + } else { + /* RTO revert clocked out retransmission. + * Will retransmit now */ + tcp_retransmit_timer(sk); + } + break; case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c520fb6e06d..408fa4b7b9b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -279,7 +279,7 @@ static void tcp_probe_timer(struct sock *sk) * The TCP retransmit timer. */ -static void tcp_retransmit_timer(struct sock *sk) +void tcp_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); -- cgit v1.2.3-70-g09d2 From 6fa12c85031485dff38ce550c24f10da23b0adaa Mon Sep 17 00:00:00 2001 From: Damian Lukowski Date: Wed, 26 Aug 2009 00:16:34 +0000 Subject: Revert Backoff [v3]: Calculate TCP's connection close threshold as a time value. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC 1122 specifies two threshold values R1 and R2 for connection timeouts, which may represent a number of allowed retransmissions or a timeout value. Currently linux uses sysctl_tcp_retries{1,2} to specify the thresholds in number of allowed retransmissions. For any desired threshold R2 (by means of time) one can specify tcp_retries2 (by means of number of retransmissions) such that TCP will not time out earlier than R2. This is the case, because the RTO schedule follows a fixed pattern, namely exponential backoff. However, the RTO behaviour is not predictable any more if RTO backoffs can be reverted, as it is the case in the draft "Make TCP more Robust to Long Connectivity Disruptions" (http://tools.ietf.org/html/draft-zimmermann-tcp-lcd). In the worst case TCP would time out a connection after 3.2 seconds, if the initial RTO equaled MIN_RTO and each backoff has been reverted. This patch introduces a function retransmits_timed_out(N), which calculates the timeout of a TCP connection, assuming an initial RTO of MIN_RTO and N unsuccessful, exponentially backed-off retransmissions. Whenever timeout decisions are made by comparing the retransmission counter to some value N, this function can be used, instead. The meaning of tcp_retries2 will be changed, as many more RTO retransmissions can occur than the value indicates. However, it yields a timeout which is similar to the one of an unpatched, exponentially backing off TCP in the same scenario. As no application could rely on an RTO greater than MIN_RTO, there should be no risk of a regression. Signed-off-by: Damian Lukowski Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 18 ++++++++++++++++++ net/ipv4/tcp_timer.c | 11 +++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/tcp.h b/include/net/tcp.h index 54f212ce8aa..e5319495f15 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1252,6 +1252,24 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) +static inline bool retransmits_timed_out(const struct sock *sk, + unsigned int boundary) +{ + int limit, K; + if (!inet_csk(sk)->icsk_retransmits) + return false; + + K = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); + + if (boundary <= K) + limit = ((2 << boundary) - 1) * TCP_RTO_MIN; + else + limit = ((2 << K) - 1) * TCP_RTO_MIN + + (boundary - K) * TCP_RTO_MAX; + + return (tcp_time_stamp - tcp_sk(sk)->retrans_stamp) >= limit; +} + static inline struct sk_buff *tcp_send_head(struct sock *sk) { return sk->sk_send_head; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 408fa4b7b9b..cdb2ca7684d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -137,13 +137,14 @@ static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); int retry_until; + bool do_reset; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) dst_negative_advice(&sk->sk_dst_cache); retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; } else { - if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { + if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); @@ -155,13 +156,15 @@ static int tcp_write_timeout(struct sock *sk) const int alive = (icsk->icsk_rto < TCP_RTO_MAX); retry_until = tcp_orphan_retries(sk, alive); + do_reset = alive || + !retransmits_timed_out(sk, retry_until); - if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until)) + if (tcp_out_of_resources(sk, do_reset)) return 1; } } - if (icsk->icsk_retransmits >= retry_until) { + if (retransmits_timed_out(sk, retry_until)) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -385,7 +388,7 @@ void tcp_retransmit_timer(struct sock *sk) out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); - if (icsk->icsk_retransmits > sysctl_tcp_retries1) + if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) __sk_dst_reset(sk); out:; -- cgit v1.2.3-70-g09d2 From 89d69d2b75a8f7e258f4b634cd985374cfd3202e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 1 Sep 2009 11:13:19 +0000 Subject: net: make neigh_ops constant These tables are never modified at runtime. Move to read-only section. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/arp.h | 2 +- include/net/neighbour.h | 2 +- net/atm/clip.c | 2 +- net/decnet/dn_neigh.c | 6 +++--- net/ipv4/arp.c | 8 ++++---- net/ipv6/ndisc.c | 6 +++--- 6 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/arp.h b/include/net/arp.h index c236270ec95..716f43c5c98 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -26,6 +26,6 @@ extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, const unsigned char *target_hw); extern void arp_xmit(struct sk_buff *skb); -extern struct neigh_ops arp_broken_ops; +extern const struct neigh_ops arp_broken_ops; #endif /* _ARP_H */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 18b69b6ceca..3817fda82a8 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -118,7 +118,7 @@ struct neighbour int (*output)(struct sk_buff *skb); struct sk_buff_head arp_queue; struct timer_list timer; - struct neigh_ops *ops; + const struct neigh_ops *ops; u8 primary_key[0]; }; diff --git a/net/atm/clip.c b/net/atm/clip.c index 27f6852ce19..64629c35434 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -267,7 +267,7 @@ static void clip_neigh_error(struct neighbour *neigh, struct sk_buff *skb) kfree_skb(skb); } -static struct neigh_ops clip_neigh_ops = { +static const struct neigh_ops clip_neigh_ops = { .family = AF_INET, .solicit = clip_neigh_solicit, .error_report = clip_neigh_error, diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 923786bd6d0..794b5bf95af 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -59,7 +59,7 @@ static int dn_phase3_output(struct sk_buff *); /* * For talking to broadcast devices: Ethernet & PPP */ -static struct neigh_ops dn_long_ops = { +static const struct neigh_ops dn_long_ops = { .family = AF_DECnet, .error_report = dn_long_error_report, .output = dn_long_output, @@ -71,7 +71,7 @@ static struct neigh_ops dn_long_ops = { /* * For talking to pointopoint and multidrop devices: DDCMP and X.25 */ -static struct neigh_ops dn_short_ops = { +static const struct neigh_ops dn_short_ops = { .family = AF_DECnet, .error_report = dn_short_error_report, .output = dn_short_output, @@ -83,7 +83,7 @@ static struct neigh_ops dn_short_ops = { /* * For talking to DECnet phase III nodes */ -static struct neigh_ops dn_phase3_ops = { +static const struct neigh_ops dn_phase3_ops = { .family = AF_DECnet, .error_report = dn_short_error_report, /* Can use short version here */ .output = dn_phase3_output, diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 090e9991ac2..4e80f336c0c 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -130,7 +130,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); static void parp_redo(struct sk_buff *skb); -static struct neigh_ops arp_generic_ops = { +static const struct neigh_ops arp_generic_ops = { .family = AF_INET, .solicit = arp_solicit, .error_report = arp_error_report, @@ -140,7 +140,7 @@ static struct neigh_ops arp_generic_ops = { .queue_xmit = dev_queue_xmit, }; -static struct neigh_ops arp_hh_ops = { +static const struct neigh_ops arp_hh_ops = { .family = AF_INET, .solicit = arp_solicit, .error_report = arp_error_report, @@ -150,7 +150,7 @@ static struct neigh_ops arp_hh_ops = { .queue_xmit = dev_queue_xmit, }; -static struct neigh_ops arp_direct_ops = { +static const struct neigh_ops arp_direct_ops = { .family = AF_INET, .output = dev_queue_xmit, .connected_output = dev_queue_xmit, @@ -158,7 +158,7 @@ static struct neigh_ops arp_direct_ops = { .queue_xmit = dev_queue_xmit, }; -struct neigh_ops arp_broken_ops = { +const struct neigh_ops arp_broken_ops = { .family = AF_INET, .solicit = arp_solicit, .error_report = arp_error_report, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 44b4c87e5cc..7015478797f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -98,7 +98,7 @@ static int pndisc_constructor(struct pneigh_entry *n); static void pndisc_destructor(struct pneigh_entry *n); static void pndisc_redo(struct sk_buff *skb); -static struct neigh_ops ndisc_generic_ops = { +static const struct neigh_ops ndisc_generic_ops = { .family = AF_INET6, .solicit = ndisc_solicit, .error_report = ndisc_error_report, @@ -108,7 +108,7 @@ static struct neigh_ops ndisc_generic_ops = { .queue_xmit = dev_queue_xmit, }; -static struct neigh_ops ndisc_hh_ops = { +static const struct neigh_ops ndisc_hh_ops = { .family = AF_INET6, .solicit = ndisc_solicit, .error_report = ndisc_error_report, @@ -119,7 +119,7 @@ static struct neigh_ops ndisc_hh_ops = { }; -static struct neigh_ops ndisc_direct_ops = { +static const struct neigh_ops ndisc_direct_ops = { .family = AF_INET6, .output = dev_queue_xmit, .connected_output = dev_queue_xmit, -- cgit v1.2.3-70-g09d2 From b2e4b3debc327a5b53d9622e0b1785eea2ea2aad Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 1 Sep 2009 19:25:03 +0000 Subject: tcp: MD5 operations should be const Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 +++--- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8afac76cd74..61723a7c21f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -233,7 +233,7 @@ struct tcp_request_sock { struct inet_request_sock req; #ifdef CONFIG_TCP_MD5SIG /* Only used by TCP MD5 Signature so far. */ - struct tcp_request_sock_ops *af_specific; + const struct tcp_request_sock_ops *af_specific; #endif u32 rcv_isn; u32 snt_isn; @@ -401,9 +401,9 @@ struct tcp_sock { #ifdef CONFIG_TCP_MD5SIG /* TCP AF-Specific parts; only used by MD5 Signature support so far */ - struct tcp_sock_af_ops *af_specific; + const struct tcp_sock_af_ops *af_specific; -/* TCP MD5 Signagure Option information */ +/* TCP MD5 Signature Option information */ struct tcp_md5sig_info *md5sig_info; #endif }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6755e29a6dd..3efbe94f022 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1195,7 +1195,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { }; #ifdef CONFIG_TCP_MD5SIG -static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { +static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .md5_lookup = tcp_v4_reqsk_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, }; @@ -1774,7 +1774,7 @@ struct inet_connection_sock_af_ops ipv4_specific = { }; #ifdef CONFIG_TCP_MD5SIG -static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { +static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_add = tcp_v4_md5_add_func, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d849dd53b78..eadbc584e91 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -78,8 +78,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static struct inet_connection_sock_af_ops ipv6_mapped; static struct inet_connection_sock_af_ops ipv6_specific; #ifdef CONFIG_TCP_MD5SIG -static struct tcp_sock_af_ops tcp_sock_ipv6_specific; -static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; +static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; +static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; #else static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, struct in6_addr *addr) @@ -894,7 +894,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { }; #ifdef CONFIG_TCP_MD5SIG -static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { +static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .md5_lookup = tcp_v6_reqsk_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, }; @@ -1780,7 +1780,7 @@ static struct inet_connection_sock_af_ops ipv6_specific = { }; #ifdef CONFIG_TCP_MD5SIG -static struct tcp_sock_af_ops tcp_sock_ipv6_specific = { +static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { .md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, .md5_add = tcp_v6_md5_add_func, @@ -1812,7 +1812,7 @@ static struct inet_connection_sock_af_ops ipv6_mapped = { }; #ifdef CONFIG_TCP_MD5SIG -static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { +static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_add = tcp_v6_md5_add_func, -- cgit v1.2.3-70-g09d2 From 3b401a81c0d50ea9c718cf837f62cc2e6e79cc30 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 1 Sep 2009 19:25:04 +0000 Subject: inet: inet_connection_sock_af_ops const The function block inet_connect_sock_af_ops contains no data make it constant. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/transp_v6.h | 2 +- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 8 ++++---- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 8 ++++---- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index bfb240c6cf7..d65381cad0f 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -51,7 +51,7 @@ extern int datagram_send_ctl(struct net *net, /* * address family specific functions */ -extern struct inet_connection_sock_af_ops ipv4_specific; +extern const struct inet_connection_sock_af_ops ipv4_specific; extern void inet6_destroy_sock(struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a0a36c9e6cc..d01c00de1ad 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -880,7 +880,7 @@ discard_and_relse: goto discard_it; } -static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { +static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { .queue_xmit = ip_queue_xmit, .send_check = dccp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3e70faab298..64f011cc449 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -35,8 +35,8 @@ /* The per-net dccp.v6_ctl_sk is used for sending RSTs and ACKs */ -static struct inet_connection_sock_af_ops dccp_ipv6_mapped; -static struct inet_connection_sock_af_ops dccp_ipv6_af_ops; +static const struct inet_connection_sock_af_ops dccp_ipv6_mapped; +static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops; static void dccp_v6_hash(struct sock *sk) { @@ -1055,7 +1055,7 @@ failure: return err; } -static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { +static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { .queue_xmit = inet6_csk_xmit, .send_check = dccp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, @@ -1076,7 +1076,7 @@ static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { /* * DCCP over IPv4 via INET6 API */ -static struct inet_connection_sock_af_ops dccp_ipv6_mapped = { +static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = dccp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3efbe94f022..ce7d3b021ff 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1754,7 +1754,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) return 0; } -struct inet_connection_sock_af_ops ipv4_specific = { +const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index eadbc584e91..d73617e9708 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -75,8 +75,8 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -static struct inet_connection_sock_af_ops ipv6_mapped; -static struct inet_connection_sock_af_ops ipv6_specific; +static const struct inet_connection_sock_af_ops ipv6_mapped; +static const struct inet_connection_sock_af_ops ipv6_specific; #ifdef CONFIG_TCP_MD5SIG static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; @@ -1760,7 +1760,7 @@ static int tcp_v6_remember_stamp(struct sock *sk) return 0; } -static struct inet_connection_sock_af_ops ipv6_specific = { +static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, @@ -1792,7 +1792,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { * TCP over IPv4 via INET6 API */ -static struct inet_connection_sock_af_ops ipv6_mapped = { +static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, -- cgit v1.2.3-70-g09d2 From 6ce9e7b5fe3195d1ae6e3a0753d4ddcac5cd699e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Sep 2009 18:05:33 -0700 Subject: ip: Report qdisc packet drops Christoph Lameter pointed out that packet drops at qdisc level where not accounted in SNMP counters. Only if application sets IP_RECVERR, drops are reported to user (-ENOBUFS errors) and SNMP counters updated. IP_RECVERR is used to enable extended reliable error message passing, but these are not needed to update system wide SNMP stats. This patch changes things a bit to allow SNMP counters to be updated, regardless of IP_RECVERR being set or not on the socket. Example after an UDP tx flood # netstat -s ... IP: 1487048 outgoing packets dropped ... Udp: ... SndbufErrors: 1487048 send() syscalls, do however still return an OK status, to not break applications. Note : send() manual page explicitly says for -ENOBUFS error : "The output queue for a network interface was full. This generally indicates that the interface has stopped sending, but may be caused by transient congestion. (Normally, this does not occur in Linux. Packets are just silently dropped when a device queue overflows.) " This is not true for IP_RECVERR enabled sockets : a send() syscall that hit a qdisc drop returns an ENOBUFS error. Many thanks to Christoph, David, and last but not least, Alexey ! Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- net/ipv4/raw.c | 9 +++++++-- net/ipv4/udp.c | 12 +++++++++--- net/ipv6/ip6_output.c | 2 +- net/ipv6/raw.c | 4 +++- net/ipv6/udp.c | 12 +++++++++--- 6 files changed, 30 insertions(+), 11 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7d082105472..afae0cbabbf 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1302,7 +1302,7 @@ int ip_push_pending_frames(struct sock *sk) err = ip_local_out(skb); if (err) { if (err > 0) - err = inet->recverr ? net_xmit_errno(err) : 0; + err = net_xmit_errno(err); if (err) goto error; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2979f14bb18..ebb1e5848bc 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) - err = inet->recverr ? net_xmit_errno(err) : 0; + err = net_xmit_errno(err); if (err) goto error; out: @@ -386,6 +386,8 @@ error_fault: kfree_skb(skb); error: IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); + if (err == -ENOBUFS && !inet->recverr) + err = 0; return err; } @@ -576,8 +578,11 @@ back_from_confirm: &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); - else if (!(msg->msg_flags & MSG_MORE)) + else if (!(msg->msg_flags & MSG_MORE)) { err = ip_push_pending_frames(sk); + if (err == -ENOBUFS && !inet->recverr) + err = 0; + } release_sock(sk); } done: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 29ebb0d27a1..ebaaa7f973d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -561,12 +561,18 @@ static int udp_push_pending_frames(struct sock *sk) send: err = ip_push_pending_frames(sk); + if (err) { + if (err == -ENOBUFS && !inet->recverr) { + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); + err = 0; + } + } else + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); out: up->len = 0; up->pending = 0; - if (!err) - UDP_INC_STATS_USER(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a931229856b..cd48801a8d6 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1511,7 +1511,7 @@ int ip6_push_pending_frames(struct sock *sk) err = ip6_local_out(skb); if (err) { if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; + err = net_xmit_errno(err); if (err) goto error; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 506841030fb..7d675b8d82d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -642,7 +642,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; + err = net_xmit_errno(err); if (err) goto error; out: @@ -653,6 +653,8 @@ error_fault: kfree_skb(skb); error: IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + if (err == -ENOBUFS && !np->recverr) + err = 0; return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 20d2ffc15f0..164040613c2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -724,12 +724,18 @@ static int udp_v6_push_pending_frames(struct sock *sk) send: err = ip6_push_pending_frames(sk); + if (err) { + if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); + err = 0; + } + } else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); out: up->len = 0; up->pending = 0; - if (!err) - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } -- cgit v1.2.3-70-g09d2 From aa1330766c49199bdab4d4a9096d98b072df9044 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 2 Sep 2009 23:45:45 -0700 Subject: tcp: replace hard coded GFP_KERNEL with sk_allocation This fixed a lockdep warning which appeared when doing stress memory tests over NFS: inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. page reclaim => nfs_writepage => tcp_sendmsg => lock sk_lock mount_root => nfs_root_data => tcp_close => lock sk_lock => tcp_send_fin => alloc_skb_fclone => page reclaim David raised a concern that if the allocation fails in tcp_send_fin(), and it's GFP_ATOMIC, we are going to yield() (which sleeps) and loop endlessly waiting for the allocation to succeed. But fact is, the original GFP_KERNEL also sleeps. GFP_ATOMIC+yield() looks weird, but it is no worse the implicit sleep inside GFP_KERNEL. Both could loop endlessly under memory pressure. CC: Arnaldo Carvalho de Melo CC: David S. Miller CC: Herbert Xu Signed-off-by: Wu Fengguang Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 10 +++++----- net/ipv4/tcp_ipv4.c | 7 ++++--- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 5 +++-- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 15 insertions(+), 13 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/tcp.h b/include/net/tcp.h index df50bc40b5f..b71a446d58f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1186,7 +1186,7 @@ extern int tcp_v4_md5_do_del(struct sock *sk, #define tcp_twsk_md5_key(twsk) NULL #endif -extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void); +extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *); extern void tcp_free_md5sig_pool(void); extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 59f69a6c586..edeea060db4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1839,7 +1839,7 @@ void tcp_close(struct sock *sk, long timeout) /* Unread data was tossed, zap the connection. */ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_KERNEL); + tcp_send_active_reset(sk, sk->sk_allocation); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); @@ -2658,7 +2658,7 @@ void tcp_free_md5sig_pool(void) EXPORT_SYMBOL(tcp_free_md5sig_pool); -static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) +static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) { int cpu; struct tcp_md5sig_pool **pool; @@ -2671,7 +2671,7 @@ static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) struct tcp_md5sig_pool *p; struct crypto_hash *hash; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), sk->sk_allocation); if (!p) goto out_free; *per_cpu_ptr(pool, cpu) = p; @@ -2688,7 +2688,7 @@ out_free: return NULL; } -struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void) +struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) { struct tcp_md5sig_pool **pool; int alloc = 0; @@ -2709,7 +2709,7 @@ retry: if (alloc) { /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(); + struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); spin_lock_bh(&tcp_md5sig_pool_lock); if (!p) { tcp_md5sig_users--; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ce7d3b021ff..0543561da99 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -886,7 +886,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } - if (tcp_alloc_md5sig_pool() == NULL) { + if (tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } @@ -1007,8 +1007,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (!tcp_sk(sk)->md5sig_info) { struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL); + struct tcp_md5sig_info *p; + p = kzalloc(sizeof(*p), sk->sk_allocation); if (!p) return -EINVAL; @@ -1016,7 +1017,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } - newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); + newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); if (!newkey) return -ENOMEM; return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6c8b42299d9..e48c37d74d7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -322,7 +322,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (key != NULL) { memcpy(&tcptw->tw_md5_key, key->key, key->keylen); tcptw->tw_md5_keylen = key->keylen; - if (tcp_alloc_md5sig_pool() == NULL) + if (tcp_alloc_md5sig_pool(sk) == NULL) BUG(); } } while (0); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4e004424d40..5200aab0ca9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2135,7 +2135,8 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL); + skb = alloc_skb_fclone(MAX_TCP_HEADER, + sk->sk_allocation); if (skb) break; yield(); @@ -2388,7 +2389,7 @@ int tcp_connect(struct sock *sk) sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); tp->packets_out += tcp_skb_pcount(buff); - tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); + tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); /* We change tp->snd_nxt after the tcp_transmit_skb() call * in order to make this packet get counted in tcpOutSegs. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d73617e9708..65aecf27f2c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -591,7 +591,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer, } sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } - if (tcp_alloc_md5sig_pool() == NULL) { + if (tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } -- cgit v1.2.3-70-g09d2 From fa1a9c681377c57e233038e50479e7d7a5cc3108 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 9 Sep 2009 03:43:50 -0700 Subject: headers: net/ipv[46]/protocol.c header trim Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/protocol.c | 19 ++----------------- net/ipv6/protocol.c | 15 ++------------- 2 files changed, 4 insertions(+), 30 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index ea50da0649f..a2e5fc0a15e 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -22,26 +22,11 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - -#include -#include +#include #include -#include -#include -#include -#include -#include -#include #include -#include -#include +#include #include -#include -#include -#include -#include -#include -#include struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; static DEFINE_SPINLOCK(inet_proto_lock); diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 9ab78915991..568864f722c 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -20,20 +20,9 @@ * - Removed unused variable 'inet6_protocol_base' * - Modified inet6_del_protocol() to correctly maintain copy bit. */ - -#include -#include -#include -#include -#include -#include +#include #include -#include - -#include -#include - -#include +#include #include struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; -- cgit v1.2.3-70-g09d2