From 98322f22eca889478045cf896b572250d03dc45f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 26 Jan 2009 21:35:35 -0800 Subject: udp: optimize bind(0) if many ports are in use commit 9088c5609584684149f3fb5b065aa7f18dcb03ff (udp: Improve port randomization) introduced a regression for UDP bind() syscall to null port (getting a random port) in case lot of ports are already in use. This is because we do about 28000 scans of very long chains (220 sockets per chain), with many spin_lock_bh()/spin_unlock_bh() calls. Fix this using a bitmap (64 bytes for current value of UDP_HTABLE_SIZE) so that we scan chains at most once. Instead of 250 ms per bind() call, we get after patch a time of 2.9 ms Based on a report from Vitaly Mayatskikh Reported-by: Vitaly Mayatskikh Signed-off-by: Eric Dumazet Tested-by: Vitaly Mayatskikh Signed-off-by: David S. Miller --- net/ipv4/udp.c | 55 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 16 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf5ab0581eb..b7faffe5c02 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); +#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) + static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, + unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2)) @@ -132,12 +135,17 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && - sk2->sk_hash == num && + (bitmap || sk2->sk_hash == num) && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2)) - return 1; + (*saddr_comp)(sk, sk2)) { + if (bitmap) + __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, + bitmap); + else + return 1; + } return 0; } @@ -160,32 +168,47 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, if (!snum) { int low, high, remaining; unsigned rand; - unsigned short first; + unsigned short first, last; + DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; rand = net_random(); - snum = first = rand % remaining + low; - rand |= 1; - for (;;) { - hslot = &udptable->hash[udp_hashfn(net, snum)]; + first = (((u64)rand * remaining) >> 32) + low; + /* + * force rand to be an odd multiple of UDP_HTABLE_SIZE + */ + rand = (rand | 1) * UDP_HTABLE_SIZE; + for (last = first + UDP_HTABLE_SIZE; first != last; first++) { + hslot = &udptable->hash[udp_hashfn(net, first)]; + bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); - if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) - break; - spin_unlock_bh(&hslot->lock); + udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, + saddr_comp); + + snum = first; + /* + * Iterate on all possible values of snum for this hash. + * Using steps of an odd multiple of UDP_HTABLE_SIZE + * give us randomization and full range coverage. + */ do { - snum = snum + rand; - } while (snum < low || snum > high); - if (snum == first) - goto fail; + if (low <= snum && snum <= high && + !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) + goto found; + snum += rand; + } while (snum != first); + spin_unlock_bh(&hslot->lock); } + goto fail; } else { hslot = &udptable->hash[udp_hashfn(net, snum)]; spin_lock_bh(&hslot->lock); - if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) goto fail_unlock; } +found: inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { -- cgit v1.2.3-70-g09d2 From e408b8dcb5ce42243a902205005208e590f28454 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Feb 2009 13:41:57 -0800 Subject: udp: increments sk_drops in __udp_queue_rcv_skb() Commit 93821778def10ec1e69aa3ac10adee975dad4ff3 (udp: Fix rcv socket locking) accidentally removed sk_drops increments for UDP IPV4 sockets. This field can be used to detect incorrect sizing of socket receive buffers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b7faffe5c02..1ab180bad72 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1015,9 +1015,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) + if (rc == -ENOMEM) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); + atomic_inc(&sk->sk_drops); + } goto drop; } -- cgit v1.2.3-70-g09d2 From 7b5e56f9d635643ad54f2f42e69ad16b80a2cff1 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 5 Feb 2009 15:05:45 -0800 Subject: udp: Fix UDP short packet false positive The UDP header pointer assignment must happen after calling pskb_may_pull(). As pskb_may_pull() can potentially alter the SKB buffer. This was exposted by running multicast traffic through the NIU driver, as it won't prepull the protocol headers into the linear area on receive. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1ab180bad72..cc3a0a06c00 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1231,7 +1231,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; - struct udphdr *uh = udp_hdr(skb); + struct udphdr *uh; unsigned short ulen; struct rtable *rt = (struct rtable*)skb->dst; __be32 saddr = ip_hdr(skb)->saddr; @@ -1244,6 +1244,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto drop; /* No space for header. */ + uh = udp_hdr(skb); ulen = ntohs(uh->len); if (ulen > skb->len) goto short_packet; -- cgit v1.2.3-70-g09d2 From 2783ef23128ad0a4b34e4121c1f7ff664785712f Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 6 Feb 2009 01:59:12 -0800 Subject: udp: Fix potential wrong ip_hdr(skb) pointers Like the UDP header fix, pskb_may_pull() can potentially alter the SKB buffer. Thus the saddr and daddr, pointers may point to the old skb->data buffer. I haven't seen corruptions, as its only seen if the old skb->data buffer were reallocated by another user and written into very quickly (or poison'd by SLAB debugging). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cc3a0a06c00..c47c989cb1f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1234,8 +1234,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct udphdr *uh; unsigned short ulen; struct rtable *rt = (struct rtable*)skb->dst; - __be32 saddr = ip_hdr(skb)->saddr; - __be32 daddr = ip_hdr(skb)->daddr; + __be32 saddr, daddr; struct net *net = dev_net(skb->dev); /* @@ -1259,6 +1258,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; + saddr = ip_hdr(skb)->saddr; + daddr = ip_hdr(skb)->daddr; + if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, udptable); -- cgit v1.2.3-70-g09d2 From 51f31cabe3ce5345b51e4a4f82138b38c4d5dc91 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 12 Feb 2009 05:03:39 +0000 Subject: ip: support for TX timestamps on UDP and RAW sockets Instructions for time stamping outgoing packets are take from the socket layer and later copied into the new skb. Signed-off-by: Patrick Ohly Signed-off-by: David S. Miller --- Documentation/networking/timestamping.txt | 2 ++ include/net/ip.h | 1 + net/can/raw.c | 3 +++ net/ipv4/icmp.c | 2 ++ net/ipv4/ip_output.c | 6 ++++++ net/ipv4/raw.c | 1 + net/ipv4/udp.c | 4 ++++ 7 files changed, 19 insertions(+) (limited to 'net/ipv4/udp.c') diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index a681a65b5bc..0e58b453917 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -56,6 +56,8 @@ and including the link layer, the scm_timestamping control message and a sock_extended_err control message with ee_errno==ENOMSG and ee_origin==SO_EE_ORIGIN_TIMESTAMPING. A socket with such a pending bounced packet is ready for reading as far as select() is concerned. +If the outgoing packet has to be fragmented, then only the first +fragment is time stamped and returned to the sending socket. All three values correspond to the same event in time, but were generated in different ways. Each of these values may be empty (= all diff --git a/include/net/ip.h b/include/net/ip.h index 10868139e65..4ac7577f98d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -55,6 +55,7 @@ struct ipcm_cookie __be32 addr; int oif; struct ip_options *opt; + union skb_shared_tx shtx; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/net/can/raw.c b/net/can/raw.c index 0703cba4bf9..6aa154e806a 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -646,6 +646,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, goto put_dev; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); + if (err < 0) + goto free_skb; + err = sock_tx_timestamp(msg, sk, skb_tx(skb)); if (err < 0) goto free_skb; skb->dev = dev; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 705b33b184a..382800a62b3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -375,6 +375,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) inet->tos = ip_hdr(skb)->tos; daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; + ipc.shtx.flags = 0; if (icmp_param->replyopts.optlen) { ipc.opt = &icmp_param->replyopts; if (ipc.opt->srr) @@ -532,6 +533,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; + ipc.shtx.flags = 0; { struct flowi fl = { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8ebe86dd72a..3e7e910c7c0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -935,6 +935,10 @@ alloc_new_skb: sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; + else + /* only the initial fragment is + time stamped */ + ipc->shtx.flags = 0; } if (skb == NULL) goto error; @@ -945,6 +949,7 @@ alloc_new_skb: skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); + *skb_tx(skb) = ipc->shtx; /* * Find where to start putting bytes. @@ -1364,6 +1369,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; + ipc.shtx.flags = 0; if (replyopts.opt.optlen) { ipc.opt = &replyopts.opt; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dff8bc4e0fa..f774651f0a4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -493,6 +493,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->saddr; ipc.opt = NULL; + ipc.shtx.flags = 0; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c47c989cb1f..4bd178a111d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -596,6 +596,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EOPNOTSUPP; ipc.opt = NULL; + ipc.shtx.flags = 0; if (up->pending) { /* @@ -643,6 +644,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->saddr; ipc.oif = sk->sk_bound_dev_if; + err = sock_tx_timestamp(msg, sk, &ipc.shtx); + if (err) + return err; if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) -- cgit v1.2.3-70-g09d2 From ead2ceb0ec9f85cff19c43b5cdb2f8a054484431 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 11 Mar 2009 09:49:55 +0000 Subject: Network Drop Monitor: Adding kfree_skb_clean for non-drops and modifying end-of-line points for skbs Signed-off-by: Neil Horman include/linux/skbuff.h | 4 +++- net/core/datagram.c | 2 +- net/core/skbuff.c | 22 ++++++++++++++++++++++ net/ipv4/arp.c | 2 +- net/ipv4/udp.c | 2 +- net/packet/af_packet.c | 2 +- 6 files changed, 29 insertions(+), 5 deletions(-) Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 +++- net/core/datagram.c | 2 +- net/core/skbuff.c | 22 ++++++++++++++++++++++ net/ipv4/arp.c | 2 +- net/ipv4/udp.c | 2 +- net/packet/af_packet.c | 2 +- 6 files changed, 29 insertions(+), 5 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1f659e8c2b8..1fbab2ae613 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -421,6 +421,7 @@ extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb, #endif extern void kfree_skb(struct sk_buff *skb); +extern void consume_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int fclone, int node); @@ -459,7 +460,8 @@ extern int skb_to_sgvec(struct sk_buff *skb, extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); extern int skb_pad(struct sk_buff *skb, int pad); -#define dev_kfree_skb(a) kfree_skb(a) +#define dev_kfree_skb(a) consume_skb(a) +#define dev_consume_skb(a) kfree_skb_clean(a) extern void skb_over_panic(struct sk_buff *skb, int len, void *here); extern void skb_under_panic(struct sk_buff *skb, int len, diff --git a/net/core/datagram.c b/net/core/datagram.c index 5e2ac0c4b07..d0de644b378 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -208,7 +208,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { - kfree_skb(skb); + consume_skb(skb); sk_mem_reclaim_partial(sk); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e5e2111a397..6acbf9e79eb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -65,6 +65,7 @@ #include #include +#include #include "kmap_skb.h" @@ -442,10 +443,31 @@ void kfree_skb(struct sk_buff *skb) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; + trace_kfree_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); } EXPORT_SYMBOL(kfree_skb); +/** + * consume_skb - free an skbuff + * @skb: buffer to free + * + * Drop a ref to the buffer and free it if the usage count has hit zero + * Functions identically to kfree_skb, but kfree_skb assumes that the frame + * is being dropped after a failure and notes that + */ +void consume_skb(struct sk_buff *skb) +{ + if (unlikely(!skb)) + return; + if (likely(atomic_read(&skb->users) == 1)) + smp_rmb(); + else if (likely(!atomic_dec_and_test(&skb->users))) + return; + __kfree_skb(skb); +} +EXPORT_SYMBOL(consume_skb); + /** * skb_recycle_check - check if skb can be reused for receive * @skb: buffer diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 3d67d1ffed7..9c220323f35 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -892,7 +892,7 @@ static int arp_process(struct sk_buff *skb) out: if (in_dev) in_dev_put(in_dev); - kfree_skb(skb); + consume_skb(skb); return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4bd178a111d..05b7abb99f6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1184,7 +1184,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, sk = sknext; } while (sknext); } else - kfree_skb(skb); + consume_skb(skb); spin_unlock(&hslot->lock); return 0; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d8cc006fac4..74776de523e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -584,7 +584,7 @@ drop_n_restore: skb->len = skb_len; } drop: - kfree_skb(skb); + consume_skb(skb); return 0; } -- cgit v1.2.3-70-g09d2 From 30842f2989aacfaba3ccb39829b3417be9313dbe Mon Sep 17 00:00:00 2001 From: Vitaly Mayatskikh Date: Mon, 23 Mar 2009 15:22:33 -0700 Subject: udp: Wrong locking code in udp seq_file infrastructure Reading zero bytes from /proc/net/udp or other similar files which use the same seq_file udp infrastructure panics kernel in that way: ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- read/1985 is trying to release lock (&table->hash[i].lock) at: [] udp_seq_stop+0x27/0x29 but there are no more locks to release! other info that might help us debug this: 1 lock held by read/1985: #0: (&p->lock){--..}, at: [] seq_read+0x38/0x348 stack backtrace: Pid: 1985, comm: read Not tainted 2.6.29-rc8 #9 Call Trace: [] ? udp_seq_stop+0x27/0x29 [] print_unlock_inbalance_bug+0xd6/0xe1 [] lock_release_non_nested+0x9e/0x1c6 [] ? seq_read+0xb2/0x348 [] ? mark_held_locks+0x68/0x86 [] ? udp_seq_stop+0x27/0x29 [] lock_release+0x15d/0x189 [] _spin_unlock_bh+0x1e/0x34 [] udp_seq_stop+0x27/0x29 [] seq_read+0x2bb/0x348 [] ? seq_read+0x0/0x348 [] proc_reg_read+0x90/0xaf [] vfs_read+0xa6/0x103 [] ? trace_hardirqs_on_caller+0x12f/0x153 [] sys_read+0x45/0x69 [] system_call_fastpath+0x16/0x1b BUG: scheduling while atomic: read/1985/0xffffff00 INFO: lockdep is turned off. Modules linked in: cpufreq_ondemand acpi_cpufreq freq_table dm_multipath kvm ppdev snd_hda_codec_analog snd_hda_intel snd_hda_codec snd_hwdep snd_seq_dummy snd_seq_oss snd_seq_midi_event arc4 snd_s eq ecb thinkpad_acpi snd_seq_device iwl3945 hwmon sdhci_pci snd_pcm_oss sdhci rfkill mmc_core snd_mixer_oss i2c_i801 mac80211 yenta_socket ricoh_mmc i2c_core iTCO_wdt snd_pcm iTCO_vendor_support rs rc_nonstatic snd_timer snd lib80211 cfg80211 soundcore snd_page_alloc video parport_pc output parport e1000e [last unloaded: scsi_wait_scan] Pid: 1985, comm: read Not tainted 2.6.29-rc8 #9 Call Trace: [] ? __debug_show_held_locks+0x1b/0x24 [] __schedule_bug+0x7e/0x83 [] schedule+0xce/0x838 [] ? fsnotify_access+0x5f/0x67 [] ? sysret_careful+0xb/0x37 [] ? trace_hardirqs_on_caller+0x1f/0x153 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] sysret_careful+0x31/0x37 read[1985]: segfault at 7fffc479bfe8 ip 0000003e7420a180 sp 00007fffc479bfa0 error 6 Kernel panic - not syncing: Aiee, killing interrupt handler! udp_seq_stop() tries to unlock not yet locked spinlock. The lock was lost during splitting global udp_hash_lock to subsequent spinlocks. Signed-off by: Vitaly Mayatskikh Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c47c989cb1f..c8bee189a19 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1614,7 +1614,8 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { - spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); + if (state->bucket < UDP_HTABLE_SIZE) + spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); return udp_get_first(seq, state->bucket + 1); } return sk; @@ -1632,6 +1633,9 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) static void *udp_seq_start(struct seq_file *seq, loff_t *pos) { + struct udp_iter_state *state = seq->private; + state->bucket = UDP_HTABLE_SIZE; + return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } -- cgit v1.2.3-70-g09d2 From b2f5e7cd3dee2ed721bf0675e1a1ddebb849aee6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 24 Mar 2009 16:24:51 +0000 Subject: ipv6: Fix conflict resolutions during ipv6 binding The ipv6 version of bind_conflict code calls ipv6_rcv_saddr_equal() which at times wrongly identified intersections between addresses. It particularly broke down under a few instances and caused erroneous bind conflicts. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/addrconf.h | 4 ++-- include/net/udp.h | 2 ++ net/ipv4/udp.c | 3 ++- net/ipv6/addrconf.c | 34 ---------------------------------- net/ipv6/udp.c | 28 ++++++++++++++++++++++++++++ 5 files changed, 34 insertions(+), 37 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index c216de528b0..7b55ab215a6 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -88,8 +88,8 @@ extern int ipv6_dev_get_saddr(struct net *net, extern int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags); -extern int ipv6_rcv_saddr_equal(const struct sock *sk, - const struct sock *sk2); +extern int ipv6_rcv_saddr_equal(const struct sock *sk, + const struct sock *sk2); extern void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr); extern void addrconf_leave_solict(struct inet6_dev *idev, diff --git a/include/net/udp.h b/include/net/udp.h index 90e6ce56be6..93dbe294d45 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -124,6 +124,8 @@ static inline void udp_lib_close(struct sock *sk, long timeout) sk_common_release(sk); } +extern int ipv4_rcv_saddr_equal(const struct sock *sk1, + const struct sock *sk2); extern int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*)(const struct sock*,const struct sock*)); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 05b7abb99f6..ace2ac8a42f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -222,7 +222,7 @@ fail: return error; } -static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -1819,6 +1819,7 @@ EXPORT_SYMBOL(udp_lib_getsockopt); EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); EXPORT_SYMBOL(udp_lib_get_port); +EXPORT_SYMBOL(ipv4_rcv_saddr_equal); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(udp_proc_register); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8499da9e76a..a8218bc1806 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1370,40 +1370,6 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add return ifp; } -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) -{ - const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; - const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; - __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - int sk_ipv6only = ipv6_only_sock(sk); - int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(sk_rcv_saddr6); - int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; - - if (!sk2_rcv_saddr && !sk_ipv6only) - return 1; - - if (addr_type2 == IPV6_ADDR_ANY && - !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) - return 1; - - if (addr_type == IPV6_ADDR_ANY && - !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) - return 1; - - if (sk2_rcv_saddr6 && - ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) - return 1; - - if (addr_type == IPV6_ADDR_MAPPED && - !sk2_ipv6only && - (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr)) - return 1; - - return 0; -} - /* Gets referenced address, destroys ifaddr */ static void addrconf_dad_stop(struct inet6_ifaddr *ifp) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 84b1a296eec..6842dd2edd5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -49,6 +49,34 @@ #include #include "udp_impl.h" +int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) +{ + const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; + const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); + int sk_ipv6only = ipv6_only_sock(sk); + int sk2_ipv6only = inet_v6_ipv6only(sk2); + int addr_type = ipv6_addr_type(sk_rcv_saddr6); + int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; + + /* if both are mapped, treat as IPv4 */ + if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) + return ipv4_rcv_saddr_equal(sk, sk2); + + if (addr_type2 == IPV6_ADDR_ANY && + !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) + return 1; + + if (addr_type == IPV6_ADDR_ANY && + !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) + return 1; + + if (sk2_rcv_saddr6 && + ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) + return 1; + + return 0; +} + int udp_v6_get_port(struct sock *sk, unsigned short snum) { return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); -- cgit v1.2.3-70-g09d2