diff options
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r-- | net/ipv4/udp.c | 188 |
1 files changed, 116 insertions, 72 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b3f7e8cf18a..599374f65c7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -110,7 +110,7 @@ struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); -int sysctl_udp_mem[3] __read_mostly; +long sysctl_udp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_udp_mem); int sysctl_udp_rmem_min __read_mostly; @@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min); int sysctl_udp_wmem_min __read_mostly; EXPORT_SYMBOL(sysctl_udp_wmem_min); -atomic_t udp_memory_allocated; +atomic_long_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); #define MAX_UDP_PORTS 65536 @@ -189,7 +189,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, * @sk: socket struct in question * @snum: port number to look up * @saddr_comp: AF-dependent comparison of bound local IP addresses - * @hash2_nulladdr: AF-dependant hash value in secondary hash chains, + * @hash2_nulladdr: AF-dependent hash value in secondary hash chains, * with NULL address */ int udp_lib_get_port(struct sock *sk, unsigned short snum, @@ -430,7 +430,7 @@ begin: if (result) { exact_match: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, daddr, hnum, dif) < badness)) { @@ -500,7 +500,7 @@ begin: goto begin; if (result) { - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { @@ -578,7 +578,7 @@ found: void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) { struct inet_sock *inet; - struct iphdr *iph = (struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *)skb->data; struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; @@ -663,75 +663,71 @@ void udp_flush_pending_frames(struct sock *sk) EXPORT_SYMBOL(udp_flush_pending_frames); /** - * udp4_hwcsum_outgoing - handle outgoing HW checksumming - * @sk: socket we are sending on + * udp4_hwcsum - handle outgoing HW checksumming * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) + * @src: source IP address + * @dst: destination IP address */ -static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, int len) +static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) { - unsigned int offset; struct udphdr *uh = udp_hdr(skb); + struct sk_buff *frags = skb_shinfo(skb)->frag_list; + int offset = skb_transport_offset(skb); + int len = skb->len - offset; + int hlen = len; __wsum csum = 0; - if (skb_queue_len(&sk->sk_write_queue) == 1) { + if (!frags) { /* * Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); + uh->check = ~csum_tcpudp_magic(src, dst, len, + IPPROTO_UDP, 0); } else { /* * HW-checksum won't work as there are two or more * fragments on the socket so that all csums of sk_buffs * should be together */ - offset = skb_transport_offset(skb); - skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + do { + csum = csum_add(csum, frags->csum); + hlen -= frags->len; + } while ((frags = frags->next)); + csum = skb_checksum(skb, offset, hlen, csum); skb->ip_summed = CHECKSUM_NONE; - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } - uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } } -/* - * Push out all pending data as one UDP datagram. Socket is locked. - */ -static int udp_push_pending_frames(struct sock *sk) +static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) { - struct udp_sock *up = udp_sk(sk); + struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; - struct sk_buff *skb; struct udphdr *uh; int err = 0; int is_udplite = IS_UDPLITE(sk); + int offset = skb_transport_offset(skb); + int len = skb->len - offset; __wsum csum = 0; - /* Grab the skbuff where UDP header space exists. */ - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) - goto out; - /* * Create a UDP header */ uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; - uh->len = htons(up->len); + uh->source = inet->inet_sport; + uh->dest = fl4->fl4_dport; + uh->len = htons(len); uh->check = 0; if (is_udplite) /* UDP-Lite */ - csum = udplite_csum_outgoing(sk, skb); + csum = udplite_csum(skb); else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ @@ -740,20 +736,20 @@ static int udp_push_pending_frames(struct sock *sk) } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); + udp4_hwcsum(skb, fl4->saddr, fl4->daddr); goto send; - } else /* `normal' UDP */ - csum = udp_csum_outgoing(sk, skb); + } else + csum = udp_csum(skb); /* add protocol-dependent pseudo-header */ - uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, + uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len, sk->sk_protocol, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; send: - err = ip_push_pending_frames(sk); + err = ip_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { UDP_INC_STATS_USER(sock_net(sk), @@ -763,6 +759,26 @@ send: } else UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +/* + * Push out all pending data as one UDP datagram. Socket is locked. + */ +static int udp_push_pending_frames(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct flowi4 *fl4 = &inet->cork.fl.u.ip4; + struct sk_buff *skb; + int err = 0; + + skb = ip_finish_skb(sk, fl4); + if (!skb) + goto out; + + err = udp_send_skb(skb, fl4); + out: up->len = 0; up->pending = 0; @@ -774,6 +790,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); + struct flowi4 fl4_stack; + struct flowi4 *fl4; int ulen = len; struct ipcm_cookie ipc; struct rtable *rt = NULL; @@ -785,6 +803,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int err, is_udplite = IS_UDPLITE(sk); int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + struct sk_buff *skb; + struct ip_options_data opt_copy; if (len > 0xFFFF) return -EMSGSIZE; @@ -799,6 +819,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.tx_flags = 0; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + + fl4 = &inet->cork.fl.u.ip4; if (up->pending) { /* * There are pending frames. @@ -856,22 +879,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, free = 1; connected = 0; } - if (!ipc.opt) - ipc.opt = inet->opt; + if (!ipc.opt) { + struct ip_options_rcu *inet_opt; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + memcpy(&opt_copy, inet_opt, + sizeof(*inet_opt) + inet_opt->opt.optlen); + ipc.opt = &opt_copy.opt; + } + rcu_read_unlock(); + } saddr = ipc.addr; ipc.addr = faddr = daddr; - if (ipc.opt && ipc.opt->srr) { + if (ipc.opt && ipc.opt->opt.srr) { if (!daddr) return -EINVAL; - faddr = ipc.opt->faddr; + faddr = ipc.opt->opt.faddr; connected = 0; } tos = RT_TOS(inet->tos); if (sock_flag(sk, SOCK_LOCALROUTE) || (msg->msg_flags & MSG_DONTROUTE) || - (ipc.opt && ipc.opt->is_strictroute)) { + (ipc.opt && ipc.opt->opt.is_strictroute)) { tos |= RTO_ONLINK; connected = 0; } @@ -888,22 +921,19 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = (struct rtable *)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi fl = { .oif = ipc.oif, - .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = faddr, - .saddr = saddr, - .tos = tos } }, - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = inet->inet_sport, - .dport = dport } } }; struct net *net = sock_net(sk); - security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(net, &rt, &fl, sk, 1); - if (err) { + fl4 = &fl4_stack; + flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, + RT_SCOPE_UNIVERSE, sk->sk_protocol, + inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, + faddr, saddr, dport, inet->inet_sport); + + security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + rt = ip_route_output_flow(net, fl4, sk); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + rt = NULL; if (err == -ENETUNREACH) IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); goto out; @@ -921,9 +951,20 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto do_confirm; back_from_confirm: - saddr = rt->rt_src; + saddr = fl4->saddr; if (!ipc.addr) - daddr = ipc.addr = rt->rt_dst; + daddr = ipc.addr = fl4->daddr; + + /* Lockless fast path for the non-corking case. */ + if (!corkreq) { + skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), &ipc, &rt, + msg->msg_flags); + err = PTR_ERR(skb); + if (skb && !IS_ERR(skb)) + err = udp_send_skb(skb, fl4); + goto out; + } lock_sock(sk); if (unlikely(up->pending)) { @@ -938,18 +979,18 @@ back_from_confirm: /* * Now cork the socket to pend data. */ - inet->cork.fl.fl4_dst = daddr; - inet->cork.fl.fl_ip_dport = dport; - inet->cork.fl.fl4_src = saddr; - inet->cork.fl.fl_ip_sport = inet->inet_sport; + fl4 = &inet->cork.fl.u.ip4; + fl4->daddr = daddr; + fl4->saddr = saddr; + fl4->fl4_dport = dport; + fl4->fl4_sport = inet->inet_sport; up->pending = AF_INET; do_append_data: up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), &ipc, &rt, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), &ipc, &rt, + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_flush_pending_frames(sk); else if (!corkreq) @@ -989,6 +1030,7 @@ EXPORT_SYMBOL(udp_sendmsg); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { + struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); int ret; @@ -1013,7 +1055,8 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, return -EINVAL; } - ret = ip_append_page(sk, page, offset, size, flags); + ret = ip_append_page(sk, &inet->cork.fl.u.ip4, + page, offset, size, flags); if (ret == -EOPNOTSUPP) { release_sock(sk); return sock_no_sendpage(sk->sk_socket, page, offset, @@ -1413,7 +1456,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (sk->sk_filter) { + if (rcu_dereference_raw(sk->sk_filter)) { if (udp_lib_checksum_complete(skb)) goto drop; } @@ -1899,6 +1942,7 @@ struct proto udp_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; EXPORT_SYMBOL(udp_prot); @@ -2200,7 +2244,7 @@ int udp4_ufo_send_check(struct sk_buff *skb) return 0; } -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) +struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; @@ -2228,7 +2272,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) /* Do software UFO. Complete and fill in the UDP checksum as HW cannot * do checksum of UDP packets sent as multiple IP fragments. */ - offset = skb->csum_start - skb_headroom(skb); + offset = skb_checksum_start_offset(skb); csum = skb_checksum(skb, offset, skb->len - offset, 0); offset += skb->csum_offset; *(__sum16 *)(skb->data + offset) = csum_fold(csum); |