From cc5c00bbb44c5d68b883aa5cb9d01514a2525d94 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Nov 2013 10:31:29 +0800 Subject: gro: Only verify TCP checksums for candidates In some cases we may receive IP packets that are longer than their stated lengths. Such packets are never merged in GRO. However, we may end up computing their checksums incorrectly and end up allowing packets with a bogus checksum enter our stack with the checksum status set as verified. Since such packets are rare and not performance-critical, this patch simply skips the checksum verification for them. Reported-by: Alexander Duyck Signed-off-by: Herbert Xu Acked-by: Alexander Duyck Thanks, Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index a2b68a108ea..55aeec93014 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -276,6 +276,10 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * __wsum wsum; __sum16 sum; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, @@ -301,6 +305,7 @@ flush: break; } +skip_csum: return tcp_gro_receive(head, skb); } -- cgit v1.2.3-70-g09d2 From b8ee93ba80b5a0b6c3c06b65c34dd1276f16c047 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Nov 2013 10:32:11 +0800 Subject: gro: Clean up tcpX_gro_receive checksum verification This patch simplifies the checksum verification in tcpX_gro_receive by reusing the CHECKSUM_COMPLETE code for CHECKSUM_NONE. All it does for CHECKSUM_NONE is compute the partial checksum and then treat it as if it came from the hardware (CHECKSUM_COMPLETE). Signed-off-by: Herbert Xu Cheers, Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 26 ++++++++++---------------- net/ipv6/tcpv6_offload.c | 27 ++++++++++----------------- 2 files changed, 20 insertions(+), 33 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 55aeec93014..05606353c7e 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -274,35 +274,29 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * { const struct iphdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip_csum; + wsum = skb->csum; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + 0); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } skip_csum: diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 71923d14127..6d18157dc32 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -37,36 +37,29 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, { const struct ipv6hdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip_csum; + wsum = skb->csum; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + wsum); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), - IPPROTO_TCP, 0)); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } skip_csum: -- cgit v1.2.3-70-g09d2 From fb10f802b0fb76079612cb78505cbc9ad81e683b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 22 Nov 2013 14:48:40 +0800 Subject: tcp_memcg: remove useless var old_lim nobody needs it. remove. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/ipv4/tcp_memcontrol.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 03e9154f7e6..269a89ecd2f 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -60,7 +60,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) { struct cg_proto *cg_proto; - u64 old_lim; int i; int ret; @@ -71,7 +70,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (val > RES_COUNTER_MAX) val = RES_COUNTER_MAX; - old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT); ret = res_counter_set_limit(&cg_proto->memory_allocated, val); if (ret) return ret; -- cgit v1.2.3-70-g09d2 From 85fbaa75037d0b6b786ff18658ddf0b4014ce2a4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 00:46:12 +0100 Subject: inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions Commit bceaa90240b6019ed73b49965eac7d167610be69 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") conditionally updated addr_len if the msg_name is written to. The recv_error and rxpmtu functions relied on the recvmsg functions to set up addr_len before. As this does not happen any more we have to pass addr_len to those functions as well and set it to the size of the corresponding sockaddr length. This broke traceroute and such. Fixes: bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") Reported-by: Brad Spengler Reported-by: Tom Labanowski Cc: mpb Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- include/net/ipv6.h | 6 ++++-- include/net/ping.h | 3 ++- net/ipv4/ip_sockglue.c | 3 ++- net/ipv4/ping.c | 5 +++-- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/datagram.c | 7 +++++-- net/ipv6/ping.c | 3 ++- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 4 ++-- net/l2tp/l2tp_ip6.c | 2 +- 12 files changed, 26 insertions(+), 17 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/ip.h b/include/net/ip.h index 217bc5bfc6c..5a25f36fe3a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -473,7 +473,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2a5f668cd68..eb198acaac1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -776,8 +776,10 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len); -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len); +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); diff --git a/include/net/ping.h b/include/net/ping.h index 3f67704f374..90f48417b03 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -31,7 +31,8 @@ /* Compatibility glue so we can support IPv6 when it's compiled as a module */ struct pingv6_ops { - int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len); + int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); int (*ip6_datagram_recv_ctl)(struct sock *sk, struct msghdr *msg, struct sk_buff *skb); int (*icmpv6_err_convert)(u8 type, u8 code, int *err); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3f858266fa7..ddf32a6bc41 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -386,7 +386,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; @@ -423,6 +423,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 876c6ca2d8f..840cf1b9e6e 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -841,10 +841,11 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_ERRQUEUE) { if (family == AF_INET) { - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - return pingv6_ops.ipv6_recv_error(sk, msg, len); + return pingv6_ops.ipv6_recv_error(sk, msg, len, + addr_len); #endif } } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5cb8ddb505e..23c3e5b5bb5 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -697,7 +697,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len); + err = ip_recv_error(sk, msg, len, addr_len); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5944d7d668d..44dfaa09b58 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1236,7 +1236,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, bool slow; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a454b0ff57c..d690204a027 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -318,7 +318,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -369,6 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) &sin->sin6_addr); sin->sin6_scope_id = 0; } + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -423,7 +424,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -457,6 +459,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; + *addr_len = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 8815e31a87f..a83243c3d65 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -57,7 +57,8 @@ static struct inet_protosw pingv6_protosw = { /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { return -EAFNOSUPPORT; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e24ff1df040..7fb4e14c467 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -466,10 +466,10 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81eb8cf8389..bcd5699313c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -393,10 +393,10 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, bool slow; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index cfd65304be6..d9b437e5500 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -665,7 +665,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, *addr_len = sizeof(*lsa); if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) -- cgit v1.2.3-70-g09d2 From 39af0c409e8c06b51caf7dc43e49ef96ae790a55 Mon Sep 17 00:00:00 2001 From: Baker Zhang Date: Tue, 26 Nov 2013 09:29:15 +0800 Subject: net: remove outdated comment for ipv4 and ipv6 protocol handler since f9242b6b28d61295f2bf7e8adfb1060b382e5381 inet: Sanitize inet{,6} protocol demux. there are not pretended hash tables for ipv4 or ipv6 protocol handler. Signed-off-by: Baker Zhang Signed-off-by: David S. Miller --- net/ipv4/protocol.c | 8 -------- net/ipv6/protocol.c | 4 ---- 2 files changed, 12 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index ce848461acb..46d6a1c923a 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -31,10 +31,6 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; -/* - * Add a protocol handler to the hash tables - */ - int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { if (!prot->netns_ok) { @@ -55,10 +51,6 @@ int inet_add_offload(const struct net_offload *prot, unsigned char protocol) } EXPORT_SYMBOL(inet_add_offload); -/* - * Remove a protocol from the hash tables. - */ - int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { int ret; diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 22d1bd4670d..e048cf1bb6a 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -36,10 +36,6 @@ int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol } EXPORT_SYMBOL(inet6_add_protocol); -/* - * Remove a protocol from the hash tables. - */ - int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) { int ret; -- cgit v1.2.3-70-g09d2 From d3f7d56a7a4671d395e8af87071068a195257bf6 Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Sun, 24 Nov 2013 22:36:28 -0800 Subject: net: update consumers of MSG_MORE to recognize MSG_SENDPAGE_NOTLAST Commit 35f9c09fe (tcp: tcp_sendpages() should call tcp_push() once) added an internal flag MSG_SENDPAGE_NOTLAST, similar to MSG_MORE. algif_hash, algif_skcipher, and udp used MSG_MORE from tcp_sendpages() and need to see the new flag as identical to MSG_MORE. This fixes sendfile() on AF_ALG. v3: also fix udp Cc: Tom Herbert Cc: Eric Dumazet Cc: David S. Miller Cc: # 3.4.x + 3.2.x Reported-and-tested-by: Shawn Landden Original-patch: Richard Weinberger Signed-off-by: Shawn Landden Signed-off-by: David S. Miller --- crypto/algif_hash.c | 3 +++ crypto/algif_skcipher.c | 3 +++ net/ipv4/udp.c | 3 +++ 3 files changed, 9 insertions(+) (limited to 'net/ipv4') diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index ef5356cd280..850246206b1 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -114,6 +114,9 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page, struct hash_ctx *ctx = ask->private; int err; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); sg_init_table(ctx->sgl.sg, 1); sg_set_page(ctx->sgl.sg, page, size, offset); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 6a6dfc062d2..a19c027b29b 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -378,6 +378,9 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, struct skcipher_sg_list *sgl; int err = -EINVAL; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); if (!ctx->more && ctx->used) goto unlock; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 44dfaa09b58..bf2b85b2549 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1098,6 +1098,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, struct udp_sock *up = udp_sk(sk); int ret; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + if (!up->pending) { struct msghdr msg = { .msg_flags = flags|MSG_MORE }; -- cgit v1.2.3-70-g09d2 From f1d8cba61c3c4b1eb88e507249c4cb8d635d9a76 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Nov 2013 09:51:22 -0800 Subject: inet: fix possible seqlock deadlocks In commit c9e9042994d3 ("ipv4: fix possible seqlock deadlock") I left another places where IP_INC_STATS_BH() were improperly used. udp_sendmsg(), ping_v4_sendmsg() and tcp_v4_connect() are called from process context, not from softirq context. This was detected by lockdep seqlock support. Reported-by: jongman heo Fixes: 584bdf8cbdf6 ("[IPV4]: Fix "ipOutNoRoutes" counter error for TCP and UDP") Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: Eric Dumazet Cc: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/ping.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 840cf1b9e6e..242e7f4ed6f 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -772,7 +772,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59a6f8b90cd..06721392475 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -177,7 +177,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bf2b85b2549..44f6a20fa29 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -999,7 +999,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } -- cgit v1.2.3-70-g09d2 From 7f2cbdc28c034ef2c3be729681f631d5744e3cd5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 Dec 2013 20:12:04 -0800 Subject: tcp_memcontrol: Cleanup/fix cg_proto->memory_pressure handling. kill memcg_tcp_enter_memory_pressure. The only function of memcg_tcp_enter_memory_pressure was to reduce deal with the unnecessary abstraction that was tcp_memcontrol. Now that struct tcp_memcontrol is gone remove this unnecessary function, the unnecessary function pointer, and modify sk_enter_memory_pressure to set this field directly, just as sk_leave_memory_pressure cleas this field directly. This fixes a small bug I intruduced when killing struct tcp_memcontrol that caused memcg_tcp_enter_memory_pressure to never be called and thus failed to ever set cg_proto->memory_pressure. Remove the cg_proto enter_memory_pressure function as it now serves no useful purpose. Don't test cg_proto->memory_presser in sk_leave_memory_pressure before clearing it. The test was originally there to ensure that the pointer was non-NULL. Now that cg_proto is not a pointer the pointer does not matter. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/sock.h | 6 ++---- net/ipv4/tcp_memcontrol.c | 7 ------- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/sock.h b/include/net/sock.h index e3a18ff0c38..2ef3c3eca47 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1035,7 +1035,6 @@ enum cg_proto_flags { }; struct cg_proto { - void (*enter_memory_pressure)(struct sock *sk); struct res_counter memory_allocated; /* Current allocated memory. */ struct percpu_counter sockets_allocated; /* Current number of sockets. */ int memory_pressure; @@ -1155,8 +1154,7 @@ static inline void sk_leave_memory_pressure(struct sock *sk) struct proto *prot = sk->sk_prot; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - if (cg_proto->memory_pressure) - cg_proto->memory_pressure = 0; + cg_proto->memory_pressure = 0; } } @@ -1171,7 +1169,7 @@ static inline void sk_enter_memory_pressure(struct sock *sk) struct proto *prot = sk->sk_prot; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - cg_proto->enter_memory_pressure(sk); + cg_proto->memory_pressure = 1; } sk->sk_prot->enter_memory_pressure(sk); diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 269a89ecd2f..f7e522c558b 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -6,13 +6,6 @@ #include #include -static void memcg_tcp_enter_memory_pressure(struct sock *sk) -{ - if (sk->sk_cgrp->memory_pressure) - sk->sk_cgrp->memory_pressure = 1; -} -EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); - int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { /* -- cgit v1.2.3-70-g09d2 From 673498b8ed4c4d4b7221c5309d891c5eac2b7528 Mon Sep 17 00:00:00 2001 From: Stefan Tomanek Date: Tue, 10 Dec 2013 23:21:25 +0100 Subject: inet: fix NULL pointer Oops in fib(6)_rule_suppress This changes ensures that the routing entry investigated by the suppress function actually does point to a device struct before following that pointer, fixing a possible kernel oops situation when verifying the interface group associated with a routing table entry. According to Daniel Golle, this Oops can be triggered by a user process trying to establish an outgoing IPv6 connection while having no real IPv6 connectivity set up (only autoassigned link-local addresses). Fixes: 6ef94cfafba15 ("fib_rules: add route suppression based on ifgroup") Reported-by: Daniel Golle Tested-by: Daniel Golle Signed-off-by: Stefan Tomanek Signed-off-by: David S. Miller --- net/ipv4/fib_rules.c | 5 ++++- net/ipv6/fib6_rules.c | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 523be38e37d..f2e15738534 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -104,7 +104,10 @@ errout: static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct fib_result *result = (struct fib_result *) arg->result; - struct net_device *dev = result->fi->fib_dev; + struct net_device *dev = NULL; + + if (result->fi) + dev = result->fi->fib_dev; /* do not accept result if the route does * not meet the required prefix length diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index e27591635f9..3fd0a578329 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -122,7 +122,11 @@ out: static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct rt6_info *rt = (struct rt6_info *) arg->result; - struct net_device *dev = rt->rt6i_idev->dev; + struct net_device *dev = NULL; + + if (rt->rt6i_idev) + dev = rt->rt6i_idev->dev; + /* do not accept result if the route does * not meet the required prefix length */ -- cgit v1.2.3-70-g09d2 From 8afdd99a1315e759de04ad6e2344f0c5f17ecb1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Dec 2013 18:07:23 -0800 Subject: udp: ipv4: fix an use after free in __udp4_lib_rcv() Dave Jones reported a use after free in UDP stack : [ 5059.434216] ========================= [ 5059.434314] [ BUG: held lock freed! ] [ 5059.434420] 3.13.0-rc3+ #9 Not tainted [ 5059.434520] ------------------------- [ 5059.434620] named/863 is freeing memory ffff88005e960000-ffff88005e96061f, with a lock still held there! [ 5059.434815] (slock-AF_INET){+.-...}, at: [] udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.435012] 3 locks held by named/863: [ 5059.435086] #0: (rcu_read_lock){.+.+..}, at: [] __netif_receive_skb_core+0x11d/0x940 [ 5059.435295] #1: (rcu_read_lock){.+.+..}, at: [] ip_local_deliver_finish+0x3e/0x410 [ 5059.435500] #2: (slock-AF_INET){+.-...}, at: [] udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.435734] stack backtrace: [ 5059.435858] CPU: 0 PID: 863 Comm: named Not tainted 3.13.0-rc3+ #9 [loadavg: 0.21 0.06 0.06 1/115 1365] [ 5059.436052] Hardware name: /D510MO, BIOS MOPNV10J.86A.0175.2010.0308.0620 03/08/2010 [ 5059.436223] 0000000000000002 ffff88007e203ad8 ffffffff8153a372 ffff8800677130e0 [ 5059.436390] ffff88007e203b10 ffffffff8108cafa ffff88005e960000 ffff88007b00cfc0 [ 5059.436554] ffffea00017a5800 ffffffff8141c490 0000000000000246 ffff88007e203b48 [ 5059.436718] Call Trace: [ 5059.436769] [] dump_stack+0x4d/0x66 [ 5059.436904] [] debug_check_no_locks_freed+0x15a/0x160 [ 5059.437037] [] ? __sk_free+0x110/0x230 [ 5059.437147] [] kmem_cache_free+0x6a/0x150 [ 5059.437260] [] __sk_free+0x110/0x230 [ 5059.437364] [] sk_free+0x19/0x20 [ 5059.437463] [] sock_edemux+0x25/0x40 [ 5059.437567] [] sock_queue_rcv_skb+0x81/0x280 [ 5059.437685] [] ? udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.437805] [] __udp_queue_rcv_skb+0x42/0x240 [ 5059.437925] [] ? _raw_spin_lock+0x65/0x70 [ 5059.438038] [] udp_queue_rcv_skb+0x26b/0x4b0 [ 5059.438155] [] __udp4_lib_rcv+0x152/0xb00 [ 5059.438269] [] udp_rcv+0x15/0x20 [ 5059.438367] [] ip_local_deliver_finish+0x10f/0x410 [ 5059.438492] [] ? ip_local_deliver_finish+0x3e/0x410 [ 5059.438621] [] ip_local_deliver+0x43/0x80 [ 5059.438733] [] ip_rcv_finish+0x140/0x5a0 [ 5059.438843] [] ip_rcv+0x296/0x3f0 [ 5059.438945] [] __netif_receive_skb_core+0x742/0x940 [ 5059.439074] [] ? __netif_receive_skb_core+0x11d/0x940 [ 5059.442231] [] ? trace_hardirqs_on+0xd/0x10 [ 5059.442231] [] __netif_receive_skb+0x13/0x60 [ 5059.442231] [] netif_receive_skb+0x1e/0x1f0 [ 5059.442231] [] napi_gro_receive+0x70/0xa0 [ 5059.442231] [] rtl8169_poll+0x166/0x700 [r8169] [ 5059.442231] [] net_rx_action+0x129/0x1e0 [ 5059.442231] [] __do_softirq+0xed/0x240 [ 5059.442231] [] irq_exit+0x125/0x140 [ 5059.442231] [] do_IRQ+0x51/0xc0 [ 5059.442231] [] common_interrupt+0x6f/0x6f We need to keep a reference on the socket, by using skb_steal_sock() at the right place. Note that another patch is needed to fix a race in udp_sk_rx_dst_set(), as we hold no lock protecting the dst. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 44f6a20fa29..2e2aecbe22c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -560,15 +560,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { - struct sock *sk; const struct iphdr *iph = ip_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) - return sk; - else - return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), - udptable); + return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, @@ -1739,15 +1735,15 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; - if (skb->sk) { + sk = skb_steal_sock(skb); + if (sk) { int ret; - sk = skb->sk; if (unlikely(sk->sk_rx_dst == NULL)) udp_sk_rx_dst_set(sk, skb); ret = udp_queue_rcv_skb(sk, skb); - + sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 */ -- cgit v1.2.3-70-g09d2 From 610438b74496b2986a9025f8e23c134cb638e338 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Dec 2013 08:10:05 -0800 Subject: udp: ipv4: fix potential use after free in udp_v4_early_demux() pskb_may_pull() can reallocate skb->head, we need to move the initialization of iph and uh pointers after its call. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2e2aecbe22c..16d246a51a0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1909,17 +1909,20 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, void udp_v4_early_demux(struct sk_buff *skb) { - const struct iphdr *iph = ip_hdr(skb); - const struct udphdr *uh = udp_hdr(skb); + struct net *net = dev_net(skb->dev); + const struct iphdr *iph; + const struct udphdr *uh; struct sock *sk; struct dst_entry *dst; - struct net *net = dev_net(skb->dev); int dif = skb->dev->ifindex; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) return; + iph = ip_hdr(skb); + uh = udp_hdr(skb); + if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, -- cgit v1.2.3-70-g09d2 From 975022310233fb0f0193873d79a7b8438070fa82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Dec 2013 14:46:51 -0800 Subject: udp: ipv4: must add synchronization in udp_sk_rx_dst_set() Unlike TCP, UDP input path does not hold the socket lock. Before messing with sk->sk_rx_dst, we must use a spinlock, otherwise multiple cpus could leak a refcount. This patch also takes care of renewing a stale dst entry. (When the sk->sk_rx_dst would not be used by IP early demux) Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 16d246a51a0..62c19fdd102 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1599,12 +1599,21 @@ static void flush_stack(struct sock **stack, unsigned int count, kfree_skb(skb1); } -static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +/* For TCP sockets, sk_rx_dst is protected by socket lock + * For UDP, we use sk_dst_lock to guard against concurrent changes. + */ +static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { - struct dst_entry *dst = skb_dst(skb); + struct dst_entry *old; - dst_hold(dst); - sk->sk_rx_dst = dst; + spin_lock(&sk->sk_dst_lock); + old = sk->sk_rx_dst; + if (likely(old != dst)) { + dst_hold(dst); + sk->sk_rx_dst = dst; + dst_release(old); + } + spin_unlock(&sk->sk_dst_lock); } /* @@ -1737,10 +1746,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, sk = skb_steal_sock(skb); if (sk) { + struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst == NULL)) - udp_sk_rx_dst_set(sk, skb); + if (unlikely(sk->sk_rx_dst != dst)) + udp_sk_rx_dst_set(sk, dst); ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); -- cgit v1.2.3-70-g09d2