From 8eae939f1400326b06d0c9afe53d2a484a326871 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:40 +0000
Subject: net: add limit for socket backlog

We got system OOM while running some UDP netperf testing on the loopback
device. The case is multiple senders sent stream UDP packets to a single
receiver via loopback on local host. Of course, the receiver is not able
to handle all the packets in time. But we surprisingly found that these
packets were not discarded due to the receiver's sk->sk_rcvbuf limit.
Instead, they are kept queuing to sk->sk_backlog and finally ate up all
the memory. We believe this is a secure hole that a none privileged user
can crash the system.

The root cause for this problem is, when the receiver is doing
__release_sock() (i.e. after userspace recv, kernel udp_recvmsg ->
skb_free_datagram_locked -> release_sock), it moves skbs from backlog to
sk_receive_queue with the softirq enabled. In the above case, multiple
busy senders will almost make it an endless loop. The skbs in the
backlog end up eat all the system memory.

The issue is not only for UDP. Any protocols using socket backlog is
potentially affected. The patch adds limit for socket backlog so that
the backlog size cannot be expanded endlessly.

Reported-by: Alex Shi <alex.shi@intel.com>
Cc: David Miller <davem@davemloft.net>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru
Cc: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Vlad Yasevich <vladislav.yasevich@hp.com>
Cc: Sridhar Samudrala <sri@us.ibm.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Cc: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'net/core/sock.c')

diff --git a/net/core/sock.c b/net/core/sock.c
index fcd397a762f..6e22dc973d2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -340,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else
-		sk_add_backlog(sk, skb);
+	} else if (sk_add_backlog_limited(sk, skb)) {
+		bh_unlock_sock(sk);
+		atomic_inc(&sk->sk_drops);
+		goto discard_and_relse;
+	}
+
 	bh_unlock_sock(sk);
 out:
 	sock_put(sk);
@@ -1139,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);
 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
+		newsk->sk_backlog.len = 0;
 
 		atomic_set(&newsk->sk_rmem_alloc, 0);
 		/*
@@ -1542,6 +1547,12 @@ static void __release_sock(struct sock *sk)
 
 		bh_lock_sock(sk);
 	} while ((skb = sk->sk_backlog.head) != NULL);
+
+	/*
+	 * Doing the zeroing here guarantee we can not loop forever
+	 * while a wild producer attempts to flood us.
+	 */
+	sk->sk_backlog.len = 0;
 }
 
 /**
@@ -1874,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_allocation	=	GFP_KERNEL;
 	sk->sk_rcvbuf		=	sysctl_rmem_default;
 	sk->sk_sndbuf		=	sysctl_wmem_default;
+	sk->sk_backlog.limit	=	sk->sk_rcvbuf << 1;
 	sk->sk_state		=	TCP_CLOSE;
 	sk_set_socket(sk, sock);
 
-- 
cgit v1.2.3-70-g09d2


From a3a858ff18a72a8d388e31ab0d98f7e944841a62 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:47 +0000
Subject: net: backlog functions rename

sk_add_backlog -> __sk_add_backlog
sk_add_backlog_limited -> sk_add_backlog

Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h       | 6 +++---
 net/core/sock.c          | 2 +-
 net/dccp/minisocks.c     | 2 +-
 net/ipv4/tcp_ipv4.c      | 2 +-
 net/ipv4/tcp_minisocks.c | 2 +-
 net/ipv4/udp.c           | 2 +-
 net/ipv6/tcp_ipv6.c      | 2 +-
 net/ipv6/udp.c           | 4 ++--
 net/llc/llc_c_ac.c       | 2 +-
 net/llc/llc_conn.c       | 2 +-
 net/sctp/input.c         | 4 ++--
 net/tipc/socket.c        | 2 +-
 net/x25/x25_dev.c        | 2 +-
 13 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'net/core/sock.c')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2516d76f043..170353dd957 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -592,7 +592,7 @@ static inline int sk_stream_memory_free(struct sock *sk)
 }
 
 /* OOB backlog add */
-static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
+static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	if (!sk->sk_backlog.tail) {
 		sk->sk_backlog.head = sk->sk_backlog.tail = skb;
@@ -604,12 +604,12 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 }
 
 /* The per-socket spinlock must be held here. */
-static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb)
+static inline int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
 		return -ENOBUFS;
 
-	sk_add_backlog(sk, skb);
+	__sk_add_backlog(sk, skb);
 	sk->sk_backlog.len += skb->truesize;
 	return 0;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 6e22dc973d2..61a65a2e045 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -340,7 +340,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		atomic_inc(&sk->sk_drops);
 		goto discard_and_relse;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index af226a06314..0d508c359fa 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -254,7 +254,7 @@ int dccp_child_process(struct sock *parent, struct sock *child,
 		 * in main socket hash table and lock on listening
 		 * socket does not protect us more.
 		 */
-		sk_add_backlog(child, skb);
+		__sk_add_backlog(child, skb);
 	}
 
 	bh_unlock_sock(child);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4baf1943b1b..1915f7dc30e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1682,7 +1682,7 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v4_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto discard_and_relse;
 	}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f206ee5dda8..4199bc6915c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -728,7 +728,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 		 * in main socket hash table and lock on listening
 		 * socket does not protect us more.
 		 */
-		sk_add_backlog(child, skb);
+		__sk_add_backlog(child, skb);
 	}
 
 	bh_unlock_sock(child);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e7eb47f338d..7af756d0f93 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1371,7 +1371,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		rc = __udp_queue_rcv_skb(sk, skb);
-	else if (sk_add_backlog_limited(sk, skb)) {
+	else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto drop;
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c4ea9d5cbfa..2c378b1bd5c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,7 +1740,7 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v6_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto discard_and_relse;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 64804912b09..3c0c9c755c9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -583,7 +583,7 @@ static void flush_stack(struct sock **stack, unsigned int count,
 			bh_lock_sock(sk);
 			if (!sock_owned_by_user(sk))
 				udpv6_queue_rcv_skb(sk, skb1);
-			else if (sk_add_backlog_limited(sk, skb1)) {
+			else if (sk_add_backlog(sk, skb1)) {
 				kfree_skb(skb1);
 				bh_unlock_sock(sk);
 				goto drop;
@@ -758,7 +758,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
-	else if (sk_add_backlog_limited(sk, skb)) {
+	else if (sk_add_backlog(sk, skb)) {
 		atomic_inc(&sk->sk_drops);
 		bh_unlock_sock(sk);
 		sock_put(sk);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 019c780512e..86d6985b9d4 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1437,7 +1437,7 @@ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb)
 			llc_conn_state_process(sk, skb);
 		else {
 			llc_set_backlog_type(skb, LLC_EVENT);
-			sk_add_backlog(sk, skb);
+			__sk_add_backlog(sk, skb);
 		}
 	}
 }
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index c0539ffdb27..a12144da797 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -827,7 +827,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
 	else {
 		dprintk("%s: adding to backlog...\n", __func__);
 		llc_set_backlog_type(skb, LLC_PACKET);
-		if (sk_add_backlog_limited(sk, skb))
+		if (sk_add_backlog(sk, skb))
 			goto drop_unlock;
 	}
 out:
diff --git a/net/sctp/input.c b/net/sctp/input.c
index cbc063665e6..3d74b264ea2 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -341,7 +341,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 		sctp_bh_lock_sock(sk);
 
 		if (sock_owned_by_user(sk)) {
-			if (sk_add_backlog_limited(sk, skb))
+			if (sk_add_backlog(sk, skb))
 				sctp_chunk_free(chunk);
 			else
 				backloged = 1;
@@ -375,7 +375,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
 	struct sctp_ep_common *rcvr = chunk->rcvr;
 	int ret;
 
-	ret = sk_add_backlog_limited(sk, skb);
+	ret = sk_add_backlog(sk, skb);
 	if (!ret) {
 		/* Hold the assoc/ep while hanging on the backlog queue.
 		 * This way, we know structures we need will not disappear
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 22bfbc33a8a..4b235fc1c70 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1322,7 +1322,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
 	if (!sock_owned_by_user(sk)) {
 		res = filter_rcv(sk, buf);
 	} else {
-		if (sk_add_backlog_limited(sk, buf))
+		if (sk_add_backlog(sk, buf))
 			res = TIPC_ERR_OVERLOAD;
 		else
 			res = TIPC_OK;
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index a9da0dc26f4..52e30421224 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 		if (!sock_owned_by_user(sk)) {
 			queued = x25_process_rx_frame(sk, skb);
 		} else {
-			queued = !sk_add_backlog_limited(sk, skb);
+			queued = !sk_add_backlog(sk, skb);
 		}
 		bh_unlock_sock(sk);
 		sock_put(sk);
-- 
cgit v1.2.3-70-g09d2


From 72150e9b7fec217fbd646a29ea2f65a3d4d55ea9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 6 Mar 2010 01:04:45 +0000
Subject: sock.c: potential null dereference

We test that "prot->rsk_prot" is non-null right before we dereference it
on this line.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/core/sock.c')

diff --git a/net/core/sock.c b/net/core/sock.c
index 61a65a2e045..c5812bbc2cc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2288,7 +2288,8 @@ out_free_request_sock_slab:
 		prot->rsk_prot->slab = NULL;
 	}
 out_free_request_sock_slab_name:
-	kfree(prot->rsk_prot->slab_name);
+	if (prot->rsk_prot)
+		kfree(prot->rsk_prot->slab_name);
 out_free_sock_slab:
 	kmem_cache_destroy(prot->slab);
 	prot->slab = NULL;
-- 
cgit v1.2.3-70-g09d2