1 files changed, 125 insertions, 8 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index 12f6d9a2a52..ccd10fd6568 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -260,7 +260,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			   
 			if (val > sysctl_wmem_max)
 				val = sysctl_wmem_max;
-
+set_sndbuf:
 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
 			if ((val * 2) < SOCK_MIN_SNDBUF)
 				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
@@ -274,6 +274,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			sk->sk_write_space(sk);
 			break;
 
+		case SO_SNDBUFFORCE:
+			if (!capable(CAP_NET_ADMIN)) {
+				ret = -EPERM;
+				break;
+			}
+			goto set_sndbuf;
+
 		case SO_RCVBUF:
 			/* Don't error on this BSD doesn't and if you think
 			   about it this is right. Otherwise apps have to
@@ -282,7 +289,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			  
 			if (val > sysctl_rmem_max)
 				val = sysctl_rmem_max;
-
+set_rcvbuf:
 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 			/* FIXME: is this lower bound the right one? */
 			if ((val * 2) < SOCK_MIN_RCVBUF)
@@ -291,6 +298,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 				sk->sk_rcvbuf = val * 2;
 			break;
 
+		case SO_RCVBUFFORCE:
+			if (!capable(CAP_NET_ADMIN)) {
+				ret = -EPERM;
+				break;
+			}
+			goto set_rcvbuf;
+
 		case SO_KEEPALIVE:
 #ifdef CONFIG_INET
 			if (sk->sk_protocol == IPPROTO_TCP)
@@ -686,6 +700,80 @@ void sk_free(struct sock *sk)
 	module_put(owner);
 }
 
+struct sock *sk_clone(const struct sock *sk, const unsigned int __nocast priority)
+{
+	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
+
+	if (newsk != NULL) {
+		struct sk_filter *filter;
+
+		memcpy(newsk, sk, sk->sk_prot->obj_size);
+
+		/* SANITY */
+		sk_node_init(&newsk->sk_node);
+		sock_lock_init(newsk);
+		bh_lock_sock(newsk);
+
+		atomic_set(&newsk->sk_rmem_alloc, 0);
+		atomic_set(&newsk->sk_wmem_alloc, 0);
+		atomic_set(&newsk->sk_omem_alloc, 0);
+		skb_queue_head_init(&newsk->sk_receive_queue);
+		skb_queue_head_init(&newsk->sk_write_queue);
+
+		rwlock_init(&newsk->sk_dst_lock);
+		rwlock_init(&newsk->sk_callback_lock);
+
+		newsk->sk_dst_cache	= NULL;
+		newsk->sk_wmem_queued	= 0;
+		newsk->sk_forward_alloc = 0;
+		newsk->sk_send_head	= NULL;
+		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
+		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+
+		sock_reset_flag(newsk, SOCK_DONE);
+		skb_queue_head_init(&newsk->sk_error_queue);
+
+		filter = newsk->sk_filter;
+		if (filter != NULL)
+			sk_filter_charge(newsk, filter);
+
+		if (unlikely(xfrm_sk_clone_policy(newsk))) {
+			/* It is still raw copy of parent, so invalidate
+			 * destructor and make plain sk_free() */
+			newsk->sk_destruct = NULL;
+			sk_free(newsk);
+			newsk = NULL;
+			goto out;
+		}
+
+		newsk->sk_err	   = 0;
+		newsk->sk_priority = 0;
+		atomic_set(&newsk->sk_refcnt, 2);
+
+		/*
+		 * Increment the counter in the same struct proto as the master
+		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
+		 * is the same as sk->sk_prot->socks, as this field was copied
+		 * with memcpy).
+		 *
+		 * This _changes_ the previous behaviour, where
+		 * tcp_create_openreq_child always was incrementing the
+		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
+		 * to be taken into account in all callers. -acme
+		 */
+		sk_refcnt_debug_inc(newsk);
+		newsk->sk_socket = NULL;
+		newsk->sk_sleep	 = NULL;
+
+		if (newsk->sk_prot->sockets_allocated)
+			atomic_inc(newsk->sk_prot->sockets_allocated);
+	}
+out:
+	return newsk;
+}
+
+EXPORT_SYMBOL_GPL(sk_clone);
+
 void __init sk_init(void)
 {
 	if (num_physpages <= 4096) {
@@ -1353,11 +1441,7 @@ void sk_common_release(struct sock *sk)
 
 	xfrm_sk_free_policy(sk);
 
-#ifdef INET_REFCNT_DEBUG
-	if (atomic_read(&sk->sk_refcnt) != 1)
-		printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
-		       sk, atomic_read(&sk->sk_refcnt));
-#endif
+	sk_refcnt_debug_release(sk);
 	sock_put(sk);
 }
 
@@ -1368,7 +1452,8 @@ static LIST_HEAD(proto_list);
 
 int proto_register(struct proto *prot, int alloc_slab)
 {
-	char *request_sock_slab_name;
+	char *request_sock_slab_name = NULL;
+	char *timewait_sock_slab_name;
 	int rc = -ENOBUFS;
 
 	if (alloc_slab) {
@@ -1399,6 +1484,23 @@ int proto_register(struct proto *prot, int alloc_slab)
 				goto out_free_request_sock_slab_name;
 			}
 		}
+
+		if (prot->twsk_obj_size) {
+			static const char mask[] = "tw_sock_%s";
+
+			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+
+			if (timewait_sock_slab_name == NULL)
+				goto out_free_request_sock_slab;
+
+			sprintf(timewait_sock_slab_name, mask, prot->name);
+			prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
+							    prot->twsk_obj_size,
+							    0, SLAB_HWCACHE_ALIGN,
+							    NULL, NULL);
+			if (prot->twsk_slab == NULL)
+				goto out_free_timewait_sock_slab_name;
+		}
 	}
 
 	write_lock(&proto_list_lock);
@@ -1407,6 +1509,13 @@ int proto_register(struct proto *prot, int alloc_slab)
 	rc = 0;
 out:
 	return rc;
+out_free_timewait_sock_slab_name:
+	kfree(timewait_sock_slab_name);
+out_free_request_sock_slab:
+	if (prot->rsk_prot && prot->rsk_prot->slab) {
+		kmem_cache_destroy(prot->rsk_prot->slab);
+		prot->rsk_prot->slab = NULL;
+	}
 out_free_request_sock_slab_name:
 	kfree(request_sock_slab_name);
 out_free_sock_slab:
@@ -1434,6 +1543,14 @@ void proto_unregister(struct proto *prot)
 		prot->rsk_prot->slab = NULL;
 	}
 
+	if (prot->twsk_slab != NULL) {
+		const char *name = kmem_cache_name(prot->twsk_slab);
+
+		kmem_cache_destroy(prot->twsk_slab);
+		kfree(name);
+		prot->twsk_slab = NULL;
+	}
+
 	list_del(&prot->node);
 	write_unlock(&proto_list_lock);
 }