From 421748ecde8e69a6364e5ae66eb3bf87e1f995c0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 2 Aug 2008 01:04:36 -0400
Subject: [PATCH] assorted path_lookup() -> kern_path() conversions

more nameidata eviction

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/unix/af_unix.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'net/unix/af_unix.c')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c647aab8d41..dc504d308ec 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -711,28 +711,30 @@ static struct sock *unix_find_other(struct net *net,
 				    int type, unsigned hash, int *error)
 {
 	struct sock *u;
-	struct nameidata nd;
+	struct path path;
 	int err = 0;
 
 	if (sunname->sun_path[0]) {
-		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+		struct inode *inode;
+		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 		if (err)
 			goto fail;
-		err = vfs_permission(&nd, MAY_WRITE);
+		inode = path.dentry->d_inode;
+		err = inode_permission(inode, MAY_WRITE);
 		if (err)
 			goto put_fail;
 
 		err = -ECONNREFUSED;
-		if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode))
+		if (!S_ISSOCK(inode->i_mode))
 			goto put_fail;
-		u = unix_find_socket_byinode(net, nd.path.dentry->d_inode);
+		u = unix_find_socket_byinode(net, inode);
 		if (!u)
 			goto put_fail;
 
 		if (u->sk_type == type)
-			touch_atime(nd.path.mnt, nd.path.dentry);
+			touch_atime(path.mnt, path.dentry);
 
-		path_put(&nd.path);
+		path_put(&path);
 
 		err=-EPROTOTYPE;
 		if (u->sk_type != type) {
@@ -753,7 +755,7 @@ static struct sock *unix_find_other(struct net *net,
 	return u;
 
 put_fail:
-	path_put(&nd.path);
+	path_put(&path);
 fail:
 	*error=err;
 	return NULL;
-- 
cgit v1.2.3-70-g09d2


From 48dcc33e5e11de0f76b65b113988dbc930d17395 Mon Sep 17 00:00:00 2001
From: Jianjun Kong <jianjun@zeuux.org>
Date: Sat, 1 Nov 2008 21:37:27 -0700
Subject: af_unix: netns: fix problem of return value

fix problem of return value

net/unix/af_unix.c: unix_net_init()
when error appears, it should return 'error', not always return 0.

Signed-off-by: Jianjun Kong <jianjun@zeuux.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/unix/af_unix.c')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dc504d308ec..4d3c6071b9a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2213,7 +2213,7 @@ static int unix_net_init(struct net *net)
 #endif
 	error = 0;
 out:
-	return 0;
+	return error;
 }
 
 static void unix_net_exit(struct net *net)
-- 
cgit v1.2.3-70-g09d2


From e27dfcea48372a4105d9fdf2e8450926737f6215 Mon Sep 17 00:00:00 2001
From: Jianjun Kong <jianjun@zeuux.org>
Date: Sat, 1 Nov 2008 21:38:31 -0700
Subject: af_unix: clean up net/unix/af_unix.c garbage.c sysctl_net_unix.c

clean up net/unix/af_unix.c garbage.c sysctl_net_unix.c

Signed-off-by: Jianjun Kong <jianjun@zeuux.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c         | 73 +++++++++++++++++++++++-----------------------
 net/unix/garbage.c         | 12 ++++----
 net/unix/sysctl_net_unix.c |  1 -
 3 files changed, 42 insertions(+), 44 deletions(-)

(limited to 'net/unix/af_unix.c')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dc504d308ec..0b80634b2b7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -211,7 +211,7 @@ static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
 		 * we are guaranteed that it is a valid memory location in our
 		 * kernel address buffer.
 		 */
-		((char *)sunaddr)[len]=0;
+		((char *)sunaddr)[len] = 0;
 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
 		return len;
 	}
@@ -392,9 +392,9 @@ static int unix_release_sock (struct sock *sk, int embrion)
 
 	wake_up_interruptible_all(&u->peer_wait);
 
-	skpair=unix_peer(sk);
+	skpair = unix_peer(sk);
 
-	if (skpair!=NULL) {
+	if (skpair != NULL) {
 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 			unix_state_lock(skpair);
 			/* No more writes */
@@ -414,7 +414,7 @@ static int unix_release_sock (struct sock *sk, int embrion)
 	/* Try to flush out this socket. Throw out buffers at least */
 
 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-		if (state==TCP_LISTEN)
+		if (state == TCP_LISTEN)
 			unix_release_sock(skb->sk, 1);
 		/* passed fds are erased in the kfree_skb hook	      */
 		kfree_skb(skb);
@@ -630,7 +630,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol)
 		 *	nothing uses it.
 		 */
 	case SOCK_RAW:
-		sock->type=SOCK_DGRAM;
+		sock->type = SOCK_DGRAM;
 	case SOCK_DGRAM:
 		sock->ops = &unix_dgram_ops;
 		break;
@@ -736,14 +736,14 @@ static struct sock *unix_find_other(struct net *net,
 
 		path_put(&path);
 
-		err=-EPROTOTYPE;
+		err = -EPROTOTYPE;
 		if (u->sk_type != type) {
 			sock_put(u);
 			goto fail;
 		}
 	} else {
 		err = -ECONNREFUSED;
-		u=unix_find_socket_byname(net, sunname, len, type, hash);
+		u = unix_find_socket_byname(net, sunname, len, type, hash);
 		if (u) {
 			struct dentry *dentry;
 			dentry = unix_sk(u)->dentry;
@@ -757,7 +757,7 @@ static struct sock *unix_find_other(struct net *net,
 put_fail:
 	path_put(&path);
 fail:
-	*error=err;
+	*error = err;
 	return NULL;
 }
 
@@ -767,7 +767,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 	struct unix_sock *u = unix_sk(sk);
-	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 	struct dentry * dentry = NULL;
 	struct nameidata nd;
 	int err;
@@ -779,7 +779,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (sunaddr->sun_family != AF_UNIX)
 		goto out;
 
-	if (addr_len==sizeof(short)) {
+	if (addr_len == sizeof(short)) {
 		err = unix_autobind(sock);
 		goto out;
 	}
@@ -875,8 +875,8 @@ out_mknod_unlock:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	path_put(&nd.path);
 out_mknod_parent:
-	if (err==-EEXIST)
-		err=-EADDRINUSE;
+	if (err == -EEXIST)
+		err = -EADDRINUSE;
 	unix_release_addr(addr);
 	goto out_up;
 }
@@ -911,7 +911,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 {
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
-	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
+	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 	struct sock *other;
 	unsigned hash;
 	int err;
@@ -927,7 +927,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 			goto out;
 
 restart:
-		other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
+		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 		if (!other)
 			goto out;
 
@@ -961,14 +961,14 @@ restart:
 	 */
 	if (unix_peer(sk)) {
 		struct sock *old_peer = unix_peer(sk);
-		unix_peer(sk)=other;
+		unix_peer(sk) = other;
 		unix_state_double_unlock(sk, other);
 
 		if (other != old_peer)
 			unix_dgram_disconnected(sk, old_peer);
 		sock_put(old_peer);
 	} else {
-		unix_peer(sk)=other;
+		unix_peer(sk) = other;
 		unix_state_double_unlock(sk, other);
 	}
 	return 0;
@@ -1004,7 +1004,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo)
 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 			       int addr_len, int flags)
 {
-	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
@@ -1179,13 +1179,13 @@ out:
 
 static int unix_socketpair(struct socket *socka, struct socket *sockb)
 {
-	struct sock *ska=socka->sk, *skb = sockb->sk;
+	struct sock *ska = socka->sk, *skb = sockb->sk;
 
 	/* Join our sockets back to back */
 	sock_hold(ska);
 	sock_hold(skb);
-	unix_peer(ska)=skb;
-	unix_peer(skb)=ska;
+	unix_peer(ska) = skb;
+	unix_peer(skb) = ska;
 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
@@ -1246,7 +1246,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
 {
 	struct sock *sk = sock->sk;
 	struct unix_sock *u;
-	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 	int err = 0;
 
 	if (peer) {
@@ -1323,7 +1323,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 	struct unix_sock *u = unix_sk(sk);
-	struct sockaddr_un *sunaddr=msg->msg_name;
+	struct sockaddr_un *sunaddr = msg->msg_name;
 	struct sock *other = NULL;
 	int namelen = 0; /* fake GCC */
 	int err;
@@ -1364,7 +1364,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto out;
 
 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
-	if (skb==NULL)
+	if (skb == NULL)
 		goto out;
 
 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
@@ -1387,7 +1387,7 @@ restart:
 
 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
 					hash, &err);
-		if (other==NULL)
+		if (other == NULL)
 			goto out_free;
 	}
 
@@ -1407,7 +1407,7 @@ restart:
 		err = 0;
 		unix_state_lock(sk);
 		if (unix_peer(sk) == other) {
-			unix_peer(sk)=NULL;
+			unix_peer(sk) = NULL;
 			unix_state_unlock(sk);
 
 			unix_dgram_disconnected(sk, other);
@@ -1473,10 +1473,10 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct sock *other = NULL;
-	struct sockaddr_un *sunaddr=msg->msg_name;
+	struct sockaddr_un *sunaddr = msg->msg_name;
 	int err,size;
 	struct sk_buff *skb;
-	int sent=0;
+	int sent = 0;
 	struct scm_cookie tmp_scm;
 
 	if (NULL == siocb->scm)
@@ -1523,9 +1523,9 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		 *	Grab a buffer
 		 */
 
-		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
+		skb = sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
 
-		if (skb==NULL)
+		if (skb == NULL)
 			goto out_err;
 
 		/*
@@ -1555,7 +1555,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		skb_queue_tail(&other->sk_receive_queue, skb);
 		unix_state_unlock(other);
 		other->sk_data_ready(other, size);
-		sent+=size;
+		sent += size;
 	}
 
 	scm_destroy(siocb->scm);
@@ -1734,7 +1734,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct scm_cookie tmp_scm;
 	struct sock *sk = sock->sk;
 	struct unix_sock *u = unix_sk(sk);
-	struct sockaddr_un *sunaddr=msg->msg_name;
+	struct sockaddr_un *sunaddr = msg->msg_name;
 	int copied = 0;
 	int check_creds = 0;
 	int target;
@@ -1772,7 +1772,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		unix_state_lock(sk);
 		skb = skb_dequeue(&sk->sk_receive_queue);
-		if (skb==NULL)
+		if (skb == NULL)
 		{
 			if (copied >= target)
 				goto unlock;
@@ -1884,7 +1884,7 @@ static int unix_shutdown(struct socket *sock, int mode)
 	if (mode) {
 		unix_state_lock(sk);
 		sk->sk_shutdown |= mode;
-		other=unix_peer(sk);
+		other = unix_peer(sk);
 		if (other)
 			sock_hold(other);
 		unix_state_unlock(sk);
@@ -1919,7 +1919,7 @@ static int unix_shutdown(struct socket *sock, int mode)
 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
-	long amount=0;
+	long amount = 0;
 	int err;
 
 	switch(cmd)
@@ -1945,7 +1945,7 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			} else {
 				skb = skb_peek(&sk->sk_receive_queue);
 				if (skb)
-					amount=skb->len;
+					amount = skb->len;
 			}
 			spin_unlock(&sk->sk_receive_queue.lock);
 			err = put_user(amount, (int __user *)arg);
@@ -2077,6 +2077,7 @@ struct unix_iter_state {
 	struct seq_net_private p;
 	int i;
 };
+
 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
 {
 	struct unix_iter_state *iter = seq->private;
@@ -2093,7 +2094,6 @@ static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
 	return NULL;
 }
 
-
 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(unix_table_lock)
 {
@@ -2173,7 +2173,6 @@ static const struct seq_operations unix_seq_ops = {
 	.show   = unix_seq_show,
 };
 
-
 static int unix_seq_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &unix_seq_ops,
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2a27b84f740..00734e22ec1 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,8 +104,8 @@ static struct sock *unix_get_socket(struct file *filp)
 	 *	Socket ?
 	 */
 	if (S_ISSOCK(inode->i_mode)) {
-		struct socket * sock = SOCKET_I(inode);
-		struct sock * s = sock->sk;
+		struct socket *sock = SOCKET_I(inode);
+		struct sock *s = sock->sk;
 
 		/*
 		 *	PF_UNIX ?
@@ -124,7 +124,7 @@ static struct sock *unix_get_socket(struct file *filp)
 void unix_inflight(struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
-	if(s) {
+	if (s) {
 		struct unix_sock *u = unix_sk(s);
 		spin_lock(&unix_gc_lock);
 		if (atomic_long_inc_return(&u->inflight) == 1) {
@@ -141,7 +141,7 @@ void unix_inflight(struct file *fp)
 void unix_notinflight(struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
-	if(s) {
+	if (s) {
 		struct unix_sock *u = unix_sk(s);
 		spin_lock(&unix_gc_lock);
 		BUG_ON(list_empty(&u->link));
@@ -154,7 +154,7 @@ void unix_notinflight(struct file *fp)
 
 static inline struct sk_buff *sock_queue_head(struct sock *sk)
 {
-	return (struct sk_buff *) &sk->sk_receive_queue;
+	return (struct sk_buff *)&sk->sk_receive_queue;
 }
 
 #define receive_queue_for_each_skb(sk, next, skb) \
@@ -339,7 +339,7 @@ void unix_gc(void)
 	 */
 	skb_queue_head_init(&hitlist);
 	list_for_each_entry(u, &gc_candidates, link)
-		scan_children(&u->sk, inc_inflight, &hitlist);
+	scan_children(&u->sk, inc_inflight, &hitlist);
 
 	spin_unlock(&unix_gc_lock);
 
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 77513d7e35f..1f4040cdada 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -61,4 +61,3 @@ void unix_sysctl_unregister(struct net *net)
 	unregister_sysctl_table(net->unx.ctl);
 	kfree(table);
 }
-
-- 
cgit v1.2.3-70-g09d2


From 6209344f5a3795d34b7f2c0061f49802283b6bdd Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Sun, 9 Nov 2008 15:23:57 +0100
Subject: net: unix: fix inflight counting bug in garbage collector

Previously I assumed that the receive queues of candidates don't
change during the GC.  This is only half true, nothing can be received
from the queues (see comment in unix_gc()), but buffers could be added
through the other half of the socket pair, which may still have file
descriptors referring to it.

This can result in inc_inflight_move_tail() erronously increasing the
"inflight" counter for a unix socket for which dec_inflight() wasn't
previously called.  This in turn can trigger the "BUG_ON(total_refs <
inflight_refs)" in a later garbage collection run.

Fix this by only manipulating the "inflight" counter for sockets which
are candidates themselves.  Duplicating the file references in
unix_attach_fds() is also needed to prevent a socket becoming a
candidate for GC while the skb that contains it is not yet queued.

Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/net/af_unix.h |  1 +
 net/unix/af_unix.c    | 31 ++++++++++++++++++++++++-------
 net/unix/garbage.c    | 49 +++++++++++++++++++++++++++++++++++++------------
 3 files changed, 62 insertions(+), 19 deletions(-)

(limited to 'net/unix/af_unix.c')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 7dd29b7e461..c29ff1da8a1 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -54,6 +54,7 @@ struct unix_sock {
         atomic_long_t           inflight;
         spinlock_t		lock;
 	unsigned int		gc_candidate : 1;
+	unsigned int		gc_maybe_cycle : 1;
         wait_queue_head_t       peer_wait;
 };
 #define unix_sk(__sk) ((struct unix_sock *)__sk)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4d3c6071b9a..eb90f77bb0e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1302,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
-static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
+
+	/*
+	 * Need to duplicate file references for the sake of garbage
+	 * collection.  Otherwise a socket in the fps might become a
+	 * candidate for GC while the skb is not yet queued.
+	 */
+	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+	if (!UNIXCB(skb).fp)
+		return -ENOMEM;
+
 	for (i=scm->fp->count-1; i>=0; i--)
 		unix_inflight(scm->fp->fp[i]);
-	UNIXCB(skb).fp = scm->fp;
 	skb->destructor = unix_destruct_fds;
-	scm->fp = NULL;
+	return 0;
 }
 
 /*
@@ -1368,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto out;
 
 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-	if (siocb->scm->fp)
-		unix_attach_fds(siocb->scm, skb);
+	if (siocb->scm->fp) {
+		err = unix_attach_fds(siocb->scm, skb);
+		if (err)
+			goto out_free;
+	}
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
@@ -1538,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		size = min_t(int, size, skb_tailroom(skb));
 
 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-		if (siocb->scm->fp)
-			unix_attach_fds(siocb->scm, skb);
+		if (siocb->scm->fp) {
+			err = unix_attach_fds(siocb->scm, skb);
+			if (err) {
+				kfree_skb(skb);
+				goto out_err;
+			}
+		}
 
 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
 			kfree_skb(skb);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2a27b84f740..6d4a9a8de5e 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 				 */
 				struct sock *sk = unix_get_socket(*fp++);
 				if (sk) {
-					hit = true;
-					func(unix_sk(sk));
+					struct unix_sock *u = unix_sk(sk);
+
+					/*
+					 * Ignore non-candidates, they could
+					 * have been added to the queues after
+					 * starting the garbage collection
+					 */
+					if (u->gc_candidate) {
+						hit = true;
+						func(u);
+					}
 				}
 			}
 			if (hit && hitlist != NULL) {
@@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u)
 {
 	atomic_long_inc(&u->inflight);
 	/*
-	 * If this is still a candidate, move it to the end of the
-	 * list, so that it's checked even if it was already passed
-	 * over
+	 * If this still might be part of a cycle, move it to the end
+	 * of the list, so that it's checked even if it was already
+	 * passed over
 	 */
-	if (u->gc_candidate)
+	if (u->gc_maybe_cycle)
 		list_move_tail(&u->link, &gc_candidates);
 }
 
@@ -267,6 +276,7 @@ void unix_gc(void)
 	struct unix_sock *next;
 	struct sk_buff_head hitlist;
 	struct list_head cursor;
+	LIST_HEAD(not_cycle_list);
 
 	spin_lock(&unix_gc_lock);
 
@@ -282,10 +292,14 @@ void unix_gc(void)
 	 *
 	 * Holding unix_gc_lock will protect these candidates from
 	 * being detached, and hence from gaining an external
-	 * reference.  This also means, that since there are no
-	 * possible receivers, the receive queues of these sockets are
-	 * static during the GC, even though the dequeue is done
-	 * before the detach without atomicity guarantees.
+	 * reference.  Since there are no possible receivers, all
+	 * buffers currently on the candidates' queues stay there
+	 * during the garbage collection.
+	 *
+	 * We also know that no new candidate can be added onto the
+	 * receive queues.  Other, non candidate sockets _can_ be
+	 * added to queue, so we must make sure only to touch
+	 * candidates.
 	 */
 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 		long total_refs;
@@ -299,6 +313,7 @@ void unix_gc(void)
 		if (total_refs == inflight_refs) {
 			list_move_tail(&u->link, &gc_candidates);
 			u->gc_candidate = 1;
+			u->gc_maybe_cycle = 1;
 		}
 	}
 
@@ -325,13 +340,23 @@ void unix_gc(void)
 		list_move(&cursor, &u->link);
 
 		if (atomic_long_read(&u->inflight) > 0) {
-			list_move_tail(&u->link, &gc_inflight_list);
-			u->gc_candidate = 0;
+			list_move_tail(&u->link, &not_cycle_list);
+			u->gc_maybe_cycle = 0;
 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
 		}
 	}
 	list_del(&cursor);
 
+	/*
+	 * not_cycle_list contains those sockets which do not make up a
+	 * cycle.  Restore these to the inflight list.
+	 */
+	while (!list_empty(&not_cycle_list)) {
+		u = list_entry(not_cycle_list.next, struct unix_sock, link);
+		u->gc_candidate = 0;
+		list_move_tail(&u->link, &gc_inflight_list);
+	}
+
 	/*
 	 * Now gc_candidates contains only garbage.  Restore original
 	 * inflight counters for these as well, and remove the skbuffs
-- 
cgit v1.2.3-70-g09d2


From 19d65624d38d6296dddf725d1b03baa8a491a553 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:10 +1100
Subject: CRED: Wrap task credential accesses in the UNIX socket protocol

Wrap access to task credentials so that they can be separated more easily from
the task_struct during the introduction of COW creds.

Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id().

Change some task->e?[ug]id to task_e?[ug]id().  In some places it makes more
sense to use RCU directly rather than a convenient wrapper; these will be
addressed by later patches.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: netdev@vger.kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 net/unix/af_unix.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net/unix/af_unix.c')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4d3c6071b9a..338c1aec708 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -467,8 +467,7 @@ static int unix_listen(struct socket *sock, int backlog)
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
 	sk->sk_peercred.pid	= task_tgid_vnr(current);
-	sk->sk_peercred.uid	= current->euid;
-	sk->sk_peercred.gid	= current->egid;
+	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
 	err = 0;
 
 out_unlock:
@@ -1126,8 +1125,7 @@ restart:
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
-	newsk->sk_peercred.uid	= current->euid;
-	newsk->sk_peercred.gid	= current->egid;
+	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
 	newu = unix_sk(newsk);
 	newsk->sk_sleep		= &newu->peer_wait;
 	otheru = unix_sk(other);
@@ -1187,8 +1185,9 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
 	unix_peer(ska)=skb;
 	unix_peer(skb)=ska;
 	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
-	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
-	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
+	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
+	ska->sk_peercred.uid = skb->sk_peercred.uid;
+	ska->sk_peercred.gid = skb->sk_peercred.gid;
 
 	if (ska->sk_type != SOCK_DGRAM) {
 		ska->sk_state = TCP_ESTABLISHED;
-- 
cgit v1.2.3-70-g09d2


From 6eba6a372b501aa3cdfb7df21a8364099125b9c4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 16 Nov 2008 22:58:44 -0800
Subject: net: Cleanup of af_unix

This is a pure cleanup of net/unix/af_unix.c to meet current code
style standards

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 117 +++++++++++++++++++++++++----------------------------
 1 file changed, 55 insertions(+), 62 deletions(-)

(limited to 'net/unix/af_unix.c')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4a39771d037..58db2a2f115 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -164,7 +164,7 @@ static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 
 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 {
-	return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
+	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 }
 
 static inline int unix_recvq_full(struct sock const *sk)
@@ -197,7 +197,7 @@ static inline void unix_release_addr(struct unix_address *addr)
  *		- if started by zero, it is abstract name.
  */
 
-static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
+static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
 {
 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
 		return -EINVAL;
@@ -216,7 +216,7 @@ static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
 		return len;
 	}
 
-	*hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
+	*hashp = unix_hash_fold(csum_partial((char *)sunaddr, len, 0));
 	return len;
 }
 
@@ -295,8 +295,7 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
 		if (!net_eq(sock_net(s), net))
 			continue;
 
-		if(dentry && dentry->d_inode == i)
-		{
+		if (dentry && dentry->d_inode == i) {
 			sock_hold(s);
 			goto found;
 		}
@@ -354,7 +353,7 @@ static void unix_sock_destructor(struct sock *sk)
 	WARN_ON(!sk_unhashed(sk));
 	WARN_ON(sk->sk_socket);
 	if (!sock_flag(sk, SOCK_DEAD)) {
-		printk("Attempt to release alive unix socket: %p\n", sk);
+		printk(KERN_DEBUG "Attempt to release alive unix socket: %p\n", sk);
 		return;
 	}
 
@@ -363,11 +362,12 @@ static void unix_sock_destructor(struct sock *sk)
 
 	atomic_dec(&unix_nr_socks);
 #ifdef UNIX_REFCNT_DEBUG
-	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
+	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
+		atomic_read(&unix_nr_socks));
 #endif
 }
 
-static int unix_release_sock (struct sock *sk, int embrion)
+static int unix_release_sock(struct sock *sk, int embrion)
 {
 	struct unix_sock *u = unix_sk(sk);
 	struct dentry *dentry;
@@ -453,11 +453,11 @@ static int unix_listen(struct socket *sock, int backlog)
 	struct unix_sock *u = unix_sk(sk);
 
 	err = -EOPNOTSUPP;
-	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
-		goto out;			/* Only stream/seqpacket sockets accept */
+	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
+		goto out;	/* Only stream/seqpacket sockets accept */
 	err = -EINVAL;
 	if (!u->addr)
-		goto out;			/* No listens on an unbound socket */
+		goto out;	/* No listens on an unbound socket */
 	unix_state_lock(sk);
 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 		goto out_unlock;
@@ -579,7 +579,7 @@ static struct proto unix_proto = {
  */
 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 
-static struct sock * unix_create1(struct net *net, struct socket *sock)
+static struct sock *unix_create1(struct net *net, struct socket *sock)
 {
 	struct sock *sk = NULL;
 	struct unix_sock *u;
@@ -592,7 +592,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock)
 	if (!sk)
 		goto out;
 
-	sock_init_data(sock,sk);
+	sock_init_data(sock, sk);
 	lockdep_set_class(&sk->sk_receive_queue.lock,
 				&af_unix_sk_receive_queue_lock_key);
 
@@ -653,7 +653,7 @@ static int unix_release(struct socket *sock)
 
 	sock->sk = NULL;
 
-	return unix_release_sock (sk, 0);
+	return unix_release_sock(sk, 0);
 }
 
 static int unix_autobind(struct socket *sock)
@@ -662,7 +662,7 @@ static int unix_autobind(struct socket *sock)
 	struct net *net = sock_net(sk);
 	struct unix_sock *u = unix_sk(sk);
 	static u32 ordernum = 1;
-	struct unix_address * addr;
+	struct unix_address *addr;
 	int err;
 
 	mutex_lock(&u->readlock);
@@ -681,7 +681,7 @@ static int unix_autobind(struct socket *sock)
 
 retry:
 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
-	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
+	addr->hash = unix_hash_fold(csum_partial((void *)addr->name, addr->len, 0));
 
 	spin_lock(&unix_table_lock);
 	ordernum = (ordernum+1)&0xFFFFF;
@@ -768,7 +768,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct net *net = sock_net(sk);
 	struct unix_sock *u = unix_sk(sk);
 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
-	struct dentry * dentry = NULL;
+	struct dentry *dentry = NULL;
 	struct nameidata nd;
 	int err;
 	unsigned hash;
@@ -1207,7 +1207,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
 	int err;
 
 	err = -EOPNOTSUPP;
-	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
+	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 		goto out;
 
 	err = -EINVAL;
@@ -1286,7 +1286,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	skb->destructor = sock_wfree;
 	UNIXCB(skb).fp = NULL;
 
-	for (i=scm->fp->count-1; i>=0; i--)
+	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_notinflight(scm->fp->fp[i]);
 }
 
@@ -1315,7 +1315,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	if (!UNIXCB(skb).fp)
 		return -ENOMEM;
 
-	for (i=scm->fp->count-1; i>=0; i--)
+	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_inflight(scm->fp->fp[i]);
 	skb->destructor = unix_destruct_fds;
 	return 0;
@@ -1385,7 +1385,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
-	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
 	if (err)
 		goto out_free;
 
@@ -1486,7 +1486,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct sock *other = NULL;
 	struct sockaddr_un *sunaddr = msg->msg_name;
-	int err,size;
+	int err, size;
 	struct sk_buff *skb;
 	int sent = 0;
 	struct scm_cookie tmp_scm;
@@ -1515,8 +1515,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (sk->sk_shutdown & SEND_SHUTDOWN)
 		goto pipe_err;
 
-	while(sent < len)
-	{
+	while (sent < len) {
 		/*
 		 *	Optimisation for the fact that under 0.01% of X
 		 *	messages typically need breaking up.
@@ -1535,7 +1534,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		 *	Grab a buffer
 		 */
 
-		skb = sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
+		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
+					  &err);
 
 		if (skb == NULL)
 			goto out_err;
@@ -1558,7 +1558,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			}
 		}
 
-		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
+		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+		if (err) {
 			kfree_skb(skb);
 			goto out_err;
 		}
@@ -1584,8 +1585,8 @@ pipe_err_free:
 	unix_state_unlock(other);
 	kfree_skb(skb);
 pipe_err:
-	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
-		send_sig(SIGPIPE,current,0);
+	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
 	err = -EPIPE;
 out_err:
 	scm_destroy(siocb->scm);
@@ -1675,13 +1676,10 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 	siocb->scm->creds = *UNIXCREDS(skb);
 	unix_set_secdata(siocb->scm, skb);
 
-	if (!(flags & MSG_PEEK))
-	{
+	if (!(flags & MSG_PEEK)) {
 		if (UNIXCB(skb).fp)
 			unix_detach_fds(siocb->scm, skb);
-	}
-	else
-	{
+	} else {
 		/* It is questionable: on PEEK we could:
 		   - do not return fds - good, but too simple 8)
 		   - return fds, and do not return them on read (old strategy,
@@ -1702,7 +1700,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 	scm_recv(sock, msg, siocb->scm, flags);
 
 out_free:
-	skb_free_datagram(sk,skb);
+	skb_free_datagram(sk, skb);
 out_unlock:
 	mutex_unlock(&u->readlock);
 out:
@@ -1713,7 +1711,7 @@ out:
  *	Sleep until data has arrive. But check for races..
  */
 
-static long unix_stream_data_wait(struct sock * sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo)
 {
 	DEFINE_WAIT(wait);
 
@@ -1782,15 +1780,13 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	mutex_lock(&u->readlock);
 
-	do
-	{
+	do {
 		int chunk;
 		struct sk_buff *skb;
 
 		unix_state_lock(sk);
 		skb = skb_dequeue(&sk->sk_receive_queue);
-		if (skb == NULL)
-		{
+		if (skb == NULL) {
 			if (copied >= target)
 				goto unlock;
 
@@ -1798,7 +1794,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 			 *	POSIX 1003.1g mandates this order.
 			 */
 
-			if ((err = sock_error(sk)) != 0)
+			err = sock_error(sk);
+			if (err)
 				goto unlock;
 			if (sk->sk_shutdown & RCV_SHUTDOWN)
 				goto unlock;
@@ -1825,7 +1822,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
+			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
+				   sizeof(siocb->scm->creds)) != 0) {
 				skb_queue_head(&sk->sk_receive_queue, skb);
 				break;
 			}
@@ -1836,8 +1834,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		}
 
 		/* Copy address just once */
-		if (sunaddr)
-		{
+		if (sunaddr) {
 			unix_copy_addr(msg, skb->sk);
 			sunaddr = NULL;
 		}
@@ -1853,16 +1850,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		size -= chunk;
 
 		/* Mark read part of skb as used */
-		if (!(flags & MSG_PEEK))
-		{
+		if (!(flags & MSG_PEEK)) {
 			skb_pull(skb, chunk);
 
 			if (UNIXCB(skb).fp)
 				unix_detach_fds(siocb->scm, skb);
 
 			/* put the skb back if we didn't use it up.. */
-			if (skb->len)
-			{
+			if (skb->len) {
 				skb_queue_head(&sk->sk_receive_queue, skb);
 				break;
 			}
@@ -1871,9 +1866,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 			if (siocb->scm->fp)
 				break;
-		}
-		else
-		{
+		} else {
 			/* It is questionable, see note in unix_dgram_recvmsg.
 			 */
 			if (UNIXCB(skb).fp)
@@ -1939,13 +1932,12 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	long amount = 0;
 	int err;
 
-	switch(cmd)
-	{
-		case SIOCOUTQ:
-			amount = atomic_read(&sk->sk_wmem_alloc);
-			err = put_user(amount, (int __user *)arg);
-			break;
-		case SIOCINQ:
+	switch (cmd) {
+	case SIOCOUTQ:
+		amount = atomic_read(&sk->sk_wmem_alloc);
+		err = put_user(amount, (int __user *)arg);
+		break;
+	case SIOCINQ:
 		{
 			struct sk_buff *skb;
 
@@ -1969,14 +1961,14 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			break;
 		}
 
-		default:
-			err = -ENOIOCTLCMD;
-			break;
+	default:
+		err = -ENOIOCTLCMD;
+		break;
 	}
 	return err;
 }
 
-static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
+static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
@@ -1998,7 +1990,8 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
 		mask |= POLLIN | POLLRDNORM;
 
 	/* Connection-based need to check for termination and startup */
-	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
+	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
+	    sk->sk_state == TCP_CLOSE)
 		mask |= POLLHUP;
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From 248969ae31e1b3276fc4399d67ce29a5d81e6fd9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 17 Nov 2008 00:00:30 -0800
Subject: net: af_unix can make unix_nr_socks visbile in /proc

Currently, /proc/net/protocols displays socket counts only for TCP/TCPv6
protocols

We can provide unix_nr_socks for free here, this counter being
already maintained in af_unix

Before patch :

# grep UNIX /proc/net/protocols
UNIX       428     -1      -1   NI       0   yes  kernel

After patch :

# grep UNIX /proc/net/protocols
UNIX       428     98      -1   NI       0   yes  kernel

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net/unix/af_unix.c')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 58db2a2f115..a1eb5969712 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -566,9 +566,10 @@ static const struct proto_ops unix_seqpacket_ops = {
 };
 
 static struct proto unix_proto = {
-	.name	  = "UNIX",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct unix_sock),
+	.name			= "UNIX",
+	.owner			= THIS_MODULE,
+	.sockets_allocated	= &unix_nr_socks,
+	.obj_size		= sizeof(struct unix_sock),
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From a8076d8db98de6da61394b2e942320e4612643ac Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 17 Nov 2008 02:38:49 -0800
Subject: net: af_unix should update its inuse counter

This patch is a preparation to namespace conversion of /proc/net/protocols

In order to have relevant information for UNIX protocol, we should use
sock_prot_inuse_add() to update a (percpu and pernamespace) counter of
inuse sockets.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net/unix/af_unix.c')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a1eb5969712..f2cf3f583f6 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -361,6 +361,7 @@ static void unix_sock_destructor(struct sock *sk)
 		unix_release_addr(u->addr);
 
 	atomic_dec(&unix_nr_socks);
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 #ifdef UNIX_REFCNT_DEBUG
 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
 		atomic_read(&unix_nr_socks));
@@ -612,6 +613,9 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
 out:
 	if (sk == NULL)
 		atomic_dec(&unix_nr_socks);
+	else
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+
 	return sk;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 07f0757a6808f2f36a0e58c3a54867ccffdb8dc9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 19 Nov 2008 15:44:53 -0800
Subject: include/net net/ - csum_partial - remove unnecessary casts

The first argument to csum_partial is const void *
casts to char/u8 * are not necessary

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/checksum.h | 2 +-
 include/net/ip_vs.h    | 6 +++---
 net/core/netpoll.c     | 2 +-
 net/ipv4/inet_lro.c    | 4 ++--
 net/ipv4/tcp_ipv4.c    | 4 ++--
 net/ipv6/icmp.c        | 4 ++--
 net/ipv6/mcast.c       | 2 +-
 net/ipv6/ndisc.c       | 4 ++--
 net/ipv6/tcp_ipv6.c    | 6 +++---
 net/unix/af_unix.c     | 4 ++--
 10 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'net/unix/af_unix.c')

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 07602b7fa21..ba55d8b8c87 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -98,7 +98,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
 {
 	__be32 diff[] = { ~from, to };
 
-	*sum = csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum_unfold(*sum)));
+	*sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum)));
 }
 
 static inline void csum_replace2(__sum16 *sum, __be16 from, __be16 to)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 8f6abf4883e..ab9b003ab67 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -913,7 +913,7 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
 {
 	__be32 diff[2] = { ~old, new };
 
-	return csum_partial((char *) diff, sizeof(diff), oldsum);
+	return csum_partial(diff, sizeof(diff), oldsum);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -923,7 +923,7 @@ static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
 	__be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
 			    new[3],  new[2],  new[1],  new[0] };
 
-	return csum_partial((char *) diff, sizeof(diff), oldsum);
+	return csum_partial(diff, sizeof(diff), oldsum);
 }
 #endif
 
@@ -931,7 +931,7 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 {
 	__be16 diff[2] = { ~old, new };
 
-	return csum_partial((char *) diff, sizeof(diff), oldsum);
+	return csum_partial(diff, sizeof(diff), oldsum);
 }
 
 #endif /* __KERNEL__ */
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 34f5d072f16..fc4e28e23b8 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -343,7 +343,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	udph->check = csum_tcpudp_magic(htonl(np->local_ip),
 					htonl(np->remote_ip),
 					udp_len, IPPROTO_UDP,
-					csum_partial((unsigned char *)udph, udp_len, 0));
+					csum_partial(udph, udp_len, 0));
 	if (udph->check == 0)
 		udph->check = CSUM_MANGLED_0;
 
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index cfd034a2b96..6a667dae315 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -120,7 +120,7 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
 	iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
 
 	tcph->check = 0;
-	tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0);
+	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
 	lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
 	tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 					lro_desc->ip_tot_len -
@@ -135,7 +135,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
 	__wsum tcp_ps_hdr_csum;
 
 	tcp_csum = ~csum_unfold(tcph->check);
-	tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum);
+	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
 
 	tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
 					     len + TCP_HDR_LEN(tcph),
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b2e3ab2287b..5559fea61e8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -492,7 +492,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
 		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
-					 csum_partial((char *)th,
+					 csum_partial(th,
 						      th->doff << 2,
 						      skb->csum));
 	}
@@ -726,7 +726,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 		th->check = tcp_v4_check(skb->len,
 					 ireq->loc_addr,
 					 ireq->rmt_addr,
-					 csum_partial((char *)th, skb->len,
+					 csum_partial(th, skb->len,
 						      skb->csum));
 
 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index be351009fd0..a77b8d10380 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -233,7 +233,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
 	icmp6h->icmp6_cksum = 0;
 
 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
-		skb->csum = csum_partial((char *)icmp6h,
+		skb->csum = csum_partial(icmp6h,
 					sizeof(struct icmp6hdr), skb->csum);
 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
 						      &fl->fl6_dst,
@@ -246,7 +246,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
 			tmp_csum = csum_add(tmp_csum, skb->csum);
 		}
 
-		tmp_csum = csum_partial((char *)icmp6h,
+		tmp_csum = csum_partial(icmp6h,
 					sizeof(struct icmp6hdr), tmp_csum);
 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
 						      &fl->fl6_dst,
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index a76199ecad2..870a1d64605 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1817,7 +1817,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 
 	hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len,
 					   IPPROTO_ICMPV6,
-					   csum_partial((__u8 *) hdr, len, 0));
+					   csum_partial(hdr, len, 0));
 
 	idev = in6_dev_get(skb->dev);
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index fbf451c0d77..af6705f03b5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -491,7 +491,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev,
 
 	hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
 					   IPPROTO_ICMPV6,
-					   csum_partial((__u8 *) hdr,
+					   csum_partial(hdr,
 							len, 0));
 
 	return skb;
@@ -1612,7 +1612,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 
 	icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
 					     len, IPPROTO_ICMPV6,
-					     csum_partial((u8 *) icmph, len, 0));
+					     csum_partial(icmph, len, 0));
 
 	buff->dst = dst;
 	idev = in6_dev_get(dst->dev);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b3578705631..a5d750acd79 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -501,7 +501,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
 
 		th->check = tcp_v6_check(th, skb->len,
 					 &treq->loc_addr, &treq->rmt_addr,
-					 csum_partial((char *)th, skb->len, skb->csum));
+					 csum_partial(th, skb->len, skb->csum));
 
 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 		err = ip6_xmit(sk, skb, &fl, opt, 0);
@@ -915,7 +915,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
 		th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
-					    csum_partial((char *)th, th->doff<<2,
+					    csum_partial(th, th->doff<<2,
 							 skb->csum));
 	}
 }
@@ -997,7 +997,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	}
 #endif
 
-	buff->csum = csum_partial((char *)t1, tot_len, 0);
+	buff->csum = csum_partial(t1, tot_len, 0);
 
 	memset(&fl, 0, sizeof(fl));
 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f2cf3f583f6..ebc4a9a4b1f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -216,7 +216,7 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
 		return len;
 	}
 
-	*hashp = unix_hash_fold(csum_partial((char *)sunaddr, len, 0));
+	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 	return len;
 }
 
@@ -686,7 +686,7 @@ static int unix_autobind(struct socket *sock)
 
 retry:
 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
-	addr->hash = unix_hash_fold(csum_partial((void *)addr->name, addr->len, 0));
+	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 
 	spin_lock(&unix_table_lock);
 	ordernum = (ordernum+1)&0xFFFFF;
-- 
cgit v1.2.3-70-g09d2


From 6b41e7dd90c6a628ab5fb8d781302d60a243b2ce Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 19 Nov 2008 15:48:09 -0800
Subject: net: af_unix should use KERN_INFO instead of KERN_DEBUG

As spotted by Joe Perches, we should use KERN_INFO in unix_sock_destructor()

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/unix/af_unix.c')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ebc4a9a4b1f..e1ca8f744ca 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -353,7 +353,7 @@ static void unix_sock_destructor(struct sock *sk)
 	WARN_ON(!sk_unhashed(sk));
 	WARN_ON(sk->sk_socket);
 	if (!sock_flag(sk, SOCK_DEAD)) {
-		printk(KERN_DEBUG "Attempt to release alive unix socket: %p\n", sk);
+		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
 		return;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 6f756a8c36bf54d0afb1d457082b3e3033d951a7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 23 Nov 2008 17:34:03 -0800
Subject: net: Make sure BHs are disabled in sock_prot_inuse_add()

The rule of calling sock_prot_inuse_add() is that BHs must
be disabled.  Some new calls were added where this was not
true and this tiggers warnings as reported by Ilpo.

Fix this by adding explicit BH disabling around those call sites.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 3 +++
 net/sctp/socket.c        | 4 ++++
 net/unix/af_unix.c       | 2 ++
 3 files changed, 9 insertions(+)

(limited to 'net/unix/af_unix.c')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a2071dcfe9e..c7d76579bf3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -452,7 +452,10 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol)
 	if (err < 0)
 		goto out_module;
 
+	local_bh_disable();
 	sock_prot_inuse_add(net, &netlink_proto, 1);
+	local_bh_enable();
+
 	nlk = nlk_sk(sock->sk);
 	nlk->module = module;
 out:
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0c70eff0b75..f03af84edf6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3614,7 +3614,11 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 
 	SCTP_DBG_OBJCNT_INC(sock);
 	atomic_inc(&sctp_sockets_allocated);
+
+	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	local_bh_enable();
+
 	return 0;
 }
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e1ca8f744ca..a45a9f7369e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -361,7 +361,9 @@ static void unix_sock_destructor(struct sock *sk)
 		unix_release_addr(u->addr);
 
 	atomic_dec(&unix_nr_socks);
+	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+	local_bh_enable();
 #ifdef UNIX_REFCNT_DEBUG
 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
 		atomic_read(&unix_nr_socks));
-- 
cgit v1.2.3-70-g09d2


From 920de804bca61f88643bc9171bcd06f1a56c6258 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 24 Nov 2008 00:09:29 -0800
Subject: net: Make sure BHs are disabled in sock_prot_inuse_add()

The rule of calling sock_prot_inuse_add() is that BHs must
be disabled.  Some new calls were added where this was not
true and this tiggers warnings as reported by Ilpo.

Fix this by adding explicit BH disabling around those call sites,
or moving sock_prot_inuse_add() call inside an existing BH disabled
section.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c | 2 +-
 net/packet/af_packet.c     | 4 ++--
 net/unix/af_unix.c         | 6 ++++--
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'net/unix/af_unix.c')

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 11fcb87a1fd..6a1045da48d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -402,9 +402,9 @@ void inet_unhash(struct sock *sk)
 
 	spin_lock_bh(lock);
 	done =__sk_nulls_del_node_init_rcu(sk);
-	spin_unlock_bh(lock);
 	if (done)
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+	spin_unlock_bh(lock);
 }
 EXPORT_SYMBOL_GPL(inet_unhash);
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b4870a34c43..5f94db2f3e9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -872,6 +872,7 @@ static int packet_release(struct socket *sock)
 
 	write_lock_bh(&net->packet.sklist_lock);
 	sk_del_node_init(sk);
+	sock_prot_inuse_add(net, sk->sk_prot, -1);
 	write_unlock_bh(&net->packet.sklist_lock);
 
 	/*
@@ -910,7 +911,6 @@ static int packet_release(struct socket *sock)
 	skb_queue_purge(&sk->sk_receive_queue);
 	sk_refcnt_debug_release(sk);
 
-	sock_prot_inuse_add(net, sk->sk_prot, -1);
 	sock_put(sk);
 	return 0;
 }
@@ -1085,8 +1085,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
 
 	write_lock_bh(&net->packet.sklist_lock);
 	sk_add_node(sk, &net->packet.sklist);
-	write_unlock_bh(&net->packet.sklist_lock);
 	sock_prot_inuse_add(net, &packet_proto, 1);
+	write_unlock_bh(&net->packet.sklist_lock);
 	return(0);
 out:
 	return err;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a45a9f7369e..3a35a6e8bf9 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -615,9 +615,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
 out:
 	if (sk == NULL)
 		atomic_dec(&unix_nr_socks);
-	else
+	else {
+		local_bh_disable();
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-
+		local_bh_enable();
+	}
 	return sk;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 1748376b6626acf59c24e9592ac67b3fe2a0e026 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 25 Nov 2008 21:16:35 -0800
Subject: net: Use a percpu_counter for sockets_allocated

Instead of using one atomic_t per protocol, use a percpu_counter
for "sockets_allocated", to reduce cache line contention on
heavy duty network servers.

Note : We revert commit (248969ae31e1b3276fc4399d67ce29a5d81e6fd9
net: af_unix can make unix_nr_socks visbile in /proc),
since it is not anymore used after sock_prot_inuse_add() addition

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h |  1 +
 include/net/sock.h      |  2 +-
 include/net/tcp.h       |  2 +-
 net/core/sock.c         | 10 +++++++---
 net/ipv4/proc.c         |  3 ++-
 net/ipv4/tcp.c          |  8 ++++++--
 net/ipv4/tcp_ipv4.c     |  4 ++--
 net/ipv6/tcp_ipv6.c     |  2 +-
 net/sctp/protocol.c     |  6 +++++-
 net/sctp/socket.c       |  6 +++---
 net/unix/af_unix.c      |  1 -
 11 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'net/unix/af_unix.c')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 23797506f59..bbb7742195b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -138,6 +138,7 @@ void sctp_write_space(struct sock *sk);
 unsigned int sctp_poll(struct file *file, struct socket *sock,
 		poll_table *wait);
 void sctp_sock_rfree(struct sk_buff *skb);
+extern struct percpu_counter sctp_sockets_allocated;
 
 /*
  * sctp/primitive.c
diff --git a/include/net/sock.h b/include/net/sock.h
index 00cd486d362..a2a3890959c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -649,7 +649,7 @@ struct proto {
 	/* Memory pressure */
 	void			(*enter_memory_pressure)(struct sock *sk);
 	atomic_t		*memory_allocated;	/* Current allocated memory. */
-	atomic_t		*sockets_allocated;	/* Current number of sockets. */
+	struct percpu_counter	*sockets_allocated;	/* Current number of sockets. */
 	/*
 	 * Pressure flag: try to collapse.
 	 * Technical note: it is used by multiple contexts non atomically.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e8ae90a8c35..cbca3b8a133 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -238,7 +238,7 @@ extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
 
 extern atomic_t tcp_memory_allocated;
-extern atomic_t tcp_sockets_allocated;
+extern struct percpu_counter tcp_sockets_allocated;
 extern int tcp_memory_pressure;
 
 /*
diff --git a/net/core/sock.c b/net/core/sock.c
index a4e840e5a05..7a081b647bf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1071,7 +1071,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		newsk->sk_sleep	 = NULL;
 
 		if (newsk->sk_prot->sockets_allocated)
-			atomic_inc(newsk->sk_prot->sockets_allocated);
+			percpu_counter_inc(newsk->sk_prot->sockets_allocated);
 	}
 out:
 	return newsk;
@@ -1463,8 +1463,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 	}
 
 	if (prot->memory_pressure) {
-		if (!*prot->memory_pressure ||
-		    prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
+		int alloc;
+
+		if (!*prot->memory_pressure)
+			return 1;
+		alloc = percpu_counter_read_positive(prot->sockets_allocated);
+		if (prot->sysctl_mem[2] > alloc *
 		    sk_mem_pages(sk->sk_wmem_queued +
 				 atomic_read(&sk->sk_rmem_alloc) +
 				 sk->sk_forward_alloc))
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 731789bb499..4944b47ad62 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -55,7 +55,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
 		   sock_prot_inuse_get(net, &tcp_prot),
 		   atomic_read(&tcp_orphan_count),
-		   tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
+		   tcp_death_row.tw_count,
+		   (int)percpu_counter_sum_positive(&tcp_sockets_allocated),
 		   atomic_read(&tcp_memory_allocated));
 	seq_printf(seq, "UDP: inuse %d mem %d\n",
 		   sock_prot_inuse_get(net, &udp_prot),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 044224a341e..e6fade9ebf6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -290,9 +290,12 @@ EXPORT_SYMBOL(sysctl_tcp_rmem);
 EXPORT_SYMBOL(sysctl_tcp_wmem);
 
 atomic_t tcp_memory_allocated;	/* Current allocated memory. */
-atomic_t tcp_sockets_allocated;	/* Current number of TCP sockets. */
-
 EXPORT_SYMBOL(tcp_memory_allocated);
+
+/*
+ * Current number of TCP sockets.
+ */
+struct percpu_counter tcp_sockets_allocated;
 EXPORT_SYMBOL(tcp_sockets_allocated);
 
 /*
@@ -2685,6 +2688,7 @@ void __init tcp_init(void)
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
+	percpu_counter_init(&tcp_sockets_allocated, 0);
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cab2458f86f..26b9030747c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1797,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
-	atomic_inc(&tcp_sockets_allocated);
+	percpu_counter_inc(&tcp_sockets_allocated);
 
 	return 0;
 }
@@ -1845,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
-	atomic_dec(&tcp_sockets_allocated);
+	percpu_counter_dec(&tcp_sockets_allocated);
 }
 
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f259c9671f3..8702b06cb60 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1830,7 +1830,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
-	atomic_inc(&tcp_sockets_allocated);
+	percpu_counter_inc(&tcp_sockets_allocated);
 
 	return 0;
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a8ca743241e..d5ea232c912 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -102,6 +102,8 @@ struct sock *sctp_get_ctl_sock(void)
 /* Set up the proc fs entry for the SCTP protocol. */
 static __init int sctp_proc_init(void)
 {
+	if (percpu_counter_init(&sctp_sockets_allocated, 0))
+		goto out_nomem;
 #ifdef CONFIG_PROC_FS
 	if (!proc_net_sctp) {
 		struct proc_dir_entry *ent;
@@ -110,7 +112,7 @@ static __init int sctp_proc_init(void)
 			ent->owner = THIS_MODULE;
 			proc_net_sctp = ent;
 		} else
-			goto out_nomem;
+			goto out_free_percpu;
 	}
 
 	if (sctp_snmp_proc_init())
@@ -135,6 +137,8 @@ out_snmp_proc_init:
 		proc_net_sctp = NULL;
 		remove_proc_entry("sctp", init_net.proc_net);
 	}
+out_free_percpu:
+	percpu_counter_destroy(&sctp_sockets_allocated);
 out_nomem:
 	return -ENOMEM;
 #else
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ba81fe3ccab..a2de585888d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -114,7 +114,7 @@ extern int sysctl_sctp_wmem[3];
 
 static int sctp_memory_pressure;
 static atomic_t sctp_memory_allocated;
-static atomic_t sctp_sockets_allocated;
+struct percpu_counter sctp_sockets_allocated;
 
 static void sctp_enter_memory_pressure(struct sock *sk)
 {
@@ -3613,7 +3613,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->hmac = NULL;
 
 	SCTP_DBG_OBJCNT_INC(sock);
-	atomic_inc(&sctp_sockets_allocated);
+	percpu_counter_inc(&sctp_sockets_allocated);
 
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -3632,7 +3632,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
 	/* Release our hold on the endpoint. */
 	ep = sctp_sk(sk)->ep;
 	sctp_endpoint_free(ep);
-	atomic_dec(&sctp_sockets_allocated);
+	percpu_counter_dec(&sctp_sockets_allocated);
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	local_bh_enable();
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3a35a6e8bf9..5aaf23e43f1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -571,7 +571,6 @@ static const struct proto_ops unix_seqpacket_ops = {
 static struct proto unix_proto = {
 	.name			= "UNIX",
 	.owner			= THIS_MODULE,
-	.sockets_allocated	= &unix_nr_socks,
 	.obj_size		= sizeof(struct unix_sock),
 };
 
-- 
cgit v1.2.3-70-g09d2


From 5f23b734963ec7eaa3ebcd9050da0c9b7d143dd3 Mon Sep 17 00:00:00 2001
From: dann frazier <dannf@hp.com>
Date: Wed, 26 Nov 2008 15:32:27 -0800
Subject: net: Fix soft lockups/OOM issues w/ unix garbage collector

This is an implementation of David Miller's suggested fix in:
  https://bugzilla.redhat.com/show_bug.cgi?id=470201

It has been updated to use wait_event() instead of
wait_event_interruptible().

Paraphrasing the description from the above report, it makes sendmsg()
block while UNIX garbage collection is in progress. This avoids a
situation where child processes continue to queue new FDs over a
AF_UNIX socket to a parent which is in the exit path and running
garbage collection on these FDs. This contention can result in soft
lockups and oom-killing of unrelated processes.

Signed-off-by: dann frazier <dannf@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h |  1 +
 net/unix/af_unix.c    |  2 ++
 net/unix/garbage.c    | 13 ++++++++++---
 3 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'net/unix/af_unix.c')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index c29ff1da8a1..1614d78c60e 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -9,6 +9,7 @@
 extern void unix_inflight(struct file *fp);
 extern void unix_notinflight(struct file *fp);
 extern void unix_gc(void);
+extern void wait_for_unix_gc(void);
 
 #define UNIX_HASH_SIZE	256
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index eb90f77bb0e..66d5ac4773a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1343,6 +1343,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
+	wait_for_unix_gc();
 	err = scm_send(sock, msg, siocb->scm);
 	if (err < 0)
 		return err;
@@ -1493,6 +1494,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
+	wait_for_unix_gc();
 	err = scm_send(sock, msg, siocb->scm);
 	if (err < 0)
 		return err;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 6d4a9a8de5e..abb3ab34cb1 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -80,6 +80,7 @@
 #include <linux/file.h>
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
+#include <linux/wait.h>
 
 #include <net/sock.h>
 #include <net/af_unix.h>
@@ -91,6 +92,7 @@
 static LIST_HEAD(gc_inflight_list);
 static LIST_HEAD(gc_candidates);
 static DEFINE_SPINLOCK(unix_gc_lock);
+static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
 
 unsigned int unix_tot_inflight;
 
@@ -266,12 +268,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
 		list_move_tail(&u->link, &gc_candidates);
 }
 
-/* The external entry point: unix_gc() */
+static bool gc_in_progress = false;
 
-void unix_gc(void)
+void wait_for_unix_gc(void)
 {
-	static bool gc_in_progress = false;
+	wait_event(unix_gc_wait, gc_in_progress == false);
+}
 
+/* The external entry point: unix_gc() */
+void unix_gc(void)
+{
 	struct unix_sock *u;
 	struct unix_sock *next;
 	struct sk_buff_head hitlist;
@@ -376,6 +382,7 @@ void unix_gc(void)
 	/* All candidates should have been detached by now. */
 	BUG_ON(!list_empty(&gc_candidates));
 	gc_in_progress = false;
+	wake_up(&unix_gc_wait);
 
  out:
 	spin_unlock(&unix_gc_lock);
-- 
cgit v1.2.3-70-g09d2