From 0884d7aa24e15e72b3c07f7da910a13bb7df3592 Mon Sep 17 00:00:00 2001 From: Alexey Moiseytsev Date: Mon, 21 Nov 2011 13:35:25 +0000 Subject: AF_UNIX: Fix poll blocking problem when reading from a stream socket poll() call may be blocked by concurrent reading from the same stream socket. Signed-off-by: Alexey Moiseytsev Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 466fbcc5cf7..b595a3d8679 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1957,6 +1957,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if ((UNIXCB(skb).pid != siocb->scm->pid) || (UNIXCB(skb).cred != siocb->scm->cred)) { skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); break; } } else { @@ -1974,6 +1975,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, chunk = min_t(unsigned int, skb->len, size); if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); if (copied == 0) copied = -EFAULT; break; @@ -1991,6 +1993,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, /* put the skb back if we didn't use it up.. */ if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); break; } @@ -2006,6 +2009,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, /* put message back and return */ skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); break; } } while (size); -- cgit v1.2.3-70-g09d2 From fa7ff56f75add89bbedaf2dfcfa8f6661e8e8b3a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 15 Dec 2011 02:44:03 +0000 Subject: af_unix: Export stuff required for diag module Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/af_unix.h | 3 +++ net/unix/af_unix.c | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 91ab5b01678..63b17816e0b 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -11,10 +11,13 @@ extern void unix_notinflight(struct file *fp); extern void unix_gc(void); extern void wait_for_unix_gc(void); extern struct sock *unix_get_socket(struct file *filp); +extern struct sock *unix_peer_get(struct sock *); #define UNIX_HASH_SIZE 256 extern unsigned int unix_tot_inflight; +extern spinlock_t unix_table_lock; +extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; struct unix_address { atomic_t refcnt; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b595a3d8679..e1b9358a211 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -115,8 +115,10 @@ #include #include -static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; -static DEFINE_SPINLOCK(unix_table_lock); +struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +EXPORT_SYMBOL_GPL(unix_socket_table); +DEFINE_SPINLOCK(unix_table_lock); +EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) @@ -172,7 +174,7 @@ static inline int unix_recvq_full(struct sock const *sk) return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; } -static struct sock *unix_peer_get(struct sock *s) +struct sock *unix_peer_get(struct sock *s) { struct sock *peer; @@ -183,6 +185,7 @@ static struct sock *unix_peer_get(struct sock *s) unix_state_unlock(s); return peer; } +EXPORT_SYMBOL_GPL(unix_peer_get); static inline void unix_release_addr(struct unix_address *addr) { -- cgit v1.2.3-70-g09d2 From 885ee74d5d3058e4a904671ed7929c9540c95fa5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 30 Dec 2011 00:54:11 +0000 Subject: af_unix: Move CINQ/COUTQ code to helpers Currently tcp diag reports rqlen and wqlen values similar to how the CINQ/COUTQ iotcls do. To make unix diag report these values in the same way move the respective code into helpers. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/af_unix.h | 3 +++ net/unix/af_unix.c | 59 +++++++++++++++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 23 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 63b17816e0b..5a4e29b168c 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -66,6 +66,9 @@ struct unix_sock { #define peer_wait peer_wq.wait +long unix_inq_len(struct sock *sk); +long unix_outq_len(struct sock *sk); + #ifdef CONFIG_SYSCTL extern int unix_sysctl_register(struct net *net); extern void unix_sysctl_unregister(struct net *net); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e1b9358a211..7cc3d7b23d1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2065,6 +2065,36 @@ static int unix_shutdown(struct socket *sock, int mode) return 0; } +long unix_inq_len(struct sock *sk) +{ + struct sk_buff *skb; + long amount = 0; + + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + spin_lock(&sk->sk_receive_queue.lock); + if (sk->sk_type == SOCK_STREAM || + sk->sk_type == SOCK_SEQPACKET) { + skb_queue_walk(&sk->sk_receive_queue, skb) + amount += skb->len; + } else { + skb = skb_peek(&sk->sk_receive_queue); + if (skb) + amount = skb->len; + } + spin_unlock(&sk->sk_receive_queue.lock); + + return amount; +} +EXPORT_SYMBOL_GPL(unix_inq_len); + +long unix_outq_len(struct sock *sk) +{ + return sk_wmem_alloc_get(sk); +} +EXPORT_SYMBOL_GPL(unix_outq_len); + static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; @@ -2073,33 +2103,16 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: - amount = sk_wmem_alloc_get(sk); + amount = unix_outq_len(sk); err = put_user(amount, (int __user *)arg); break; case SIOCINQ: - { - struct sk_buff *skb; - - if (sk->sk_state == TCP_LISTEN) { - err = -EINVAL; - break; - } - - spin_lock(&sk->sk_receive_queue.lock); - if (sk->sk_type == SOCK_STREAM || - sk->sk_type == SOCK_SEQPACKET) { - skb_queue_walk(&sk->sk_receive_queue, skb) - amount += skb->len; - } else { - skb = skb_peek(&sk->sk_receive_queue); - if (skb) - amount = skb->len; - } - spin_unlock(&sk->sk_receive_queue.lock); + amount = unix_inq_len(sk); + if (amount < 0) + err = amount; + else err = put_user(amount, (int __user *)arg); - break; - } - + break; default: err = -ENOIOCTLCMD; break; -- cgit v1.2.3-70-g09d2 From 04fc66e789a896e684bfdca30208e57eb832dd96 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Nov 2011 14:58:38 -0500 Subject: switch ->path_mknod() to umode_t Signed-off-by: Al Viro --- include/linux/security.h | 6 +++--- net/unix/af_unix.c | 2 +- security/apparmor/lsm.c | 2 +- security/capability.c | 2 +- security/security.c | 2 +- security/tomoyo/tomoyo.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/include/linux/security.h b/include/linux/security.h index 24cd7cf4856..535721cc374 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1426,7 +1426,7 @@ struct security_operations { int (*path_unlink) (struct path *dir, struct dentry *dentry); int (*path_mkdir) (struct path *dir, struct dentry *dentry, umode_t mode); int (*path_rmdir) (struct path *dir, struct dentry *dentry); - int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode, + int (*path_mknod) (struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int (*path_truncate) (struct path *path); int (*path_symlink) (struct path *dir, struct dentry *dentry, @@ -2857,7 +2857,7 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi int security_path_unlink(struct path *dir, struct dentry *dentry); int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode); int security_path_rmdir(struct path *dir, struct dentry *dentry); -int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, +int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int security_path_truncate(struct path *path); int security_path_symlink(struct path *dir, struct dentry *dentry, @@ -2888,7 +2888,7 @@ static inline int security_path_rmdir(struct path *dir, struct dentry *dentry) } static inline int security_path_mknod(struct path *dir, struct dentry *dentry, - int mode, unsigned int dev) + umode_t mode, unsigned int dev) { return 0; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b595a3d8679..412a99f4a3f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -847,7 +847,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) atomic_set(&addr->refcnt, 1); if (sun_path[0]) { - unsigned int mode; + umode_t mode; err = 0; /* * Get the parent directory, calculate the hash for last diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 3271bd38d86..c0a399ec1df 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -274,7 +274,7 @@ static int apparmor_path_rmdir(struct path *dir, struct dentry *dentry) } static int apparmor_path_mknod(struct path *dir, struct dentry *dentry, - int mode, unsigned int dev) + umode_t mode, unsigned int dev) { return common_perm_create(OP_MKNOD, dir, dentry, AA_MAY_CREATE, mode); } diff --git a/security/capability.c b/security/capability.c index 2e1fe45d148..156816d451b 100644 --- a/security/capability.c +++ b/security/capability.c @@ -235,7 +235,7 @@ static void cap_inode_getsecid(const struct inode *inode, u32 *secid) } #ifdef CONFIG_SECURITY_PATH -static int cap_path_mknod(struct path *dir, struct dentry *dentry, int mode, +static int cap_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) { return 0; diff --git a/security/security.c b/security/security.c index e9724e058b4..151152de1a0 100644 --- a/security/security.c +++ b/security/security.c @@ -388,7 +388,7 @@ int security_old_inode_init_security(struct inode *inode, struct inode *dir, EXPORT_SYMBOL(security_old_inode_init_security); #ifdef CONFIG_SECURITY_PATH -int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, +int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) { if (unlikely(IS_PRIVATE(dir->dentry->d_inode))) diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 95e4a7db8b8..75c956a51e7 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -234,7 +234,7 @@ static int tomoyo_path_symlink(struct path *parent, struct dentry *dentry, * Returns 0 on success, negative value otherwise. */ static int tomoyo_path_mknod(struct path *parent, struct dentry *dentry, - int mode, unsigned int dev) + umode_t mode, unsigned int dev) { struct path path = { parent->mnt, dentry }; int type = TOMOYO_TYPE_CREATE; -- cgit v1.2.3-70-g09d2 From 6f01fd6e6f6809061b56e78f1e8d143099716d70 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 28 Jan 2012 16:11:03 +0000 Subject: af_unix: fix EPOLLET regression for stream sockets Commit 0884d7aa24 (AF_UNIX: Fix poll blocking problem when reading from a stream socket) added a regression for epoll() in Edge Triggered mode (EPOLLET) Appropriate fix is to use skb_peek()/skb_unlink() instead of skb_dequeue(), and only call skb_unlink() when skb is fully consumed. This remove the need to requeue a partial skb into sk_receive_queue head and the extra sk->sk_data_ready() calls that added the regression. This is safe because once skb is given to sk_receive_queue, it is not modified by a writer, and readers are serialized by u->readlock mutex. This also reduce number of spinlock acquisition for small reads or MSG_PEEK users so should improve overall performance. Reported-by: Nick Mathewson Signed-off-by: Eric Dumazet Cc: Alexey Moiseytsev Signed-off-by: David S. Miller --- net/unix/af_unix.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index aad8fb69998..85d3bb7490a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1918,7 +1918,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; unix_state_lock(sk); - skb = skb_dequeue(&sk->sk_receive_queue); + skb = skb_peek(&sk->sk_receive_queue); if (skb == NULL) { unix_sk(sk)->recursion_level = 0; if (copied >= target) @@ -1958,11 +1958,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (check_creds) { /* Never glue messages from different writers */ if ((UNIXCB(skb).pid != siocb->scm->pid) || - (UNIXCB(skb).cred != siocb->scm->cred)) { - skb_queue_head(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + (UNIXCB(skb).cred != siocb->scm->cred)) break; - } } else { /* Copy credentials */ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); @@ -1977,8 +1974,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, chunk = min_t(unsigned int, skb->len, size); if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { - skb_queue_head(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); if (copied == 0) copied = -EFAULT; break; @@ -1993,13 +1988,10 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (UNIXCB(skb).fp) unix_detach_fds(siocb->scm, skb); - /* put the skb back if we didn't use it up.. */ - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + if (skb->len) break; - } + skb_unlink(skb, &sk->sk_receive_queue); consume_skb(skb); if (siocb->scm->fp) @@ -2010,9 +2002,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (UNIXCB(skb).fp) siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); - /* put message back and return */ - skb_queue_head(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); break; } } while (size); -- cgit v1.2.3-70-g09d2