summaryrefslogtreecommitdiffstats
path: root/net/unix/af_unix.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r--net/unix/af_unix.c187
1 files changed, 115 insertions, 72 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index fc3ebb90691..3c95304a081 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,7 +117,7 @@
static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
static DEFINE_SPINLOCK(unix_table_lock);
-static atomic_t unix_nr_socks = ATOMIC_INIT(0);
+static atomic_long_t unix_nr_socks;
#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
@@ -144,7 +144,7 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
/*
* SMP locking strategy:
* hash table is protected with spinlock unix_table_lock
- * each socket state is protected by separate rwlock.
+ * each socket state is protected by separate spin lock.
*/
static inline unsigned unix_hash_fold(__wsum n)
@@ -282,7 +282,7 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
return s;
}
-static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
+static struct sock *unix_find_socket_byinode(struct inode *i)
{
struct sock *s;
struct hlist_node *node;
@@ -292,9 +292,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
struct dentry *dentry = unix_sk(s)->dentry;
- if (!net_eq(sock_net(s), net))
- continue;
-
if (dentry && dentry->d_inode == i) {
sock_hold(s);
goto found;
@@ -313,13 +310,16 @@ static inline int unix_writable(struct sock *sk)
static void unix_write_space(struct sock *sk)
{
- read_lock(&sk->sk_callback_lock);
+ struct socket_wq *wq;
+
+ rcu_read_lock();
if (unix_writable(sk)) {
- if (sk_has_sleeper(sk))
- wake_up_interruptible_sync(sk->sk_sleep);
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_sync(&wq->wait);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
- read_unlock(&sk->sk_callback_lock);
+ rcu_read_unlock();
}
/* When dgram socket disconnects (or changes its peer), we clear its receive
@@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk)
if (u->addr)
unix_release_addr(u->addr);
- atomic_dec(&unix_nr_socks);
+ atomic_long_dec(&unix_nr_socks);
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
local_bh_enable();
#ifdef UNIX_REFCNT_DEBUG
- printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
- atomic_read(&unix_nr_socks));
+ printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
+ atomic_long_read(&unix_nr_socks));
#endif
}
@@ -406,9 +406,7 @@ static int unix_release_sock(struct sock *sk, int embrion)
skpair->sk_err = ECONNRESET;
unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
- read_lock(&skpair->sk_callback_lock);
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
- read_unlock(&skpair->sk_callback_lock);
}
sock_put(skpair); /* It may now die */
unix_peer(sk) = NULL;
@@ -449,11 +447,31 @@ static int unix_release_sock(struct sock *sk, int embrion)
return 0;
}
+static void init_peercred(struct sock *sk)
+{
+ put_pid(sk->sk_peer_pid);
+ if (sk->sk_peer_cred)
+ put_cred(sk->sk_peer_cred);
+ sk->sk_peer_pid = get_pid(task_tgid(current));
+ sk->sk_peer_cred = get_current_cred();
+}
+
+static void copy_peercred(struct sock *sk, struct sock *peersk)
+{
+ put_pid(sk->sk_peer_pid);
+ if (sk->sk_peer_cred)
+ put_cred(sk->sk_peer_cred);
+ sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
+ sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+}
+
static int unix_listen(struct socket *sock, int backlog)
{
int err;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
+ struct pid *old_pid = NULL;
+ const struct cred *old_cred = NULL;
err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -469,12 +487,14 @@ static int unix_listen(struct socket *sock, int backlog)
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN;
/* set credentials so connect can copy them */
- sk->sk_peercred.pid = task_tgid_vnr(current);
- current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
+ init_peercred(sk);
err = 0;
out_unlock:
unix_state_unlock(sk);
+ put_pid(old_pid);
+ if (old_cred)
+ put_cred(old_cred);
out:
return err;
}
@@ -586,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
struct sock *sk = NULL;
struct unix_sock *u;
- atomic_inc(&unix_nr_socks);
- if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
+ atomic_long_inc(&unix_nr_socks);
+ if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
@@ -612,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
unix_insert_socket(unix_sockets_unbound, sk);
out:
if (sk == NULL)
- atomic_dec(&unix_nr_socks);
+ atomic_long_dec(&unix_nr_socks);
else {
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -621,7 +641,8 @@ out:
return sk;
}
-static int unix_create(struct net *net, struct socket *sock, int protocol)
+static int unix_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
{
if (protocol && protocol != PF_UNIX)
return -EPROTONOSUPPORT;
@@ -671,6 +692,7 @@ static int unix_autobind(struct socket *sock)
static u32 ordernum = 1;
struct unix_address *addr;
int err;
+ unsigned int retries = 0;
mutex_lock(&u->readlock);
@@ -696,9 +718,17 @@ retry:
if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
addr->hash)) {
spin_unlock(&unix_table_lock);
- /* Sanity yield. It is unusual case, but yet... */
- if (!(ordernum&0xFF))
- yield();
+ /*
+ * __unix_find_socket_byname() may take long time if many names
+ * are already in use.
+ */
+ cond_resched();
+ /* Give up if all names seems to be in use. */
+ if (retries++ == 0xFFFFF) {
+ err = -ENOSPC;
+ kfree(addr);
+ goto out;
+ }
goto retry;
}
addr->hash ^= sk->sk_type;
@@ -734,7 +764,7 @@ static struct sock *unix_find_other(struct net *net,
err = -ECONNREFUSED;
if (!S_ISSOCK(inode->i_mode))
goto put_fail;
- u = unix_find_socket_byinode(net, inode);
+ u = unix_find_socket_byinode(inode);
if (!u)
goto put_fail;
@@ -1032,8 +1062,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto out;
addr_len = err;
- if (test_bit(SOCK_PASSCRED, &sock->flags)
- && !u->addr && (err = unix_autobind(sock)) != 0)
+ if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
+ (err = unix_autobind(sock)) != 0)
goto out;
timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
@@ -1074,6 +1104,8 @@ restart:
err = -ECONNREFUSED;
if (other->sk_state != TCP_LISTEN)
goto out_unlock;
+ if (other->sk_shutdown & RCV_SHUTDOWN)
+ goto out_unlock;
if (unix_recvq_full(other)) {
err = -EAGAIN;
@@ -1136,10 +1168,9 @@ restart:
unix_peer(newsk) = sk;
newsk->sk_state = TCP_ESTABLISHED;
newsk->sk_type = sk->sk_type;
- newsk->sk_peercred.pid = task_tgid_vnr(current);
- current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
+ init_peercred(newsk);
newu = unix_sk(newsk);
- newsk->sk_sleep = &newu->peer_wait;
+ newsk->sk_wq = &newu->peer_wq;
otheru = unix_sk(other);
/* copy address information from listening to new sock*/
@@ -1153,7 +1184,7 @@ restart:
}
/* Set credentials */
- sk->sk_peercred = other->sk_peercred;
+ copy_peercred(sk, other);
sock->state = SS_CONNECTED;
sk->sk_state = TCP_ESTABLISHED;
@@ -1195,10 +1226,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
sock_hold(skb);
unix_peer(ska) = skb;
unix_peer(skb) = ska;
- ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
- current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
- ska->sk_peercred.uid = skb->sk_peercred.uid;
- ska->sk_peercred.gid = skb->sk_peercred.gid;
+ init_peercred(ska);
+ init_peercred(skb);
if (ska->sk_type != SOCK_DGRAM) {
ska->sk_state = TCP_ESTABLISHED;
@@ -1256,7 +1285,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
{
struct sock *sk = sock->sk;
struct unix_sock *u;
- struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
+ DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
int err = 0;
if (peer) {
@@ -1293,18 +1322,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
int i;
scm->fp = UNIXCB(skb).fp;
- skb->destructor = sock_wfree;
UNIXCB(skb).fp = NULL;
for (i = scm->fp->count-1; i >= 0; i--)
unix_notinflight(scm->fp->fp[i]);
}
-static void unix_destruct_fds(struct sk_buff *skb)
+static void unix_destruct_scm(struct sk_buff *skb)
{
struct scm_cookie scm;
memset(&scm, 0, sizeof(scm));
- unix_detach_fds(&scm, skb);
+ scm.pid = UNIXCB(skb).pid;
+ scm.cred = UNIXCB(skb).cred;
+ if (UNIXCB(skb).fp)
+ unix_detach_fds(&scm, skb);
/* Alas, it calls VFS */
/* So fscking what? fput() had been SMP-safe since the last Summer */
@@ -1327,10 +1358,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
for (i = scm->fp->count-1; i >= 0; i--)
unix_inflight(scm->fp->fp[i]);
- skb->destructor = unix_destruct_fds;
return 0;
}
+static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
+{
+ int err = 0;
+ UNIXCB(skb).pid = get_pid(scm->pid);
+ UNIXCB(skb).cred = get_cred(scm->cred);
+ UNIXCB(skb).fp = NULL;
+ if (scm->fp && send_fds)
+ err = unix_attach_fds(scm, skb);
+
+ skb->destructor = unix_destruct_scm;
+ return err;
+}
+
/*
* Send AF_UNIX data.
*/
@@ -1375,8 +1418,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto out;
}
- if (test_bit(SOCK_PASSCRED, &sock->flags)
- && !u->addr && (err = unix_autobind(sock)) != 0)
+ if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
+ && (err = unix_autobind(sock)) != 0)
goto out;
err = -EMSGSIZE;
@@ -1387,12 +1430,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (skb == NULL)
goto out;
- memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp) {
- err = unix_attach_fds(siocb->scm, skb);
- if (err)
- goto out_free;
- }
+ err = unix_scm_to_skb(siocb->scm, skb, true);
+ if (err)
+ goto out_free;
unix_get_secdata(siocb->scm, skb);
skb_reset_transport_header(skb);
@@ -1471,6 +1511,8 @@ restart:
goto restart;
}
+ if (sock_flag(other, SOCK_RCVTSTAMP))
+ __net_timestamp(skb);
skb_queue_tail(&other->sk_receive_queue, skb);
unix_state_unlock(other);
other->sk_data_ready(other, len);
@@ -1501,6 +1543,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct sk_buff *skb;
int sent = 0;
struct scm_cookie tmp_scm;
+ bool fds_sent = false;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
@@ -1561,14 +1604,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
*/
size = min_t(int, size, skb_tailroom(skb));
- memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp) {
- err = unix_attach_fds(siocb->scm, skb);
- if (err) {
- kfree_skb(skb);
- goto out_err;
- }
+
+ /* Only send the fds in the first buffer */
+ err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
+ if (err) {
+ kfree_skb(skb);
+ goto out_err;
}
+ fds_sent = true;
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
if (err) {
@@ -1681,11 +1724,14 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
if (err)
goto out_free;
+ if (sock_flag(sk, SOCK_RCVTSTAMP))
+ __sock_recv_timestamp(msg, sk, skb);
+
if (!siocb->scm) {
siocb->scm = &tmp_scm;
memset(&tmp_scm, 0, sizeof(tmp_scm));
}
- siocb->scm->creds = *UNIXCREDS(skb);
+ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
unix_set_secdata(siocb->scm, skb);
if (!(flags & MSG_PEEK)) {
@@ -1730,7 +1776,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
unix_state_lock(sk);
for (;;) {
- prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (!skb_queue_empty(&sk->sk_receive_queue) ||
sk->sk_err ||
@@ -1746,7 +1792,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
}
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
unix_state_unlock(sk);
return timeo;
}
@@ -1834,14 +1880,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
if (check_creds) {
/* Never glue messages from different writers */
- if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
- sizeof(siocb->scm->creds)) != 0) {
+ if ((UNIXCB(skb).pid != siocb->scm->pid) ||
+ (UNIXCB(skb).cred != siocb->scm->cred)) {
skb_queue_head(&sk->sk_receive_queue, skb);
break;
}
} else {
/* Copy credentials */
- siocb->scm->creds = *UNIXCREDS(skb);
+ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
check_creds = 1;
}
@@ -1874,7 +1920,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
break;
}
- kfree_skb(skb);
+ consume_skb(skb);
if (siocb->scm->fp)
break;
@@ -1925,12 +1971,10 @@ static int unix_shutdown(struct socket *sock, int mode)
other->sk_shutdown |= peer_mode;
unix_state_unlock(other);
other->sk_state_change(other);
- read_lock(&other->sk_callback_lock);
if (peer_mode == SHUTDOWN_MASK)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
else if (peer_mode & RCV_SHUTDOWN)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
- read_unlock(&other->sk_callback_lock);
}
if (other)
sock_put(other);
@@ -1985,7 +2029,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
struct sock *sk = sock->sk;
unsigned int mask;
- sock_poll_wait(file, sk->sk_sleep, wait);
+ sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
/* exceptional events? */
@@ -1994,11 +2038,10 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP;
+ mask |= POLLRDHUP | POLLIN | POLLRDNORM;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sk->sk_shutdown & RCV_SHUTDOWN))
+ if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
/* Connection-based need to check for termination and startup */
@@ -2022,7 +2065,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk, *other;
unsigned int mask, writable;
- sock_poll_wait(file, sk->sk_sleep, wait);
+ sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
/* exceptional events? */
@@ -2211,14 +2254,14 @@ static const struct file_operations unix_seq_fops = {
#endif
-static struct net_proto_family unix_family_ops = {
+static const struct net_proto_family unix_family_ops = {
.family = PF_UNIX,
.create = unix_create,
.owner = THIS_MODULE,
};
-static int unix_net_init(struct net *net)
+static int __net_init unix_net_init(struct net *net)
{
int error = -ENOMEM;
@@ -2237,7 +2280,7 @@ out:
return error;
}
-static void unix_net_exit(struct net *net)
+static void __net_exit unix_net_exit(struct net *net)
{
unix_sysctl_unregister(net);
proc_net_remove(net, "unix");