diff options
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r-- | net/unix/af_unix.c | 187 |
1 files changed, 115 insertions, 72 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fc3ebb90691..3c95304a081 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -117,7 +117,7 @@ static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; static DEFINE_SPINLOCK(unix_table_lock); -static atomic_t unix_nr_socks = ATOMIC_INIT(0); +static atomic_long_t unix_nr_socks; #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) @@ -144,7 +144,7 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) /* * SMP locking strategy: * hash table is protected with spinlock unix_table_lock - * each socket state is protected by separate rwlock. + * each socket state is protected by separate spin lock. */ static inline unsigned unix_hash_fold(__wsum n) @@ -282,7 +282,7 @@ static inline struct sock *unix_find_socket_byname(struct net *net, return s; } -static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) +static struct sock *unix_find_socket_byinode(struct inode *i) { struct sock *s; struct hlist_node *node; @@ -292,9 +292,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; - if (!net_eq(sock_net(s), net)) - continue; - if (dentry && dentry->d_inode == i) { sock_hold(s); goto found; @@ -313,13 +310,16 @@ static inline int unix_writable(struct sock *sk) static void unix_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + struct socket_wq *wq; + + rcu_read_lock(); if (unix_writable(sk)) { - if (sk_has_sleeper(sk)) - wake_up_interruptible_sync(sk->sk_sleep); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } - read_unlock(&sk->sk_callback_lock); + rcu_read_unlock(); } /* When dgram socket disconnects (or changes its peer), we clear its receive @@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk) if (u->addr) unix_release_addr(u->addr); - atomic_dec(&unix_nr_socks); + atomic_long_dec(&unix_nr_socks); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); #ifdef UNIX_REFCNT_DEBUG - printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, - atomic_read(&unix_nr_socks)); + printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk, + atomic_long_read(&unix_nr_socks)); #endif } @@ -406,9 +406,7 @@ static int unix_release_sock(struct sock *sk, int embrion) skpair->sk_err = ECONNRESET; unix_state_unlock(skpair); skpair->sk_state_change(skpair); - read_lock(&skpair->sk_callback_lock); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); - read_unlock(&skpair->sk_callback_lock); } sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; @@ -449,11 +447,31 @@ static int unix_release_sock(struct sock *sk, int embrion) return 0; } +static void init_peercred(struct sock *sk) +{ + put_pid(sk->sk_peer_pid); + if (sk->sk_peer_cred) + put_cred(sk->sk_peer_cred); + sk->sk_peer_pid = get_pid(task_tgid(current)); + sk->sk_peer_cred = get_current_cred(); +} + +static void copy_peercred(struct sock *sk, struct sock *peersk) +{ + put_pid(sk->sk_peer_pid); + if (sk->sk_peer_cred) + put_cred(sk->sk_peer_cred); + sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); + sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); +} + static int unix_listen(struct socket *sock, int backlog) { int err; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); + struct pid *old_pid = NULL; + const struct cred *old_cred = NULL; err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) @@ -469,12 +487,14 @@ static int unix_listen(struct socket *sock, int backlog) sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; /* set credentials so connect can copy them */ - sk->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid); + init_peercred(sk); err = 0; out_unlock: unix_state_unlock(sk); + put_pid(old_pid); + if (old_cred) + put_cred(old_cred); out: return err; } @@ -586,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) struct sock *sk = NULL; struct unix_sock *u; - atomic_inc(&unix_nr_socks); - if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) + atomic_long_inc(&unix_nr_socks); + if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) goto out; sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); @@ -612,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) unix_insert_socket(unix_sockets_unbound, sk); out: if (sk == NULL) - atomic_dec(&unix_nr_socks); + atomic_long_dec(&unix_nr_socks); else { local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -621,7 +641,8 @@ out: return sk; } -static int unix_create(struct net *net, struct socket *sock, int protocol) +static int unix_create(struct net *net, struct socket *sock, int protocol, + int kern) { if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -671,6 +692,7 @@ static int unix_autobind(struct socket *sock) static u32 ordernum = 1; struct unix_address *addr; int err; + unsigned int retries = 0; mutex_lock(&u->readlock); @@ -696,9 +718,17 @@ retry: if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, addr->hash)) { spin_unlock(&unix_table_lock); - /* Sanity yield. It is unusual case, but yet... */ - if (!(ordernum&0xFF)) - yield(); + /* + * __unix_find_socket_byname() may take long time if many names + * are already in use. + */ + cond_resched(); + /* Give up if all names seems to be in use. */ + if (retries++ == 0xFFFFF) { + err = -ENOSPC; + kfree(addr); + goto out; + } goto retry; } addr->hash ^= sk->sk_type; @@ -734,7 +764,7 @@ static struct sock *unix_find_other(struct net *net, err = -ECONNREFUSED; if (!S_ISSOCK(inode->i_mode)) goto put_fail; - u = unix_find_socket_byinode(net, inode); + u = unix_find_socket_byinode(inode); if (!u) goto put_fail; @@ -1032,8 +1062,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; addr_len = err; - if (test_bit(SOCK_PASSCRED, &sock->flags) - && !u->addr && (err = unix_autobind(sock)) != 0) + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && + (err = unix_autobind(sock)) != 0) goto out; timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); @@ -1074,6 +1104,8 @@ restart: err = -ECONNREFUSED; if (other->sk_state != TCP_LISTEN) goto out_unlock; + if (other->sk_shutdown & RCV_SHUTDOWN) + goto out_unlock; if (unix_recvq_full(other)) { err = -EAGAIN; @@ -1136,10 +1168,9 @@ restart: unix_peer(newsk) = sk; newsk->sk_state = TCP_ESTABLISHED; newsk->sk_type = sk->sk_type; - newsk->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); + init_peercred(newsk); newu = unix_sk(newsk); - newsk->sk_sleep = &newu->peer_wait; + newsk->sk_wq = &newu->peer_wq; otheru = unix_sk(other); /* copy address information from listening to new sock*/ @@ -1153,7 +1184,7 @@ restart: } /* Set credentials */ - sk->sk_peercred = other->sk_peercred; + copy_peercred(sk, other); sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; @@ -1195,10 +1226,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) sock_hold(skb); unix_peer(ska) = skb; unix_peer(skb) = ska; - ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid); - ska->sk_peercred.uid = skb->sk_peercred.uid; - ska->sk_peercred.gid = skb->sk_peercred.gid; + init_peercred(ska); + init_peercred(skb); if (ska->sk_type != SOCK_DGRAM) { ska->sk_state = TCP_ESTABLISHED; @@ -1256,7 +1285,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ { struct sock *sk = sock->sk; struct unix_sock *u; - struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; + DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); int err = 0; if (peer) { @@ -1293,18 +1322,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) int i; scm->fp = UNIXCB(skb).fp; - skb->destructor = sock_wfree; UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) unix_notinflight(scm->fp->fp[i]); } -static void unix_destruct_fds(struct sk_buff *skb) +static void unix_destruct_scm(struct sk_buff *skb) { struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); - unix_detach_fds(&scm, skb); + scm.pid = UNIXCB(skb).pid; + scm.cred = UNIXCB(skb).cred; + if (UNIXCB(skb).fp) + unix_detach_fds(&scm, skb); /* Alas, it calls VFS */ /* So fscking what? fput() had been SMP-safe since the last Summer */ @@ -1327,10 +1358,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count-1; i >= 0; i--) unix_inflight(scm->fp->fp[i]); - skb->destructor = unix_destruct_fds; return 0; } +static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) +{ + int err = 0; + UNIXCB(skb).pid = get_pid(scm->pid); + UNIXCB(skb).cred = get_cred(scm->cred); + UNIXCB(skb).fp = NULL; + if (scm->fp && send_fds) + err = unix_attach_fds(scm, skb); + + skb->destructor = unix_destruct_scm; + return err; +} + /* * Send AF_UNIX data. */ @@ -1375,8 +1418,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } - if (test_bit(SOCK_PASSCRED, &sock->flags) - && !u->addr && (err = unix_autobind(sock)) != 0) + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr + && (err = unix_autobind(sock)) != 0) goto out; err = -EMSGSIZE; @@ -1387,12 +1430,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) { - err = unix_attach_fds(siocb->scm, skb); - if (err) - goto out_free; - } + err = unix_scm_to_skb(siocb->scm, skb, true); + if (err) + goto out_free; unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1471,6 +1511,8 @@ restart: goto restart; } + if (sock_flag(other, SOCK_RCVTSTAMP)) + __net_timestamp(skb); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); other->sk_data_ready(other, len); @@ -1501,6 +1543,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sk_buff *skb; int sent = 0; struct scm_cookie tmp_scm; + bool fds_sent = false; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1561,14 +1604,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, */ size = min_t(int, size, skb_tailroom(skb)); - memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) { - err = unix_attach_fds(siocb->scm, skb); - if (err) { - kfree_skb(skb); - goto out_err; - } + + /* Only send the fds in the first buffer */ + err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); + if (err) { + kfree_skb(skb); + goto out_err; } + fds_sent = true; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err) { @@ -1681,11 +1724,14 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (err) goto out_free; + if (sock_flag(sk, SOCK_RCVTSTAMP)) + __sock_recv_timestamp(msg, sk, skb); + if (!siocb->scm) { siocb->scm = &tmp_scm; memset(&tmp_scm, 0, sizeof(tmp_scm)); } - siocb->scm->creds = *UNIXCREDS(skb); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { @@ -1730,7 +1776,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) unix_state_lock(sk); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (!skb_queue_empty(&sk->sk_receive_queue) || sk->sk_err || @@ -1746,7 +1792,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); unix_state_unlock(sk); return timeo; } @@ -1834,14 +1880,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (check_creds) { /* Never glue messages from different writers */ - if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, - sizeof(siocb->scm->creds)) != 0) { + if ((UNIXCB(skb).pid != siocb->scm->pid) || + (UNIXCB(skb).cred != siocb->scm->cred)) { skb_queue_head(&sk->sk_receive_queue, skb); break; } } else { /* Copy credentials */ - siocb->scm->creds = *UNIXCREDS(skb); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); check_creds = 1; } @@ -1874,7 +1920,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, break; } - kfree_skb(skb); + consume_skb(skb); if (siocb->scm->fp) break; @@ -1925,12 +1971,10 @@ static int unix_shutdown(struct socket *sock, int mode) other->sk_shutdown |= peer_mode; unix_state_unlock(other); other->sk_state_change(other); - read_lock(&other->sk_callback_lock); if (peer_mode == SHUTDOWN_MASK) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); else if (peer_mode & RCV_SHUTDOWN) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); - read_unlock(&other->sk_callback_lock); } if (other) sock_put(other); @@ -1985,7 +2029,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table struct sock *sk = sock->sk; unsigned int mask; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ @@ -1994,11 +2038,10 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP; + mask |= POLLRDHUP | POLLIN | POLLRDNORM; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue) || - (sk->sk_shutdown & RCV_SHUTDOWN)) + if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -2022,7 +2065,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk, *other; unsigned int mask, writable; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ @@ -2211,14 +2254,14 @@ static const struct file_operations unix_seq_fops = { #endif -static struct net_proto_family unix_family_ops = { +static const struct net_proto_family unix_family_ops = { .family = PF_UNIX, .create = unix_create, .owner = THIS_MODULE, }; -static int unix_net_init(struct net *net) +static int __net_init unix_net_init(struct net *net) { int error = -ENOMEM; @@ -2237,7 +2280,7 @@ out: return error; } -static void unix_net_exit(struct net *net) +static void __net_exit unix_net_exit(struct net *net) { unix_sysctl_unregister(net); proc_net_remove(net, "unix"); |