diff options
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r-- | net/unix/af_unix.c | 437 |
1 files changed, 258 insertions, 179 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ec68e1c05b8..c5ee4ff6136 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -115,13 +115,24 @@ #include <net/checksum.h> #include <linux/security.h> -static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; -static DEFINE_SPINLOCK(unix_table_lock); +struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; +EXPORT_SYMBOL_GPL(unix_socket_table); +DEFINE_SPINLOCK(unix_table_lock); +EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; -#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) -#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) +static struct hlist_head *unix_sockets_unbound(void *addr) +{ + unsigned long hash = (unsigned long)addr; + + hash ^= hash >> 16; + hash ^= hash >> 8; + hash %= UNIX_HASH_SIZE; + return &unix_socket_table[UNIX_HASH_SIZE + hash]; +} + +#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -147,9 +158,10 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) * each socket state is protected by separate spin lock. */ -static inline unsigned unix_hash_fold(__wsum n) +static inline unsigned int unix_hash_fold(__wsum n) { - unsigned hash = (__force unsigned)n; + unsigned int hash = (__force unsigned int)n; + hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1); @@ -172,7 +184,7 @@ static inline int unix_recvq_full(struct sock const *sk) return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; } -static struct sock *unix_peer_get(struct sock *s) +struct sock *unix_peer_get(struct sock *s) { struct sock *peer; @@ -183,6 +195,7 @@ static struct sock *unix_peer_get(struct sock *s) unix_state_unlock(s); return peer; } +EXPORT_SYMBOL_GPL(unix_peer_get); static inline void unix_release_addr(struct unix_address *addr) { @@ -197,7 +210,7 @@ static inline void unix_release_addr(struct unix_address *addr) * - if started by zero, it is abstract name. */ -static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp) +static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) { if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; @@ -247,7 +260,7 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) static struct sock *__unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, - int len, int type, unsigned hash) + int len, int type, unsigned int hash) { struct sock *s; struct hlist_node *node; @@ -270,7 +283,7 @@ found: static inline struct sock *unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, int len, int type, - unsigned hash) + unsigned int hash) { struct sock *s; @@ -290,7 +303,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) spin_lock(&unix_table_lock); sk_for_each(s, node, &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { - struct dentry *dentry = unix_sk(s)->dentry; + struct dentry *dentry = unix_sk(s)->path.dentry; if (dentry && dentry->d_inode == i) { sock_hold(s); @@ -374,8 +387,7 @@ static void unix_sock_destructor(struct sock *sk) static int unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); - struct dentry *dentry; - struct vfsmount *mnt; + struct path path; struct sock *skpair; struct sk_buff *skb; int state; @@ -386,10 +398,9 @@ static int unix_release_sock(struct sock *sk, int embrion) unix_state_lock(sk); sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; - dentry = u->dentry; - u->dentry = NULL; - mnt = u->mnt; - u->mnt = NULL; + path = u->path; + u->path.dentry = NULL; + u->path.mnt = NULL; state = sk->sk_state; sk->sk_state = TCP_CLOSE; unix_state_unlock(sk); @@ -422,10 +433,8 @@ static int unix_release_sock(struct sock *sk, int embrion) kfree_skb(skb); } - if (dentry) { - dput(dentry); - mntput(mnt); - } + if (path.dentry) + path_put(&path); sock_put(sk); @@ -527,6 +536,16 @@ static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int); +static void unix_set_peek_off(struct sock *sk, int val) +{ + struct unix_sock *u = unix_sk(sk); + + mutex_lock(&u->readlock); + sk->sk_peek_off = val; + mutex_unlock(&u->readlock); +} + + static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, @@ -546,6 +565,7 @@ static const struct proto_ops unix_stream_ops = { .recvmsg = unix_stream_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_peek_off = unix_set_peek_off, }; static const struct proto_ops unix_dgram_ops = { @@ -567,6 +587,7 @@ static const struct proto_ops unix_dgram_ops = { .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_peek_off = unix_set_peek_off, }; static const struct proto_ops unix_seqpacket_ops = { @@ -588,6 +609,7 @@ static const struct proto_ops unix_seqpacket_ops = { .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_peek_off = unix_set_peek_off, }; static struct proto unix_proto = { @@ -625,14 +647,14 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); - u->dentry = NULL; - u->mnt = NULL; + u->path.dentry = NULL; + u->path.mnt = NULL; spin_lock_init(&u->lock); atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); - unix_insert_socket(unix_sockets_unbound, sk); + unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) atomic_long_dec(&unix_nr_socks); @@ -748,7 +770,7 @@ out: mutex_unlock(&u->readlock); static struct sock *unix_find_other(struct net *net, struct sockaddr_un *sunname, int len, - int type, unsigned hash, int *error) + int type, unsigned int hash, int *error) { struct sock *u; struct path path; @@ -772,7 +794,7 @@ static struct sock *unix_find_other(struct net *net, goto put_fail; if (u->sk_type == type) - touch_atime(path.mnt, path.dentry); + touch_atime(&path); path_put(&path); @@ -786,9 +808,9 @@ static struct sock *unix_find_other(struct net *net, u = unix_find_socket_byname(net, sunname, len, type, hash); if (u) { struct dentry *dentry; - dentry = unix_sk(u)->dentry; + dentry = unix_sk(u)->path.dentry; if (dentry) - touch_atime(unix_sk(u)->mnt, dentry); + touch_atime(&unix_sk(u)->path); } else goto fail; } @@ -801,6 +823,34 @@ fail: return NULL; } +static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) +{ + struct dentry *dentry; + struct path path; + int err = 0; + /* + * Get the parent directory, calculate the hash for last + * component. + */ + dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + return err; + + /* + * All right, let's create it. + */ + err = security_path_mknod(&path, dentry, mode, 0); + if (!err) { + err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); + if (!err) { + res->mnt = mntget(path.mnt); + res->dentry = dget(dentry); + } + } + done_path_create(&path, dentry); + return err; +} static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -809,10 +859,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; - struct dentry *dentry = NULL; - struct path path; int err; - unsigned hash; + unsigned int hash; struct unix_address *addr; struct hlist_head *list; @@ -847,43 +895,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) atomic_set(&addr->refcnt, 1); if (sun_path[0]) { - unsigned int mode; - err = 0; - /* - * Get the parent directory, calculate the hash for last - * component. - */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out_mknod_parent; - - /* - * All right, let's create it. - */ - mode = S_IFSOCK | + struct path path; + umode_t mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = mnt_want_write(path.mnt); - if (err) - goto out_mknod_dput; - err = security_path_mknod(&path, dentry, mode, 0); - if (err) - goto out_mknod_drop_write; - err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); -out_mknod_drop_write: - mnt_drop_write(path.mnt); - if (err) - goto out_mknod_dput; - mutex_unlock(&path.dentry->d_inode->i_mutex); - dput(path.dentry); - path.dentry = dentry; - + err = unix_mknod(sun_path, mode, &path); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; + unix_release_addr(addr); + goto out_up; + } addr->hash = UNIX_HASH_SIZE; - } - - spin_lock(&unix_table_lock); - - if (!sun_path[0]) { + hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1); + spin_lock(&unix_table_lock); + u->path = path; + list = &unix_socket_table[hash]; + } else { + spin_lock(&unix_table_lock); err = -EADDRINUSE; if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { @@ -892,10 +920,6 @@ out_mknod_drop_write: } list = &unix_socket_table[addr->hash]; - } else { - list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; - u->dentry = path.dentry; - u->mnt = path.mnt; } err = 0; @@ -909,16 +933,6 @@ out_up: mutex_unlock(&u->readlock); out: return err; - -out_mknod_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); -out_mknod_parent: - if (err == -EEXIST) - err = -EADDRINUSE; - unix_release_addr(addr); - goto out_up; } static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) @@ -953,7 +967,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, struct net *net = sock_net(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; struct sock *other; - unsigned hash; + unsigned int hash; int err; if (addr->sa_family != AF_UNSPEC) { @@ -1051,7 +1065,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *newsk = NULL; struct sock *other = NULL; struct sk_buff *skb = NULL; - unsigned hash; + unsigned int hash; int st; int err; long timeo; @@ -1177,9 +1191,9 @@ restart: atomic_inc(&otheru->addr->refcnt); newu->addr = otheru->addr; } - if (otheru->dentry) { - newu->dentry = dget(otheru->dentry); - newu->mnt = mntget(otheru->mnt); + if (otheru->path.dentry) { + path_get(&otheru->path); + newu->path = otheru->path; } /* Set credentials */ @@ -1381,8 +1395,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; + UNIXCB(skb).pid = get_pid(scm->pid); - UNIXCB(skb).cred = get_cred(scm->cred); + if (scm->cred) + UNIXCB(skb).cred = get_cred(scm->cred); UNIXCB(skb).fp = NULL; if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1392,6 +1408,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen } /* + * Some apps rely on write() giving SCM_CREDENTIALS + * We include credentials if source or destination socket + * asserted SOCK_PASSCRED. + */ +static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, + const struct sock *other) +{ + if (UNIXCB(skb).cred) + return; + if (test_bit(SOCK_PASSCRED, &sock->flags) || + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + UNIXCB(skb).pid = get_pid(task_tgid(current)); + UNIXCB(skb).cred = get_current_cred(); + } +} + +/* * Send AF_UNIX data. */ @@ -1406,16 +1440,17 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock *other = NULL; int namelen = 0; /* fake GCC */ int err; - unsigned hash; + unsigned int hash; struct sk_buff *skb; long timeo; struct scm_cookie tmp_scm; int max_level; + int data_len = 0; if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; @@ -1444,7 +1479,13 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; - skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err); + if (len > SKB_MAX_ALLOC) + data_len = min_t(size_t, + len - SKB_MAX_ALLOC, + MAX_SKB_FRAGS * PAGE_SIZE); + + skb = sock_alloc_send_pskb(sk, len - data_len, data_len, + msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out; @@ -1454,8 +1495,10 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, max_level = err + 1; unix_get_secdata(siocb->scm, skb); - skb_reset_transport_header(skb); - err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + skb_put(skb, len - data_len); + skb->data_len = data_len; + skb->len = len; + err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len); if (err) goto out_free; @@ -1538,6 +1581,7 @@ restart: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); + maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); if (max_level > unix_sk(other)->recursion_level) unix_sk(other)->recursion_level = max_level; @@ -1575,7 +1619,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; @@ -1652,6 +1696,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, (other->sk_shutdown & RCV_SHUTDOWN)) goto pipe_err_free; + maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); if (max_level > unix_sk(other)->recursion_level) unix_sk(other)->recursion_level = max_level; @@ -1731,6 +1776,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, int noblock = flags & MSG_DONTWAIT; struct sk_buff *skb; int err; + int peeked, skip; err = -EOPNOTSUPP; if (flags&MSG_OOB) @@ -1744,7 +1790,9 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; } - skb = skb_recv_datagram(sk, flags, noblock, &err); + skip = sk_peek_offset(sk, flags); + + skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err); if (!skb) { unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ @@ -1761,12 +1809,12 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (msg->msg_name) unix_copy_addr(msg, skb->sk); - if (size > skb->len) - size = skb->len; - else if (size < skb->len) + if (size > skb->len - skip) + size = skb->len - skip; + else if (size < skb->len - skip) msg->msg_flags |= MSG_TRUNC; - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size); + err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size); if (err) goto out_free; @@ -1783,6 +1831,8 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) unix_detach_fds(siocb->scm, skb); + + sk_peek_offset_bwd(sk, skb->len); } else { /* It is questionable: on PEEK we could: - do not return fds - good, but too simple 8) @@ -1796,10 +1846,13 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, clearly however! */ + + sk_peek_offset_fwd(sk, size); + if (UNIXCB(skb).fp) siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); } - err = size; + err = (flags & MSG_TRUNC) ? skb->len - skip : size; scm_recv(sock, msg, siocb->scm, flags); @@ -1859,6 +1912,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, int target; int err = 0; long timeo; + int skip; err = -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) @@ -1888,12 +1942,15 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; } + skip = sk_peek_offset(sk, flags); + do { int chunk; struct sk_buff *skb; unix_state_lock(sk); - skb = skb_dequeue(&sk->sk_receive_queue); + skb = skb_peek(&sk->sk_receive_queue); +again: if (skb == NULL) { unix_sk(sk)->recursion_level = 0; if (copied >= target) @@ -1928,15 +1985,20 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, unix_state_unlock(sk); break; } + + if (skip >= skb->len) { + skip -= skb->len; + skb = skb_peek_next(skb, &sk->sk_receive_queue); + goto again; + } + unix_state_unlock(sk); if (check_creds) { /* Never glue messages from different writers */ if ((UNIXCB(skb).pid != siocb->scm->pid) || - (UNIXCB(skb).cred != siocb->scm->cred)) { - skb_queue_head(&sk->sk_receive_queue, skb); + (UNIXCB(skb).cred != siocb->scm->cred)) break; - } } else { /* Copy credentials */ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); @@ -1949,9 +2011,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, sunaddr = NULL; } - chunk = min_t(unsigned int, skb->len, size); - if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { - skb_queue_head(&sk->sk_receive_queue, skb); + chunk = min_t(unsigned int, skb->len - skip, size); + if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) { if (copied == 0) copied = -EFAULT; break; @@ -1963,15 +2024,15 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (!(flags & MSG_PEEK)) { skb_pull(skb, chunk); + sk_peek_offset_bwd(sk, chunk); + if (UNIXCB(skb).fp) unix_detach_fds(siocb->scm, skb); - /* put the skb back if we didn't use it up.. */ - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); + if (skb->len) break; - } + skb_unlink(skb, &sk->sk_receive_queue); consume_skb(skb); if (siocb->scm->fp) @@ -1982,8 +2043,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (UNIXCB(skb).fp) siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); - /* put message back and return */ - skb_queue_head(&sk->sk_receive_queue, skb); + sk_peek_offset_fwd(sk, chunk); + break; } } while (size); @@ -2036,6 +2097,36 @@ static int unix_shutdown(struct socket *sock, int mode) return 0; } +long unix_inq_len(struct sock *sk) +{ + struct sk_buff *skb; + long amount = 0; + + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + spin_lock(&sk->sk_receive_queue.lock); + if (sk->sk_type == SOCK_STREAM || + sk->sk_type == SOCK_SEQPACKET) { + skb_queue_walk(&sk->sk_receive_queue, skb) + amount += skb->len; + } else { + skb = skb_peek(&sk->sk_receive_queue); + if (skb) + amount = skb->len; + } + spin_unlock(&sk->sk_receive_queue.lock); + + return amount; +} +EXPORT_SYMBOL_GPL(unix_inq_len); + +long unix_outq_len(struct sock *sk) +{ + return sk_wmem_alloc_get(sk); +} +EXPORT_SYMBOL_GPL(unix_outq_len); + static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; @@ -2044,33 +2135,16 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: - amount = sk_wmem_alloc_get(sk); + amount = unix_outq_len(sk); err = put_user(amount, (int __user *)arg); break; case SIOCINQ: - { - struct sk_buff *skb; - - if (sk->sk_state == TCP_LISTEN) { - err = -EINVAL; - break; - } - - spin_lock(&sk->sk_receive_queue.lock); - if (sk->sk_type == SOCK_STREAM || - sk->sk_type == SOCK_SEQPACKET) { - skb_queue_walk(&sk->sk_receive_queue, skb) - amount += skb->len; - } else { - skb = skb_peek(&sk->sk_receive_queue); - if (skb) - amount = skb->len; - } - spin_unlock(&sk->sk_receive_queue.lock); + amount = unix_inq_len(sk); + if (amount < 0) + err = amount; + else err = put_user(amount, (int __user *)arg); - break; - } - + break; default: err = -ENOIOCTLCMD; break; @@ -2144,7 +2218,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, } /* No write status requested, avoid expensive OUT tests. */ - if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT))) + if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT))) return mask; writable = unix_writable(sk); @@ -2167,47 +2241,54 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, } #ifdef CONFIG_PROC_FS -static struct sock *first_unix_socket(int *i) + +#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); + unsigned long offset = get_offset(*pos); + unsigned long bucket = get_bucket(*pos); + struct sock *sk; + unsigned long count = 0; + + for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { + if (sock_net(sk) != seq_file_net(seq)) + continue; + if (++count == offset) + break; } - return NULL; + + return sk; } -static struct sock *next_unix_socket(int *i, struct sock *s) +static struct sock *unix_next_socket(struct seq_file *seq, + struct sock *sk, + loff_t *pos) { - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); + unsigned long bucket; + + while (sk > (struct sock *)SEQ_START_TOKEN) { + sk = sk_next(sk); + if (!sk) + goto next_bucket; + if (sock_net(sk) == seq_file_net(seq)) + return sk; } - return NULL; -} -struct unix_iter_state { - struct seq_net_private p; - int i; -}; + do { + sk = unix_from_bucket(seq, pos); + if (sk) + return sk; -static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) -{ - struct unix_iter_state *iter = seq->private; - loff_t off = 0; - struct sock *s; +next_bucket: + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < ARRAY_SIZE(unix_socket_table)); - for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { - if (sock_net(s) != seq_file_net(seq)) - continue; - if (off == pos) - return s; - ++off; - } return NULL; } @@ -2215,22 +2296,20 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos) __acquires(unix_table_lock) { spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN; + + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) + return NULL; + + return unix_next_socket(seq, NULL, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - struct sock *sk = v; ++*pos; - - if (v == SEQ_START_TOKEN) - sk = first_unix_socket(&iter->i); - else - sk = next_unix_socket(&iter->i, sk); - while (sk && (sock_net(sk) != seq_file_net(seq))) - sk = next_unix_socket(&iter->i, sk); - return sk; + return unix_next_socket(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) @@ -2293,7 +2372,7 @@ static const struct seq_operations unix_seq_ops = { static int unix_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &unix_seq_ops, - sizeof(struct unix_iter_state)); + sizeof(struct seq_net_private)); } static const struct file_operations unix_seq_fops = { |