diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-08-21 11:27:00 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-08-21 11:27:00 +0200 |
commit | bcada3d4b8c96b8792c2306f363992ca5ab9da42 (patch) | |
tree | e420679a5db6ea4e1694eef57f9abb6acac8d4d3 /net/unix | |
parent | 26198c21d1b286a084fe5d514a30bc7e6c712a34 (diff) | |
parent | 000078bc3ee69efb1124b8478c7527389a826074 (diff) |
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
* Fix include order for bison/flex-generated C files, from Ben Hutchings
* Build fixes and documentation corrections from David Ahern
* Group parsing support, from Jiri Olsa
* UI/gtk refactorings and improvements from Namhyung Kim
* NULL deref fix for perf script, from Namhyung Kim
* Assorted cleanups from Robert Richter
* Let O= makes handle relative paths, from Steven Rostedt
* perf script python fixes, from Feng Tang.
* Improve 'perf lock' error message when the needed tracepoints
are not present, from David Ahern.
* Initial bash completion support, from Frederic Weisbecker
* Allow building without libelf, from Namhyung Kim.
* Support DWARF CFI based unwind to have callchains when %bp
based unwinding is not possible, from Jiri Olsa.
* Symbol resolution fixes, while fixing support PPC64 files with an .opt ELF
section was the end goal, several fixes for code that handles all
architectures and cleanups are included, from Cody Schafer.
* Add a description for the JIT interface, from Andi Kleen.
* Assorted fixes for Documentation and build in 32 bit, from Robert Richter
* Add support for non-tracepoint events in perf script python, from Feng Tang
* Cache the libtraceevent event_format associated to each evsel early, so that we
avoid relookups, i.e. calling pevent_find_event repeatedly when processing
tracepoint events.
[ This is to reduce the surface contact with libtraceevents and make clear what
is that the perf tools needs from that lib: so far parsing the common and per
event fields. ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'net/unix')
-rw-r--r-- | net/unix/af_unix.c | 203 | ||||
-rw-r--r-- | net/unix/diag.c | 115 |
2 files changed, 160 insertions, 158 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 641f2e47f16..e4768c180da 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -115,15 +115,24 @@ #include <net/checksum.h> #include <linux/security.h> -struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); DEFINE_SPINLOCK(unix_table_lock); EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; -#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) -#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) +static struct hlist_head *unix_sockets_unbound(void *addr) +{ + unsigned long hash = (unsigned long)addr; + + hash ^= hash >> 16; + hash ^= hash >> 8; + hash %= UNIX_HASH_SIZE; + return &unix_socket_table[UNIX_HASH_SIZE + hash]; +} + +#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -645,7 +654,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); - unix_insert_socket(unix_sockets_unbound, sk); + unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) atomic_long_dec(&unix_nr_socks); @@ -814,6 +823,34 @@ fail: return NULL; } +static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) +{ + struct dentry *dentry; + struct path path; + int err = 0; + /* + * Get the parent directory, calculate the hash for last + * component. + */ + dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + return err; + + /* + * All right, let's create it. + */ + err = security_path_mknod(&path, dentry, mode, 0); + if (!err) { + err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); + if (!err) { + res->mnt = mntget(path.mnt); + res->dentry = dget(dentry); + } + } + done_path_create(&path, dentry); + return err; +} static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -822,8 +859,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; - struct dentry *dentry = NULL; - struct path path; int err; unsigned int hash; struct unix_address *addr; @@ -860,43 +895,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) atomic_set(&addr->refcnt, 1); if (sun_path[0]) { - umode_t mode; - err = 0; - /* - * Get the parent directory, calculate the hash for last - * component. - */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out_mknod_parent; - - /* - * All right, let's create it. - */ - mode = S_IFSOCK | + struct path path; + umode_t mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = mnt_want_write(path.mnt); - if (err) - goto out_mknod_dput; - err = security_path_mknod(&path, dentry, mode, 0); - if (err) - goto out_mknod_drop_write; - err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); -out_mknod_drop_write: - mnt_drop_write(path.mnt); - if (err) - goto out_mknod_dput; - mutex_unlock(&path.dentry->d_inode->i_mutex); - dput(path.dentry); - path.dentry = dentry; - + err = unix_mknod(sun_path, mode, &path); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; + unix_release_addr(addr); + goto out_up; + } addr->hash = UNIX_HASH_SIZE; - } - - spin_lock(&unix_table_lock); - - if (!sun_path[0]) { + hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1); + spin_lock(&unix_table_lock); + u->path = path; + list = &unix_socket_table[hash]; + } else { + spin_lock(&unix_table_lock); err = -EADDRINUSE; if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { @@ -905,9 +920,6 @@ out_mknod_drop_write: } list = &unix_socket_table[addr->hash]; - } else { - list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; - u->path = path; } err = 0; @@ -921,16 +933,6 @@ out_up: mutex_unlock(&u->readlock); out: return err; - -out_mknod_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); -out_mknod_parent: - if (err == -EEXIST) - err = -EADDRINUSE; - unix_release_addr(addr); - goto out_up; } static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) @@ -2239,47 +2241,54 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, } #ifdef CONFIG_PROC_FS -static struct sock *first_unix_socket(int *i) + +#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); + unsigned long offset = get_offset(*pos); + unsigned long bucket = get_bucket(*pos); + struct sock *sk; + unsigned long count = 0; + + for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { + if (sock_net(sk) != seq_file_net(seq)) + continue; + if (++count == offset) + break; } - return NULL; + + return sk; } -static struct sock *next_unix_socket(int *i, struct sock *s) +static struct sock *unix_next_socket(struct seq_file *seq, + struct sock *sk, + loff_t *pos) { - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); + unsigned long bucket; + + while (sk > (struct sock *)SEQ_START_TOKEN) { + sk = sk_next(sk); + if (!sk) + goto next_bucket; + if (sock_net(sk) == seq_file_net(seq)) + return sk; } - return NULL; -} -struct unix_iter_state { - struct seq_net_private p; - int i; -}; + do { + sk = unix_from_bucket(seq, pos); + if (sk) + return sk; -static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) -{ - struct unix_iter_state *iter = seq->private; - loff_t off = 0; - struct sock *s; +next_bucket: + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < ARRAY_SIZE(unix_socket_table)); - for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { - if (sock_net(s) != seq_file_net(seq)) - continue; - if (off == pos) - return s; - ++off; - } return NULL; } @@ -2287,22 +2296,20 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos) __acquires(unix_table_lock) { spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN; + + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) + return NULL; + + return unix_next_socket(seq, NULL, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - struct sock *sk = v; ++*pos; - - if (v == SEQ_START_TOKEN) - sk = first_unix_socket(&iter->i); - else - sk = next_unix_socket(&iter->i, sk); - while (sk && (sock_net(sk) != seq_file_net(seq))) - sk = next_unix_socket(&iter->i, sk); - return sk; + return unix_next_socket(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) @@ -2365,7 +2372,7 @@ static const struct seq_operations unix_seq_ops = { static int unix_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &unix_seq_ops, - sizeof(struct unix_iter_state)); + sizeof(struct seq_net_private)); } static const struct file_operations unix_seq_fops = { diff --git a/net/unix/diag.c b/net/unix/diag.c index 47d3002737f..750b1340844 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -8,40 +8,31 @@ #include <net/af_unix.h> #include <net/tcp_states.h> -#define UNIX_DIAG_PUT(skb, attrtype, attrlen) \ - RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) - static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { struct unix_address *addr = unix_sk(sk)->addr; - char *s; - - if (addr) { - s = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short)); - memcpy(s, addr->name->sun_path, addr->len - sizeof(short)); - } - return 0; + if (!addr) + return 0; -rtattr_failure: - return -EMSGSIZE; + return nla_put(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short), + addr->name->sun_path); } static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb) { struct dentry *dentry = unix_sk(sk)->path.dentry; - struct unix_diag_vfs *uv; if (dentry) { - uv = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_VFS, sizeof(*uv)); - uv->udiag_vfs_ino = dentry->d_inode->i_ino; - uv->udiag_vfs_dev = dentry->d_sb->s_dev; + struct unix_diag_vfs uv = { + .udiag_vfs_ino = dentry->d_inode->i_ino, + .udiag_vfs_dev = dentry->d_sb->s_dev, + }; + + return nla_put(nlskb, UNIX_DIAG_VFS, sizeof(uv), &uv); } return 0; - -rtattr_failure: - return -EMSGSIZE; } static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb) @@ -56,24 +47,28 @@ static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb) unix_state_unlock(peer); sock_put(peer); - RTA_PUT_U32(nlskb, UNIX_DIAG_PEER, ino); + return nla_put_u32(nlskb, UNIX_DIAG_PEER, ino); } return 0; -rtattr_failure: - return -EMSGSIZE; } static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) { struct sk_buff *skb; + struct nlattr *attr; u32 *buf; int i; if (sk->sk_state == TCP_LISTEN) { spin_lock(&sk->sk_receive_queue.lock); - buf = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_ICONS, - sk->sk_receive_queue.qlen * sizeof(u32)); + + attr = nla_reserve(nlskb, UNIX_DIAG_ICONS, + sk->sk_receive_queue.qlen * sizeof(u32)); + if (!attr) + goto errout; + + buf = nla_data(attr); i = 0; skb_queue_walk(&sk->sk_receive_queue, skb) { struct sock *req, *peer; @@ -94,43 +89,38 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) return 0; -rtattr_failure: +errout: spin_unlock(&sk->sk_receive_queue.lock); return -EMSGSIZE; } static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) { - struct unix_diag_rqlen *rql; - - rql = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_RQLEN, sizeof(*rql)); + struct unix_diag_rqlen rql; if (sk->sk_state == TCP_LISTEN) { - rql->udiag_rqueue = sk->sk_receive_queue.qlen; - rql->udiag_wqueue = sk->sk_max_ack_backlog; + rql.udiag_rqueue = sk->sk_receive_queue.qlen; + rql.udiag_wqueue = sk->sk_max_ack_backlog; } else { - rql->udiag_rqueue = (__u32)unix_inq_len(sk); - rql->udiag_wqueue = (__u32)unix_outq_len(sk); + rql.udiag_rqueue = (u32) unix_inq_len(sk); + rql.udiag_wqueue = (u32) unix_outq_len(sk); } - return 0; - -rtattr_failure: - return -EMSGSIZE; + return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, u32 pid, u32 seq, u32 flags, int sk_ino) { - unsigned char *b = skb_tail_pointer(skb); struct nlmsghdr *nlh; struct unix_diag_msg *rep; - nlh = NLMSG_PUT(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep)); - nlh->nlmsg_flags = flags; - - rep = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + flags); + if (!nlh) + return -EMSGSIZE; + rep = nlmsg_data(nlh); rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; rep->udiag_state = sk->sk_state; @@ -139,33 +129,32 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if ((req->udiag_show & UDIAG_SHOW_NAME) && sk_diag_dump_name(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_VFS) && sk_diag_dump_vfs(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_PEER) && sk_diag_dump_peer(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_ICONS) && sk_diag_dump_icons(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_RQLEN) && sk_diag_show_rqlen(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_MEMINFO) && sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) - goto nlmsg_failure; + goto out_nlmsg_trim; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); -nlmsg_failure: - nlmsg_trim(skb, b); +out_nlmsg_trim: + nlmsg_cancel(skb, nlh); return -EMSGSIZE; } @@ -188,19 +177,24 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct unix_diag_req *req; int num, s_num, slot, s_slot; + struct net *net = sock_net(skb->sk); - req = NLMSG_DATA(cb->nlh); + req = nlmsg_data(cb->nlh); s_slot = cb->args[0]; num = s_num = cb->args[1]; spin_lock(&unix_table_lock); - for (slot = s_slot; slot <= UNIX_HASH_SIZE; s_num = 0, slot++) { + for (slot = s_slot; + slot < ARRAY_SIZE(unix_socket_table); + s_num = 0, slot++) { struct sock *sk; struct hlist_node *node; num = 0; sk_for_each(sk, node, &unix_socket_table[slot]) { + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) @@ -228,7 +222,7 @@ static struct sock *unix_lookup_by_ino(int ino) struct sock *sk; spin_lock(&unix_table_lock); - for (i = 0; i <= UNIX_HASH_SIZE; i++) { + for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { struct hlist_node *node; sk_for_each(sk, node, &unix_socket_table[i]) @@ -252,6 +246,7 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, struct sock *sk; struct sk_buff *rep; unsigned int extra_len; + struct net *net = sock_net(in_skb->sk); if (req->udiag_ino == 0) goto out_nosk; @@ -268,22 +263,21 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, extra_len = 256; again: err = -ENOMEM; - rep = alloc_skb(NLMSG_SPACE((sizeof(struct unix_diag_msg) + extra_len)), - GFP_KERNEL); + rep = nlmsg_new(sizeof(struct unix_diag_msg) + extra_len, GFP_KERNEL); if (!rep) goto out; err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { - kfree_skb(rep); + nlmsg_free(rep); extra_len += 256; if (extra_len >= PAGE_SIZE) goto out; goto again; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -297,6 +291,7 @@ out_nosk: static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct unix_diag_req); + struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -305,9 +300,9 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) struct netlink_dump_control c = { .dump = unix_diag_dump, }; - return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } else - return unix_diag_get_exact(skb, h, (struct unix_diag_req *)NLMSG_DATA(h)); + return unix_diag_get_exact(skb, h, nlmsg_data(h)); } static const struct sock_diag_handler unix_diag_handler = { |