From 7123aaa3a1416529ce461e98108e6b343b294643 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jun 2012 05:03:21 +0000 Subject: af_unix: speedup /proc/net/unix /proc/net/unix has quadratic behavior, and can hold unix_table_lock for a while if high number of unix sockets are alive. (90 ms for 200k sockets...) We already have a hash table, so its quite easy to use it. Problem is unbound sockets are still hashed in a single hash slot (unix_socket_table[UNIX_HASH_TABLE]) This patch also spreads unbound sockets to 256 hash slots, to speedup both /proc/net/unix and unix_diag. Time to read /proc/net/unix with 200k unix sockets : (time dd if=/proc/net/unix of=/dev/null bs=4k) before : 520 secs after : 2 secs Signed-off-by: Eric Dumazet Cc: Steven Whitehouse Cc: Pavel Emelyanov Signed-off-by: David S. Miller --- net/unix/af_unix.c | 110 +++++++++++++++++++++++++++++++---------------------- net/unix/diag.c | 6 ++- 2 files changed, 68 insertions(+), 48 deletions(-) (limited to 'net/unix') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 641f2e47f16..cf83f6b5ac9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -115,15 +115,24 @@ #include #include -struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); DEFINE_SPINLOCK(unix_table_lock); EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; -#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) -#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) +static struct hlist_head *unix_sockets_unbound(void *addr) +{ + unsigned long hash = (unsigned long)addr; + + hash ^= hash >> 16; + hash ^= hash >> 8; + hash %= UNIX_HASH_SIZE; + return &unix_socket_table[UNIX_HASH_SIZE + hash]; +} + +#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -645,7 +654,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); - unix_insert_socket(unix_sockets_unbound, sk); + unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) atomic_long_dec(&unix_nr_socks); @@ -2239,47 +2248,58 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, } #ifdef CONFIG_PROC_FS -static struct sock *first_unix_socket(int *i) -{ - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; -} -static struct sock *next_unix_socket(int *i, struct sock *s) -{ - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; -} +#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) struct unix_iter_state { struct seq_net_private p; - int i; }; -static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) +static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - loff_t off = 0; - struct sock *s; + unsigned long offset = get_offset(*pos); + unsigned long bucket = get_bucket(*pos); + struct sock *sk; + unsigned long count = 0; - for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { - if (sock_net(s) != seq_file_net(seq)) + for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { + if (sock_net(sk) != seq_file_net(seq)) continue; - if (off == pos) - return s; - ++off; + if (++count == offset) + break; } + + return sk; +} + +static struct sock *unix_next_socket(struct seq_file *seq, + struct sock *sk, + loff_t *pos) +{ + unsigned long bucket; + + while (sk > (struct sock *)SEQ_START_TOKEN) { + sk = sk_next(sk); + if (!sk) + goto next_bucket; + if (sock_net(sk) == seq_file_net(seq)) + return sk; + } + + do { + sk = unix_from_bucket(seq, pos); + if (sk) + return sk; + +next_bucket: + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < ARRAY_SIZE(unix_socket_table)); + return NULL; } @@ -2287,22 +2307,20 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos) __acquires(unix_table_lock) { spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN; + + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) + return NULL; + + return unix_next_socket(seq, NULL, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - struct sock *sk = v; ++*pos; - - if (v == SEQ_START_TOKEN) - sk = first_unix_socket(&iter->i); - else - sk = next_unix_socket(&iter->i, sk); - while (sk && (sock_net(sk) != seq_file_net(seq))) - sk = next_unix_socket(&iter->i, sk); - return sk; + return unix_next_socket(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) diff --git a/net/unix/diag.c b/net/unix/diag.c index 47d3002737f..7e8a24bff34 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -195,7 +195,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = s_num = cb->args[1]; spin_lock(&unix_table_lock); - for (slot = s_slot; slot <= UNIX_HASH_SIZE; s_num = 0, slot++) { + for (slot = s_slot; + slot < ARRAY_SIZE(unix_socket_table); + s_num = 0, slot++) { struct sock *sk; struct hlist_node *node; @@ -228,7 +230,7 @@ static struct sock *unix_lookup_by_ino(int ino) struct sock *sk; spin_lock(&unix_table_lock); - for (i = 0; i <= UNIX_HASH_SIZE; i++) { + for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { struct hlist_node *node; sk_for_each(sk, node, &unix_socket_table[i]) -- cgit v1.2.3-70-g09d2 From 8b51b064a6da90c68af5385a874968829a2a0ed7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jun 2012 22:10:20 +0000 Subject: af_unix: remove unix_iter_state As pointed out by Michael Tokarev , struct unix_iter_state is no longer needed. Suggested-by: Michael Tokarev Signed-off-by: Eric Dumazet Cc: Steven Whitehouse Cc: Pavel Emelyanov Signed-off-by: David S. Miller --- net/unix/af_unix.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net/unix') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index cf83f6b5ac9..79981d97bc9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2255,10 +2255,6 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) -struct unix_iter_state { - struct seq_net_private p; -}; - static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { unsigned long offset = get_offset(*pos); @@ -2383,7 +2379,7 @@ static const struct seq_operations unix_seq_ops = { static int unix_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &unix_seq_ops, - sizeof(struct unix_iter_state)); + sizeof(struct seq_net_private)); } static const struct file_operations unix_seq_fops = { -- cgit v1.2.3-70-g09d2 From b61bb01974730e2fd7d36ab4cc848ca6f44cffd4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 26 Jun 2012 21:41:00 -0700 Subject: unix_diag: Move away from NLMSG_PUT(). And use nlmsg_data() while we're here too and remove useless casts. Signed-off-by: David S. Miller --- net/unix/diag.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'net/unix') diff --git a/net/unix/diag.c b/net/unix/diag.c index 7e8a24bff34..977ca317550 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -126,10 +126,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r struct nlmsghdr *nlh; struct unix_diag_msg *rep; - nlh = NLMSG_PUT(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep)); + nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), 0); + if (!nlh) + goto out_nlmsg_trim; nlh->nlmsg_flags = flags; - rep = NLMSG_DATA(nlh); + rep = nlmsg_data(nlh); rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; @@ -139,32 +141,32 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if ((req->udiag_show & UDIAG_SHOW_NAME) && sk_diag_dump_name(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_VFS) && sk_diag_dump_vfs(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_PEER) && sk_diag_dump_peer(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_ICONS) && sk_diag_dump_icons(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_RQLEN) && sk_diag_show_rqlen(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_MEMINFO) && sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) - goto nlmsg_failure; + goto out_nlmsg_trim; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -nlmsg_failure: +out_nlmsg_trim: nlmsg_trim(skb, b); return -EMSGSIZE; } @@ -189,7 +191,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct unix_diag_req *req; int num, s_num, slot, s_slot; - req = NLMSG_DATA(cb->nlh); + req = nlmsg_data(cb->nlh); s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -309,7 +311,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) }; return netlink_dump_start(sock_diag_nlsk, skb, h, &c); } else - return unix_diag_get_exact(skb, h, (struct unix_diag_req *)NLMSG_DATA(h)); + return unix_diag_get_exact(skb, h, nlmsg_data(h)); } static const struct sock_diag_handler unix_diag_handler = { -- cgit v1.2.3-70-g09d2 From 4245375db87767aacaad16f07040b5d89a9056c8 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 26 Jun 2012 23:36:10 +0000 Subject: unix_diag: Do not use RTA_PUT() macros Also, no need to trim on nlmsg_put() failure, nothing has been added yet. We also want to use nlmsg_end(), nlmsg_new() and nlmsg_free(). Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/unix/diag.c | 80 ++++++++++++++++++++++++--------------------------------- 1 file changed, 33 insertions(+), 47 deletions(-) (limited to 'net/unix') diff --git a/net/unix/diag.c b/net/unix/diag.c index 977ca317550..a74864eedfc 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -8,40 +8,31 @@ #include #include -#define UNIX_DIAG_PUT(skb, attrtype, attrlen) \ - RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) - static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { struct unix_address *addr = unix_sk(sk)->addr; - char *s; - - if (addr) { - s = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short)); - memcpy(s, addr->name->sun_path, addr->len - sizeof(short)); - } - return 0; + if (!addr) + return 0; -rtattr_failure: - return -EMSGSIZE; + return nla_put(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short), + addr->name->sun_path); } static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb) { struct dentry *dentry = unix_sk(sk)->path.dentry; - struct unix_diag_vfs *uv; if (dentry) { - uv = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_VFS, sizeof(*uv)); - uv->udiag_vfs_ino = dentry->d_inode->i_ino; - uv->udiag_vfs_dev = dentry->d_sb->s_dev; + struct unix_diag_vfs uv = { + .udiag_vfs_ino = dentry->d_inode->i_ino, + .udiag_vfs_dev = dentry->d_sb->s_dev, + }; + + return nla_put(nlskb, UNIX_DIAG_VFS, sizeof(uv), &uv); } return 0; - -rtattr_failure: - return -EMSGSIZE; } static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb) @@ -56,24 +47,28 @@ static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb) unix_state_unlock(peer); sock_put(peer); - RTA_PUT_U32(nlskb, UNIX_DIAG_PEER, ino); + return nla_put_u32(nlskb, UNIX_DIAG_PEER, ino); } return 0; -rtattr_failure: - return -EMSGSIZE; } static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) { struct sk_buff *skb; + struct nlattr *attr; u32 *buf; int i; if (sk->sk_state == TCP_LISTEN) { spin_lock(&sk->sk_receive_queue.lock); - buf = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_ICONS, - sk->sk_receive_queue.qlen * sizeof(u32)); + + attr = nla_reserve(nlskb, UNIX_DIAG_ICONS, + sk->sk_receive_queue.qlen * sizeof(u32)); + if (!attr) + goto errout; + + buf = nla_data(attr); i = 0; skb_queue_walk(&sk->sk_receive_queue, skb) { struct sock *req, *peer; @@ -94,45 +89,38 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) return 0; -rtattr_failure: +errout: spin_unlock(&sk->sk_receive_queue.lock); return -EMSGSIZE; } static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) { - struct unix_diag_rqlen *rql; - - rql = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_RQLEN, sizeof(*rql)); + struct unix_diag_rqlen rql; if (sk->sk_state == TCP_LISTEN) { - rql->udiag_rqueue = sk->sk_receive_queue.qlen; - rql->udiag_wqueue = sk->sk_max_ack_backlog; + rql.udiag_rqueue = sk->sk_receive_queue.qlen; + rql.udiag_wqueue = sk->sk_max_ack_backlog; } else { - rql->udiag_rqueue = (__u32)unix_inq_len(sk); - rql->udiag_wqueue = (__u32)unix_outq_len(sk); + rql.udiag_rqueue = (u32) unix_inq_len(sk); + rql.udiag_wqueue = (u32) unix_outq_len(sk); } - return 0; - -rtattr_failure: - return -EMSGSIZE; + return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, u32 pid, u32 seq, u32 flags, int sk_ino) { - unsigned char *b = skb_tail_pointer(skb); struct nlmsghdr *nlh; struct unix_diag_msg *rep; - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), 0); + nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + flags); if (!nlh) - goto out_nlmsg_trim; - nlh->nlmsg_flags = flags; + return -EMSGSIZE; rep = nlmsg_data(nlh); - rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; rep->udiag_state = sk->sk_state; @@ -163,11 +151,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) goto out_nlmsg_trim; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); out_nlmsg_trim: - nlmsg_trim(skb, b); + nlmsg_cancel(skb, nlh); return -EMSGSIZE; } @@ -272,15 +259,14 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, extra_len = 256; again: err = -ENOMEM; - rep = alloc_skb(NLMSG_SPACE((sizeof(struct unix_diag_msg) + extra_len)), - GFP_KERNEL); + rep = nlmsg_new(sizeof(struct unix_diag_msg) + extra_len, GFP_KERNEL); if (!rep) goto out; err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { - kfree_skb(rep); + nlmsg_free(rep); extra_len += 256; if (extra_len >= PAGE_SIZE) goto out; -- cgit v1.2.3-70-g09d2 From 51d7cccf07238f5236c5b9269231a30dd5f8e714 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 16 Jul 2012 04:28:49 +0000 Subject: net: make sock diag per-namespace Before this patch sock_diag works for init_net only and dumps information about sockets from all namespaces. This patch expands sock_diag for all name-spaces. It creates a netlink kernel socket for each netns and filters data during dumping. v2: filter accoding with netns in all places remove an unused variable. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Pavel Emelyanov CC: Eric Dumazet Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Andrew Vagin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 1 - include/net/net_namespace.h | 1 + net/core/sock_diag.c | 27 ++++++++++++++++++++------- net/ipv4/inet_diag.c | 21 ++++++++++++++++----- net/ipv4/udp_diag.c | 10 +++++++--- net/unix/diag.c | 9 +++++++-- 6 files changed, 51 insertions(+), 18 deletions(-) (limited to 'net/unix') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 6793fac5eab..e3e395acc2f 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -44,6 +44,5 @@ void sock_diag_save_cookie(void *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); -extern struct sock *sock_diag_nlsk; #endif /* KERNEL */ #endif diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index ac9195e6a06..ae1cd6c9ba5 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -101,6 +101,7 @@ struct net { struct netns_xfrm xfrm; #endif struct netns_ipvs *ipvs; + struct sock *diag_nlsk; }; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 07a29eb34a4..9d8755e4a7a 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -166,23 +166,36 @@ static void sock_diag_rcv(struct sk_buff *skb) mutex_unlock(&sock_diag_mutex); } -struct sock *sock_diag_nlsk; -EXPORT_SYMBOL_GPL(sock_diag_nlsk); - -static int __init sock_diag_init(void) +static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = sock_diag_rcv, }; - sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, + net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, THIS_MODULE, &cfg); - return sock_diag_nlsk == NULL ? -ENOMEM : 0; + return net->diag_nlsk == NULL ? -ENOMEM : 0; +} + +static void __net_exit diag_net_exit(struct net *net) +{ + netlink_kernel_release(net->diag_nlsk); + net->diag_nlsk = NULL; +} + +static struct pernet_operations diag_net_ops = { + .init = diag_net_init, + .exit = diag_net_exit, +}; + +static int __init sock_diag_init(void) +{ + return register_pernet_subsys(&diag_net_ops); } static void __exit sock_diag_exit(void) { - netlink_kernel_release(sock_diag_nlsk); + unregister_pernet_subsys(&diag_net_ops); } module_init(sock_diag_init); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 38064a285cc..570e61f9611 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -272,16 +272,17 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s int err; struct sock *sk; struct sk_buff *rep; + struct net *net = sock_net(in_skb->sk); err = -EINVAL; if (req->sdiag_family == AF_INET) { - sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], + sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); } #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { - sk = inet6_lookup(&init_net, hashinfo, + sk = inet6_lookup(net, hashinfo, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, @@ -317,7 +318,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s nlmsg_free(rep); goto out; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -724,6 +725,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, { int i, num; int s_i, s_num; + struct net *net = sock_net(skb->sk); s_i = cb->args[1]; s_num = num = cb->args[2]; @@ -743,6 +745,9 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, sk_nulls_for_each(sk, node, &ilb->head) { struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) { num++; continue; @@ -813,6 +818,8 @@ skip_listen_ht: sk_nulls_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next_normal; if (!(r->idiag_states & (1 << sk->sk_state))) @@ -839,6 +846,8 @@ next_normal: inet_twsk_for_each(tw, node, &head->twchain) { + if (!net_eq(twsk_net(tw), net)) + continue; if (num < s_num) goto next_dying; @@ -943,6 +952,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) { int hdrlen = sizeof(struct inet_diag_req); + struct net *net = sock_net(skb->sk); if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || nlmsg_len(nlh) < hdrlen) @@ -963,7 +973,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) struct netlink_dump_control c = { .dump = inet_diag_dump_compat, }; - return netlink_dump_start(sock_diag_nlsk, skb, nlh, &c); + return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); } } @@ -973,6 +983,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); + struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -991,7 +1002,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) struct netlink_dump_control c = { .dump = inet_diag_dump, }; - return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index a7f86a3cd50..16d0960062b 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -34,15 +34,16 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, int err = -EINVAL; struct sock *sk; struct sk_buff *rep; + struct net *net = sock_net(in_skb->sk); if (req->sdiag_family == AF_INET) - sk = __udp4_lib_lookup(&init_net, + sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_if, tbl); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) - sk = __udp6_lib_lookup(&init_net, + sk = __udp6_lib_lookup(net, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, (struct in6_addr *)req->id.idiag_dst, @@ -75,7 +76,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, kfree_skb(rep); goto out; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -90,6 +91,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin struct inet_diag_req_v2 *r, struct nlattr *bc) { int num, s_num, slot, s_slot; + struct net *net = sock_net(skb->sk); s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -106,6 +108,8 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin sk_nulls_for_each(sk, node, &hslot->head) { struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next; if (!(r->idiag_states & (1 << sk->sk_state))) diff --git a/net/unix/diag.c b/net/unix/diag.c index a74864eedfc..750b1340844 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -177,6 +177,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct unix_diag_req *req; int num, s_num, slot, s_slot; + struct net *net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); @@ -192,6 +193,8 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = 0; sk_for_each(sk, node, &unix_socket_table[slot]) { + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) @@ -243,6 +246,7 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, struct sock *sk; struct sk_buff *rep; unsigned int extra_len; + struct net *net = sock_net(in_skb->sk); if (req->udiag_ino == 0) goto out_nosk; @@ -273,7 +277,7 @@ again: goto again; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -287,6 +291,7 @@ out_nosk: static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct unix_diag_req); + struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -295,7 +300,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) struct netlink_dump_control c = { .dump = unix_diag_dump, }; - return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } else return unix_diag_get_exact(skb, h, nlmsg_data(h)); } -- cgit v1.2.3-70-g09d2