diff options
Diffstat (limited to 'net/netlink')
-rw-r--r-- | net/netlink/af_netlink.c | 151 | ||||
-rw-r--r-- | net/netlink/af_netlink.h | 8 | ||||
-rw-r--r-- | net/netlink/genetlink.c | 56 |
3 files changed, 137 insertions, 78 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7a186e74b1b..84ea76ca3f1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -96,6 +96,14 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); +/* nl_table locking explained: + * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock + * combined with an RCU read-side lock. Insertion and removal are protected + * with nl_sk_hash_lock while using RCU list modification primitives and may + * run in parallel to nl_table_lock protected lookups. Destruction of the + * Netlink socket may only occur *after* nl_table_lock has been acquired + * either during or after the socket has been removed from the list. + */ DEFINE_RWLOCK(nl_table_lock); EXPORT_SYMBOL_GPL(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); @@ -106,14 +114,14 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); DEFINE_MUTEX(nl_sk_hash_lock); EXPORT_SYMBOL_GPL(nl_sk_hash_lock); -static int lockdep_nl_sk_hash_is_held(void) +#ifdef CONFIG_PROVE_LOCKING +static int lockdep_nl_sk_hash_is_held(void *parent) { -#ifdef CONFIG_LOCKDEP - return (debug_locks) ? lockdep_is_held(&nl_sk_hash_lock) : 1; -#else + if (debug_locks) + return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock); return 1; -#endif } +#endif static ATOMIC_NOTIFIER_HEAD(netlink_chain); @@ -134,8 +142,7 @@ int netlink_add_tap(struct netlink_tap *nt) list_add_rcu(&nt->list, &netlink_tap_all); spin_unlock(&netlink_tap_lock); - if (nt->module) - __module_get(nt->module); + __module_get(nt->module); return 0; } @@ -518,14 +525,14 @@ out: return err; } -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) +static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) { #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 struct page *p_start, *p_end; /* First page is flushed through netlink_{get,set}_status */ p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1); + p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); while (p_start <= p_end) { flush_dcache_page(p_start); p_start++; @@ -543,9 +550,9 @@ static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) static void netlink_set_status(struct nl_mmap_hdr *hdr, enum nl_mmap_status status) { + smp_mb(); hdr->nm_status = status; flush_dcache_page(pgvec_to_page(hdr)); - smp_wmb(); } static struct nl_mmap_hdr * @@ -707,24 +714,16 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, struct nl_mmap_hdr *hdr; struct sk_buff *skb; unsigned int maxlen; - bool excl = true; int err = 0, len = 0; - /* Netlink messages are validated by the receiver before processing. - * In order to avoid userspace changing the contents of the message - * after validation, the socket and the ring may only be used by a - * single process, otherwise we fall back to copying. - */ - if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 || - atomic_read(&nlk->mapped) > 1) - excl = false; - mutex_lock(&nlk->pg_vec_lock); ring = &nlk->tx_ring; maxlen = ring->frame_size - NL_MMAP_HDRLEN; do { + unsigned int nm_len; + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); if (hdr == NULL) { if (!(msg->msg_flags & MSG_DONTWAIT) && @@ -732,35 +731,23 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, schedule(); continue; } - if (hdr->nm_len > maxlen) { + + nm_len = ACCESS_ONCE(hdr->nm_len); + if (nm_len > maxlen) { err = -EINVAL; goto out; } - netlink_frame_flush_dcache(hdr); + netlink_frame_flush_dcache(hdr, nm_len); - if (likely(dst_portid == 0 && dst_group == 0 && excl)) { - skb = alloc_skb_head(GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - sock_hold(sk); - netlink_ring_setup_skb(skb, sk, ring, hdr); - NETLINK_CB(skb).flags |= NETLINK_SKB_TX; - __skb_put(skb, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - } else { - skb = alloc_skb(hdr->nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, hdr->nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); + skb = alloc_skb(nm_len, GFP_KERNEL); + if (skb == NULL) { + err = -ENOBUFS; + goto out; } + __skb_put(skb, nm_len); + memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); + netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); netlink_increment_head(ring); @@ -806,7 +793,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) hdr->nm_pid = NETLINK_CB(skb).creds.pid; hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr); + netlink_frame_flush_dcache(hdr, hdr->nm_len); netlink_set_status(hdr, NL_MMAP_STATUS_VALID); NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; @@ -1028,11 +1015,13 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) struct netlink_table *table = &nl_table[protocol]; struct sock *sk; + read_lock(&nl_table_lock); rcu_read_lock(); sk = __netlink_lookup(table, portid, net); if (sk) sock_hold(sk); rcu_read_unlock(); + read_unlock(&nl_table_lock); return sk; } @@ -1082,7 +1071,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) nlk_sk(sk)->portid = portid; sock_hold(sk); - rhashtable_insert(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL); + rhashtable_insert(&table->hash, &nlk_sk(sk)->node); err = 0; err: mutex_unlock(&nl_sk_hash_lock); @@ -1095,15 +1084,17 @@ static void netlink_remove(struct sock *sk) mutex_lock(&nl_sk_hash_lock); table = &nl_table[sk->sk_protocol]; - if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL)) { + if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } mutex_unlock(&nl_sk_hash_lock); netlink_table_grab(); - if (nlk_sk(sk)->subscriptions) + if (nlk_sk(sk)->subscriptions) { __sk_del_bind_node(sk); + netlink_update_listeners(sk); + } netlink_table_ungrab(); } @@ -1150,8 +1141,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1237,8 +1228,8 @@ static int netlink_release(struct socket *sock) module_put(nlk->module); - netlink_table_grab(); if (netlink_is_kernel(sk)) { + netlink_table_grab(); BUG_ON(nl_table[sk->sk_protocol].registered == 0); if (--nl_table[sk->sk_protocol].registered == 0) { struct listeners *old; @@ -1252,14 +1243,16 @@ static int netlink_release(struct socket *sock) nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } - } else if (nlk->subscriptions) { - netlink_update_listeners(sk); + netlink_table_ungrab(); } - netlink_table_ungrab(); - /* Wait for readers to complete */ - synchronize_net(); + if (nlk->netlink_unbind) { + int i; + for (i = 0; i < nlk->ngroups; i++) + if (test_bit(i, nlk->groups)) + nlk->netlink_unbind(sock_net(sk), i + 1); + } kfree(nlk->groups); nlk->groups = NULL; @@ -1281,6 +1274,7 @@ static int netlink_autobind(struct socket *sock) retry: cond_resched(); + netlink_table_grab(); rcu_read_lock(); if (__netlink_lookup(table, portid, net)) { /* Bind collision, search negative portid values. */ @@ -1288,9 +1282,11 @@ retry: if (rover > -4097) rover = -4097; rcu_read_unlock(); + netlink_table_ungrab(); goto retry; } rcu_read_unlock(); + netlink_table_ungrab(); err = netlink_insert(sk, net, portid); if (err == -EADDRINUSE) @@ -1421,17 +1417,18 @@ static int netlink_realloc_groups(struct sock *sk) return err; } -static void netlink_unbind(int group, long unsigned int groups, - struct netlink_sock *nlk) +static void netlink_undo_bind(int group, long unsigned int groups, + struct sock *sk) { + struct netlink_sock *nlk = nlk_sk(sk); int undo; if (!nlk->netlink_unbind) return; for (undo = 0; undo < group; undo++) - if (test_bit(group, &groups)) - nlk->netlink_unbind(undo); + if (test_bit(undo, &groups)) + nlk->netlink_unbind(sock_net(sk), undo); } static int netlink_bind(struct socket *sock, struct sockaddr *addr, @@ -1469,10 +1466,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, for (group = 0; group < nlk->ngroups; group++) { if (!test_bit(group, &groups)) continue; - err = nlk->netlink_bind(group); + err = nlk->netlink_bind(net, group); if (!err) continue; - netlink_unbind(group, groups, nlk); + netlink_undo_bind(group, groups, sk); return err; } } @@ -1482,7 +1479,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_unbind(nlk->ngroups - 1, groups, nlk); + netlink_undo_bind(nlk->ngroups, groups, sk); return err; } } @@ -2133,7 +2130,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (!val || val - 1 >= nlk->ngroups) return -EINVAL; if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { - err = nlk->netlink_bind(val); + err = nlk->netlink_bind(sock_net(sk), val); if (err) return err; } @@ -2142,7 +2139,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) - nlk->netlink_unbind(val); + nlk->netlink_unbind(sock_net(sk), val); err = 0; break; @@ -2296,7 +2293,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, } if (netlink_tx_is_mmaped(sk) && - msg->msg_iov->iov_base == NULL) { + msg->msg_iter.iov->iov_base == NULL) { err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb); goto out; @@ -2316,7 +2313,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).flags = netlink_skb_flags; err = -EFAULT; - if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { + if (memcpy_from_msg(skb_put(skb, len), msg, len)) { kfree_skb(skb); goto out; } @@ -2391,7 +2388,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } skb_reset_transport_header(data_skb); - err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(data_skb, 0, msg, copied); if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); @@ -2499,6 +2496,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, nl_table[unit].module = module; if (cfg) { nl_table[unit].bind = cfg->bind; + nl_table[unit].unbind = cfg->unbind; nl_table[unit].flags = cfg->flags; if (cfg->compare) nl_table[unit].compare = cfg->compare; @@ -2921,14 +2919,16 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) } static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) + __acquires(nl_table_lock) __acquires(RCU) { + read_lock(&nl_table_lock); rcu_read_lock(); return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct rhashtable *ht; struct netlink_sock *nlk; struct nl_seq_iter *iter; struct net *net; @@ -2943,19 +2943,19 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) iter = seq->private; nlk = v; - rht_for_each_entry_rcu(nlk, nlk->node.next, node) + i = iter->link; + ht = &nl_table[i].hash; + rht_for_each_entry(nlk, nlk->node.next, ht, node) if (net_eq(sock_net((struct sock *)nlk), net)) return nlk; - i = iter->link; j = iter->hash_idx + 1; do { - struct rhashtable *ht = &nl_table[i].hash; const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); for (; j < tbl->size; j++) { - rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { + rht_for_each_entry(nlk, tbl->buckets[j], ht, node) { if (net_eq(sock_net((struct sock *)nlk), net)) { iter->link = i; iter->hash_idx = j; @@ -2971,9 +2971,10 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(RCU) __releases(nl_table_lock) { rcu_read_unlock(); + read_unlock(&nl_table_lock); } @@ -3116,11 +3117,13 @@ static int __init netlink_proto_init(void) .head_offset = offsetof(struct netlink_sock, node), .key_offset = offsetof(struct netlink_sock, portid), .key_len = sizeof(u32), /* portid */ - .hashfn = arch_fast_hash, + .hashfn = jhash, .max_shift = 16, /* 64K */ .grow_decision = rht_grow_above_75, .shrink_decision = rht_shrink_below_30, +#ifdef CONFIG_PROVE_LOCKING .mutex_is_held = lockdep_nl_sk_hash_is_held, +#endif }; if (err != 0) diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index b20a1731759..f123a88496f 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -39,8 +39,8 @@ struct netlink_sock { struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - int (*netlink_bind)(int group); - void (*netlink_unbind)(int group); + int (*netlink_bind)(struct net *net, int group); + void (*netlink_unbind)(struct net *net, int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -65,8 +65,8 @@ struct netlink_table { unsigned int groups; struct mutex *cb_mutex; struct module *module; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 76393f2f4b2..2e11061ef88 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -983,11 +983,67 @@ static struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; +static int genl_bind(struct net *net, int group) +{ + int i, err = 0; + + down_read(&cb_lock); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { + struct genl_family *f; + + list_for_each_entry(f, genl_family_chain(i), family_list) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (!f->netnsok && net != &init_net) + err = -ENOENT; + else if (f->mcast_bind) + err = f->mcast_bind(net, fam_grp); + else + err = 0; + break; + } + } + } + up_read(&cb_lock); + + return err; +} + +static void genl_unbind(struct net *net, int group) +{ + int i; + bool found = false; + + down_read(&cb_lock); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { + struct genl_family *f; + + list_for_each_entry(f, genl_family_chain(i), family_list) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (f->mcast_unbind) + f->mcast_unbind(net, fam_grp); + found = true; + break; + } + } + } + up_read(&cb_lock); + + WARN_ON(!found); +} + static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, + .bind = genl_bind, + .unbind = genl_unbind, }; /* we'll bump the group number right afterwards */ |