diff options
Diffstat (limited to 'net/socket.c')
-rw-r--r-- | net/socket.c | 210 |
1 files changed, 167 insertions, 43 deletions
diff --git a/net/socket.c b/net/socket.c index 088fb3fd45e..310d16b1b3c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -156,7 +156,7 @@ static const struct file_operations socket_file_ops = { */ static DEFINE_SPINLOCK(net_family_lock); -static const struct net_proto_family *net_families[NPROTO] __read_mostly; +static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; /* * Statistics counters of the socket lists @@ -240,17 +240,19 @@ static struct kmem_cache *sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; + struct socket_wq *wq; ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; - ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); - if (!ei->socket.wq) { + wq = kmalloc(sizeof(*wq), GFP_KERNEL); + if (!wq) { kmem_cache_free(sock_inode_cachep, ei); return NULL; } - init_waitqueue_head(&ei->socket.wq->wait); - ei->socket.wq->fasync_list = NULL; + init_waitqueue_head(&wq->wait); + wq->fasync_list = NULL; + RCU_INIT_POINTER(ei->socket.wq, wq); ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; @@ -262,6 +264,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb) } + static void wq_free_rcu(struct rcu_head *head) { struct socket_wq *wq = container_of(head, struct socket_wq, rcu); @@ -272,9 +275,11 @@ static void wq_free_rcu(struct rcu_head *head) static void sock_destroy_inode(struct inode *inode) { struct socket_alloc *ei; + struct socket_wq *wq; ei = container_of(inode, struct socket_alloc, vfs_inode); - call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + wq = rcu_dereference_protected(ei->socket.wq, 1); + call_rcu(&wq->rcu, wq_free_rcu); kmem_cache_free(sock_inode_cachep, ei); } @@ -305,20 +310,6 @@ static const struct super_operations sockfs_ops = { .statfs = simple_statfs, }; -static struct dentry *sockfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); -} - -static struct vfsmount *sock_mnt __read_mostly; - -static struct file_system_type sock_fs_type = { - .name = "sockfs", - .mount = sockfs_mount, - .kill_sb = kill_anon_super, -}; - /* * sockfs_dname() is called from d_path(). */ @@ -332,6 +323,21 @@ static const struct dentry_operations sockfs_dentry_operations = { .d_dname = sockfs_dname, }; +static struct dentry *sockfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "socket:", &sockfs_ops, + &sockfs_dentry_operations, SOCKFS_MAGIC); +} + +static struct vfsmount *sock_mnt __read_mostly; + +static struct file_system_type sock_fs_type = { + .name = "sockfs", + .mount = sockfs_mount, + .kill_sb = kill_anon_super, +}; + /* * Obtains the first available file descriptor and sets it up for use. * @@ -360,14 +366,13 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); if (unlikely(!path.dentry)) { put_unused_fd(fd); return -ENOMEM; } path.mnt = mntget(sock_mnt); - path.dentry->d_op = &sockfs_dentry_operations; d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; @@ -523,7 +528,7 @@ void sock_release(struct socket *sock) module_put(owner); } - if (sock->wq->fasync_list) + if (rcu_dereference_protected(sock->wq, 1)->fasync_list) printk(KERN_ERR "sock_release: fasync list not empty!\n"); percpu_sub(sockets_in_use, 1); @@ -1107,15 +1112,16 @@ static int sock_fasync(int fd, struct file *filp, int on) { struct socket *sock = filp->private_data; struct sock *sk = sock->sk; + struct socket_wq *wq; if (sk == NULL) return -EINVAL; lock_sock(sk); + wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); + fasync_helper(fd, filp, on, &wq->fasync_list); - fasync_helper(fd, filp, on, &sock->wq->fasync_list); - - if (!sock->wq->fasync_list) + if (!wq->fasync_list) sock_reset_flag(sk, SOCK_FASYNC); else sock_set_flag(sk, SOCK_FASYNC); @@ -1215,7 +1221,7 @@ int __sock_create(struct net *net, int family, int type, int protocol, * requested real, full-featured networking support upon configuration. * Otherwise module support will break! */ - if (net_families[family] == NULL) + if (rcu_access_pointer(net_families[family]) == NULL) request_module("net-pf-%d", family); #endif @@ -2347,10 +2353,11 @@ int sock_register(const struct net_proto_family *ops) } spin_lock(&net_family_lock); - if (net_families[ops->family]) + if (rcu_dereference_protected(net_families[ops->family], + lockdep_is_held(&net_family_lock))) err = -EEXIST; else { - net_families[ops->family] = ops; + rcu_assign_pointer(net_families[ops->family], ops); err = 0; } spin_unlock(&net_family_lock); @@ -2378,7 +2385,7 @@ void sock_unregister(int family) BUG_ON(family < 0 || family >= NPROTO); spin_lock(&net_family_lock); - net_families[family] = NULL; + rcu_assign_pointer(net_families[family], NULL); spin_unlock(&net_family_lock); synchronize_rcu(); @@ -2389,6 +2396,8 @@ EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { + int err; + /* * Initialize sock SLAB cache. */ @@ -2405,8 +2414,15 @@ static int __init sock_init(void) */ init_inodecache(); - register_filesystem(&sock_fs_type); + + err = register_filesystem(&sock_fs_type); + if (err) + goto out_fs; sock_mnt = kern_mount(&sock_fs_type); + if (IS_ERR(sock_mnt)) { + err = PTR_ERR(sock_mnt); + goto out_mount; + } /* The real protocol initialization is performed in later initcalls. */ @@ -2419,7 +2435,13 @@ static int __init sock_init(void) skb_timestamping_init(); #endif - return 0; +out: + return err; + +out_mount: + unregister_filesystem(&sock_fs_type); +out_fs: + goto out; } core_initcall(sock_init); /* early initcall */ @@ -2566,23 +2588,123 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) { + struct compat_ethtool_rxnfc __user *compat_rxnfc; + bool convert_in = false, convert_out = false; + size_t buf_size = ALIGN(sizeof(struct ifreq), 8); + struct ethtool_rxnfc __user *rxnfc; struct ifreq __user *ifr; + u32 rule_cnt = 0, actual_rule_cnt; + u32 ethcmd; u32 data; - void __user *datap; + int ret; - ifr = compat_alloc_user_space(sizeof(*ifr)); + if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + return -EFAULT; - if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + compat_rxnfc = compat_ptr(data); + + if (get_user(ethcmd, &compat_rxnfc->cmd)) return -EFAULT; - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + /* Most ethtool structures are defined without padding. + * Unfortunately struct ethtool_rxnfc is an exception. + */ + switch (ethcmd) { + default: + break; + case ETHTOOL_GRXCLSRLALL: + /* Buffer size is variable */ + if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) + return -EFAULT; + if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) + return -ENOMEM; + buf_size += rule_cnt * sizeof(u32); + /* fall through */ + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + convert_out = true; + /* fall through */ + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + buf_size += sizeof(struct ethtool_rxnfc); + convert_in = true; + break; + } + + ifr = compat_alloc_user_space(buf_size); + rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8); + + if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &ifr->ifr_ifru.ifru_data)) + if (put_user(convert_in ? rxnfc : compat_ptr(data), + &ifr->ifr_ifru.ifru_data)) return -EFAULT; - return dev_ioctl(net, SIOCETHTOOL, ifr); + if (convert_in) { + /* We expect there to be holes between fs.m_u and + * fs.ring_cookie and at the end of fs, but nowhere else. + */ + BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) + + sizeof(compat_rxnfc->fs.m_u) != + offsetof(struct ethtool_rxnfc, fs.m_u) + + sizeof(rxnfc->fs.m_u)); + BUILD_BUG_ON( + offsetof(struct compat_ethtool_rxnfc, fs.location) - + offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != + offsetof(struct ethtool_rxnfc, fs.location) - + offsetof(struct ethtool_rxnfc, fs.ring_cookie)); + + if (copy_in_user(rxnfc, compat_rxnfc, + (void *)(&rxnfc->fs.m_u + 1) - + (void *)rxnfc) || + copy_in_user(&rxnfc->fs.ring_cookie, + &compat_rxnfc->fs.ring_cookie, + (void *)(&rxnfc->fs.location + 1) - + (void *)&rxnfc->fs.ring_cookie) || + copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) + return -EFAULT; + } + + ret = dev_ioctl(net, SIOCETHTOOL, ifr); + if (ret) + return ret; + + if (convert_out) { + if (copy_in_user(compat_rxnfc, rxnfc, + (const void *)(&rxnfc->fs.m_u + 1) - + (const void *)rxnfc) || + copy_in_user(&compat_rxnfc->fs.ring_cookie, + &rxnfc->fs.ring_cookie, + (const void *)(&rxnfc->fs.location + 1) - + (const void *)&rxnfc->fs.ring_cookie) || + copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) + return -EFAULT; + + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + /* As an optimisation, we only copy the actual + * number of rules that the underlying + * function returned. Since Mallory might + * change the rule count in user memory, we + * check that it is less than the rule count + * originally given (as the user buffer size), + * which has been range-checked. + */ + if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + if (actual_rule_cnt < rule_cnt) + rule_cnt = actual_rule_cnt; + if (copy_in_user(&compat_rxnfc->rule_locs[0], + &rxnfc->rule_locs[0], + rule_cnt * sizeof(u32))) + return -EFAULT; + } + } + + return 0; } static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) @@ -2626,7 +2748,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd, old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, &kifr); + err = dev_ioctl(net, cmd, + (struct ifreq __user __force *) &kifr); set_fs(old_fs); return err; @@ -2735,7 +2858,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, (void __user *)&ifr); + err = dev_ioctl(net, cmd, (void __user __force *)&ifr); set_fs(old_fs); if (cmd == SIOCGIFMAP && !err) { @@ -2840,7 +2963,8 @@ static int routing_ioctl(struct net *net, struct socket *sock, ret |= __get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { ret |= copy_from_user(devname, compat_ptr(rtdev), 15); - r4.rt_dev = devname; devname[15] = 0; + r4.rt_dev = (char __user __force *)devname; + devname[15] = 0; } else r4.rt_dev = NULL; @@ -2862,7 +2986,7 @@ out: /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE * for some operations; this forces use of the newer bridge-utils that - * use compatiable ioctls + * use compatible ioctls */ static int old_bridge_ioctl(compat_ulong_t __user *argp) { |