diff options
Diffstat (limited to 'net')
34 files changed, 655 insertions, 355 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8836575f9d7..511afe72af3 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -140,7 +140,7 @@ static void vlan_rcu_free(struct rcu_head *rcu) vlan_group_free(container_of(rcu, struct vlan_group, rcu)); } -void unregister_vlan_dev(struct net_device *dev) +void unregister_vlan_dev(struct net_device *dev, struct list_head *head) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; @@ -159,12 +159,13 @@ void unregister_vlan_dev(struct net_device *dev) if (real_dev->features & NETIF_F_HW_VLAN_FILTER) ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); - vlan_group_set_device(grp, vlan_id, NULL); grp->nr_vlans--; - synchronize_net(); - - unregister_netdevice(dev); + if (!grp->killall) { + vlan_group_set_device(grp, vlan_id, NULL); + synchronize_net(); + } + unregister_netdevice_queue(dev, head); /* If the group is now empty, kill off the group. */ if (grp->nr_vlans == 0) { @@ -183,6 +184,34 @@ void unregister_vlan_dev(struct net_device *dev) dev_put(real_dev); } +void unregister_vlan_dev_alls(struct vlan_group *grp) +{ + LIST_HEAD(list); + int i; + struct net_device *vlandev; + struct vlan_group save; + + memcpy(&save, grp, sizeof(save)); + memset(&grp->vlan_devices_arrays, 0, sizeof(grp->vlan_devices_arrays)); + grp->killall = 1; + + synchronize_net(); + + /* Delete all VLANs for this dev. */ + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + vlandev = vlan_group_get_device(&save, i); + if (!vlandev) + continue; + + unregister_vlan_dev(vlandev, &list); + if (grp->nr_vlans == 0) + break; + } + unregister_netdevice_many(&list); + for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) + kfree(save.vlan_devices_arrays[i]); +} + static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev) { @@ -524,19 +553,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, break; case NETDEV_UNREGISTER: - /* Delete all VLANs for this dev. */ - for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - - /* unregistration of last vlan destroys group, abort - * afterwards */ - if (grp->nr_vlans == 1) - i = VLAN_GROUP_ARRAY_LEN; - - unregister_vlan_dev(vlandev); - } + unregister_vlan_dev_alls(grp); break; } @@ -642,7 +659,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) err = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - unregister_vlan_dev(dev); + unregister_vlan_dev(dev, NULL); err = 0; break; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 82570bc2a18..68f9290e683 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -82,14 +82,14 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id); void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); -void unregister_vlan_dev(struct net_device *dev); +void unregister_vlan_dev(struct net_device *dev, struct list_head *head); static inline u32 vlan_get_ingress_priority(struct net_device *dev, u16 vlan_tci) { struct vlan_dev_info *vip = vlan_dev_info(dev); - return vip->ingress_priority_map[(vlan_tci >> 13) & 0x7]; + return vip->ingress_priority_map[(vlan_tci >> VLAN_PRIO_SHIFT) & 0x7]; } #ifdef CONFIG_VLAN_8021Q_GVRP diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4198ec5c8ab..790fd55ec31 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -393,7 +393,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, struct vlan_dev_info *vlan = vlan_dev_info(dev); struct vlan_priority_tci_mapping *mp = NULL; struct vlan_priority_tci_mapping *np; - u32 vlan_qos = (vlan_prio << 13) & 0xE000; + u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; /* See if a priority mapping exists.. */ mp = vlan->egress_priority_map[skb_prio & 0xF]; @@ -626,6 +626,17 @@ static int vlan_dev_fcoe_disable(struct net_device *dev) rc = ops->ndo_fcoe_disable(real_dev); return rc; } + +static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) +{ + struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; + int rc = -EINVAL; + + if (ops->ndo_fcoe_get_wwn) + rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); + return rc; +} #endif static void vlan_dev_change_rx_flags(struct net_device *dev, int change) @@ -791,6 +802,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, + .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, #endif }; @@ -813,6 +825,7 @@ static const struct net_device_ops vlan_netdev_accel_ops = { .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, + .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, #endif }; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 7f939ce2980..2bc6f6a8de6 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -92,6 +92,8 @@ static void add_conn(struct work_struct *work) dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); + dev_set_drvdata(&conn->dev, conn); + if (device_add(&conn->dev) < 0) { BT_ERR("Failed to register connection device"); return; @@ -144,8 +146,6 @@ void hci_conn_init_sysfs(struct hci_conn *conn) conn->dev.class = bt_class; conn->dev.parent = &hdev->dev; - dev_set_drvdata(&conn->dev, conn); - device_initialize(&conn->dev); INIT_WORK(&conn->work_add, add_conn); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 4b66bd579f4..d65101d92ee 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -555,12 +555,12 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) conn->feat_mask = 0; - setup_timer(&conn->info_timer, l2cap_info_timeout, - (unsigned long) conn); - spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); + setup_timer(&conn->info_timer, l2cap_info_timeout, + (unsigned long) conn); + conn->disc_reason = 0x13; return conn; @@ -783,6 +783,9 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) /* Default config options */ pi->conf_len = 0; pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; + skb_queue_head_init(TX_QUEUE(sk)); + skb_queue_head_init(SREJ_QUEUE(sk)); + INIT_LIST_HEAD(SREJ_LIST(sk)); } static struct proto l2cap_proto = { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b1b3b0fbf41..2117e5ba24c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -154,7 +154,7 @@ static void del_nbp(struct net_bridge_port *p) } /* called with RTNL */ -static void del_br(struct net_bridge *br) +static void del_br(struct net_bridge *br, struct list_head *head) { struct net_bridge_port *p, *n; @@ -165,7 +165,7 @@ static void del_br(struct net_bridge *br) del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); - unregister_netdevice(br->dev); + unregister_netdevice_queue(br->dev, head); } static struct net_device *new_bridge_dev(struct net *net, const char *name) @@ -323,7 +323,7 @@ int br_del_bridge(struct net *net, const char *name) } else - del_br(netdev_priv(dev)); + del_br(netdev_priv(dev), NULL); rtnl_unlock(); return ret; @@ -462,15 +462,14 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) void br_net_exit(struct net *net) { struct net_device *dev; + LIST_HEAD(list); rtnl_lock(); -restart: - for_each_netdev(net, dev) { - if (dev->priv_flags & IFF_EBRIDGE) { - del_br(netdev_priv(dev)); - goto restart; - } - } + for_each_netdev(net, dev) + if (dev->priv_flags & IFF_EBRIDGE) + del_br(netdev_priv(dev), &list); + + unregister_netdevice_many(&list); rtnl_unlock(); } diff --git a/net/can/raw.c b/net/can/raw.c index 962fc9f1d0c..6e77db58b9e 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -424,8 +424,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_CAN_RAW) return -EINVAL; - if (optlen < 0) - return -EINVAL; switch (optname) { diff --git a/net/compat.c b/net/compat.c index e13f5256fd2..6a2f75fb3f4 100644 --- a/net/compat.c +++ b/net/compat.c @@ -390,9 +390,6 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, int err; struct socket *sock; - if (optlen < 0) - return -EINVAL; - if ((sock = sockfd_lookup(fd, &err))!=NULL) { err = security_socket_setsockopt(sock,level,optname); diff --git a/net/core/dev.c b/net/core/dev.c index fa88dcd476d..68a1bb68b5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -193,18 +193,15 @@ static struct list_head ptype_all __read_mostly; /* Taps */ DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); -#define NETDEV_HASHBITS 8 -#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) - static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); - return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; + return &net->dev_name_head[hash & (NETDEV_HASHENTRIES - 1)]; } static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) { - return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; + return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; } /* Device list insertion */ @@ -217,12 +214,15 @@ static int list_netdevice(struct net_device *dev) write_lock_bh(&dev_base_lock); list_add_tail(&dev->dev_list, &net->dev_base_head); hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); - hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); + hlist_add_head_rcu(&dev->index_hlist, + dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); return 0; } -/* Device list removal */ +/* Device list removal + * caller must respect a RCU grace period before freeing/reusing dev + */ static void unlist_netdevice(struct net_device *dev) { ASSERT_RTNL(); @@ -231,7 +231,7 @@ static void unlist_netdevice(struct net_device *dev) write_lock_bh(&dev_base_lock); list_del(&dev->dev_list); hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); + hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); } @@ -649,6 +649,31 @@ struct net_device *__dev_get_by_index(struct net *net, int ifindex) } EXPORT_SYMBOL(__dev_get_by_index); +/** + * dev_get_by_index_rcu - find a device by its ifindex + * @net: the applicable net namespace + * @ifindex: index of device + * + * Search for an interface by index. Returns %NULL if the device + * is not found or a pointer to the device. The device has not + * had its reference counter increased so the caller must be careful + * about locking. The caller must hold RCU lock. + */ + +struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) +{ + struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_index_hash(net, ifindex); + + hlist_for_each_entry_rcu(dev, p, head, index_hlist) + if (dev->ifindex == ifindex) + return dev; + + return NULL; +} +EXPORT_SYMBOL(dev_get_by_index_rcu); + /** * dev_get_by_index - find a device by its ifindex @@ -665,11 +690,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) { struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifindex); if (dev) dev_hold(dev); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } EXPORT_SYMBOL(dev_get_by_index); @@ -2303,7 +2328,7 @@ int netif_receive_skb(struct sk_buff *skb) if (!skb->tstamp.tv64) net_timestamp(skb); - if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) return NET_RX_SUCCESS; /* if we've gotten here through NAPI, check netpoll */ @@ -2942,15 +2967,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifr.ifr_ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); if (!dev) { - read_unlock(&dev_base_lock); + rcu_read_unlock(); return -ENODEV; } strcpy(ifr.ifr_name, dev->name); - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; @@ -4640,59 +4665,76 @@ static void net_set_todo(struct net_device *dev) list_add_tail(&dev->todo_list, &net_todo_list); } -static void rollback_registered(struct net_device *dev) +static void rollback_registered_many(struct list_head *head) { + struct net_device *dev; + BUG_ON(dev_boot_phase); ASSERT_RTNL(); - /* Some devices call without registering for initialization unwind. */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " - "was registered\n", dev->name, dev); + list_for_each_entry(dev, head, unreg_list) { + /* Some devices call without registering + * for initialization unwind. + */ + if (dev->reg_state == NETREG_UNINITIALIZED) { + pr_debug("unregister_netdevice: device %s/%p never " + "was registered\n", dev->name, dev); - WARN_ON(1); - return; - } + WARN_ON(1); + return; + } - BUG_ON(dev->reg_state != NETREG_REGISTERED); + BUG_ON(dev->reg_state != NETREG_REGISTERED); - /* If device is running, close it first. */ - dev_close(dev); + /* If device is running, close it first. */ + dev_close(dev); - /* And unlink it from device chain. */ - unlist_netdevice(dev); + /* And unlink it from device chain. */ + unlist_netdevice(dev); - dev->reg_state = NETREG_UNREGISTERING; + dev->reg_state = NETREG_UNREGISTERING; + } synchronize_net(); - /* Shutdown queueing discipline. */ - dev_shutdown(dev); + list_for_each_entry(dev, head, unreg_list) { + /* Shutdown queueing discipline. */ + dev_shutdown(dev); - /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - /* - * Flush the unicast and multicast chains - */ - dev_unicast_flush(dev); - dev_addr_discard(dev); + /* + * Flush the unicast and multicast chains + */ + dev_unicast_flush(dev); + dev_addr_discard(dev); - if (dev->netdev_ops->ndo_uninit) - dev->netdev_ops->ndo_uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); - /* Notifier chain MUST detach us from master device. */ - WARN_ON(dev->master); + /* Notifier chain MUST detach us from master device. */ + WARN_ON(dev->master); - /* Remove entries from kobject tree */ - netdev_unregister_kobject(dev); + /* Remove entries from kobject tree */ + netdev_unregister_kobject(dev); + } synchronize_net(); - dev_put(dev); + list_for_each_entry(dev, head, unreg_list) + dev_put(dev); +} + +static void rollback_registered(struct net_device *dev) +{ + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + rollback_registered_many(&single); } static void __netdev_init_queue_locks_one(struct net_device *dev, @@ -5248,25 +5290,48 @@ void synchronize_net(void) EXPORT_SYMBOL(synchronize_net); /** - * unregister_netdevice - remove device from the kernel + * unregister_netdevice_queue - remove device from the kernel * @dev: device - * + * @head: list + * This function shuts down a device interface and removes it * from the kernel tables. + * If head not NULL, device is queued to be unregistered later. * * Callers must hold the rtnl semaphore. You may want * unregister_netdev() instead of this. */ -void unregister_netdevice(struct net_device *dev) +void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) { ASSERT_RTNL(); - rollback_registered(dev); - /* Finish processing unregister after unlock */ - net_set_todo(dev); + if (head) { + list_add_tail(&dev->unreg_list, head); + } else { + rollback_registered(dev); + /* Finish processing unregister after unlock */ + net_set_todo(dev); + } +} +EXPORT_SYMBOL(unregister_netdevice_queue); + +/** + * unregister_netdevice_many - unregister many devices + * @head: list of devices + * + */ +void unregister_netdevice_many(struct list_head *head) +{ + struct net_device *dev; + + if (!list_empty(head)) { + rollback_registered_many(head); + list_for_each_entry(dev, head, unreg_list) + net_set_todo(dev); + } } -EXPORT_SYMBOL(unregister_netdevice); +EXPORT_SYMBOL(unregister_netdevice_many); /** * unregister_netdev - remove device from the kernel @@ -5593,7 +5658,7 @@ restart: /* Delete virtual devices */ if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { - dev->rtnl_link_ops->dellink(dev); + dev->rtnl_link_ops->dellink(dev, NULL); goto restart; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 753c420060d..89de182353b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -139,7 +139,9 @@ static ssize_t show_speed(struct device *dev, if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev) && netdev->ethtool_ops->get_settings) { + if (netif_running(netdev) && + netdev->ethtool_ops && + netdev->ethtool_ops->get_settings) { struct ethtool_cmd cmd = { ETHTOOL_GSET }; if (!netdev->ethtool_ops->get_settings(netdev, &cmd)) @@ -158,7 +160,9 @@ static ssize_t show_duplex(struct device *dev, if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev) && netdev->ethtool_ops->get_settings) { + if (netif_running(netdev) && + netdev->ethtool_ops && + netdev->ethtool_ops->get_settings) { struct ethtool_cmd cmd = { ETHTOOL_GSET }; if (!netdev->ethtool_ops->get_settings(netdev, &cmd)) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1da0e038df7..5ce017bf4af 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -335,6 +335,7 @@ struct pktgen_dev { __u32 cur_src_mac_offset; __be32 cur_saddr; __be32 cur_daddr; + __u16 ip_id; __u16 cur_udp_dst; __u16 cur_udp_src; __u16 cur_queue_map; @@ -2630,6 +2631,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->protocol = IPPROTO_UDP; /* UDP */ iph->saddr = pkt_dev->cur_saddr; iph->daddr = pkt_dev->cur_daddr; + iph->id = htons(pkt_dev->ip_id); + pkt_dev->ip_id++; iph->frag_off = 0; iplen = 20 + 8 + datalen; iph->tot_len = htons(iplen); @@ -2641,24 +2644,26 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; - if (pkt_dev->nfrags <= 0) + if (pkt_dev->nfrags <= 0) { pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - else { + memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr)); + } else { int frags = pkt_dev->nfrags; - int i; + int i, len; pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); if (frags > MAX_SKB_FRAGS) frags = MAX_SKB_FRAGS; if (datalen > frags * PAGE_SIZE) { - skb_put(skb, datalen - frags * PAGE_SIZE); + len = datalen - frags * PAGE_SIZE; + memset(skb_put(skb, len), 0, len); datalen = frags * PAGE_SIZE; } i = 0; while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL, 0); + struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); skb_shinfo(skb)->frags[i].page = page; skb_shinfo(skb)->frags[i].page_offset = 0; skb_shinfo(skb)->frags[i].size = diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ba13b0974a7..391a62cd9df 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -248,7 +248,7 @@ static LIST_HEAD(link_ops); int __rtnl_link_register(struct rtnl_link_ops *ops) { if (!ops->dellink) - ops->dellink = unregister_netdevice; + ops->dellink = unregister_netdevice_queue; list_add_tail(&ops->list, &link_ops); return 0; @@ -277,13 +277,13 @@ EXPORT_SYMBOL_GPL(rtnl_link_register); static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) { struct net_device *dev; -restart: + LIST_HEAD(list_kill); + for_each_netdev(net, dev) { - if (dev->rtnl_link_ops == ops) { - ops->dellink(dev); - goto restart; - } + if (dev->rtnl_link_ops == ops) + ops->dellink(dev, &list_kill); } + unregister_netdevice_many(&list_kill); } void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) @@ -682,22 +682,33 @@ nla_put_failure: static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int idx; - int s_idx = cb->args[0]; + int h, s_h; + int idx = 0, s_idx; struct net_device *dev; - - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx) - goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) - break; + struct hlist_head *head; + struct hlist_node *node; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, node, head, index_hlist) { + if (idx < s_idx) + goto cont; + if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI) <= 0) + goto out; cont: - idx++; + idx++; + } } - cb->args[0] = idx; +out: + cb->args[1] = idx; + cb->args[0] = h; return skb->len; } @@ -961,7 +972,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!ops) return -EOPNOTSUPP; - ops->dellink(dev); + ops->dellink(dev, NULL); return 0; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f6a0af75993..26fb50e9131 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -447,6 +447,28 @@ extern int sysctl_tcp_synack_retries; EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); +/* Decide when to expire the request and when to resend SYN-ACK */ +static inline void syn_ack_recalc(struct request_sock *req, const int thresh, + const int max_retries, + const u8 rskq_defer_accept, + int *expire, int *resend) +{ + if (!rskq_defer_accept) { + *expire = req->retrans >= thresh; + *resend = 1; + return; + } + *expire = req->retrans >= thresh && + (!inet_rsk(req)->acked || req->retrans >= max_retries); + /* + * Do not resend while waiting for data after ACK, + * start to resend on end of deferring period to give + * last chance for data or ACK to create established socket. + */ + *resend = !inet_rsk(req)->acked || + req->retrans >= rskq_defer_accept - 1; +} + void inet_csk_reqsk_queue_prune(struct sock *parent, const unsigned long interval, const unsigned long timeout, @@ -502,9 +524,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, reqp=&lopt->syn_table[i]; while ((req = *reqp) != NULL) { if (time_after_eq(now, req->expires)) { - if ((req->retrans < thresh || - (inet_rsk(req)->acked && req->retrans < max_retries)) - && !req->rsk_ops->rtx_syn_ack(parent, req)) { + int expire = 0, resend = 0; + + syn_ack_recalc(req, thresh, max_retries, + queue->rskq_defer_accept, + &expire, &resend); + if (!expire && + (!resend || + !req->rsk_ops->rtx_syn_ack(parent, req) || + inet_rsk(req)->acked)) { unsigned long timeo; if (req->retrans++ == 0) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 89ff9d5b150..a77807d449e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -156,8 +156,13 @@ struct ipgre_net { #define tunnels_r tunnels[2] #define tunnels_l tunnels[1] #define tunnels_wc tunnels[0] +/* + * Locking : hash tables are protected by RCU and a spinlock + */ +static DEFINE_SPINLOCK(ipgre_lock); -static DEFINE_RWLOCK(ipgre_lock); +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) /* Given src, dst and key, find appropriate for input tunnel. */ @@ -175,7 +180,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, ARPHRD_ETHER : ARPHRD_IPGRE; int score, cand_score = 4; - for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { if (local != t->parms.iph.saddr || remote != t->parms.iph.daddr || key != t->parms.i_key || @@ -200,7 +205,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, } } - for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { if (remote != t->parms.iph.daddr || key != t->parms.i_key || !(t->dev->flags & IFF_UP)) @@ -224,7 +229,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, } } - for (t = ign->tunnels_l[h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { if ((local != t->parms.iph.saddr && (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) || @@ -250,7 +255,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, } } - for (t = ign->tunnels_wc[h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { if (t->parms.i_key != key || !(t->dev->flags & IFF_UP)) continue; @@ -276,8 +281,9 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, if (cand != NULL) return cand; - if (ign->fb_tunnel_dev->flags & IFF_UP) - return netdev_priv(ign->fb_tunnel_dev); + dev = ign->fb_tunnel_dev; + if (dev->flags & IFF_UP) + return netdev_priv(dev); return NULL; } @@ -311,10 +317,10 @@ static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) { struct ip_tunnel **tp = ipgre_bucket(ign, t); + spin_lock_bh(&ipgre_lock); t->next = *tp; - write_lock_bh(&ipgre_lock); - *tp = t; - write_unlock_bh(&ipgre_lock); + rcu_assign_pointer(*tp, t); + spin_unlock_bh(&ipgre_lock); } static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) @@ -323,9 +329,9 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { if (t == *tp) { - write_lock_bh(&ipgre_lock); + spin_lock_bh(&ipgre_lock); *tp = t->next; - write_unlock_bh(&ipgre_lock); + spin_unlock_bh(&ipgre_lock); break; } } @@ -476,7 +482,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; } - read_lock(&ipgre_lock); + rcu_read_lock(); t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, @@ -494,7 +500,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - read_unlock(&ipgre_lock); + rcu_read_unlock(); return; } @@ -573,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb) gre_proto = *(__be16 *)(h + 2); - read_lock(&ipgre_lock); + rcu_read_lock(); if ((tunnel = ipgre_tunnel_lookup(skb->dev, iph->saddr, iph->daddr, key, gre_proto))) { @@ -647,13 +653,13 @@ static int ipgre_rcv(struct sk_buff *skb) ipgre_ecn_decapsulate(iph, skb); netif_rx(skb); - read_unlock(&ipgre_lock); + rcu_read_unlock(); return(0); } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: - read_unlock(&ipgre_lock); + rcu_read_unlock(); drop_nolock: kfree_skb(skb); return(0); @@ -1284,16 +1290,19 @@ static const struct net_protocol ipgre_protocol = { .netns_ok = 1, }; -static void ipgre_destroy_tunnels(struct ipgre_net *ign) +static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) { int prio; for (prio = 0; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - while ((t = ign->tunnels[prio][h]) != NULL) - unregister_netdevice(t->dev); + struct ip_tunnel *t = ign->tunnels[prio][h]; + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = t->next; + } } } } @@ -1341,10 +1350,12 @@ err_alloc: static void ipgre_exit_net(struct net *net) { struct ipgre_net *ign; + LIST_HEAD(list); ign = net_generic(net, ipgre_net_id); rtnl_lock(); - ipgre_destroy_tunnels(ign); + ipgre_destroy_tunnels(ign, &list); + unregister_netdevice_many(&list); rtnl_unlock(); kfree(ign); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 2445fedec0b..cafad9baff0 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -480,7 +480,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_OPTIONS: { struct ip_options *opt = NULL; - if (optlen > 40 || optlen < 0) + if (optlen > 40) goto e_inval; err = ip_options_get_from_user(sock_net(sk), &opt, optval, optlen); @@ -634,17 +634,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); - if (dev) { + if (dev) mreq.imr_ifindex = dev->ifindex; - dev_put(dev); - } } else - dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex); + dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); err = -EADDRNOTAVAIL; if (!dev) break; + dev_put(dev); err = -EINVAL; if (sk->sk_bound_dev_if && diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 6a5539236ab..a2ca53da437 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -134,7 +134,13 @@ static void ipip_fb_tunnel_init(struct net_device *dev); static void ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); -static DEFINE_RWLOCK(ipip_lock); +/* + * Locking : hash tables are protected by RCU and a spinlock + */ +static DEFINE_SPINLOCK(ipip_lock); + +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, __be32 remote, __be32 local) @@ -144,20 +150,21 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, struct ip_tunnel *t; struct ipip_net *ipn = net_generic(net, ipip_net_id); - for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) return t; - } - for (t = ipn->tunnels_r[h0]; t; t = t->next) { + + for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) return t; - } - for (t = ipn->tunnels_l[h1]; t; t = t->next) { + + for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) return t; - } - if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) + + t = rcu_dereference(ipn->tunnels_wc[0]); + if (t && (t->dev->flags&IFF_UP)) return t; return NULL; } @@ -193,9 +200,9 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { if (t == *tp) { - write_lock_bh(&ipip_lock); + spin_lock_bh(&ipip_lock); *tp = t->next; - write_unlock_bh(&ipip_lock); + spin_unlock_bh(&ipip_lock); break; } } @@ -205,10 +212,10 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) { struct ip_tunnel **tp = ipip_bucket(ipn, t); + spin_lock_bh(&ipip_lock); t->next = *tp; - write_lock_bh(&ipip_lock); - *tp = t; - write_unlock_bh(&ipip_lock); + rcu_assign_pointer(*tp, t); + spin_unlock_bh(&ipip_lock); } static struct ip_tunnel * ipip_tunnel_locate(struct net *net, @@ -267,9 +274,9 @@ static void ipip_tunnel_uninit(struct net_device *dev) struct ipip_net *ipn = net_generic(net, ipip_net_id); if (dev == ipn->fb_tunnel_dev) { - write_lock_bh(&ipip_lock); + spin_lock_bh(&ipip_lock); ipn->tunnels_wc[0] = NULL; - write_unlock_bh(&ipip_lock); + spin_unlock_bh(&ipip_lock); } else ipip_tunnel_unlink(ipn, netdev_priv(dev)); dev_put(dev); @@ -318,7 +325,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) err = -ENOENT; - read_lock(&ipip_lock); + rcu_read_lock(); t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); if (t == NULL || t->parms.iph.daddr == 0) goto out; @@ -333,7 +340,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - read_unlock(&ipip_lock); + rcu_read_unlock(); return err; } @@ -351,11 +358,11 @@ static int ipip_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); - read_lock(&ipip_lock); + rcu_read_lock(); if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr)) != NULL) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - read_unlock(&ipip_lock); + rcu_read_unlock(); kfree_skb(skb); return 0; } @@ -374,10 +381,10 @@ static int ipip_rcv(struct sk_buff *skb) nf_reset(skb); ipip_ecn_decapsulate(iph, skb); netif_rx(skb); - read_unlock(&ipip_lock); + rcu_read_unlock(); return 0; } - read_unlock(&ipip_lock); + rcu_read_unlock(); return -1; } @@ -747,16 +754,19 @@ static struct xfrm_tunnel ipip_handler = { static const char banner[] __initconst = KERN_INFO "IPv4 over IPv4 tunneling driver\n"; -static void ipip_destroy_tunnels(struct ipip_net *ipn) +static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) { int prio; for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - while ((t = ipn->tunnels[prio][h]) != NULL) - unregister_netdevice(t->dev); + struct ip_tunnel *t = ipn->tunnels[prio][h]; + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = t->next; + } } } } @@ -809,11 +819,13 @@ err_alloc: static void ipip_exit_net(struct net *net) { struct ipip_net *ipn; + LIST_HEAD(list); ipn = net_generic(net, ipip_net_id); rtnl_lock(); - ipip_destroy_tunnels(ipn); - unregister_netdevice(ipn->fb_tunnel_dev); + ipip_destroy_tunnels(ipn, &list); + unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); + unregister_netdevice_many(&list); rtnl_unlock(); kfree(ipn); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 694974502ea..ef4ee45b928 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -275,7 +275,8 @@ failure: * @notify: Set to 1, if the caller is a notifier_call */ -static int vif_delete(struct net *net, int vifi, int notify) +static int vif_delete(struct net *net, int vifi, int notify, + struct list_head *head) { struct vif_device *v; struct net_device *dev; @@ -319,7 +320,7 @@ static int vif_delete(struct net *net, int vifi, int notify) } if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); dev_put(dev); return 0; @@ -870,14 +871,16 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) static void mroute_clean_tables(struct net *net) { int i; + LIST_HEAD(list); /* * Shut down all active vif entries */ for (i = 0; i < net->ipv4.maxvif; i++) { if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) - vif_delete(net, i, 0); + vif_delete(net, i, 0, &list); } + unregister_netdevice_many(&list); /* * Wipe the cache @@ -993,7 +996,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi if (optname == MRT_ADD_VIF) { ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); } else { - ret = vif_delete(net, vif.vifc_vifi, 0); + ret = vif_delete(net, vif.vifc_vifi, 0, NULL); } rtnl_unlock(); return ret; @@ -1156,6 +1159,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v struct net *net = dev_net(dev); struct vif_device *v; int ct; + LIST_HEAD(list); if (!net_eq(dev_net(dev), net)) return NOTIFY_DONE; @@ -1165,8 +1169,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v v = &net->ipv4.vif_table[0]; for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { if (v->dev == dev) - vif_delete(net, ct, 1); + vif_delete(net, ct, 1, &list); } + unregister_netdevice_many(&list); return NOTIFY_DONE; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5ec678ad70e..3146cc40174 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -276,13 +276,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - ret = NULL; req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ if (!req) @@ -298,12 +291,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; ireq->ecn_ok = 0; - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -352,6 +339,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } } + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, 0, &rt->u.dst); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + /* Try to redo what tcp_v4_send_synack did. */ req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 206a291dff0..e0cfa633680 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -326,6 +326,43 @@ void tcp_enter_memory_pressure(struct sock *sk) EXPORT_SYMBOL(tcp_enter_memory_pressure); +/* Convert seconds to retransmits based on initial and max timeout */ +static u8 secs_to_retrans(int seconds, int timeout, int rto_max) +{ + u8 res = 0; + + if (seconds > 0) { + int period = timeout; + + res = 1; + while (seconds > period && res < 255) { + res++; + timeout <<= 1; + if (timeout > rto_max) + timeout = rto_max; + period += timeout; + } + } + return res; +} + +/* Convert retransmits to seconds based on initial and max timeout */ +static int retrans_to_secs(u8 retrans, int timeout, int rto_max) +{ + int period = 0; + + if (retrans > 0) { + period = timeout; + while (--retrans) { + timeout <<= 1; + if (timeout > rto_max) + timeout = rto_max; + period += timeout; + } + } + return period; +} + /* * Wait for a TCP event. * @@ -1405,7 +1442,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - WARN_ON(!(flags & MSG_PEEK)); + WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: " + "copied %X seq %X\n", *seq, + TCP_SKB_CB(skb)->seq); } /* Well, if we have backlog, try to process it now yet. */ @@ -2163,16 +2202,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_DEFER_ACCEPT: - icsk->icsk_accept_queue.rskq_defer_accept = 0; - if (val > 0) { - /* Translate value in seconds to number of - * retransmits */ - while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && - val > ((TCP_TIMEOUT_INIT / HZ) << - icsk->icsk_accept_queue.rskq_defer_accept)) - icsk->icsk_accept_queue.rskq_defer_accept++; - icsk->icsk_accept_queue.rskq_defer_accept++; - } + /* Translate value in seconds to number of retransmits */ + icsk->icsk_accept_queue.rskq_defer_accept = + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: @@ -2353,8 +2386,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: - val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : - ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); + val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, + TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d86784be7ab..ba0eab65fe8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2300,7 +2300,7 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) * they differ. Since neither occurs due to loss, TCP should really * ignore them. */ -static inline int tcp_dupack_heurestics(struct tcp_sock *tp) +static inline int tcp_dupack_heuristics(struct tcp_sock *tp) { return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; } @@ -2425,7 +2425,7 @@ static int tcp_time_to_recover(struct sock *sk) return 1; /* Not-A-Trick#2 : Classic rule... */ - if (tcp_dupack_heurestics(tp) > tp->reordering) + if (tcp_dupack_heuristics(tp) > tp->reordering) return 1; /* Trick#3 : when we use RFC2988 timer restart, fast @@ -3698,12 +3698,14 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab) + int estab, struct dst_entry *dst) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); int length = (th->doff * 4) - sizeof(struct tcphdr); + BUG_ON(!estab && !dst); + ptr = (unsigned char *)(th + 1); opt_rx->saw_tstamp = 0; @@ -3737,7 +3739,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && sysctl_tcp_window_scaling) { + !estab && sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > 14) { @@ -3753,7 +3756,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps))) { + (!estab && sysctl_tcp_timestamps && + !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -3761,7 +3765,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && sysctl_tcp_sack) { + !estab && sysctl_tcp_sack && + !dst_feature(dst, RTAX_FEATURE_NO_SACK)) { opt_rx->sack_ok = 1; tcp_sack_reset(opt_rx); } @@ -3820,7 +3825,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, if (tcp_parse_aligned_timestamp(tp, th)) return 1; } - tcp_parse_options(skb, &tp->rx_opt, 1); + tcp_parse_options(skb, &tp->rx_opt, 1, NULL); return 1; } @@ -4075,8 +4080,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack && + !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4105,13 +4112,15 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack && + !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) @@ -5364,8 +5373,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); int saved_clamp = tp->rx_opt.mss_clamp; + struct dst_entry *dst = __sk_dst_get(sk); - tcp_parse_options(skb, &tp->rx_opt, 0); + tcp_parse_options(skb, &tp->rx_opt, 0, dst); if (th->ack) { /* rfc793: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a4a3390a528..657ae334f12 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1257,11 +1257,21 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; #endif + ireq = inet_rsk(req); + ireq->loc_addr = daddr; + ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; + ireq->opt = tcp_v4_save_options(sk, skb); + + dst = inet_csk_route_req(sk, req); + if(!dst) + goto drop_and_free; + tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = 536; tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0); + tcp_parse_options(skb, &tmp_opt, 0, dst); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1270,14 +1280,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(sk, skb); - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; + goto drop_and_release; if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1302,7 +1306,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e320afea07f..463d51b53d3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -100,9 +100,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, struct tcp_options_received tmp_opt; int paws_reject = 0; - tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, 0); + tmp_opt.tstamp_ok = 1; + tcp_parse_options(skb, &tmp_opt, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -500,10 +500,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, int paws_reject = 0; struct tcp_options_received tmp_opt; struct sock *child; + struct dst_entry *dst = inet_csk_route_req(sk, req); tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, 0); + tcp_parse_options(skb, &tmp_opt, 0, dst); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; @@ -516,6 +517,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, } } + dst_release(dst); + /* Check for pure retransmitted SYN. */ if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn && flg == TCP_FLAG_SYN && @@ -641,10 +644,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!(flg & TCP_FLAG_ACK)) return NULL; - /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ + if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { - inet_csk(sk)->icsk_accept_queue.rskq_defer_accept--; inet_rsk(req)->acked = 1; return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2e2eb74ac4c..616c686ca25 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -464,6 +464,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); unsigned size = 0; + struct dst_entry *dst = __sk_dst_get(sk); #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); @@ -487,18 +488,22 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); size += TCPOLEN_MSS_ALIGNED; - if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { + if (likely(sysctl_tcp_timestamps && + !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) && + *md5 == NULL)) { opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sysctl_tcp_window_scaling)) { + if (likely(sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; size += TCPOLEN_WSCALE_ALIGNED; } - if (likely(sysctl_tcp_sack)) { + if (likely(sysctl_tcp_sack && + !dst_feature(dst, RTAX_FEATURE_NO_SACK))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) size += TCPOLEN_SACKPERM_ALIGNED; @@ -2315,7 +2320,9 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); + (sysctl_tcp_timestamps && + (!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ? + TCPOLEN_TSTAMP_ALIGNED : 0)); #ifdef CONFIG_TCP_MD5SIG if (tp->af_specific->md5_lookup(sk, sk) != NULL) @@ -2341,7 +2348,8 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sysctl_tcp_window_scaling, + (sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)), &rcv_wscale); tp->rx_opt.rcv_wscale = rcv_wscale; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c595bbe1ed9..6c1b5c98e81 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -88,8 +88,10 @@ struct ip6_tnl_net { struct ip6_tnl **tnls[2]; }; -/* lock for the tunnel lists */ -static DEFINE_RWLOCK(ip6_tnl_lock); +/* + * Locking : hash tables are protected by RCU and a spinlock + */ +static DEFINE_SPINLOCK(ip6_tnl_lock); static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) { @@ -130,6 +132,9 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) * else %NULL **/ +#define for_each_ip6_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + static struct ip6_tnl * ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) { @@ -138,13 +143,14 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - for (t = ip6n->tnls_r_l[h0 ^ h1]; t; t = t->next) { + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && (t->dev->flags & IFF_UP)) return t; } - if ((t = ip6n->tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP)) + t = rcu_dereference(ip6n->tnls_wc[0]); + if (t && (t->dev->flags & IFF_UP)) return t; return NULL; @@ -186,10 +192,10 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms); + spin_lock_bh(&ip6_tnl_lock); t->next = *tp; - write_lock_bh(&ip6_tnl_lock); - *tp = t; - write_unlock_bh(&ip6_tnl_lock); + rcu_assign_pointer(*tp, t); + spin_unlock_bh(&ip6_tnl_lock); } /** @@ -204,9 +210,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) { if (t == *tp) { - write_lock_bh(&ip6_tnl_lock); + spin_lock_bh(&ip6_tnl_lock); *tp = t->next; - write_unlock_bh(&ip6_tnl_lock); + spin_unlock_bh(&ip6_tnl_lock); break; } } @@ -313,9 +319,9 @@ ip6_tnl_dev_uninit(struct net_device *dev) struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev == ip6n->fb_tnl_dev) { - write_lock_bh(&ip6_tnl_lock); + spin_lock_bh(&ip6_tnl_lock); ip6n->tnls_wc[0] = NULL; - write_unlock_bh(&ip6_tnl_lock); + spin_unlock_bh(&ip6_tnl_lock); } else { ip6_tnl_unlink(ip6n, t); } @@ -409,7 +415,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, in trouble since we might need the source address for further processing of the error. */ - read_lock(&ip6_tnl_lock); + rcu_read_lock(); if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr)) == NULL) goto out; @@ -482,7 +488,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, *msg = rel_msg; out: - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); return err; } @@ -693,23 +699,23 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, struct ip6_tnl *t; struct ipv6hdr *ipv6h = ipv6_hdr(skb); - read_lock(&ip6_tnl_lock); + rcu_read_lock(); if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { if (t->parms.proto != ipproto && t->parms.proto != 0) { - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); goto discard; } if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); goto discard; } if (!ip6_tnl_rcv_ctl(t)) { t->dev->stats.rx_dropped++; - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); goto discard; } secpath_reset(skb); @@ -727,10 +733,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, t->dev->stats.rx_packets++; t->dev->stats.rx_bytes += skb->len; netif_rx(skb); - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); return 0; } - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); return 1; discard: @@ -1387,14 +1393,19 @@ static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) { int h; struct ip6_tnl *t; + LIST_HEAD(list); for (h = 0; h < HASH_SIZE; h++) { - while ((t = ip6n->tnls_r_l[h]) != NULL) - unregister_netdevice(t->dev); + t = ip6n->tnls_r_l[h]; + while (t != NULL) { + unregister_netdevice_queue(t->dev, &list); + t = t->next; + } } t = ip6n->tnls_wc[0]; - unregister_netdevice(t->dev); + unregister_netdevice_queue(t->dev, &list); + unregister_netdevice_many(&list); } static int ip6_tnl_init_net(struct net *net) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 85849b4f5a3..52e0f74fdfe 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -477,7 +477,7 @@ failure: * Delete a VIF entry */ -static int mif6_delete(struct net *net, int vifi) +static int mif6_delete(struct net *net, int vifi, struct list_head *head) { struct mif_device *v; struct net_device *dev; @@ -519,7 +519,7 @@ static int mif6_delete(struct net *net, int vifi) in6_dev->cnf.mc_forwarding--; if (v->flags & MIFF_REGISTER) - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); dev_put(dev); return 0; @@ -976,6 +976,7 @@ static int ip6mr_device_event(struct notifier_block *this, struct net *net = dev_net(dev); struct mif_device *v; int ct; + LIST_HEAD(list); if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; @@ -983,8 +984,10 @@ static int ip6mr_device_event(struct notifier_block *this, v = &net->ipv6.vif6_table[0]; for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) { if (v->dev == dev) - mif6_delete(net, ct); + mif6_delete(net, ct, &list); } + unregister_netdevice_many(&list); + return NOTIFY_DONE; } @@ -1188,14 +1191,16 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock) static void mroute_clean_tables(struct net *net) { int i; + LIST_HEAD(list); /* * Shut down all active vif entries */ for (i = 0; i < net->ipv6.maxvif; i++) { if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC)) - mif6_delete(net, i); + mif6_delete(net, i, &list); } + unregister_netdevice_many(&list); /* * Wipe the cache @@ -1325,7 +1330,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns if (copy_from_user(&mifi, optval, sizeof(mifi_t))) return -EFAULT; rtnl_lock(); - ret = mif6_delete(net, mifi); + ret = mif6_delete(net, mifi, NULL); rtnl_unlock(); return ret; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 68566de4bcc..430454ee5ea 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -497,13 +497,17 @@ done: goto e_inval; if (val) { + struct net_device *dev; + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) goto e_inval; - if (__dev_get_by_index(net, val) == NULL) { + dev = dev_get_by_index(net, val); + if (!dev) { retv = -ENODEV; break; } + dev_put(dev); } np->mcast_oif = val; retv = 0; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 510d31f3cb9..2362a3397e9 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -77,8 +77,17 @@ struct sit_net { struct net_device *fb_tunnel_dev; }; -static DEFINE_RWLOCK(ipip6_lock); +/* + * Locking : hash tables are protected by RCU and a spinlock + */ +static DEFINE_SPINLOCK(ipip6_lock); + +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) +/* + * Must be invoked with rcu_read_lock + */ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, struct net_device *dev, __be32 remote, __be32 local) { @@ -87,26 +96,26 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, struct ip_tunnel *t; struct sit_net *sitn = net_generic(net, sit_net_id); - for (t = sitn->tunnels_r_l[h0^h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && (!dev || !t->parms.link || dev->iflink == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } - for (t = sitn->tunnels_r[h0]; t; t = t->next) { + for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) { if (remote == t->parms.iph.daddr && (!dev || !t->parms.link || dev->iflink == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } - for (t = sitn->tunnels_l[h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) { if (local == t->parms.iph.saddr && (!dev || !t->parms.link || dev->iflink == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } - t = sitn->tunnels_wc[0]; + t = rcu_dereference(sitn->tunnels_wc[0]); if ((t != NULL) && (t->dev->flags & IFF_UP)) return t; return NULL; @@ -143,9 +152,9 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { if (t == *tp) { - write_lock_bh(&ipip6_lock); + spin_lock_bh(&ipip6_lock); *tp = t->next; - write_unlock_bh(&ipip6_lock); + spin_unlock_bh(&ipip6_lock); break; } } @@ -155,10 +164,10 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) { struct ip_tunnel **tp = ipip6_bucket(sitn, t); + spin_lock_bh(&ipip6_lock); t->next = *tp; - write_lock_bh(&ipip6_lock); - *tp = t; - write_unlock_bh(&ipip6_lock); + rcu_assign_pointer(*tp, t); + spin_unlock_bh(&ipip6_lock); } static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) @@ -240,15 +249,22 @@ failed: return NULL; } +static DEFINE_SPINLOCK(ipip6_prl_lock); + +#define for_each_prl_rcu(start) \ + for (prl = rcu_dereference(start); \ + prl; \ + prl = rcu_dereference(prl->next)) + static struct ip_tunnel_prl_entry * __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) { - struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *)NULL; + struct ip_tunnel_prl_entry *prl; - for (p = t->prl; p; p = p->next) - if (p->addr == addr) + for_each_prl_rcu(t->prl) + if (prl->addr == addr) break; - return p; + return prl; } @@ -273,7 +289,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, kcalloc(cmax, sizeof(*kp), GFP_KERNEL) : NULL; - read_lock(&ipip6_lock); + rcu_read_lock(); ca = t->prl_count < cmax ? t->prl_count : cmax; @@ -291,7 +307,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, } c = 0; - for (prl = t->prl; prl; prl = prl->next) { + for_each_prl_rcu(t->prl) { if (c >= cmax) break; if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) @@ -303,7 +319,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, break; } out: - read_unlock(&ipip6_lock); + rcu_read_unlock(); len = sizeof(*kp) * c; ret = 0; @@ -324,12 +340,14 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) if (a->addr == htonl(INADDR_ANY)) return -EINVAL; - write_lock(&ipip6_lock); + spin_lock(&ipip6_prl_lock); for (p = t->prl; p; p = p->next) { if (p->addr == a->addr) { - if (chg) - goto update; + if (chg) { + p->flags = a->flags; + goto out; + } err = -EEXIST; goto out; } @@ -346,46 +364,63 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) goto out; } + INIT_RCU_HEAD(&p->rcu_head); p->next = t->prl; - t->prl = p; - t->prl_count++; -update: p->addr = a->addr; p->flags = a->flags; + t->prl_count++; + rcu_assign_pointer(t->prl, p); out: - write_unlock(&ipip6_lock); + spin_unlock(&ipip6_prl_lock); return err; } +static void prl_entry_destroy_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head)); +} + +static void prl_list_destroy_rcu(struct rcu_head *head) +{ + struct ip_tunnel_prl_entry *p, *n; + + p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); + do { + n = p->next; + kfree(p); + p = n; + } while (p); +} + static int ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) { struct ip_tunnel_prl_entry *x, **p; int err = 0; - write_lock(&ipip6_lock); + spin_lock(&ipip6_prl_lock); if (a && a->addr != htonl(INADDR_ANY)) { for (p = &t->prl; *p; p = &(*p)->next) { if ((*p)->addr == a->addr) { x = *p; *p = x->next; - kfree(x); + call_rcu(&x->rcu_head, prl_entry_destroy_rcu); t->prl_count--; goto out; } } err = -ENXIO; } else { - while (t->prl) { + if (t->prl) { + t->prl_count = 0; x = t->prl; - t->prl = t->prl->next; - kfree(x); - t->prl_count--; + call_rcu(&x->rcu_head, prl_list_destroy_rcu); + t->prl = NULL; } } out: - write_unlock(&ipip6_lock); + spin_unlock(&ipip6_prl_lock); return err; } @@ -395,7 +430,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) struct ip_tunnel_prl_entry *p; int ok = 1; - read_lock(&ipip6_lock); + rcu_read_lock(); p = __ipip6_tunnel_locate_prl(t, iph->saddr); if (p) { if (p->flags & PRL_DEFAULT) @@ -411,7 +446,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) else ok = 0; } - read_unlock(&ipip6_lock); + rcu_read_unlock(); return ok; } @@ -421,9 +456,9 @@ static void ipip6_tunnel_uninit(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { - write_lock_bh(&ipip6_lock); + spin_lock_bh(&ipip6_lock); sitn->tunnels_wc[0] = NULL; - write_unlock_bh(&ipip6_lock); + spin_unlock_bh(&ipip6_lock); dev_put(dev); } else { ipip6_tunnel_unlink(sitn, netdev_priv(dev)); @@ -476,7 +511,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) err = -ENOENT; - read_lock(&ipip6_lock); + rcu_read_lock(); t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->daddr, @@ -494,7 +529,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - read_unlock(&ipip6_lock); + rcu_read_unlock(); return err; } @@ -514,7 +549,7 @@ static int ipip6_rcv(struct sk_buff *skb) iph = ip_hdr(skb); - read_lock(&ipip6_lock); + rcu_read_lock(); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { @@ -528,7 +563,7 @@ static int ipip6_rcv(struct sk_buff *skb) if ((tunnel->dev->priv_flags & IFF_ISATAP) && !isatap_chksrc(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; - read_unlock(&ipip6_lock); + rcu_read_unlock(); kfree_skb(skb); return 0; } @@ -539,12 +574,12 @@ static int ipip6_rcv(struct sk_buff *skb) nf_reset(skb); ipip6_ecn_decapsulate(iph, skb); netif_rx(skb); - read_unlock(&ipip6_lock); + rcu_read_unlock(); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - read_unlock(&ipip6_lock); + rcu_read_unlock(); out: kfree_skb(skb); return 0; @@ -1110,16 +1145,19 @@ static struct xfrm_tunnel sit_handler = { .priority = 1, }; -static void sit_destroy_tunnels(struct sit_net *sitn) +static void sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) { int prio; for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - while ((t = sitn->tunnels[prio][h]) != NULL) - unregister_netdevice(t->dev); + struct ip_tunnel *t = sitn->tunnels[prio][h]; + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = t->next; + } } } } @@ -1173,11 +1211,13 @@ err_alloc: static void sit_exit_net(struct net *net) { struct sit_net *sitn; + LIST_HEAD(list); sitn = net_generic(net, sit_net_id); rtnl_lock(); - sit_destroy_tunnels(sitn); - unregister_netdevice(sitn->fb_tunnel_dev); + sit_destroy_tunnels(sitn, &list); + unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); + unregister_netdevice_many(&list); rtnl_unlock(); kfree(sitn); } @@ -1192,6 +1232,7 @@ static void __exit sit_cleanup(void) xfrm4_tunnel_deregister(&sit_handler, AF_INET6); unregister_pernet_gen_device(sit_net_id, &sit_net_ops); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } static int __init sit_init(void) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index c46da533888..612fc53e0bb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -184,13 +184,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - ret = NULL; req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (!req) @@ -224,12 +217,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->retrans = 0; ireq->ecn_ok = 0; - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; @@ -265,6 +252,21 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out_free; } + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, 0, dst); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c54ec3615de..34925f089e0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1167,6 +1167,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct request_sock *req = NULL; __u32 isn = TCP_SKB_CB(skb)->when; + struct dst_entry *dst = __sk_dst_get(sk); #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1205,7 +1206,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0); + tcp_parse_options(skb, &tmp_opt, 0, dst); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 81a95c00e50..438831d3359 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -23,7 +23,7 @@ */ #include <linux/module.h> #include <linux/xfrm.h> -#include <linux/list.h> +#include <linux/rculist.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/ipv6.h> @@ -36,14 +36,15 @@ * per xfrm_address_t. */ struct xfrm6_tunnel_spi { - struct hlist_node list_byaddr; - struct hlist_node list_byspi; - xfrm_address_t addr; - u32 spi; - atomic_t refcnt; + struct hlist_node list_byaddr; + struct hlist_node list_byspi; + xfrm_address_t addr; + u32 spi; + atomic_t refcnt; + struct rcu_head rcu_head; }; -static DEFINE_RWLOCK(xfrm6_tunnel_spi_lock); +static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock); static u32 xfrm6_tunnel_spi; @@ -107,6 +108,7 @@ static void xfrm6_tunnel_spi_fini(void) if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i])) return; } + rcu_barrier(); kmem_cache_destroy(xfrm6_tunnel_spi_kmem); xfrm6_tunnel_spi_kmem = NULL; } @@ -116,7 +118,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) struct xfrm6_tunnel_spi *x6spi; struct hlist_node *pos; - hlist_for_each_entry(x6spi, pos, + hlist_for_each_entry_rcu(x6spi, pos, &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) @@ -131,10 +133,10 @@ __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) struct xfrm6_tunnel_spi *x6spi; u32 spi; - read_lock_bh(&xfrm6_tunnel_spi_lock); + rcu_read_lock_bh(); x6spi = __xfrm6_tunnel_spi_lookup(saddr); spi = x6spi ? x6spi->spi : 0; - read_unlock_bh(&xfrm6_tunnel_spi_lock); + rcu_read_unlock_bh(); return htonl(spi); } @@ -185,14 +187,15 @@ alloc_spi: if (!x6spi) goto out; + INIT_RCU_HEAD(&x6spi->rcu_head); memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr)); x6spi->spi = spi; atomic_set(&x6spi->refcnt, 1); - hlist_add_head(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]); + hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]); index = xfrm6_tunnel_spi_hash_byaddr(saddr); - hlist_add_head(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]); + hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]); out: return spi; } @@ -202,26 +205,32 @@ __be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) struct xfrm6_tunnel_spi *x6spi; u32 spi; - write_lock_bh(&xfrm6_tunnel_spi_lock); + spin_lock_bh(&xfrm6_tunnel_spi_lock); x6spi = __xfrm6_tunnel_spi_lookup(saddr); if (x6spi) { atomic_inc(&x6spi->refcnt); spi = x6spi->spi; } else spi = __xfrm6_tunnel_alloc_spi(saddr); - write_unlock_bh(&xfrm6_tunnel_spi_lock); + spin_unlock_bh(&xfrm6_tunnel_spi_lock); return htonl(spi); } EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi); +static void x6spi_destroy_rcu(struct rcu_head *head) +{ + kmem_cache_free(xfrm6_tunnel_spi_kmem, + container_of(head, struct xfrm6_tunnel_spi, rcu_head)); +} + void xfrm6_tunnel_free_spi(xfrm_address_t *saddr) { struct xfrm6_tunnel_spi *x6spi; struct hlist_node *pos, *n; - write_lock_bh(&xfrm6_tunnel_spi_lock); + spin_lock_bh(&xfrm6_tunnel_spi_lock); hlist_for_each_entry_safe(x6spi, pos, n, &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], @@ -229,14 +238,14 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr) { if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) { if (atomic_dec_and_test(&x6spi->refcnt)) { - hlist_del(&x6spi->list_byaddr); - hlist_del(&x6spi->list_byspi); - kmem_cache_free(xfrm6_tunnel_spi_kmem, x6spi); + hlist_del_rcu(&x6spi->list_byaddr); + hlist_del_rcu(&x6spi->list_byspi); + call_rcu(&x6spi->rcu_head, x6spi_destroy_rcu); break; } } } - write_unlock_bh(&xfrm6_tunnel_spi_lock); + spin_unlock_bh(&xfrm6_tunnel_spi_lock); } EXPORT_SYMBOL(xfrm6_tunnel_free_spi); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ff752c60641..33e68f20ec6 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -79,6 +79,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/mutex.h> +#include <linux/if_vlan.h> #ifdef CONFIG_INET #include <net/inet_common.h> @@ -766,7 +767,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, getnstimeofday(&ts); h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; - h.h2->tp_vlan_tci = skb->vlan_tci; + h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); hdrlen = sizeof(*h.h2); break; default: @@ -1493,7 +1494,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); - aux.tp_vlan_tci = skb->vlan_tci; + aux.tp_vlan_tci = vlan_tx_tag_get(skb); put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0f133c5a8d3..3291902f0b8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1074,6 +1074,8 @@ restart: err = -ECONNREFUSED; if (other->sk_state != TCP_LISTEN) goto out_unlock; + if (other->sk_shutdown & RCV_SHUTDOWN) + goto out_unlock; if (unix_recvq_full(other)) { err = -EAGAIN; diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 7d7c3abf38b..96d92278354 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -114,7 +114,7 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp /* * Copy any Call User Data. */ - if (skb->len >= 0) { + if (skb->len > 0) { skb_copy_from_linear_data(skb, x25->calluserdata.cuddata, skb->len); diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 2c999ccf504..66961ea28c9 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -190,7 +190,7 @@ int x25_route_ioctl(unsigned int cmd, void __user *arg) goto out; rc = -EINVAL; - if (rt.sigdigits < 0 || rt.sigdigits > 15) + if (rt.sigdigits > 15) goto out; dev = x25_dev_get(rt.device); |