diff options
Diffstat (limited to 'net')
58 files changed, 1050 insertions, 777 deletions
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 5e86d6f0c0f..319a7ccf6ef 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -24,7 +24,6 @@ #include <linux/slab.h> #include "icmp_socket.h" #include "send.h" -#include "types.h" #include "hash.h" #include "originator.h" #include "hard-interface.h" diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 08b18595950..462b190fa10 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -22,8 +22,6 @@ #ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ #define _NET_BATMAN_ADV_ICMP_SOCKET_H_ -#include "types.h" - #define ICMP_SOCKET "socket" void bat_socket_init(void); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index dc9248d9ea5..06d956c91c2 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -30,7 +30,6 @@ #include "translation-table.h" #include "hard-interface.h" #include "gateway_client.h" -#include "types.h" #include "vis.h" #include "hash.h" diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 028f73967b0..827414067e4 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -28,7 +28,6 @@ #include "icmp_socket.h" #include "translation-table.h" #include "originator.h" -#include "types.h" #include "ring_buffer.h" #include "vis.h" #include "aggregation.h" diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index ceeca6f6ad1..a09d16f0c3a 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -22,8 +22,6 @@ #ifndef _NET_BATMAN_ADV_ROUTING_H_ #define _NET_BATMAN_ADV_ROUTING_H_ -#include "types.h" - void slide_own_bcast_window(struct batman_if *batman_if); void receive_bat_packet(struct ethhdr *ethhdr, struct batman_packet *batman_packet, diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 7cc620e8aa1..831427694fc 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -25,7 +25,6 @@ #include "translation-table.h" #include "soft-interface.h" #include "hard-interface.h" -#include "types.h" #include "vis.h" #include "aggregation.h" #include "gateway_common.h" diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index bc53adede58..b68c272cb84 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -22,8 +22,6 @@ #ifndef _NET_BATMAN_ADV_SEND_H_ #define _NET_BATMAN_ADV_SEND_H_ -#include "types.h" - int send_skb_packet(struct sk_buff *skb, struct batman_if *batman_if, uint8_t *dst_addr); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 145e0f78292..bd088f877e3 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -26,7 +26,6 @@ #include "send.h" #include "bat_debugfs.h" #include "translation-table.h" -#include "types.h" #include "hash.h" #include "gateway_common.h" #include "gateway_client.h" diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index f6917dde42c..7fb6726ccbd 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -22,7 +22,6 @@ #include "main.h" #include "translation-table.h" #include "soft-interface.h" -#include "types.h" #include "hash.h" #include "originator.h" diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index a4f3a37fd6e..f19931ca145 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -22,8 +22,6 @@ #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ #define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ -#include "types.h" - int hna_local_init(struct bat_priv *bat_priv); void hna_local_add(struct net_device *soft_iface, uint8_t *addr); void hna_local_remove(struct bat_priv *bat_priv, diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index bedf2942577..121b11d2a23 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -39,8 +39,8 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, (struct unicast_frag_packet *)skb->data; struct sk_buff *tmp_skb; struct unicast_packet *unicast_packet; - int hdr_len = sizeof(struct unicast_packet), - uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; + int hdr_len = sizeof(struct unicast_packet); + int uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; /* set skb to the first part and tmp_skb to the second part */ if (up->flags & UNI_FRAG_HEAD) { @@ -231,6 +231,7 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, int ucf_hdr_len = sizeof(struct unicast_frag_packet); int data_len = skb->len - uc_hdr_len; int large_tail = 0; + uint16_t seqno; if (!bat_priv->primary_if) goto dropped; @@ -266,10 +267,9 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, frag1->flags = UNI_FRAG_HEAD | large_tail; frag2->flags = large_tail; - frag1->seqno = htons((uint16_t)atomic_inc_return( - &batman_if->frag_seqno)); - frag2->seqno = htons((uint16_t)atomic_inc_return( - &batman_if->frag_seqno)); + seqno = atomic_add_return(2, &batman_if->frag_seqno); + frag1->seqno = htons(seqno - 1); + frag2->seqno = htons(seqno); send_skb_packet(skb, batman_if, dstaddr); send_skb_packet(frag_skb, batman_if, dstaddr); @@ -286,7 +286,7 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct unicast_packet *unicast_packet; - struct orig_node *orig_node; + struct orig_node *orig_node = NULL; struct batman_if *batman_if; struct neigh_node *router; int data_len = skb->len; @@ -297,11 +297,6 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) /* get routing information */ if (is_multicast_ether_addr(ethhdr->h_dest)) orig_node = (struct orig_node *)gw_get_selected(bat_priv); - else - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, - choose_orig, - ethhdr->h_dest)); /* check for hna host */ if (!orig_node) diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 7550abb0c96..675614e38e1 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -859,6 +859,7 @@ static void __l2cap_sock_close(struct sock *sk, int reason) result = L2CAP_CR_SEC_BLOCK; else result = L2CAP_CR_BAD_PSM; + sk->sk_state = BT_DISCONN; rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 556443566e9..1461b19efd3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -297,6 +297,21 @@ void br_netpoll_disable(struct net_bridge_port *p) #endif +static int br_add_slave(struct net_device *dev, struct net_device *slave_dev) + +{ + struct net_bridge *br = netdev_priv(dev); + + return br_add_if(br, slave_dev); +} + +static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return br_del_if(br, slave_dev); +} + static const struct ethtool_ops br_ethtool_ops = { .get_drvinfo = br_getinfo, .get_link = ethtool_op_get_link, @@ -326,6 +341,8 @@ static const struct net_device_ops br_netdev_ops = { .ndo_netpoll_cleanup = br_netpoll_cleanup, .ndo_poll_controller = br_poll_controller, #endif + .ndo_add_slave = br_add_slave, + .ndo_del_slave = br_del_slave, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 2a6801d8b72..dce8f0009a1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -148,6 +148,8 @@ static void del_nbp(struct net_bridge_port *p) netdev_rx_handler_unregister(dev); + netdev_set_master(dev, NULL); + br_multicast_del_port(p); kobject_uevent(&p->kobj, KOBJ_REMOVE); @@ -429,10 +431,14 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) goto err3; - err = netdev_rx_handler_register(dev, br_handle_frame, p); + err = netdev_set_master(dev, br->dev); if (err) goto err3; + err = netdev_rx_handler_register(dev, br_handle_frame, p); + if (err) + goto err4; + dev->priv_flags |= IFF_BRIDGE_PORT; dev_disable_lro(dev); @@ -455,6 +461,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) kobject_uevent(&p->kobj, KOBJ_ADD); return 0; + +err4: + netdev_set_master(dev, NULL); err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 6f6d8e1b776..88e4aa9cb1f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -80,7 +80,7 @@ int br_handle_frame_finish(struct sk_buff *skb) if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { - if ((mdst && !hlist_unhashed(&mdst->mglist)) || + if ((mdst && mdst->mglist) || br_multicast_is_router(br)) skb2 = skb; br_multicast_forward(mdst, skb, skb2); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f701a21acb3..09d5c098792 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -232,8 +232,7 @@ static void br_multicast_group_expired(unsigned long data) if (!netif_running(br->dev) || timer_pending(&mp->timer)) goto out; - if (!hlist_unhashed(&mp->mglist)) - hlist_del_init(&mp->mglist); + mp->mglist = false; if (mp->ports) goto out; @@ -276,7 +275,7 @@ static void br_multicast_del_pg(struct net_bridge *br, del_timer(&p->query_timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && hlist_unhashed(&mp->mglist) && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); @@ -528,7 +527,7 @@ static void br_multicast_group_query_expired(unsigned long data) struct net_bridge *br = mp->br; spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || hlist_unhashed(&mp->mglist) || + if (!netif_running(br->dev) || !mp->mglist || mp->queries_sent >= br->multicast_last_member_count) goto out; @@ -719,7 +718,7 @@ static int br_multicast_add_group(struct net_bridge *br, goto err; if (!port) { - hlist_add_head(&mp->mglist, &br->mglist); + mp->mglist = true; mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -1165,7 +1164,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, max_delay *= br->multicast_last_member_count; - if (!hlist_unhashed(&mp->mglist) && + if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) @@ -1177,7 +1176,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0) - mod_timer(&mp->timer, now + max_delay); + mod_timer(&p->timer, now + max_delay); } out: @@ -1236,7 +1235,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, goto out; max_delay *= br->multicast_last_member_count; - if (!hlist_unhashed(&mp->mglist) && + if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) @@ -1248,7 +1247,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0) - mod_timer(&mp->timer, now + max_delay); + mod_timer(&p->timer, now + max_delay); } out: @@ -1283,7 +1282,7 @@ static void br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (!hlist_unhashed(&mp->mglist) && + if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 9f22898c535..f7afc364d77 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -84,13 +84,13 @@ struct net_bridge_port_group { struct net_bridge_mdb_entry { struct hlist_node hlist[2]; - struct hlist_node mglist; struct net_bridge *br; struct net_bridge_port_group __rcu *ports; struct rcu_head rcu; struct timer_list timer; struct timer_list query_timer; struct br_ip addr; + bool mglist; u32 queries_sent; }; @@ -238,7 +238,6 @@ struct net_bridge spinlock_t multicast_lock; struct net_bridge_mdb_htable __rcu *mdb; struct hlist_head router_list; - struct hlist_head mglist; struct timer_list multicast_router_timer; struct timer_list multicast_querier_timer; diff --git a/net/core/dev.c b/net/core/dev.c index 54aaca69a02..578415c1ef7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1281,10 +1281,13 @@ static int __dev_close_many(struct list_head *head) static int __dev_close(struct net_device *dev) { + int retval; LIST_HEAD(single); list_add(&dev->unreg_list, &single); - return __dev_close_many(&single); + retval = __dev_close_many(&single); + list_del(&single); + return retval; } static int dev_close_many(struct list_head *head) @@ -1326,7 +1329,7 @@ int dev_close(struct net_device *dev) list_add(&dev->unreg_list, &single); dev_close_many(&single); - + list_del(&single); return 0; } EXPORT_SYMBOL(dev_close); @@ -3107,7 +3110,8 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and * ARP on active-backup slaves with arp_validate enabled. */ -int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) +static int __skb_bond_should_drop(struct sk_buff *skb, + struct net_device *master) { struct net_device *dev = skb->dev; @@ -3141,14 +3145,12 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) } return 0; } -EXPORT_SYMBOL(__skb_bond_should_drop); static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; - struct net_device *master; struct net_device *null_or_orig; struct net_device *orig_or_bond; int ret = NET_RX_DROP; @@ -3175,15 +3177,19 @@ static int __netif_receive_skb(struct sk_buff *skb) */ null_or_orig = NULL; orig_dev = skb->dev; - master = ACCESS_ONCE(orig_dev->master); if (skb->deliver_no_wcard) null_or_orig = orig_dev; - else if (master) { - if (skb_bond_should_drop(skb, master)) { - skb->deliver_no_wcard = 1; - null_or_orig = orig_dev; /* deliver only exact match */ - } else - skb->dev = master; + else if (netif_is_bond_slave(orig_dev)) { + struct net_device *bond_master = ACCESS_ONCE(orig_dev->master); + + if (likely(bond_master)) { + if (__skb_bond_should_drop(skb, bond_master)) { + skb->deliver_no_wcard = 1; + /* deliver only exact match */ + null_or_orig = orig_dev; + } else + skb->dev = bond_master; + } } __this_cpu_inc(softnet_data.processed); @@ -4348,15 +4354,14 @@ static int __init dev_proc_init(void) /** - * netdev_set_master - set up master/slave pair + * netdev_set_master - set up master pointer * @slave: slave device * @master: new master device * * Changes the master device of the slave. Pass %NULL to break the * bonding. The caller must hold the RTNL semaphore. On a failure * a negative errno code is returned. On success the reference counts - * are adjusted, %RTM_NEWLINK is sent to the routing socket and the - * function returns zero. + * are adjusted and the function returns zero. */ int netdev_set_master(struct net_device *slave, struct net_device *master) { @@ -4376,6 +4381,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) synchronize_net(); dev_put(old); } + return 0; +} +EXPORT_SYMBOL(netdev_set_master); + +/** + * netdev_set_bond_master - set up bonding master/slave pair + * @slave: slave device + * @master: new master device + * + * Changes the master device of the slave. Pass %NULL to break the + * bonding. The caller must hold the RTNL semaphore. On a failure + * a negative errno code is returned. On success %RTM_NEWLINK is sent + * to the routing socket and the function returns zero. + */ +int netdev_set_bond_master(struct net_device *slave, struct net_device *master) +{ + int err; + + ASSERT_RTNL(); + + err = netdev_set_master(slave, master); + if (err) + return err; if (master) slave->flags |= IFF_SLAVE; else @@ -4384,7 +4412,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); return 0; } -EXPORT_SYMBOL(netdev_set_master); +EXPORT_SYMBOL(netdev_set_bond_master); static void dev_change_rx_flags(struct net_device *dev, int flags) { @@ -5219,6 +5247,7 @@ static void rollback_registered(struct net_device *dev) list_add(&dev->unreg_list, &single); rollback_registered_many(&single); + list_del(&single); } u32 netdev_fix_features(struct net_device *dev, u32 features) @@ -5250,6 +5279,12 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) features &= ~NETIF_F_TSO; } + /* Software GSO depends on SG. */ + if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { + netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); + features &= ~NETIF_F_GSO; + } + /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ @@ -5272,6 +5307,37 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) } EXPORT_SYMBOL(netdev_fix_features); +void netdev_update_features(struct net_device *dev) +{ + u32 features; + int err = 0; + + features = netdev_get_wanted_features(dev); + + if (dev->netdev_ops->ndo_fix_features) + features = dev->netdev_ops->ndo_fix_features(dev, features); + + /* driver might be less strict about feature dependencies */ + features = netdev_fix_features(dev, features); + + if (dev->features == features) + return; + + netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", + dev->features, features); + + if (dev->netdev_ops->ndo_set_features) + err = dev->netdev_ops->ndo_set_features(dev, features); + + if (!err) + dev->features = features; + else if (err < 0) + netdev_err(dev, + "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", + err, features, dev->features); +} +EXPORT_SYMBOL(netdev_update_features); + /** * netif_stacked_transfer_operstate - transfer operstate * @rootdev: the root or lower level device to transfer state from @@ -5406,11 +5472,18 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - dev->features = netdev_fix_features(dev, dev->features); + /* Transfer changeable features to wanted_features and enable + * software offloads (GSO and GRO). + */ + dev->hw_features |= NETIF_F_SOFT_FEATURES; + dev->wanted_features = (dev->features & dev->hw_features) + | NETIF_F_SOFT_FEATURES; + + /* Avoid warning from netdev_fix_features() for GSO without SG */ + if (!(dev->wanted_features & NETIF_F_SG)) + dev->wanted_features &= ~NETIF_F_GSO; - /* Enable software GSO if SG is supported. */ - if (dev->features & NETIF_F_SG) - dev->features |= NETIF_F_GSO; + netdev_update_features(dev); /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features @@ -6365,6 +6438,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) } } unregister_netdevice_many(&dev_kill_list); + list_del(&dev_kill_list); rtnl_unlock(); } diff --git a/net/core/dst.c b/net/core/dst.c index c1674fde827..91104d35de7 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -166,7 +166,7 @@ EXPORT_SYMBOL(dst_discard); const u32 dst_default_metrics[RTAX_MAX]; -void *dst_alloc(struct dst_ops *ops) +void *dst_alloc(struct dst_ops *ops, int initial_ref) { struct dst_entry *dst; @@ -177,7 +177,7 @@ void *dst_alloc(struct dst_ops *ops) dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; - atomic_set(&dst->__refcnt, 0); + atomic_set(&dst->__refcnt, initial_ref); dst->ops = ops; dst->lastuse = jiffies; dst->path = dst; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 5984ee0c713..66cdc76770c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -34,12 +34,6 @@ u32 ethtool_op_get_link(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_link); -u32 ethtool_op_get_rx_csum(struct net_device *dev) -{ - return (dev->features & NETIF_F_ALL_CSUM) != 0; -} -EXPORT_SYMBOL(ethtool_op_get_rx_csum); - u32 ethtool_op_get_tx_csum(struct net_device *dev) { return (dev->features & NETIF_F_ALL_CSUM) != 0; @@ -55,6 +49,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) return 0; } +EXPORT_SYMBOL(ethtool_op_set_tx_csum); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) { @@ -171,6 +166,306 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); /* Handlers for each ethtool command */ +#define ETHTOOL_DEV_FEATURE_WORDS 1 + +static int ethtool_get_features(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_gfeatures cmd = { + .cmd = ETHTOOL_GFEATURES, + .size = ETHTOOL_DEV_FEATURE_WORDS, + }; + struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = { + { + .available = dev->hw_features, + .requested = dev->wanted_features, + .active = dev->features, + .never_changed = NETIF_F_NEVER_CHANGE, + }, + }; + u32 __user *sizeaddr; + u32 copy_size; + + sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); + if (get_user(copy_size, sizeaddr)) + return -EFAULT; + + if (copy_size > ETHTOOL_DEV_FEATURE_WORDS) + copy_size = ETHTOOL_DEV_FEATURE_WORDS; + + if (copy_to_user(useraddr, &cmd, sizeof(cmd))) + return -EFAULT; + useraddr += sizeof(cmd); + if (copy_to_user(useraddr, features, copy_size * sizeof(*features))) + return -EFAULT; + + return 0; +} + +static int ethtool_set_features(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_sfeatures cmd; + struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; + int ret = 0; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + useraddr += sizeof(cmd); + + if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS) + return -EINVAL; + + if (copy_from_user(features, useraddr, sizeof(features))) + return -EFAULT; + + if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) + return -EINVAL; + + if (features[0].valid & ~dev->hw_features) { + features[0].valid &= dev->hw_features; + ret |= ETHTOOL_F_UNSUPPORTED; + } + + dev->wanted_features &= ~features[0].valid; + dev->wanted_features |= features[0].valid & features[0].requested; + netdev_update_features(dev); + + if ((dev->wanted_features ^ dev->features) & features[0].valid) + ret |= ETHTOOL_F_WISH; + + return ret; +} + +static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = { + /* NETIF_F_SG */ "tx-scatter-gather", + /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4", + /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded", + /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic", + /* NETIF_F_IPV6_CSUM */ "tx_checksum-ipv6", + /* NETIF_F_HIGHDMA */ "highdma", + /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist", + /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert", + + /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse", + /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter", + /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged", + /* NETIF_F_GSO */ "tx-generic-segmentation", + /* NETIF_F_LLTX */ "tx-lockless", + /* NETIF_F_NETNS_LOCAL */ "netns-local", + /* NETIF_F_GRO */ "rx-gro", + /* NETIF_F_LRO */ "rx-lro", + + /* NETIF_F_TSO */ "tx-tcp-segmentation", + /* NETIF_F_UFO */ "tx-udp-fragmentation", + /* NETIF_F_GSO_ROBUST */ "tx-gso-robust", + /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation", + /* NETIF_F_TSO6 */ "tx-tcp6-segmentation", + /* NETIF_F_FSO */ "tx-fcoe-segmentation", + "", + "", + + /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc", + /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp", + /* NETIF_F_FCOE_MTU */ "fcoe-mtu", + /* NETIF_F_NTUPLE */ "rx-ntuple-filter", + /* NETIF_F_RXHASH */ "rx-hashing", + /* NETIF_F_RXCSUM */ "rx-checksum", + "", + "", +}; + +static int __ethtool_get_sset_count(struct net_device *dev, int sset) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (sset == ETH_SS_FEATURES) + return ARRAY_SIZE(netdev_features_strings); + + if (ops && ops->get_sset_count && ops->get_strings) + return ops->get_sset_count(dev, sset); + else + return -EOPNOTSUPP; +} + +static void __ethtool_get_strings(struct net_device *dev, + u32 stringset, u8 *data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (stringset == ETH_SS_FEATURES) + memcpy(data, netdev_features_strings, + sizeof(netdev_features_strings)); + else + /* ops->get_strings is valid because checked earlier */ + ops->get_strings(dev, stringset, data); +} + +static u32 ethtool_get_feature_mask(u32 eth_cmd) +{ + /* feature masks of legacy discrete ethtool ops */ + + switch (eth_cmd) { + case ETHTOOL_GTXCSUM: + case ETHTOOL_STXCSUM: + return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + case ETHTOOL_GRXCSUM: + case ETHTOOL_SRXCSUM: + return NETIF_F_RXCSUM; + case ETHTOOL_GSG: + case ETHTOOL_SSG: + return NETIF_F_SG; + case ETHTOOL_GTSO: + case ETHTOOL_STSO: + return NETIF_F_ALL_TSO; + case ETHTOOL_GUFO: + case ETHTOOL_SUFO: + return NETIF_F_UFO; + case ETHTOOL_GGSO: + case ETHTOOL_SGSO: + return NETIF_F_GSO; + case ETHTOOL_GGRO: + case ETHTOOL_SGRO: + return NETIF_F_GRO; + default: + BUG(); + } +} + +static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops) + return NULL; + + switch (ethcmd) { + case ETHTOOL_GTXCSUM: + return ops->get_tx_csum; + case ETHTOOL_GRXCSUM: + return ops->get_rx_csum; + case ETHTOOL_SSG: + return ops->get_sg; + case ETHTOOL_STSO: + return ops->get_tso; + case ETHTOOL_SUFO: + return ops->get_ufo; + default: + return NULL; + } +} + +static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev) +{ + return !!(dev->features & NETIF_F_ALL_CSUM); +} + +static int ethtool_get_one_feature(struct net_device *dev, + char __user *useraddr, u32 ethcmd) +{ + u32 mask = ethtool_get_feature_mask(ethcmd); + struct ethtool_value edata = { + .cmd = ethcmd, + .data = !!(dev->features & mask), + }; + + /* compatibility with discrete get_ ops */ + if (!(dev->hw_features & mask)) { + u32 (*actor)(struct net_device *); + + actor = __ethtool_get_one_feature_actor(dev, ethcmd); + + /* bug compatibility with old get_rx_csum */ + if (ethcmd == ETHTOOL_GRXCSUM && !actor) + actor = __ethtool_get_rx_csum_oldbug; + + if (actor) + edata.data = actor(dev); + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); +static int __ethtool_set_rx_csum(struct net_device *dev, u32 data); +static int __ethtool_set_sg(struct net_device *dev, u32 data); +static int __ethtool_set_tso(struct net_device *dev, u32 data); +static int __ethtool_set_ufo(struct net_device *dev, u32 data); + +static int ethtool_set_one_feature(struct net_device *dev, + void __user *useraddr, u32 ethcmd) +{ + struct ethtool_value edata; + u32 mask; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + mask = ethtool_get_feature_mask(ethcmd); + mask &= dev->hw_features; + if (mask) { + if (edata.data) + dev->wanted_features |= mask; + else + dev->wanted_features &= ~mask; + + netdev_update_features(dev); + return 0; + } + + /* Driver is not converted to ndo_fix_features or does not + * support changing this offload. In the latter case it won't + * have corresponding ethtool_ops field set. + * + * Following part is to be removed after all drivers advertise + * their changeable features in netdev->hw_features and stop + * using discrete offload setting ops. + */ + + switch (ethcmd) { + case ETHTOOL_STXCSUM: + return __ethtool_set_tx_csum(dev, edata.data); + case ETHTOOL_SRXCSUM: + return __ethtool_set_rx_csum(dev, edata.data); + case ETHTOOL_SSG: + return __ethtool_set_sg(dev, edata.data); + case ETHTOOL_STSO: + return __ethtool_set_tso(dev, edata.data); + case ETHTOOL_SUFO: + return __ethtool_set_ufo(dev, edata.data); + default: + return -EOPNOTSUPP; + } +} + +static int __ethtool_set_flags(struct net_device *dev, u32 data) +{ + u32 changed; + + if (data & ~flags_dup_features) + return -EINVAL; + + /* legacy set_flags() op */ + if (dev->ethtool_ops->set_flags) { + if (unlikely(dev->hw_features & flags_dup_features)) + netdev_warn(dev, + "driver BUG: mixed hw_features and set_flags()\n"); + return dev->ethtool_ops->set_flags(dev, data); + } + + /* allow changing only bits set in hw_features */ + changed = (data ^ dev->wanted_features) & flags_dup_features; + if (changed & ~dev->hw_features) + return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; + + dev->wanted_features = + (dev->wanted_features & ~changed) | data; + + netdev_update_features(dev); + + return 0; +} + static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; @@ -251,14 +546,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, void __user *useraddr) { struct ethtool_sset_info info; - const struct ethtool_ops *ops = dev->ethtool_ops; u64 sset_mask; int i, idx = 0, n_bits = 0, ret, rc; u32 *info_buf = NULL; - if (!ops->get_sset_count) - return -EOPNOTSUPP; - if (copy_from_user(&info, useraddr, sizeof(info))) return -EFAULT; @@ -285,7 +576,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, if (!(sset_mask & (1ULL << i))) continue; - rc = ops->get_sset_count(dev, i); + rc = __ethtool_get_sset_count(dev, i); if (rc >= 0) { info.sset_mask |= (1ULL << i); info_buf[idx++] = rc; @@ -1091,6 +1382,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; + if (data && !(dev->features & NETIF_F_ALL_CSUM)) + return -EINVAL; + if (!data && dev->ethtool_ops->set_tso) { err = dev->ethtool_ops->set_tso(dev, 0); if (err) @@ -1105,145 +1399,55 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) return dev->ethtool_ops->set_sg(dev, data); } -static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) { - struct ethtool_value edata; int err; if (!dev->ethtool_ops->set_tx_csum) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (!edata.data && dev->ethtool_ops->set_sg) { + if (!data && dev->ethtool_ops->set_sg) { err = __ethtool_set_sg(dev, 0); if (err) return err; } - return dev->ethtool_ops->set_tx_csum(dev, edata.data); + return dev->ethtool_ops->set_tx_csum(dev, data); } -EXPORT_SYMBOL(ethtool_op_set_tx_csum); -static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_rx_csum(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_rx_csum) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (!edata.data && dev->ethtool_ops->set_sg) + if (!data) dev->features &= ~NETIF_F_GRO; - return dev->ethtool_ops->set_rx_csum(dev, edata.data); + return dev->ethtool_ops->set_rx_csum(dev, data); } -static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_tso(struct net_device *dev, u32 data) { - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_sg) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data && - !(dev->features & NETIF_F_ALL_CSUM)) - return -EINVAL; - - return __ethtool_set_sg(dev, edata.data); -} - -static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - if (!dev->ethtool_ops->set_tso) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data && !(dev->features & NETIF_F_SG)) + if (data && !(dev->features & NETIF_F_SG)) return -EINVAL; - return dev->ethtool_ops->set_tso(dev, edata.data); + return dev->ethtool_ops->set_tso(dev, data); } -static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_ufo(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_ufo) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if (edata.data && !(dev->features & NETIF_F_SG)) + if (data && !(dev->features & NETIF_F_SG)) return -EINVAL; - if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || + if (data && !((dev->features & NETIF_F_GEN_CSUM) || (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) return -EINVAL; - return dev->ethtool_ops->set_ufo(dev, edata.data); -} - -static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GGSO }; - - edata.data = dev->features & NETIF_F_GSO; - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if (edata.data) - dev->features |= NETIF_F_GSO; - else - dev->features &= ~NETIF_F_GSO; - return 0; -} - -static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GGRO }; - - edata.data = dev->features & NETIF_F_GRO; - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_gro(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data) { - u32 rxcsum = dev->ethtool_ops->get_rx_csum ? - dev->ethtool_ops->get_rx_csum(dev) : - ethtool_op_get_rx_csum(dev); - - if (!rxcsum) - return -EINVAL; - dev->features |= NETIF_F_GRO; - } else - dev->features &= ~NETIF_F_GRO; - - return 0; + return dev->ethtool_ops->set_ufo(dev, data); } static int ethtool_self_test(struct net_device *dev, char __user *useraddr) @@ -1287,17 +1491,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) { struct ethtool_gstrings gstrings; - const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; - if (!ops->get_strings || !ops->get_sset_count) - return -EOPNOTSUPP; - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - ret = ops->get_sset_count(dev, gstrings.string_set); + ret = __ethtool_get_sset_count(dev, gstrings.string_set); if (ret < 0) return ret; @@ -1307,7 +1507,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) if (!data) return -ENOMEM; - ops->get_strings(dev, gstrings.string_set, data); + __ethtool_get_strings(dev, gstrings.string_set, data); ret = -EFAULT; if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) @@ -1317,7 +1517,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) goto out; ret = 0; - out: +out: kfree(data); return ret; } @@ -1500,6 +1700,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: + case ETHTOOL_GFEATURES: break; default: if (!capable(CAP_NET_ADMIN)) @@ -1570,42 +1771,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SPAUSEPARAM: rc = ethtool_set_pauseparam(dev, useraddr); break; - case ETHTOOL_GRXCSUM: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_rx_csum ? - dev->ethtool_ops->get_rx_csum : - ethtool_op_get_rx_csum)); - break; - case ETHTOOL_SRXCSUM: - rc = ethtool_set_rx_csum(dev, useraddr); - break; - case ETHTOOL_GTXCSUM: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_tx_csum ? - dev->ethtool_ops->get_tx_csum : - ethtool_op_get_tx_csum)); - break; - case ETHTOOL_STXCSUM: - rc = ethtool_set_tx_csum(dev, useraddr); - break; - case ETHTOOL_GSG: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_sg ? - dev->ethtool_ops->get_sg : - ethtool_op_get_sg)); - break; - case ETHTOOL_SSG: - rc = ethtool_set_sg(dev, useraddr); - break; - case ETHTOOL_GTSO: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_tso ? - dev->ethtool_ops->get_tso : - ethtool_op_get_tso)); - break; - case ETHTOOL_STSO: - rc = ethtool_set_tso(dev, useraddr); - break; case ETHTOOL_TEST: rc = ethtool_self_test(dev, useraddr); break; @@ -1621,21 +1786,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPERMADDR: rc = ethtool_get_perm_addr(dev, useraddr); break; - case ETHTOOL_GUFO: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_ufo ? - dev->ethtool_ops->get_ufo : - ethtool_op_get_ufo)); - break; - case ETHTOOL_SUFO: - rc = ethtool_set_ufo(dev, useraddr); - break; - case ETHTOOL_GGSO: - rc = ethtool_get_gso(dev, useraddr); - break; - case ETHTOOL_SGSO: - rc = ethtool_set_gso(dev, useraddr); - break; case ETHTOOL_GFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, (dev->ethtool_ops->get_flags ? @@ -1643,8 +1793,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) ethtool_op_get_flags)); break; case ETHTOOL_SFLAGS: - rc = ethtool_set_value(dev, useraddr, - dev->ethtool_ops->set_flags); + rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); break; case ETHTOOL_GPFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, @@ -1666,12 +1815,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCLSRLINS: rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); break; - case ETHTOOL_GGRO: - rc = ethtool_get_gro(dev, useraddr); - break; - case ETHTOOL_SGRO: - rc = ethtool_set_gro(dev, useraddr); - break; case ETHTOOL_FLASHDEV: rc = ethtool_flash_device(dev, useraddr); break; @@ -1693,6 +1836,30 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GFEATURES: + rc = ethtool_get_features(dev, useraddr); + break; + case ETHTOOL_SFEATURES: + rc = ethtool_set_features(dev, useraddr); + break; + case ETHTOOL_GTXCSUM: + case ETHTOOL_GRXCSUM: + case ETHTOOL_GSG: + case ETHTOOL_GTSO: + case ETHTOOL_GUFO: + case ETHTOOL_GGSO: + case ETHTOOL_GGRO: + rc = ethtool_get_one_feature(dev, useraddr, ethcmd); + break; + case ETHTOOL_STXCSUM: + case ETHTOOL_SRXCSUM: + case ETHTOOL_SSG: + case ETHTOOL_STSO: + case ETHTOOL_SUFO: + case ETHTOOL_SGSO: + case ETHTOOL_SGRO: + rc = ethtool_set_one_feature(dev, useraddr, ethcmd); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2e4a393dfc3..5ceb257e860 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -330,7 +330,7 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), - __ATTR(group, S_IRUGO | S_IWUSR, show_group, store_group), + __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group), {} }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index da0fe457c85..49f7ea5b4c7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1036,6 +1036,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_LINK] = { .type = NLA_U32 }, + [IFLA_MASTER] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, @@ -1178,6 +1179,41 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) return err; } +static int do_set_master(struct net_device *dev, int ifindex) +{ + struct net_device *master_dev; + const struct net_device_ops *ops; + int err; + + if (dev->master) { + if (dev->master->ifindex == ifindex) + return 0; + ops = dev->master->netdev_ops; + if (ops->ndo_del_slave) { + err = ops->ndo_del_slave(dev->master, dev); + if (err) + return err; + } else { + return -EOPNOTSUPP; + } + } + + if (ifindex) { + master_dev = __dev_get_by_index(dev_net(dev), ifindex); + if (!master_dev) + return -EINVAL; + ops = master_dev->netdev_ops; + if (ops->ndo_add_slave) { + err = ops->ndo_add_slave(master_dev, dev); + if (err) + return err; + } else { + return -EOPNOTSUPP; + } + } + return 0; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { @@ -1301,6 +1337,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; } + if (tb[IFLA_MASTER]) { + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 6b03f561cae..d5074a56728 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -626,6 +626,9 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb, dcb->cmd = DCB_CMD_GAPP; app_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_APP); + if (!app_nest) + goto out_cancel; + ret = nla_put_u8(dcbnl_skb, DCB_APP_ATTR_IDTYPE, idtype); if (ret) goto out_cancel; @@ -1613,6 +1616,10 @@ EXPORT_SYMBOL(dcb_getapp); u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) { struct dcb_app_type *itr; + struct dcb_app_type event; + + memcpy(&event.name, dev->name, sizeof(event.name)); + memcpy(&event.app, new, sizeof(event.app)); spin_lock(&dcb_lock); /* Search for existing match and replace */ @@ -1644,7 +1651,7 @@ u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) } out: spin_unlock(&dcb_lock); - call_dcbevent_notifiers(DCB_APP_EVENT, new); + call_dcbevent_notifiers(DCB_APP_EVENT, &event); return 0; } EXPORT_SYMBOL(dcb_setapp); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 42c9c62d341..06c054d5ccb 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1122,7 +1122,7 @@ make_route: if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rt = dst_alloc(&dn_dst_ops); + rt = dst_alloc(&dn_dst_ops, 0); if (rt == NULL) goto e_nobufs; @@ -1383,7 +1383,7 @@ static int dn_route_input_slow(struct sk_buff *skb) } make_route: - rt = dst_alloc(&dn_dst_ops); + rt = dst_alloc(&dn_dst_ops, 0); if (rt == NULL) goto e_nobufs; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 748cb5b337b..90389281d97 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -51,6 +51,7 @@ #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/slab.h> +#include <linux/hash.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif @@ -92,6 +93,71 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; +/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE + * value. So if you change this define, make appropriate changes to + * inet_addr_hash as well. + */ +#define IN4_ADDR_HSIZE 256 +static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; +static DEFINE_SPINLOCK(inet_addr_hash_lock); + +static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) +{ + u32 val = (__force u32) addr ^ hash_ptr(net, 8); + + return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & + (IN4_ADDR_HSIZE - 1)); +} + +static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) +{ + unsigned int hash = inet_addr_hash(net, ifa->ifa_address); + + spin_lock(&inet_addr_hash_lock); + hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); + spin_unlock(&inet_addr_hash_lock); +} + +static void inet_hash_remove(struct in_ifaddr *ifa) +{ + spin_lock(&inet_addr_hash_lock); + hlist_del_init_rcu(&ifa->hash); + spin_unlock(&inet_addr_hash_lock); +} + +/** + * __ip_dev_find - find the first device with a given source address. + * @net: the net namespace + * @addr: the source address + * @devref: if true, take a reference on the found device + * + * If a caller uses devref=false, it should be protected by RCU, or RTNL + */ +struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) +{ + unsigned int hash = inet_addr_hash(net, addr); + struct net_device *result = NULL; + struct in_ifaddr *ifa; + struct hlist_node *node; + + rcu_read_lock(); + hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { + struct net_device *dev = ifa->ifa_dev->dev; + + if (!net_eq(dev_net(dev), net)) + continue; + if (ifa->ifa_address == addr) { + result = dev; + break; + } + } + if (result && devref) + dev_hold(result); + rcu_read_unlock(); + return result; +} +EXPORT_SYMBOL(__ip_dev_find); + static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); @@ -265,6 +331,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } if (!do_promote) { + inet_hash_remove(ifa); *ifap1 = ifa->ifa_next; rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); @@ -281,6 +348,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, /* 2. Unlink it */ *ifap = ifa1->ifa_next; + inet_hash_remove(ifa1); /* 3. Announce address deletion */ @@ -368,6 +436,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, ifa->ifa_next = *ifap; *ifap = ifa; + inet_hash_insert(dev_net(in_dev->dev), ifa); + /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ @@ -521,6 +591,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) if (tb[IFA_ADDRESS] == NULL) tb[IFA_ADDRESS] = tb[IFA_LOCAL]; + INIT_HLIST_NODE(&ifa->hash); ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); ifa->ifa_flags = ifm->ifa_flags; @@ -728,6 +799,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) { ret = -ENOBUFS; ifa = inet_alloc_ifa(); + INIT_HLIST_NODE(&ifa->hash); if (!ifa) break; if (colon) @@ -1030,6 +1102,21 @@ static inline bool inetdev_valid_mtu(unsigned mtu) return mtu >= 68; } +static void inetdev_send_gratuitous_arp(struct net_device *dev, + struct in_device *in_dev) + +{ + struct in_ifaddr *ifa = in_dev->ifa_list; + + if (!ifa) + return; + + arp_send(ARPOP_REQUEST, ETH_P_ARP, + ifa->ifa_address, dev, + ifa->ifa_address, NULL, + dev->dev_addr, NULL); +} + /* Called only under RTNL semaphore */ static int inetdev_event(struct notifier_block *this, unsigned long event, @@ -1069,6 +1156,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, struct in_ifaddr *ifa = inet_alloc_ifa(); if (ifa) { + INIT_HLIST_NODE(&ifa->hash); ifa->ifa_local = ifa->ifa_address = htonl(INADDR_LOOPBACK); ifa->ifa_prefixlen = 8; @@ -1082,18 +1170,13 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, } ip_mc_up(in_dev); /* fall through */ - case NETDEV_NOTIFY_PEERS: case NETDEV_CHANGEADDR: + if (!IN_DEV_ARP_NOTIFY(in_dev)) + break; + /* fall through */ + case NETDEV_NOTIFY_PEERS: /* Send gratuitous ARP to notify of link change */ - if (IN_DEV_ARP_NOTIFY(in_dev)) { - struct in_ifaddr *ifa = in_dev->ifa_list; - - if (ifa) - arp_send(ARPOP_REQUEST, ETH_P_ARP, - ifa->ifa_address, dev, - ifa->ifa_address, NULL, - dev->dev_addr, NULL); - } + inetdev_send_gratuitous_arp(dev, in_dev); break; case NETDEV_DOWN: ip_mc_down(in_dev); @@ -1710,6 +1793,11 @@ static struct rtnl_af_ops inet_af_ops = { void __init devinet_init(void) { + int i; + + for (i = 0; i < IN4_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet_addr_lst[i]); + register_pernet_subsys(&devinet_ops); register_gifconf(PF_INET, inet_gifconf); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2a49c061b34..ad0778a3fa5 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -132,46 +132,6 @@ static void fib_flush(struct net *net) rt_cache_flush(net, -1); } -/** - * __ip_dev_find - find the first device with a given source address. - * @net: the net namespace - * @addr: the source address - * @devref: if true, take a reference on the found device - * - * If a caller uses devref=false, it should be protected by RCU, or RTNL - */ -struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) -{ - struct flowi fl = { - .fl4_dst = addr, - }; - struct fib_result res = { 0 }; - struct net_device *dev = NULL; - struct fib_table *local_table; - -#ifdef CONFIG_IP_MULTIPLE_TABLES - res.r = NULL; -#endif - - rcu_read_lock(); - local_table = fib_get_table(net, RT_TABLE_LOCAL); - if (!local_table || - fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { - rcu_read_unlock(); - return NULL; - } - if (res.type != RTN_LOCAL) - goto out; - dev = FIB_RES_DEV(res); - - if (dev && devref) - dev_hold(dev); -out: - rcu_read_unlock(); - return dev; -} -EXPORT_SYMBOL(__ip_dev_find); - /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 9cefe72029c..3018efbaea7 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,7 +47,7 @@ struct fib4_rule { }; #ifdef CONFIG_IP_ROUTE_CLASSID -u32 fib_rules_tclass(struct fib_result *res) +u32 fib_rules_tclass(const struct fib_result *res) { return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 146bd82ef60..562f34cd930 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1189,7 +1189,7 @@ void fib_select_default(struct fib_result *res) fib_result_assign(res, last_resort); tb->tb_default = last_idx; out: - rcu_read_unlock(); + return; } #ifdef CONFIG_IP_ROUTE_MULTIPATH diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1eae90b054e..edf3b0997e0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -217,12 +217,12 @@ static inline int tnode_child_length(const struct tnode *tn) return 1 << tn->bits; } -static inline t_key mask_pfx(t_key k, unsigned short l) +static inline t_key mask_pfx(t_key k, unsigned int l) { return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); } -static inline t_key tkey_extract_bits(t_key a, int offset, int bits) +static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) { if (offset < KEYLENGTH) return ((t_key)(a << offset)) >> (KEYLENGTH - bits); @@ -1378,11 +1378,11 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, int ret; struct rt_trie_node *n; struct tnode *pn; - int pos, bits; + unsigned int pos, bits; t_key key = ntohl(flp->fl4_dst); - int chopped_off; + unsigned int chopped_off; t_key cindex = 0; - int current_prefix_length = KEYLENGTH; + unsigned int current_prefix_length = KEYLENGTH; struct tnode *cn; t_key pref_mismatch; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c5af909cf70..3c8dfa16614 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -505,7 +505,9 @@ restart: } rcu_read_unlock(); + local_bh_disable(); inet_twsk_deschedule(tw, twdr); + local_bh_enable(); inet_twsk_put(tw); goto restart_rcu; } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 709fbb4132d..48f8d4592cc 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -167,9 +167,9 @@ static int addr_compare(const struct inetpeer_addr *a, int i, n = (a->family == AF_INET ? 1 : 4); for (i = 0; i < n; i++) { - if (a->a6[i] == b->a6[i]) + if (a->addr.a6[i] == b->addr.a6[i]) continue; - if (a->a6[i] < b->a6[i]) + if (a->addr.a6[i] < b->addr.a6[i]) return -1; return 1; } @@ -510,11 +510,13 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); + atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); p->tcp_ts_stamp = 0; p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->rate_last = 0; + p->pmtu_expires = 0; + memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); INIT_LIST_HEAD(&p->unused); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index eb68a0e34e4..6613edfac28 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -775,6 +775,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev .fl4_dst = dst, .fl4_src = tiph->saddr, .fl4_tos = RT_TOS(tos), + .proto = IPPROTO_GRE, .fl_gre_key = tunnel->parms.o_key }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0455af85175..52b077d4520 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; -static struct delayed_work expires_work; -static unsigned long expires_ljiffies; - /* * Interface to generic destination cache. */ @@ -668,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth) static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - rth->dst.expires; + (rth->peer && rth->peer->pmtu_expires); } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -679,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t if (atomic_read(&rth->dst.__refcnt)) goto out; - ret = 1; - if (rth->dst.expires && - time_after_eq(jiffies, rth->dst.expires)) - goto out; - age = jiffies - rth->dst.lastuse; - ret = 0; if ((age <= tmo1 && !rt_fast_clean(rth)) || (age <= tmo2 && rt_valuable(rth))) goto out; @@ -829,97 +820,6 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) return ONE; } -static void rt_check_expire(void) -{ - static unsigned int rover; - unsigned int i = rover, goal; - struct rtable *rth; - struct rtable __rcu **rthp; - unsigned long samples = 0; - unsigned long sum = 0, sum2 = 0; - unsigned long delta; - u64 mult; - - delta = jiffies - expires_ljiffies; - expires_ljiffies = jiffies; - mult = ((u64)delta) << rt_hash_log; - if (ip_rt_gc_timeout > 1) - do_div(mult, ip_rt_gc_timeout); - goal = (unsigned int)mult; - if (goal > rt_hash_mask) - goal = rt_hash_mask + 1; - for (; goal > 0; goal--) { - unsigned long tmo = ip_rt_gc_timeout; - unsigned long length; - - i = (i + 1) & rt_hash_mask; - rthp = &rt_hash_table[i].chain; - - if (need_resched()) - cond_resched(); - - samples++; - - if (rcu_dereference_raw(*rthp) == NULL) - continue; - length = 0; - spin_lock_bh(rt_hash_lock_addr(i)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { - prefetch(rth->dst.rt_next); - if (rt_is_expired(rth)) { - *rthp = rth->dst.rt_next; - rt_free(rth); - continue; - } - if (rth->dst.expires) { - /* Entry is expired even if it is in use */ - if (time_before_eq(jiffies, rth->dst.expires)) { -nofree: - tmo >>= 1; - rthp = &rth->dst.rt_next; - /* - * We only count entries on - * a chain with equal hash inputs once - * so that entries for different QOS - * levels, and other non-hash input - * attributes don't unfairly skew - * the length computation - */ - length += has_noalias(rt_hash_table[i].chain, rth); - continue; - } - } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) - goto nofree; - - /* Cleanup aged off entries. */ - *rthp = rth->dst.rt_next; - rt_free(rth); - } - spin_unlock_bh(rt_hash_lock_addr(i)); - sum += length; - sum2 += length*length; - } - if (samples) { - unsigned long avg = sum / samples; - unsigned long sd = int_sqrt(sum2 / samples - avg*avg); - rt_chain_length_max = max_t(unsigned long, - ip_rt_gc_elasticity, - (avg + 4*sd) >> FRACT_BITS); - } - rover = i; -} - -/* - * rt_worker_func() is run in process context. - * we call rt_check_expire() to scan part of the hash table - */ -static void rt_worker_func(struct work_struct *work) -{ - rt_check_expire(); - schedule_delayed_work(&expires_work, ip_rt_gc_interval); -} - /* * Pertubation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -1308,6 +1208,13 @@ skip_hashing: return 0; } +static atomic_t __rt_peer_genid = ATOMIC_INIT(0); + +static u32 rt_peer_genid(void) +{ + return atomic_read(&__rt_peer_genid); +} + void rt_bind_peer(struct rtable *rt, int create) { struct inet_peer *peer; @@ -1316,6 +1223,8 @@ void rt_bind_peer(struct rtable *rt, int create) if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); + else + rt->rt_peer_genid = rt_peer_genid(); } /* @@ -1385,13 +1294,8 @@ static void rt_del(unsigned hash, struct rtable *rt) void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { - int i, k; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct rtable *rth; - struct rtable __rcu **rthp; - __be32 skeys[2] = { saddr, 0 }; - int ikeys[2] = { dev->ifindex, 0 }; - struct netevent_redirect netevent; + struct inet_peer *peer; struct net *net; if (!in_dev) @@ -1403,9 +1307,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, ipv4_is_zeronet(new_gw)) goto reject_redirect; - if (!rt_caching(net)) - goto reject_redirect; - if (!IN_DEV_SHARED_MEDIA(in_dev)) { if (!inet_addr_onlink(in_dev, new_gw, old_gw)) goto reject_redirect; @@ -1416,93 +1317,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, goto reject_redirect; } - for (i = 0; i < 2; i++) { - for (k = 0; k < 2; k++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], - rt_genid(net)); - - rthp = &rt_hash_table[hash].chain; - - while ((rth = rcu_dereference(*rthp)) != NULL) { - struct rtable *rt; - - if (rth->fl.fl4_dst != daddr || - rth->fl.fl4_src != skeys[i] || - rth->fl.oif != ikeys[k] || - rt_is_input_route(rth) || - rt_is_expired(rth) || - !net_eq(dev_net(rth->dst.dev), net)) { - rthp = &rth->dst.rt_next; - continue; - } - - if (rth->rt_dst != daddr || - rth->rt_src != saddr || - rth->dst.error || - rth->rt_gateway != old_gw || - rth->dst.dev != dev) - break; - - dst_hold(&rth->dst); - - rt = dst_alloc(&ipv4_dst_ops); - if (rt == NULL) { - ip_rt_put(rth); - return; - } - - /* Copy all the information. */ - *rt = *rth; - rt->dst.__use = 1; - atomic_set(&rt->dst.__refcnt, 1); - rt->dst.child = NULL; - if (rt->dst.dev) - dev_hold(rt->dst.dev); - rt->dst.obsolete = -1; - rt->dst.lastuse = jiffies; - rt->dst.path = &rt->dst; - rt->dst.neighbour = NULL; - rt->dst.hh = NULL; -#ifdef CONFIG_XFRM - rt->dst.xfrm = NULL; -#endif - rt->rt_genid = rt_genid(net); - rt->rt_flags |= RTCF_REDIRECTED; - - /* Gateway is different ... */ - rt->rt_gateway = new_gw; - - /* Redirect received -> path was valid */ - dst_confirm(&rth->dst); - - if (rt->peer) - atomic_inc(&rt->peer->refcnt); - if (rt->fi) - atomic_inc(&rt->fi->fib_clntref); - - if (arp_bind_neighbour(&rt->dst) || - !(rt->dst.neighbour->nud_state & - NUD_VALID)) { - if (rt->dst.neighbour) - neigh_event_send(rt->dst.neighbour, NULL); - ip_rt_put(rth); - rt_drop(rt); - goto do_next; - } + peer = inet_getpeer_v4(daddr, 1); + if (peer) { + peer->redirect_learned.a4 = new_gw; - netevent.old = &rth->dst; - netevent.new = &rt->dst; - call_netevent_notifiers(NETEVENT_REDIRECT, - &netevent); + inet_putpeer(peer); - rt_del(hash, rth); - if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) - ip_rt_put(rt); - goto do_next; - } - do_next: - ; - } + atomic_inc(&__rt_peer_genid); } return; @@ -1526,9 +1347,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) if (dst->obsolete > 0) { ip_rt_put(rt); ret = NULL; - } else if ((rt->rt_flags & RTCF_REDIRECTED) || - (rt->dst.expires && - time_after_eq(jiffies, rt->dst.expires))) { + } else if (rt->rt_flags & RTCF_REDIRECTED) { unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, rt->fl.oif, rt_genid(dev_net(dst->dev))); @@ -1538,6 +1357,14 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) #endif rt_del(hash, rt); ret = NULL; + } else if (rt->peer && + rt->peer->pmtu_expires && + time_after_eq(jiffies, rt->peer->pmtu_expires)) { + unsigned long orig = rt->peer->pmtu_expires; + + if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) + dst_metric_set(dst, RTAX_MTU, + rt->peer->pmtu_orig); } } return ret; @@ -1688,87 +1515,130 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev) { - int i, k; unsigned short old_mtu = ntohs(iph->tot_len); - struct rtable *rth; - int ikeys[2] = { dev->ifindex, 0 }; - __be32 skeys[2] = { iph->saddr, 0, }; - __be32 daddr = iph->daddr; unsigned short est_mtu = 0; + struct inet_peer *peer; - for (k = 0; k < 2; k++) { - for (i = 0; i < 2; i++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], - rt_genid(net)); - - rcu_read_lock(); - for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->dst.rt_next)) { - unsigned short mtu = new_mtu; - - if (rth->fl.fl4_dst != daddr || - rth->fl.fl4_src != skeys[i] || - rth->rt_dst != daddr || - rth->rt_src != iph->saddr || - rth->fl.oif != ikeys[k] || - rt_is_input_route(rth) || - dst_metric_locked(&rth->dst, RTAX_MTU) || - !net_eq(dev_net(rth->dst.dev), net) || - rt_is_expired(rth)) - continue; - - if (new_mtu < 68 || new_mtu >= old_mtu) { + peer = inet_getpeer_v4(iph->daddr, 1); + if (peer) { + unsigned short mtu = new_mtu; - /* BSD 4.2 compatibility hack :-( */ - if (mtu == 0 && - old_mtu >= dst_mtu(&rth->dst) && - old_mtu >= 68 + (iph->ihl << 2)) - old_mtu -= iph->ihl << 2; + if (new_mtu < 68 || new_mtu >= old_mtu) { + /* BSD 4.2 derived systems incorrectly adjust + * tot_len by the IP header length, and report + * a zero MTU in the ICMP message. + */ + if (mtu == 0 && + old_mtu >= 68 + (iph->ihl << 2)) + old_mtu -= iph->ihl << 2; + mtu = guess_mtu(old_mtu); + } - mtu = guess_mtu(old_mtu); - } - if (mtu <= dst_mtu(&rth->dst)) { - if (mtu < dst_mtu(&rth->dst)) { - dst_confirm(&rth->dst); - if (mtu < ip_rt_min_pmtu) { - u32 lock = dst_metric(&rth->dst, - RTAX_LOCK); - mtu = ip_rt_min_pmtu; - lock |= (1 << RTAX_MTU); - dst_metric_set(&rth->dst, RTAX_LOCK, - lock); - } - dst_metric_set(&rth->dst, RTAX_MTU, mtu); - dst_set_expires(&rth->dst, - ip_rt_mtu_expires); - } - est_mtu = mtu; - } - } - rcu_read_unlock(); + if (mtu < ip_rt_min_pmtu) + mtu = ip_rt_min_pmtu; + if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { + est_mtu = mtu; + peer->pmtu_learned = mtu; + peer->pmtu_expires = jiffies + ip_rt_mtu_expires; } + + inet_putpeer(peer); + + atomic_inc(&__rt_peer_genid); } return est_mtu ? : new_mtu; } +static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) +{ + unsigned long expires = peer->pmtu_expires; + + if (time_before(expires, jiffies)) { + u32 orig_dst_mtu = dst_mtu(dst); + if (peer->pmtu_learned < orig_dst_mtu) { + if (!peer->pmtu_orig) + peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); + dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); + } + } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) + dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); +} + static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { - if (dst_mtu(dst) > mtu && mtu >= 68 && - !(dst_metric_locked(dst, RTAX_MTU))) { - if (mtu < ip_rt_min_pmtu) { - u32 lock = dst_metric(dst, RTAX_LOCK); + struct rtable *rt = (struct rtable *) dst; + struct inet_peer *peer; + + dst_confirm(dst); + + if (!rt->peer) + rt_bind_peer(rt, 1); + peer = rt->peer; + if (peer) { + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); + if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { + peer->pmtu_learned = mtu; + peer->pmtu_expires = jiffies + ip_rt_mtu_expires; + + atomic_inc(&__rt_peer_genid); + rt->rt_peer_genid = rt_peer_genid(); + + check_peer_pmtu(dst, peer); } - dst_metric_set(dst, RTAX_MTU, mtu); - dst_set_expires(dst, ip_rt_mtu_expires); + inet_putpeer(peer); } } +static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) +{ + struct rtable *rt = (struct rtable *) dst; + __be32 orig_gw = rt->rt_gateway; + + dst_confirm(&rt->dst); + + neigh_release(rt->dst.neighbour); + rt->dst.neighbour = NULL; + + rt->rt_gateway = peer->redirect_learned.a4; + if (arp_bind_neighbour(&rt->dst) || + !(rt->dst.neighbour->nud_state & NUD_VALID)) { + if (rt->dst.neighbour) + neigh_event_send(rt->dst.neighbour, NULL); + rt->rt_gateway = orig_gw; + return -EAGAIN; + } else { + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, + rt->dst.neighbour); + } + return 0; +} + static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { - if (rt_is_expired((struct rtable *)dst)) + struct rtable *rt = (struct rtable *) dst; + + if (rt_is_expired(rt)) return NULL; + if (rt->rt_peer_genid != rt_peer_genid()) { + struct inet_peer *peer; + + if (!rt->peer) + rt_bind_peer(rt, 0); + + peer = rt->peer; + if (peer && peer->pmtu_expires) + check_peer_pmtu(dst, peer); + + if (peer && peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + if (check_peer_redir(dst, peer)) + return NULL; + } + + rt->rt_peer_genid = rt_peer_genid(); + } return dst; } @@ -1795,8 +1665,14 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt) - dst_set_expires(&rt->dst, 0); + if (rt && + rt->peer && + rt->peer->pmtu_expires) { + unsigned long orig = rt->peer->pmtu_expires; + + if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) + dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); + } } static int ip_rt_bug(struct sk_buff *skb) @@ -1894,6 +1770,14 @@ static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) memcpy(peer->metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX); dst_init_metrics(&rt->dst, peer->metrics, false); + + if (peer->pmtu_expires) + check_peer_pmtu(&rt->dst, peer); + if (peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + rt->rt_gateway = peer->redirect_learned.a4; + rt->rt_flags |= RTCF_REDIRECTED; + } } else { if (fi->fib_metrics != (u32 *) dst_default_metrics) { rt->fi = fi; @@ -1903,10 +1787,10 @@ static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) } } -static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) +static void rt_set_nexthop(struct rtable *rt, const struct fib_result *res, + struct fib_info *fi, u16 type, u32 itag) { struct dst_entry *dst = &rt->dst; - struct fib_info *fi = res->fi; if (fi) { if (FIB_RES_GW(*res) && @@ -1929,7 +1813,20 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) #endif set_class_tag(rt, itag); #endif - rt->rt_type = res->type; + rt->rt_type = type; +} + +static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) +{ + struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); + if (rt) { + rt->dst.obsolete = -1; + + rt->dst.flags = DST_HOST | + (nopolicy ? DST_NOPOLICY : 0) | + (noxfrm ? DST_NOXFRM : 0); + } + return rt; } /* called in rcu_read_lock() section */ @@ -1962,17 +1859,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (err < 0) goto e_err; } - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; rth->dst.output = ip_rt_bug; - rth->dst.obsolete = -1; - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2047,7 +1939,7 @@ static void ip_handle_martian_source(struct net_device *dev, /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, - struct fib_result *res, + const struct fib_result *res, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) @@ -2101,19 +1993,13 @@ static int __mkroute_input(struct sk_buff *skb, } } - - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), + IN_DEV_CONF_GET(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; goto cleanup; } - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; - if (IN_DEV_CONF_GET(out_dev, NOXFRM)) - rth->dst.flags |= DST_NOXFRM; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2128,12 +2014,11 @@ static int __mkroute_input(struct sk_buff *skb, rth->fl.oif = 0; rth->rt_spec_dst= spec_dst; - rth->dst.obsolete = -1; rth->dst.input = ip_forward; rth->dst.output = ip_output; rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); - rt_set_nexthop(rth, res, itag); + rt_set_nexthop(rth, res, res->fi, res->type, itag); rth->rt_flags = flags; @@ -2278,18 +2163,13 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; rth->dst.output= ip_rt_bug; - rth->dst.obsolete = -1; rth->rt_genid = rt_genid(net); - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2439,38 +2319,39 @@ skip_cache: EXPORT_SYMBOL(ip_route_input_common); /* called with rcu_read_lock() */ -static int __mkroute_output(struct rtable **result, - struct fib_result *res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) +static struct rtable *__mkroute_output(const struct fib_result *res, + const struct flowi *fl, + const struct flowi *oldflp, + struct net_device *dev_out, + unsigned int flags) { - struct rtable *rth; - struct in_device *in_dev; + struct fib_info *fi = res->fi; u32 tos = RT_FL_TOS(oldflp); + struct in_device *in_dev; + u16 type = res->type; + struct rtable *rth; if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) - return -EINVAL; + return ERR_PTR(-EINVAL); if (ipv4_is_lbcast(fl->fl4_dst)) - res->type = RTN_BROADCAST; + type = RTN_BROADCAST; else if (ipv4_is_multicast(fl->fl4_dst)) - res->type = RTN_MULTICAST; + type = RTN_MULTICAST; else if (ipv4_is_zeronet(fl->fl4_dst)) - return -EINVAL; + return ERR_PTR(-EINVAL); if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; in_dev = __in_dev_get_rcu(dev_out); if (!in_dev) - return -EINVAL; + return ERR_PTR(-EINVAL); - if (res->type == RTN_BROADCAST) { + if (type == RTN_BROADCAST) { flags |= RTCF_BROADCAST | RTCF_LOCAL; - res->fi = NULL; - } else if (res->type == RTN_MULTICAST) { + fi = NULL; + } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, oldflp->proto)) @@ -2479,21 +2360,14 @@ static int __mkroute_output(struct rtable **result, * default one, but do not gateway in this case. * Yes, it is hack. */ - if (res->fi && res->prefixlen < 4) - res->fi = NULL; + if (fi && res->prefixlen < 4) + fi = NULL; } - - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), + IN_DEV_CONF_GET(in_dev, NOXFRM)); if (!rth) - return -ENOBUFS; - - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOXFRM)) - rth->dst.flags |= DST_NOXFRM; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; + return ERR_PTR(-ENOBUFS); rth->fl.fl4_dst = oldflp->fl4_dst; rth->fl.fl4_tos = tos; @@ -2511,7 +2385,6 @@ static int __mkroute_output(struct rtable **result, rth->rt_spec_dst= fl->fl4_src; rth->dst.output=ip_output; - rth->dst.obsolete = -1; rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); @@ -2528,7 +2401,7 @@ static int __mkroute_output(struct rtable **result, RT_CACHE_STAT_INC(out_slow_mc); } #ifdef CONFIG_IP_MROUTE - if (res->type == RTN_MULTICAST) { + if (type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && !ipv4_is_local_multicast(oldflp->fl4_dst)) { rth->dst.input = ip_mr_input; @@ -2538,31 +2411,10 @@ static int __mkroute_output(struct rtable **result, #endif } - rt_set_nexthop(rth, res, 0); + rt_set_nexthop(rth, res, fi, type, 0); rth->rt_flags = flags; - *result = rth; - return 0; -} - -/* called with rcu_read_lock() */ -static int ip_mkroute_output(struct rtable **rp, - struct fib_result *res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) -{ - struct rtable *rth = NULL; - int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); - unsigned hash; - if (err == 0) { - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, - rt_genid(dev_net(dev_out))); - err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); - } - - return err; + return rth; } /* @@ -2585,6 +2437,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, struct fib_result res; unsigned int flags = 0; struct net_device *dev_out = NULL; + struct rtable *rth; int err; @@ -2593,6 +2446,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, res.r = NULL; #endif + rcu_read_lock(); if (oldflp->fl4_src) { err = -EINVAL; if (ipv4_is_multicast(oldflp->fl4_src) || @@ -2743,17 +2597,27 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, make_route: - err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); + rth = __mkroute_output(&res, &fl, oldflp, dev_out, flags); + if (IS_ERR(rth)) + err = PTR_ERR(rth); + else { + unsigned int hash; -out: return err; + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, + rt_genid(dev_net(dev_out))); + err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); + } + +out: + rcu_read_unlock(); + return err; } int __ip_route_output_key(struct net *net, struct rtable **rp, const struct flowi *flp) { - unsigned int hash; - int res; struct rtable *rth; + unsigned int hash; if (!rt_caching(net)) goto slow_output; @@ -2783,10 +2647,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rcu_read_unlock_bh(); slow_output: - rcu_read_lock(); - res = ip_route_output_slow(net, rp, flp); - rcu_read_unlock(); - return res; + return ip_route_output_slow(net, rp, flp); } EXPORT_SYMBOL_GPL(__ip_route_output_key); @@ -2810,6 +2671,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .destroy = ipv4_dst_destroy, .check = ipv4_blackhole_dst_check, .default_mtu = ipv4_blackhole_default_mtu, + .default_advmss = ipv4_default_advmss, .update_pmtu = ipv4_rt_blackhole_update_pmtu, }; @@ -2818,12 +2680,11 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi { struct rtable *ort = *rp; struct rtable *rt = (struct rtable *) - dst_alloc(&ipv4_dst_blackhole_ops); + dst_alloc(&ipv4_dst_blackhole_ops, 1); if (rt) { struct dst_entry *new = &rt->dst; - atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; @@ -2944,7 +2805,8 @@ static int rt_fill_info(struct net *net, NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); error = rt->dst.error; - expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; + expires = (rt->peer && rt->peer->pmtu_expires) ? + rt->peer->pmtu_expires - jiffies : 0; if (rt->peer) { inet_peer_refcheck(rt->peer); id = atomic_read(&rt->peer->ip_id_count) & 0xffff; @@ -3401,14 +3263,6 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); - /* All the timers, started at system startup tend - to synchronize. Perturb it a bit. - */ - INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); - expires_ljiffies = jiffies; - schedule_delayed_work(&expires_work, - net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f9867d2dbef..a17a5a72b98 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -873,9 +873,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, flags); lock_sock(sk); - TCP_CHECK_TIMER(sk); res = do_tcp_sendpages(sk, &page, offset, size, flags); - TCP_CHECK_TIMER(sk); release_sock(sk); return res; } @@ -916,7 +914,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, long timeo; lock_sock(sk); - TCP_CHECK_TIMER(sk); flags = msg->msg_flags; timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); @@ -1104,7 +1101,6 @@ wait_for_memory: out: if (copied) tcp_push(sk, flags, mss_now, tp->nonagle); - TCP_CHECK_TIMER(sk); release_sock(sk); return copied; @@ -1123,7 +1119,6 @@ do_error: goto out; out_err: err = sk_stream_error(sk, flags, err); - TCP_CHECK_TIMER(sk); release_sock(sk); return err; } @@ -1415,8 +1410,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, lock_sock(sk); - TCP_CHECK_TIMER(sk); - err = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; @@ -1767,12 +1760,10 @@ skip_copy: /* Clean up data we have read: This will do ACK frames. */ tcp_cleanup_rbuf(sk, copied); - TCP_CHECK_TIMER(sk); release_sock(sk); return copied; out: - TCP_CHECK_TIMER(sk); release_sock(sk); return err; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 02f583b3744..ef5a90beb9b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1341,7 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_death_row.sysctl_tw_recycle && (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && - peer->daddr.a4 == saddr) { + peer->daddr.addr.a4 == saddr) { inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > @@ -1556,12 +1556,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ sock_rps_save_rxhash(sk, skb->rxhash); - TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; } - TCP_CHECK_TIMER(sk); return 0; } @@ -1583,13 +1581,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } else sock_rps_save_rxhash(sk, skb->rxhash); - - TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; } - TCP_CHECK_TIMER(sk); return 0; reset: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 74a6aa00365..ecd44b0c45f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -259,7 +259,6 @@ static void tcp_delack_timer(unsigned long data) tcp_send_ack(sk); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); } - TCP_CHECK_TIMER(sk); out: if (tcp_memory_pressure) @@ -481,7 +480,6 @@ static void tcp_write_timer(unsigned long data) tcp_probe_timer(sk); break; } - TCP_CHECK_TIMER(sk); out: sk_mem_reclaim(sk); @@ -589,7 +587,6 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_when(tp) - elapsed; } - TCP_CHECK_TIMER(sk); sk_mem_reclaim(sk); resched: diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 05027b75372..e6af8d72f26 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -410,7 +410,7 @@ fallback: if (p != NULL) { sb_add(m, "%02x", *p++); for (i = 1; i < len; i++) - sb_add(m, ":%02x", p[i]); + sb_add(m, ":%02x", *p++); } sb_add(m, " "); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 12ec83d4880..f786aed02a6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -159,6 +159,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .destroy = ip6_dst_destroy, .check = ip6_dst_check, .default_mtu = ip6_blackhole_default_mtu, + .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, }; @@ -221,7 +222,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) { - return (struct rt6_info *)dst_alloc(ops); + return (struct rt6_info *)dst_alloc(ops, 0); } static void ip6_dst_destroy(struct dst_entry *dst) @@ -240,6 +241,13 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } +static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); + +static u32 rt6_peer_genid(void) +{ + return atomic_read(&__rt6_peer_genid); +} + void rt6_bind_peer(struct rt6_info *rt, int create) { struct inet_peer *peer; @@ -247,6 +255,8 @@ void rt6_bind_peer(struct rt6_info *rt, int create) peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) inet_putpeer(peer); + else + rt->rt6i_peer_genid = rt6_peer_genid(); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -864,13 +874,12 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl { struct rt6_info *ort = (struct rt6_info *) *dstp; struct rt6_info *rt = (struct rt6_info *) - dst_alloc(&ip6_dst_blackhole_ops); + dst_alloc(&ip6_dst_blackhole_ops, 1); struct dst_entry *new = NULL; if (rt) { new = &rt->dst; - atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; @@ -912,9 +921,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { + if (rt->rt6i_peer_genid != rt6_peer_genid()) { + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 0); + rt->rt6i_peer_genid = rt6_peer_genid(); + } return dst; - + } return NULL; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 20aa95e3735..1d0ab557090 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1323,7 +1323,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_death_row.sysctl_tw_recycle && (dst = inet6_csk_route_req(sk, req)) != NULL && (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && - ipv6_addr_equal((struct in6_addr *)peer->daddr.a6, + ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6, &treq->rmt_addr)) { inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && @@ -1636,10 +1636,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; - TCP_CHECK_TIMER(sk); if (opt_skb) goto ipv6_pktoptions; return 0; @@ -1667,10 +1665,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } - TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; - TCP_CHECK_TIMER(sk); if (opt_skb) goto ipv6_pktoptions; return 0; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index cf68700abff..d036597aabb 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1210,7 +1210,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: changed |= BSS_CHANGED_ASSOC; + mutex_lock(&sdata->u.mgd.mtx); ieee80211_bss_info_change_notify(sdata, changed); + mutex_unlock(&sdata->u.mgd.mtx); break; case NL80211_IFTYPE_ADHOC: changed |= BSS_CHANGED_IBSS; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 1e00bf7d27c..899b71c0ff5 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -133,6 +133,7 @@ unsigned int nf_iterate(struct list_head *head, /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ +repeat: verdict = elem->hook(hook, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG @@ -145,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head, #endif if (verdict != NF_REPEAT) return verdict; - *i = (*i)->prev; + goto repeat; } } return NF_ACCEPT; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index fb2d04ac5d4..b027ccc49f4 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1101,7 +1101,7 @@ static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); - spin_lock_init(&ipvs->tcp_app_lock); + spin_lock_init(&ipvs->sctp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 2a2a8363ca1..d1b7298e589 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -392,7 +392,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode) { struct netns_ipvs *ipvs = net_ipvs(net); - if (!ipvs->sync_state & IP_VS_STATE_MASTER) + if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) return; if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) return; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1909311c392..2f454efa1a8 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -959,8 +959,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); out: - if (tmpl) - nf_ct_put(tmpl); + if (tmpl) { + /* Special case: we have to repeat this hook, assign the + * template again to this packet. We assume that this packet + * has no conntrack assigned. This is used by nf_ct_tcp. */ + if (ret == NF_REPEAT) + skb->nfct = (struct nf_conntrack *)tmpl; + else + nf_ct_put(tmpl); + } return ret; } diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c index 4d87befb04c..474d621cbc2 100644 --- a/net/netfilter/nf_tproxy_core.c +++ b/net/netfilter/nf_tproxy_core.c @@ -28,26 +28,23 @@ nf_tproxy_destructor(struct sk_buff *skb) skb->destructor = NULL; if (sk) - nf_tproxy_put_sock(sk); + sock_put(sk); } /* consumes sk */ -int +void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { - bool transparent = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_transparent : - inet_sk(sk)->transparent; - - if (transparent) { - skb_orphan(skb); - skb->sk = sk; - skb->destructor = nf_tproxy_destructor; - return 1; - } else - nf_tproxy_put_sock(sk); - - return 0; + /* assigning tw sockets complicates things; most + * skb->sk->X checks would have to test sk->sk_state first */ + if (sk->sk_state == TCP_TIME_WAIT) { + inet_twsk_put(inet_twsk(sk)); + return; + } + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = nf_tproxy_destructor; } EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 640678f47a2..dcfd57eb9d0 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -33,6 +33,20 @@ #include <net/netfilter/nf_tproxy_core.h> #include <linux/netfilter/xt_TPROXY.h> +static bool tproxy_sk_is_transparent(struct sock *sk) +{ + if (sk->sk_state != TCP_TIME_WAIT) { + if (inet_sk(sk)->transparent) + return true; + sock_put(sk); + } else { + if (inet_twsk(sk)->tw_transparent) + return true; + inet_twsk_put(inet_twsk(sk)); + } + return false; +} + static inline __be32 tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) { @@ -141,7 +155,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, skb->dev, NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ - if (sk && nf_tproxy_assign_sock(skb, sk)) { + if (sk && tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~mark_mask) ^ mark_value; @@ -149,6 +163,8 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n", iph->protocol, &iph->daddr, ntohs(hp->dest), &laddr, ntohs(lport), skb->mark); + + nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } @@ -306,7 +322,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) par->in, NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ - if (sk && nf_tproxy_assign_sock(skb, sk)) { + if (sk && tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; @@ -314,6 +330,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n", tproto, &iph->saddr, ntohs(hp->source), laddr, ntohs(lport), skb->mark); + + nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 00d6ae83830..9cc46356b57 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,6 +35,15 @@ #include <net/netfilter/nf_conntrack.h> #endif +static void +xt_socket_put_sk(struct sock *sk) +{ + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); +} + static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, @@ -164,7 +173,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - nf_tproxy_put_sock(sk); + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; @@ -298,7 +307,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - nf_tproxy_put_sock(sk); + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c60649ec119..5efef5b5879 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -465,7 +465,7 @@ retry: */ err = -EMSGSIZE; - if (len > dev->mtu + dev->hard_header_len) + if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN) goto out_unlock; if (!skb) { @@ -496,6 +496,19 @@ retry: goto retry; } + if (len > (dev->mtu + dev->hard_header_len)) { + /* Earlier code assumed this would be a VLAN pkt, + * double-check this now that we have the actual + * packet in hand. + */ + struct ethhdr *ehdr; + skb_reset_mac_header(skb); + ehdr = eth_hdr(skb); + if (ehdr->h_proto != htons(ETH_P_8021Q)) { + err = -EMSGSIZE; + goto out_unlock; + } + } skb->protocol = proto; skb->dev = dev; @@ -1199,7 +1212,7 @@ static int packet_snd(struct socket *sock, } err = -EMSGSIZE; - if (!gso_type && (len > dev->mtu+reserve)) + if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN)) goto out_unlock; err = -ENOBUFS; @@ -1224,6 +1237,20 @@ static int packet_snd(struct socket *sock, if (err < 0) goto out_free; + if (!gso_type && (len > dev->mtu + reserve)) { + /* Earlier code assumed this would be a VLAN pkt, + * double-check this now that we have the actual + * packet in hand. + */ + struct ethhdr *ehdr; + skb_reset_mac_header(skb); + ehdr = eth_hdr(skb); + if (ehdr->h_proto != htons(ETH_P_8021Q)) { + err = -EMSGSIZE; + goto out_free; + } + } + skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index d952e7eac18..5ee0c62046a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -803,7 +803,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le rose_insert_socket(sk); /* Finish the bind */ } -rose_try_next_neigh: rose->dest_addr = addr->srose_addr; rose->dest_call = addr->srose_call; rose->rand = ((long)rose & 0xFFFF) + rose->lci; @@ -865,12 +864,6 @@ rose_try_next_neigh: } if (sk->sk_state != TCP_ESTABLISHED) { - /* Try next neighbour */ - rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic, 0); - if (rose->neighbour) - goto rose_try_next_neigh; - - /* No more neighbours */ sock->state = SS_UNCONNECTED; err = sock_error(sk); /* Always set at this point */ goto out_release; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index b4fdaac233f..88a77e90e7e 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -674,29 +674,34 @@ struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neig * Find a neighbour or a route given a ROSE address. */ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, - unsigned char *diagnostic, int new) + unsigned char *diagnostic, int route_frame) { struct rose_neigh *res = NULL; struct rose_node *node; int failed = 0; int i; - if (!new) spin_lock_bh(&rose_node_list_lock); + if (!route_frame) spin_lock_bh(&rose_node_list_lock); for (node = rose_node_list; node != NULL; node = node->next) { if (rosecmpm(addr, &node->address, node->mask) == 0) { for (i = 0; i < node->count; i++) { - if (new) { - if (node->neighbour[i]->restarted) { - res = node->neighbour[i]; - goto out; - } + if (node->neighbour[i]->restarted) { + res = node->neighbour[i]; + goto out; } - else { + } + } + } + if (!route_frame) { /* connect request */ + for (node = rose_node_list; node != NULL; node = node->next) { + if (rosecmpm(addr, &node->address, node->mask) == 0) { + for (i = 0; i < node->count; i++) { if (!rose_ftimer_running(node->neighbour[i])) { res = node->neighbour[i]; + failed = 0; goto out; - } else - failed = 1; + } + failed = 1; } } } @@ -711,8 +716,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, } out: - if (!new) spin_unlock_bh(&rose_node_list_lock); - + if (!route_frame) spin_unlock_bh(&rose_node_list_lock); return res; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 2cc46f0962c..b23428f3c0d 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2029,11 +2029,11 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, *errp = sctp_make_op_error_fixed(asoc, chunk); if (*errp) { - sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, - WORD_ROUND(ntohs(param.p->length))); - sctp_addto_chunk_fixed(*errp, - WORD_ROUND(ntohs(param.p->length)), - param.v); + if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, + WORD_ROUND(ntohs(param.p->length)))) + sctp_addto_chunk_fixed(*errp, + WORD_ROUND(ntohs(param.p->length)), + param.v); } else { /* If there is no memory for generating the ERROR * report as specified, an ABORT will be triggered diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 8e02550ff3e..b53b2ebbb19 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6102,15 +6102,16 @@ static void __sctp_write_space(struct sctp_association *asoc) wake_up_interruptible(&asoc->wait); if (sctp_writeable(sk)) { - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + wait_queue_head_t *wq = sk_sleep(sk); + + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); /* Note that we try to include the Async I/O support * here by modeling from the current TCP/UDP code. * We have not tested with it yet. */ - if (sock->wq->fasync_list && - !(sk->sk_shutdown & SEND_SHUTDOWN)) + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); } diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 747d5412c46..f1e40cebc98 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -344,7 +344,7 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, /* Refresh the gap ack information. */ if (sctp_tsnmap_has_gap(map)) { - __u16 start, end; + __u16 start = 0, end = 0; sctp_tsnmap_iter_init(map, &iter); while (sctp_tsnmap_next_gap_ack(map, &iter, &start, diff --git a/net/socket.c b/net/socket.c index ac2219f90d5..9fa1e3b4366 100644 --- a/net/socket.c +++ b/net/socket.c @@ -240,17 +240,19 @@ static struct kmem_cache *sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; + struct socket_wq *wq; ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; - ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); - if (!ei->socket.wq) { + wq = kmalloc(sizeof(*wq), GFP_KERNEL); + if (!wq) { kmem_cache_free(sock_inode_cachep, ei); return NULL; } - init_waitqueue_head(&ei->socket.wq->wait); - ei->socket.wq->fasync_list = NULL; + init_waitqueue_head(&wq->wait); + wq->fasync_list = NULL; + RCU_INIT_POINTER(ei->socket.wq, wq); ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; @@ -273,9 +275,11 @@ static void wq_free_rcu(struct rcu_head *head) static void sock_destroy_inode(struct inode *inode) { struct socket_alloc *ei; + struct socket_wq *wq; ei = container_of(inode, struct socket_alloc, vfs_inode); - call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + wq = rcu_dereference_protected(ei->socket.wq, 1); + call_rcu(&wq->rcu, wq_free_rcu); kmem_cache_free(sock_inode_cachep, ei); } @@ -524,7 +528,7 @@ void sock_release(struct socket *sock) module_put(owner); } - if (sock->wq->fasync_list) + if (rcu_dereference_protected(sock->wq, 1)->fasync_list) printk(KERN_ERR "sock_release: fasync list not empty!\n"); percpu_sub(sockets_in_use, 1); @@ -1108,15 +1112,16 @@ static int sock_fasync(int fd, struct file *filp, int on) { struct socket *sock = filp->private_data; struct sock *sk = sock->sk; + struct socket_wq *wq; if (sk == NULL) return -EINVAL; lock_sock(sk); + wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); + fasync_helper(fd, filp, on, &wq->fasync_list); - fasync_helper(fd, filp, on, &sock->wq->fasync_list); - - if (!sock->wq->fasync_list) + if (!wq->fasync_list) sock_reset_flag(sk, SOCK_FASYNC); else sock_set_flag(sk, SOCK_FASYNC); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7bd3bbba471..b7d435c3f19 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -420,6 +420,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, static void svc_udp_data_ready(struct sock *sk, int count) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); if (svsk) { dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", @@ -428,8 +429,8 @@ static void svc_udp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* @@ -438,6 +439,7 @@ static void svc_udp_data_ready(struct sock *sk, int count) static void svc_write_space(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); + wait_queue_head_t *wq = sk_sleep(sk); if (svsk) { dprintk("svc: socket %p(inet %p), write_space busy=%d\n", @@ -445,10 +447,10 @@ static void svc_write_space(struct sock *sk) svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { + if (wq && waitqueue_active(wq)) { dprintk("RPC svc_write_space: someone sleeping on %p\n", svsk); - wake_up_interruptible(sk_sleep(sk)); + wake_up_interruptible(wq); } } @@ -739,6 +741,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq; dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); @@ -761,8 +764,9 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) printk("svc: socket %p: no user data\n", sk); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_all(sk_sleep(sk)); + wq = sk_sleep(sk); + if (wq && waitqueue_active(wq)) + wake_up_interruptible_all(wq); } /* @@ -771,6 +775,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) static void svc_tcp_state_change(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", sk, sk->sk_state, sk->sk_user_data); @@ -781,13 +786,14 @@ static void svc_tcp_state_change(struct sock *sk) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_all(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible_all(wq); } static void svc_tcp_data_ready(struct sock *sk, int count) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); dprintk("svc: socket %p TCP data ready (svsk %p)\n", sk, sk->sk_user_data); @@ -795,8 +801,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* @@ -1531,6 +1537,7 @@ static void svc_sock_detach(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct sock *sk = svsk->sk_sk; + wait_queue_head_t *wq; dprintk("svc: svc_sock_detach(%p)\n", svsk); @@ -1539,8 +1546,9 @@ static void svc_sock_detach(struct svc_xprt *xprt) sk->sk_data_ready = svsk->sk_odata; sk->sk_write_space = svsk->sk_owspace; - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + wq = sk_sleep(sk); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* @@ -1609,9 +1617,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, */ static void svc_bc_sock_free(struct svc_xprt *xprt) { - if (xprt) { - kfree(xprt->xpt_bc_sid); + if (xprt) kfree(container_of(xprt, struct svc_sock, sk_xprt)); - } } #endif /* CONFIG_NFS_V4_1 */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d8d98d5b508..217fb7f34d5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1171,7 +1171,7 @@ restart: newsk->sk_type = sk->sk_type; init_peercred(newsk); newu = unix_sk(newsk); - newsk->sk_wq = &newu->peer_wq; + RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); /* copy address information from listening to new sock*/ diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 4cbc942f762..21306928d47 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -396,9 +396,12 @@ void __exit x25_link_free(void) write_lock_bh(&x25_neigh_list_lock); list_for_each_safe(entry, tmp, &x25_neigh_list) { + struct net_device *dev; + nb = list_entry(entry, struct x25_neigh, node); + dev = nb->dev; __x25_remove_neigh(nb); - dev_put(nb->dev); + dev_put(dev); } write_unlock_bh(&x25_neigh_list_lock); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8b3ef404c79..7a8e2c77d08 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1340,10 +1340,13 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); + xdst = dst_alloc(dst_ops, 0); xfrm_policy_put_afinfo(afinfo); - xdst->flo.ops = &xfrm_bundle_fc_ops; + if (likely(xdst)) + xdst->flo.ops = &xfrm_bundle_fc_ops; + else + xdst = ERR_PTR(-ENOBUFS); return xdst; } |