diff options
Diffstat (limited to 'net')
87 files changed, 873 insertions, 425 deletions
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 30912973228..c7e634af851 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev) return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ nla_total_size(2) + /* IFLA_VLAN_ID */ - sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ + nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */ vlan_qos_map_size(vlan->nr_ingress_mappings) + vlan_qos_map_size(vlan->nr_egress_mappings); } diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index c72d1bcdcf4..1356af660b5 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -65,6 +65,7 @@ static int __init batadv_init(void) batadv_recv_handler_init(); batadv_iv_init(); + batadv_nc_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); @@ -142,7 +143,7 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; - ret = batadv_nc_init(bat_priv); + ret = batadv_nc_mesh_init(bat_priv); if (ret < 0) goto err; @@ -167,7 +168,7 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_vis_quit(bat_priv); batadv_gw_node_purge(bat_priv); - batadv_nc_free(bat_priv); + batadv_nc_mesh_free(bat_priv); batadv_dat_free(bat_priv); batadv_bla_free(bat_priv); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index a487d46e0ae..4ecc0b6bf8a 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -35,6 +35,20 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); /** + * batadv_nc_init - one-time initialization for network coding + */ +int __init batadv_nc_init(void) +{ + int ret; + + /* Register our packet type */ + ret = batadv_recv_handler_register(BATADV_CODED, + batadv_nc_recv_coded_packet); + + return ret; +} + +/** * batadv_nc_start_timer - initialise the nc periodic worker * @bat_priv: the bat priv with all the soft interface information */ @@ -45,10 +59,10 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_nc_init - initialise coding hash table and start house keeping + * batadv_nc_mesh_init - initialise coding hash table and start house keeping * @bat_priv: the bat priv with all the soft interface information */ -int batadv_nc_init(struct batadv_priv *bat_priv) +int batadv_nc_mesh_init(struct batadv_priv *bat_priv) { bat_priv->nc.timestamp_fwd_flush = jiffies; bat_priv->nc.timestamp_sniffed_purge = jiffies; @@ -70,11 +84,6 @@ int batadv_nc_init(struct batadv_priv *bat_priv) batadv_hash_set_lock_class(bat_priv->nc.coding_hash, &batadv_nc_decoding_hash_lock_class_key); - /* Register our packet type */ - if (batadv_recv_handler_register(BATADV_CODED, - batadv_nc_recv_coded_packet) < 0) - goto err; - INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); batadv_nc_start_timer(bat_priv); @@ -1721,12 +1730,11 @@ free_nc_packet: } /** - * batadv_nc_free - clean up network coding memory + * batadv_nc_mesh_free - clean up network coding memory * @bat_priv: the bat priv with all the soft interface information */ -void batadv_nc_free(struct batadv_priv *bat_priv) +void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { - batadv_recv_handler_unregister(BATADV_CODED); cancel_delayed_work_sync(&bat_priv->nc.work); batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 85a4ec81ad5..ddfa618e80b 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -22,8 +22,9 @@ #ifdef CONFIG_BATMAN_ADV_NC -int batadv_nc_init(struct batadv_priv *bat_priv); -void batadv_nc_free(struct batadv_priv *bat_priv); +int batadv_nc_init(void); +int batadv_nc_mesh_init(struct batadv_priv *bat_priv); +void batadv_nc_mesh_free(struct batadv_priv *bat_priv); void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, @@ -46,12 +47,17 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_NC */ -static inline int batadv_nc_init(struct batadv_priv *bat_priv) +static inline int batadv_nc_init(void) { return 0; } -static inline void batadv_nc_free(struct batadv_priv *bat_priv) +static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { return; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ca04163635d..e6b7fecb3af 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -64,7 +64,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_flood_deliver(br, skb, false); goto out; } - if (br_multicast_rcv(br, NULL, skb)) { + if (br_multicast_rcv(br, NULL, skb, vid)) { kfree_skb(skb); goto out; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ffd5874f259..33e8f23acdd 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -700,7 +700,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], vid = nla_get_u16(tb[NDA_VLAN]); - if (vid >= VLAN_N_VID) { + if (!vid || vid >= VLAN_VID_MASK) { pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", vid); return -EINVAL; @@ -794,7 +794,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], vid = nla_get_u16(tb[NDA_VLAN]); - if (vid >= VLAN_N_VID) { + if (!vid || vid >= VLAN_VID_MASK) { pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", vid); return -EINVAL; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index a2fd37ec35f..7e73c32e205 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -80,7 +80,7 @@ int br_handle_frame_finish(struct sk_buff *skb) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && - br_multicast_rcv(br, p, skb)) + br_multicast_rcv(br, p, skb, vid)) goto drop; if (p->state == BR_STATE_LEARNING) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 85a09bb5ca5..b7b1914dfa2 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -453,7 +453,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) call_rcu_bh(&p->rcu, br_multicast_free_pg); err = 0; - if (!mp->ports && !mp->mglist && mp->timer_armed && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); break; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d1c57863067..686284ff3d6 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -272,7 +272,7 @@ static void br_multicast_del_pg(struct net_bridge *br, del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && !mp->mglist && mp->timer_armed && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); @@ -620,7 +620,6 @@ rehash: mp->br = br; mp->addr = *group; - setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); @@ -660,6 +659,7 @@ static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; + unsigned long now = jiffies; int err; spin_lock(&br->multicast_lock); @@ -674,6 +674,7 @@ static int br_multicast_add_group(struct net_bridge *br, if (!port) { mp->mglist = true; + mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -681,7 +682,7 @@ static int br_multicast_add_group(struct net_bridge *br, (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->port == port) - goto out; + goto found; if ((unsigned long)p->port < (unsigned long)port) break; } @@ -692,6 +693,8 @@ static int br_multicast_add_group(struct net_bridge *br, rcu_assign_pointer(*pp, p); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); +found: + mod_timer(&p->timer, now + br->multicast_membership_interval); out: err = 0; @@ -944,7 +947,8 @@ void br_multicast_disable_port(struct net_bridge_port *port) static int br_ip4_multicast_igmp3_report(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct igmpv3_report *ih; struct igmpv3_grec *grec; @@ -954,12 +958,10 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int type; int err = 0; __be32 group; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ih))) return -EINVAL; - br_vlan_get_tag(skb, &vid); ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -1002,7 +1004,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_mld2_report(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct icmp6hdr *icmp6h; struct mld2_grec *grec; @@ -1010,12 +1013,10 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, int len; int num; int err = 0; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*icmp6h))) return -EINVAL; - br_vlan_get_tag(skb, &vid); icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); len = sizeof(*icmp6h); @@ -1138,7 +1139,8 @@ static void br_multicast_query_received(struct net_bridge *br, static int br_ip4_multicast_query(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { const struct iphdr *iph = ip_hdr(skb); struct igmphdr *ih = igmp_hdr(skb); @@ -1150,7 +1152,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, unsigned long now = jiffies; __be32 group; int err = 0; - u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1186,14 +1187,10 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!group) goto out; - br_vlan_get_tag(skb, &vid); mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; - mod_timer(&mp->timer, now + br->multicast_membership_interval); - mp->timer_armed = true; - max_delay *= br->multicast_last_member_count; if (mp->mglist && @@ -1219,7 +1216,8 @@ out: #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_query(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct mld_msg *mld; @@ -1231,7 +1229,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long now = jiffies; const struct in6_addr *group = NULL; int err = 0; - u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1265,14 +1262,10 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!group) goto out; - br_vlan_get_tag(skb, &vid); mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; - mod_timer(&mp->timer, now + br->multicast_membership_interval); - mp->timer_armed = true; - max_delay *= br->multicast_last_member_count; if (mp->mglist && (timer_pending(&mp->timer) ? @@ -1358,7 +1351,7 @@ static void br_multicast_leave_group(struct net_bridge *br, call_rcu_bh(&p->rcu, br_multicast_free_pg); br_mdb_notify(br->dev, port, group, RTM_DELMDB); - if (!mp->ports && !mp->mglist && mp->timer_armed && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } @@ -1370,12 +1363,30 @@ static void br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (mp->mglist && mp->timer_armed && + if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { mod_timer(&mp->timer, time); } + + goto out; + } + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (p->port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; } out: spin_unlock(&br->multicast_lock); @@ -1424,7 +1435,8 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct sk_buff *skb2 = skb; const struct iphdr *iph; @@ -1432,7 +1444,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, unsigned int len; unsigned int offset; int err; - u16 vid = 0; /* We treat OOM as packet loss for now. */ if (!pskb_may_pull(skb, sizeof(*iph))) @@ -1493,7 +1504,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = 0; - br_vlan_get_tag(skb2, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; ih = igmp_hdr(skb2); @@ -1504,10 +1514,10 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb2); + err = br_ip4_multicast_igmp3_report(br, port, skb2, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - err = br_ip4_multicast_query(br, port, skb2); + err = br_ip4_multicast_query(br, port, skb2, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid); @@ -1525,7 +1535,8 @@ err_out: #if IS_ENABLED(CONFIG_IPV6) static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct sk_buff *skb2; const struct ipv6hdr *ip6h; @@ -1535,7 +1546,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, unsigned int len; int offset; int err; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ip6h))) return -EINVAL; @@ -1625,7 +1635,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = 0; - br_vlan_get_tag(skb, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; switch (icmp6_type) { @@ -1642,10 +1651,10 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; } case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb2); + err = br_ip6_multicast_mld2_report(br, port, skb2, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb2); + err = br_ip6_multicast_query(br, port, skb2, vid); break; case ICMPV6_MGM_REDUCTION: { @@ -1666,7 +1675,7 @@ out: #endif int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, u16 vid) { BR_INPUT_SKB_CB(skb)->igmp = 0; BR_INPUT_SKB_CB(skb)->mrouters_only = 0; @@ -1676,10 +1685,10 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, switch (skb->protocol) { case htons(ETH_P_IP): - return br_multicast_ipv4_rcv(br, port, skb); + return br_multicast_ipv4_rcv(br, port, skb, vid); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return br_multicast_ipv6_rcv(br, port, skb); + return br_multicast_ipv6_rcv(br, port, skb, vid); #endif } @@ -1798,7 +1807,6 @@ void br_multicast_stop(struct net_bridge *br) hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); - mp->timer_armed = false; call_rcu_bh(&mp->rcu, br_multicast_free_group); } } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e74ddc1c29a..f75d92e4f96 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -243,7 +243,7 @@ static int br_afspec(struct net_bridge *br, vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); - if (vinfo->vid >= VLAN_N_VID) + if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) return -EINVAL; switch (cmd) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index efb57d91156..2e8244efb26 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -126,7 +126,6 @@ struct net_bridge_mdb_entry struct timer_list timer; struct br_ip addr; bool mglist; - bool timer_armed; }; struct net_bridge_mdb_htable @@ -452,7 +451,8 @@ extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __us extern unsigned int br_mdb_rehash_seq; extern int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb); + struct sk_buff *skb, + u16 vid); extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb, u16 vid); extern void br_multicast_add_port(struct net_bridge_port *port); @@ -523,7 +523,8 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { return 0; } @@ -643,9 +644,7 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v) * vid wasn't set */ smp_rmb(); - return (v->pvid & VLAN_TAG_PRESENT) ? - (v->pvid & ~VLAN_TAG_PRESENT) : - VLAN_N_VID; + return v->pvid ?: VLAN_N_VID; } #else diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 108084a0467..656a6f3e40d 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -134,7 +134,7 @@ static void br_stp_start(struct net_bridge *br) if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY) __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY); - else if (br->bridge_forward_delay < BR_MAX_FORWARD_DELAY) + else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); if (r == 0) { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 9a9ffe7e401..53f0990eab5 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -45,37 +45,34 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) return 0; } - if (vid) { - if (v->port_idx) { - p = v->parent.port; - br = p->br; - dev = p->dev; - } else { - br = v->parent.br; - dev = br->dev; - } - ops = dev->netdev_ops; - - if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { - /* Add VLAN to the device filter if it is supported. - * Stricly speaking, this is not necessary now, since - * devices are made promiscuous by the bridge, but if - * that ever changes this code will allow tagged - * traffic to enter the bridge. - */ - err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q), - vid); - if (err) - return err; - } - - err = br_fdb_insert(br, p, dev->dev_addr, vid); - if (err) { - br_err(br, "failed insert local address into bridge " - "forwarding table\n"); - goto out_filt; - } + if (v->port_idx) { + p = v->parent.port; + br = p->br; + dev = p->dev; + } else { + br = v->parent.br; + dev = br->dev; + } + ops = dev->netdev_ops; + + if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { + /* Add VLAN to the device filter if it is supported. + * Stricly speaking, this is not necessary now, since + * devices are made promiscuous by the bridge, but if + * that ever changes this code will allow tagged + * traffic to enter the bridge. + */ + err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q), + vid); + if (err) + return err; + } + err = br_fdb_insert(br, p, dev->dev_addr, vid); + if (err) { + br_err(br, "failed insert local address into bridge " + "forwarding table\n"); + goto out_filt; } set_bit(vid, v->vlan_bitmap); @@ -98,7 +95,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) __vlan_delete_pvid(v, vid); clear_bit(vid, v->untagged_bitmap); - if (v->port_idx && vid) { + if (v->port_idx) { struct net_device *dev = v->parent.port->dev; const struct net_device_ops *ops = dev->netdev_ops; @@ -192,6 +189,8 @@ out: bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, struct sk_buff *skb, u16 *vid) { + int err; + /* If VLAN filtering is disabled on the bridge, all packets are * permitted. */ @@ -204,20 +203,32 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) return false; - if (br_vlan_get_tag(skb, vid)) { + err = br_vlan_get_tag(skb, vid); + if (!*vid) { u16 pvid = br_get_pvid(v); - /* Frame did not have a tag. See if pvid is set - * on this port. That tells us which vlan untagged - * traffic belongs to. + /* Frame had a tag with VID 0 or did not have a tag. + * See if pvid is set on this port. That tells us which + * vlan untagged or priority-tagged traffic belongs to. */ if (pvid == VLAN_N_VID) return false; - /* PVID is set on this port. Any untagged ingress - * frame is considered to belong to this vlan. + /* PVID is set on this port. Any untagged or priority-tagged + * ingress frame is considered to belong to this vlan. */ - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); + *vid = pvid; + if (likely(err)) + /* Untagged Frame. */ + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); + else + /* Priority-tagged Frame. + * At this point, We know that skb->vlan_tci had + * VLAN_TAG_PRESENT bit and its VID field was 0x000. + * We update only VID field and preserve PCP field. + */ + skb->vlan_tci |= pvid; + return true; } @@ -248,7 +259,9 @@ bool br_allowed_egress(struct net_bridge *br, return false; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; @@ -278,7 +291,9 @@ out: return err; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int br_vlan_delete(struct net_bridge *br, u16 vid) { struct net_port_vlans *pv; @@ -289,14 +304,9 @@ int br_vlan_delete(struct net_bridge *br, u16 vid) if (!pv) return -EINVAL; - if (vid) { - /* If the VID !=0 remove fdb for this vid. VID 0 is special - * in that it's the default and is always there in the fdb. - */ - spin_lock_bh(&br->hash_lock); - fdb_delete_by_addr(br, br->dev->dev_addr, vid); - spin_unlock_bh(&br->hash_lock); - } + spin_lock_bh(&br->hash_lock); + fdb_delete_by_addr(br, br->dev->dev_addr, vid); + spin_unlock_bh(&br->hash_lock); __vlan_del(pv, vid); return 0; @@ -329,7 +339,9 @@ unlock: return 0; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; @@ -363,7 +375,9 @@ clean_up: return err; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) { struct net_port_vlans *pv; @@ -374,14 +388,9 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) if (!pv) return -EINVAL; - if (vid) { - /* If the VID !=0 remove fdb for this vid. VID 0 is special - * in that it's the default and is always there in the fdb. - */ - spin_lock_bh(&port->br->hash_lock); - fdb_delete_by_addr(port->br, port->dev->dev_addr, vid); - spin_unlock_bh(&port->br->hash_lock); - } + spin_lock_bh(&port->br->hash_lock); + fdb_delete_by_addr(port->br, port->dev->dev_addr, vid); + spin_unlock_bh(&port->br->hash_lock); return __vlan_del(pv, vid); } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 518093802d1..7c470c371e1 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -181,6 +181,7 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, ub->qlen++; pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* Fill in the ulog data */ pm->version = EBT_ULOG_VERSION; @@ -193,8 +194,6 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, pm->hook = hooknr; if (uloginfo->prefix != NULL) strcpy(pm->prefix, uloginfo->prefix); - else - *(pm->prefix) = '\0'; if (in) { strcpy(pm->physindev, in->name); @@ -204,16 +203,14 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name); else strcpy(pm->indev, in->name); - } else - pm->indev[0] = pm->physindev[0] = '\0'; + } if (out) { /* If out exists, then out is a bridge port */ strcpy(pm->physoutdev, out->name); /* rcu_read_lock()ed by nf_hook_slow */ strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name); - } else - pm->outdev[0] = pm->physoutdev[0] = '\0'; + } if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0) BUG(); diff --git a/net/compat.c b/net/compat.c index f0a1ba6c808..89032580bd1 100644 --- a/net/compat.c +++ b/net/compat.c @@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + return -EINVAL; kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/core/dev.c b/net/core/dev.c index 65f829cfd92..3430b1ed12e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1917,7 +1917,8 @@ static struct xps_map *expand_xps_map(struct xps_map *map, return new_map; } -int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, + u16 index) { struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; struct xps_map *map, *new_map; diff --git a/net/core/filter.c b/net/core/filter.c index 6438f29ff26..01b780856db 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -644,7 +644,6 @@ void sk_filter_release_rcu(struct rcu_head *rcu) struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); bpf_jit_free(fp); - kfree(fp); } EXPORT_SYMBOL(sk_filter_release_rcu); @@ -683,7 +682,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp, if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL); + fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); if (!fp) return -ENOMEM; memcpy(fp->insns, fprog->filter, fsize); @@ -723,6 +722,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct sk_filter *fp, *old_fp; unsigned int fsize = sizeof(struct sock_filter) * fprog->len; + unsigned int sk_fsize = sk_filter_size(fprog->len); int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) @@ -732,11 +732,11 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); + fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); if (!fp) return -ENOMEM; if (copy_from_user(fp->insns, fprog->filter, fsize)) { - sock_kfree_s(sk, fp, fsize+sizeof(*fp)); + sock_kfree_s(sk, fp, sk_fsize); return -EFAULT; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 8d7d0dd72db..143b6fdb964 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -40,7 +40,7 @@ again: struct iphdr _iph; ip: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); - if (!iph) + if (!iph || iph->ihl < 5) return false; if (ip_is_fragment(iph)) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d954b56b4e4..325dee863e4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1344,17 +1344,19 @@ int netdev_register_kobject(struct net_device *net) return error; } -int netdev_class_create_file(struct class_attribute *class_attr) +int netdev_class_create_file_ns(struct class_attribute *class_attr, + const void *ns) { - return class_create_file(&net_class, class_attr); + return class_create_file_ns(&net_class, class_attr, ns); } -EXPORT_SYMBOL(netdev_class_create_file); +EXPORT_SYMBOL(netdev_class_create_file_ns); -void netdev_class_remove_file(struct class_attribute *class_attr) +void netdev_class_remove_file_ns(struct class_attribute *class_attr, + const void *ns) { - class_remove_file(&net_class, class_attr); + class_remove_file_ns(&net_class, class_attr, ns); } -EXPORT_SYMBOL(netdev_class_remove_file); +EXPORT_SYMBOL(netdev_class_remove_file_ns); int netdev_kobject_init(void) { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fc75c9e461b..8f971990677 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -636,8 +636,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo netpoll_send_skb(np, send_skb); - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ + /* If there are several rx_skb_hooks for the same + * address we're fine by sending a single reply + */ break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); @@ -719,8 +720,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo netpoll_send_skb(np, send_skb); - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ + /* If there are several rx_skb_hooks for the same + * address, we're fine by sending a single reply + */ break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); @@ -756,11 +758,12 @@ static bool pkt_is_ns(struct sk_buff *skb) int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) { - int proto, len, ulen; - int hits = 0; + int proto, len, ulen, data_len; + int hits = 0, offset; const struct iphdr *iph; struct udphdr *uh; struct netpoll *np, *tmp; + uint16_t source; if (list_empty(&npinfo->rx_np)) goto out; @@ -820,7 +823,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) len -= iph->ihl*4; uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); + offset = (unsigned char *)(uh + 1) - skb->data; ulen = ntohs(uh->len); + data_len = skb->len - offset; + source = ntohs(uh->source); if (ulen != len) goto out; @@ -834,9 +840,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (np->local_port && np->local_port != ntohs(uh->dest)) continue; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + np->rx_skb_hook(np, source, skb, offset, data_len); hits++; } } else { @@ -859,7 +863,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; uh = udp_hdr(skb); + offset = (unsigned char *)(uh + 1) - skb->data; ulen = ntohs(uh->len); + data_len = skb->len - offset; + source = ntohs(uh->source); if (ulen != skb->len) goto out; if (udp6_csum_init(skb, uh, IPPROTO_UDP)) @@ -872,9 +879,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (np->local_port && np->local_port != ntohs(uh->dest)) continue; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + np->rx_skb_hook(np, source, skb, offset, data_len); hits++; } #endif @@ -1062,7 +1067,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - if (np->rx_hook) { + if (np->rx_skb_hook) { spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_flags |= NETPOLL_RX_ENABLED; list_add_tail(&np->rx, &npinfo->rx_np); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 3f1ec1586ae..8d9d05edd2e 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -10,6 +10,7 @@ #include <net/secure_seq.h> +#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) #define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; @@ -29,6 +30,7 @@ static void net_secret_init(void) cmpxchg(&net_secret[--i], 0, tmp); } } +#endif #ifdef CONFIG_INET static u32 seq_scale(u32 seq) diff --git a/net/core/sock.c b/net/core/sock.c index 5b6beba494a..0b39e7ae438 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2319,6 +2319,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_ll_usec = sysctl_net_busy_read; #endif + sk->sk_pacing_rate = ~0U; /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index c85e71e0c7f..ff41b4d60d3 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1372,6 +1372,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; + if (real_dev->type != ARPHRD_IEEE802154) + return -EINVAL; lowpan_dev_info(dev)->real_dev = real_dev; lowpan_dev_info(dev)->fragment_tag = 0; @@ -1386,6 +1388,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, entry->ldev = dev; + /* Set the lowpan harware address to the wpan hardware address. */ + memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); INIT_LIST_HEAD(&entry->list); list_add_tail(&entry->list, &lowpan_devices); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 7bd8983dbfc..96da9c77dec 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -287,7 +287,7 @@ begintw: if (unlikely(!INET_TW_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { - sock_put(sk); + inet_twsk_put(inet_twsk(sk)); goto begintw; } goto out; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a04d872c54f..3982eabf61e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -772,15 +772,20 @@ static inline int ip_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; - skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - /* specify the length of each IP datagram fragment */ - skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + __skb_queue_tail(queue, skb); + } else if (skb_is_gso(skb)) { + goto append; } + skb->ip_summed = CHECKSUM_PARTIAL; + /* specify the length of each IP datagram fragment */ + skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + +append: return skb_append_datato_frags(sk, skb, getfrag, from, (length - transhdrlen)); } diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index e805e7b3030..6e87f853d03 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -125,8 +125,17 @@ static int vti_rcv(struct sk_buff *skb) iph->saddr, iph->daddr, 0); if (tunnel != NULL) { struct pcpu_tstats *tstats; + u32 oldmark = skb->mark; + int ret; - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + + /* temporarily mark the skb with the tunnel o_key, to + * only match policies with this mark. + */ + skb->mark = be32_to_cpu(tunnel->parms.o_key); + ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); + skb->mark = oldmark; + if (!ret) return -1; tstats = this_cpu_ptr(tunnel->dev->tstats); @@ -135,7 +144,6 @@ static int vti_rcv(struct sk_buff *skb) tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - skb->mark = 0; secpath_reset(skb); skb->dev = tunnel->dev; return 1; @@ -167,7 +175,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl4, 0, sizeof(fl4)); flowi4_init_output(&fl4, tunnel->parms.link, - be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos), + be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPIP, 0, dst, tiph->saddr, 0, 0); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 85a4f21aac1..59da7cde072 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -271,6 +271,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d23118d95ff..718dfbd30cb 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -327,6 +327,11 @@ ipt_do_table(struct sk_buff *skb, addend = xt_write_recseq_begin(); private = table->private; cpu = smp_processor_id(); + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); table_base = private->entries[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index cbc22158af4..9cb993cd224 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -220,6 +220,7 @@ static void ipt_ulog_packet(struct net *net, ub->qlen++; pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* We might not have a timestamp, get one */ if (skb->tstamp.tv64 == 0) @@ -238,8 +239,6 @@ static void ipt_ulog_packet(struct net *net, } else if (loginfo->prefix[0] != '\0') strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); - else - *(pm->prefix) = '\0'; if (in && in->hard_header_len > 0 && skb->mac_header != skb->network_header && @@ -251,13 +250,9 @@ static void ipt_ulog_packet(struct net *net, if (in) strncpy(pm->indev_name, in->name, sizeof(pm->indev_name)); - else - pm->indev_name[0] = '\0'; if (out) strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); - else - pm->outdev_name[0] = '\0'; /* copy_len <= skb->len, so can't fail. */ if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 727f4365bcd..6011615e810 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2072,7 +2072,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) RT_SCOPE_LINK); goto make_route; } - if (fl4->saddr) { + if (!fl4->saddr) { if (ipv4_is_multicast(fl4->daddr)) fl4->saddr = inet_select_addr(dev_out, 0, fl4->flowi4_scope); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 25a89eaa669..068c8fb0d15 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1284,7 +1284,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tp->lost_cnt_hint -= tcp_skb_pcount(prev); } - TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; + TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + TCP_SKB_CB(prev)->end_seq++; + if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); @@ -2853,7 +2856,8 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * left edge of the send window. * See draft-ietf-tcplw-high-performance-00, section 3.3. */ - if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) + if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && + flag & FLAG_ACKED) seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; if (seq_rtt < 0) @@ -2868,14 +2872,19 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, } /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ -static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) +static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) { struct tcp_sock *tp = tcp_sk(sk); s32 seq_rtt = -1; - if (tp->lsndtime && !tp->total_retrans) - seq_rtt = tcp_time_stamp - tp->lsndtime; - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); + if (synack_stamp && !tp->total_retrans) + seq_rtt = tcp_time_stamp - synack_stamp; + + /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets + * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() + */ + if (!tp->srtt) + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); } static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) @@ -2978,6 +2987,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, s32 seq_rtt = -1; s32 ca_seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); + bool rtt_update; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); @@ -3054,14 +3064,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) || - (flag & FLAG_ACKED)) - tcp_rearm_rto(sk); + rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); if (flag & FLAG_ACKED) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + tcp_rearm_rto(sk); if (unlikely(icsk->icsk_mtup.probe_size && !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { tcp_mtup_probe_success(sk); @@ -3100,6 +3109,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ca_ops->pkts_acked(sk, pkts_acked, rtt_us); } + } else if (skb && rtt_update && sack_rtt >= 0 && + sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) { + /* Do not re-arm RTO if the sack RTT is measured from data sent + * after when the head was last (re)transmitted. Otherwise the + * timeout may continue to extend in loss recovery. + */ + tcp_rearm_rto(sk); } #if FASTRETRANS_DEBUG > 0 @@ -3288,7 +3304,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) tcp_init_cwnd_reduction(sk, true); tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); - tcp_set_ca_state(sk, TCP_CA_Open); + tcp_try_keep_open(sk); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBERECOVERY); } @@ -5584,6 +5600,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct request_sock *req; int queued = 0; bool acceptable; + u32 synack_stamp; tp->rx_opt.saw_tstamp = 0; @@ -5666,9 +5683,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * so release it. */ if (req) { + synack_stamp = tcp_rsk(req)->snt_synack; tp->total_retrans = req->num_retrans; reqsk_fastopen_remove(sk, req, false); } else { + synack_stamp = tp->lsndtime; /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); tcp_init_congestion_control(sk); @@ -5691,7 +5710,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - tcp_synack_rtt_meas(sk, req); + tcp_synack_rtt_meas(sk, synack_stamp); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; @@ -5709,6 +5728,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } else tcp_init_metrics(sk); + tcp_update_pacing_rate(sk); + /* Prevent spurious tcp_cwnd_restart() on first data packet */ tp->lsndtime = tcp_time_stamp; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 3a7525e6c08..533c58a5cfb 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -18,6 +18,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int sum_truesize = 0; struct tcphdr *th; unsigned int thlen; unsigned int seq; @@ -102,13 +103,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, if (copy_destructor) { skb->destructor = gso_skb->destructor; skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; + sum_truesize += skb->truesize; } skb = skb->next; th = tcp_hdr(skb); @@ -125,7 +120,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, if (copy_destructor) { swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); + sum_truesize += skb->truesize; + atomic_add(sum_truesize - gso_skb->truesize, + &skb->sk->sk_wmem_alloc); } delta = htonl(oldlen + (skb_tail_pointer(skb) - diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e6bb8256e59..d46f2143305 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -637,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb unsigned int size = 0; unsigned int eff_sacks; + opts->options = 0; + #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (unlikely(*md5)) { @@ -984,8 +986,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk) || - skb->ip_summed == CHECKSUM_NONE) { + /* Make sure we own this skb before messing gso_size/gso_segs */ + WARN_ON_ONCE(skb_cloned(skb)); + + if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1065,9 +1069,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (skb_cloned(skb) && - skb_is_nonlinear(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -2342,6 +2344,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int oldpcount = tcp_skb_pcount(skb); if (unlikely(oldpcount > 1)) { + if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; tcp_init_tso_segs(sk, skb, cur_mss); tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 9a459be24af..e1a63930a96 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -104,9 +104,14 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) const struct iphdr *iph = ip_hdr(skb); u8 *xprth = skb_network_header(skb) + iph->ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; + fl4->flowi4_oif = reverse ? skb->skb_iif : oif; if (!ip_is_fragment(iph)) { switch (iph->protocol) { @@ -235,7 +240,7 @@ static struct dst_ops xfrm4_dst_ops = { .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 73784c3d464..82e1da3a40b 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -618,8 +618,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG && + if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index d3618a78fca..e67e63f9858 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -436,8 +436,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG && + if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 32b4a1675d8..066640e0ba8 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -116,7 +116,7 @@ begintw: } if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif))) { - sock_put(sk); + inet_twsk_put(inet_twsk(sk)); goto begintw; } goto out; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 7bb5446b9d7..bf4a9a084de 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -976,6 +976,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) if (t->parms.o_flags&GRE_SEQ) addend += 4; } + t->hlen = addend; if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & @@ -1002,8 +1003,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) } ip6_rt_put(rt); } - - t->hlen = addend; } static int ip6gre_tnl_change(struct ip6_tnl *t, @@ -1173,9 +1172,8 @@ done: static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { - struct ip6_tnl *tunnel = netdev_priv(dev); if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) + new_mtu > 0xFFF8 - dev->hard_header_len) return -EINVAL; dev->mtu = new_mtu; return 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a54c45ce4a4..91fb4e8212f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + nexthop = rt6_nexthop((struct rt6_info *)dst); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -874,7 +874,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ rt = (struct rt6_info *) *dst; rcu_read_lock_bh(); - n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock_bh(); @@ -1008,6 +1008,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, { struct sk_buff *skb; + struct frag_hdr fhdr; int err; /* There is support for UDP large send offload by network @@ -1015,8 +1016,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, * udp datagram */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { - struct frag_hdr fhdr; - skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -1036,20 +1035,24 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->transport_header = skb->network_header + fragheaderlen; skb->protocol = htons(ETH_P_IPV6); - skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - /* Specify the length of each IPv6 datagram fragment. - * It has to be a multiple of 8. - */ - skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - - sizeof(struct frag_hdr)) & ~7; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr, rt); - skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); + } else if (skb_is_gso(skb)) { + goto append; } + skb->ip_summed = CHECKSUM_PARTIAL; + /* Specify the length of each IPv6 datagram fragment. + * It has to be a multiple of 8. + */ + skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - + sizeof(struct frag_hdr)) & ~7; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + ipv6_select_ident(&fhdr, rt); + skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + +append: return skb_append_datato_frags(sk, skb, getfrag, from, (length - transhdrlen)); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a791552e042..583b77e2f69 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1430,9 +1430,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < IPV6_MIN_MTU) { - return -EINVAL; + struct ip6_tnl *tnl = netdev_priv(dev); + + if (tnl->parms.proto == IPPROTO_IPIP) { + if (new_mtu < 68) + return -EINVAL; + } else { + if (new_mtu < IPV6_MIN_MTU) + return -EINVAL; } + if (new_mtu > 0xFFF8 - dev->hard_header_len) + return -EINVAL; dev->mtu = new_mtu; return 0; } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 5636a912074..ce507d9e1c9 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -64,8 +64,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG && + if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 44400c216dc..710238f58aa 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -349,6 +349,11 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c979dd96d82..04e17b3309f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -476,6 +476,24 @@ out: } #ifdef CONFIG_IPV6_ROUTER_PREF +struct __rt6_probe_work { + struct work_struct work; + struct in6_addr target; + struct net_device *dev; +}; + +static void rt6_probe_deferred(struct work_struct *w) +{ + struct in6_addr mcaddr; + struct __rt6_probe_work *work = + container_of(w, struct __rt6_probe_work, work); + + addrconf_addr_solict_mult(&work->target, &mcaddr); + ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); + dev_put(work->dev); + kfree(w); +} + static void rt6_probe(struct rt6_info *rt) { struct neighbour *neigh; @@ -499,17 +517,23 @@ static void rt6_probe(struct rt6_info *rt) if (!neigh || time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { - struct in6_addr mcaddr; - struct in6_addr *target; + struct __rt6_probe_work *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (neigh) { + if (neigh && work) neigh->updated = jiffies; + + if (neigh) write_unlock(&neigh->lock); - } - target = (struct in6_addr *)&rt->rt6i_gateway; - addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); + if (work) { + INIT_WORK(&work->work, rt6_probe_deferred); + work->target = rt->rt6i_gateway; + dev_hold(rt->dst.dev); + work->dev = rt->dst.dev; + schedule_work(&work->work); + } } else { out: write_unlock(&neigh->lock); @@ -851,7 +875,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; - rt->rt6i_gateway = *daddr; } rt->rt6i_flags |= RTF_CACHE; @@ -1064,10 +1087,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) - return dst; + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; - return NULL; + if (rt6_check_expired(rt)) + return NULL; + + return dst; } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -1338,6 +1364,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; atomic_set(&rt->dst.__refcnt, 1); + rt->rt6i_gateway = fl6->daddr; rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; rt->rt6i_idev = idev; @@ -1873,7 +1900,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, in6_dev_hold(rt->rt6i_idev); rt->dst.lastuse = jiffies; - rt->rt6i_gateway = ort->rt6i_gateway; + if (ort->rt6i_flags & RTF_GATEWAY) + rt->rt6i_gateway = ort->rt6i_gateway; + else + rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == (RTF_DEFAULT | RTF_ADDRCONF)) @@ -2160,6 +2190,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, else rt->rt6i_flags |= RTF_LOCAL; + rt->rt6i_gateway = *addr; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 72b7eaaf3ca..18786098fd4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1225,9 +1225,6 @@ do_udp_sendmsg: if (tclass < 0) tclass = np->tclass; - if (dontfrag < 0) - dontfrag = np->dontfrag; - if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -1246,6 +1243,8 @@ back_from_confirm: up->pending = AF_INET6; do_append_data: + if (dontfrag < 0) + dontfrag = np->dontfrag; up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 23ed03d786c..5f8e128c512 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -135,9 +135,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct ipv6_opt_hdr *exthdr; const unsigned char *nh = skb_network_header(skb); u8 nexthdr = nh[IP6CB(skb)->nhoff]; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; + fl6->flowi6_oif = reverse ? skb->skb_iif : oif; fl6->daddr = reverse ? hdr->saddr : hdr->daddr; fl6->saddr = reverse ? hdr->daddr : hdr->saddr; @@ -284,7 +289,7 @@ static struct dst_ops xfrm6_dst_ops = { .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 0578d4fa00a..0f676908d15 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2563,9 +2563,8 @@ bed: jiffies + msecs_to_jiffies(val)); /* Wait for IR-LMP to call us back */ - __wait_event_interruptible(self->query_wait, - (self->cachedaddr != 0 || self->errno == -ETIME), - err); + err = __wait_event_interruptible(self->query_wait, + (self->cachedaddr != 0 || self->errno == -ETIME)); /* If watchdog is still activated, kill it! */ del_timer(&(self->watchdog)); diff --git a/net/key/af_key.c b/net/key/af_key.c index 9d585370c5b..911ef03bf8f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1098,7 +1098,8 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, x->id.proto = proto; x->id.spi = sa->sadb_sa_spi; - x->props.replay_window = sa->sadb_sa_replay; + x->props.replay_window = min_t(unsigned int, sa->sadb_sa_replay, + (sizeof(x->replay.bitmap) * 8)); if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN) x->props.flags |= XFRM_STATE_NOECN; if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index feae495a0a3..b076e8309bc 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -115,6 +115,11 @@ struct l2tp_net { static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); +static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) +{ + return sk->sk_user_data; +} + static inline struct l2tp_net *l2tp_pernet(struct net *net) { BUG_ON(!net); @@ -504,7 +509,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return 0; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) { + if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) { if (!uh->check) { LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); return 1; @@ -1128,7 +1133,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->local_df = 1; #if IS_ENABLED(CONFIG_IPV6) - if (skb->sk->sk_family == PF_INET6) + if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped) error = inet6_csk_xmit(skb, NULL); else #endif @@ -1255,7 +1260,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) l2tp_xmit_ipv6_csum(sk, skb, udp_len); else #endif @@ -1304,10 +1309,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); */ static void l2tp_tunnel_destruct(struct sock *sk) { - struct l2tp_tunnel *tunnel; + struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); struct l2tp_net *pn; - tunnel = sk->sk_user_data; if (tunnel == NULL) goto end; @@ -1675,7 +1679,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 } /* Check if this socket has already been prepped */ - tunnel = (struct l2tp_tunnel *)sk->sk_user_data; + tunnel = l2tp_tunnel(sk); if (tunnel != NULL) { /* This socket has already been prepped */ err = -EBUSY; @@ -1704,6 +1708,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 if (cfg != NULL) tunnel->debug = cfg->debug; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + if (ipv6_addr_v4mapped(&np->saddr) && + ipv6_addr_v4mapped(&np->daddr)) { + struct inet_sock *inet = inet_sk(sk); + + tunnel->v4mapped = true; + inet->inet_saddr = np->saddr.s6_addr32[3]; + inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3]; + inet->inet_daddr = np->daddr.s6_addr32[3]; + } else { + tunnel->v4mapped = false; + } + } +#endif + /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { @@ -1712,7 +1734,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) udpv6_encap_enable(); else #endif diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 66a559b104b..6f251cbc2ed 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -194,6 +194,9 @@ struct l2tp_tunnel { struct sock *sock; /* Parent socket */ int fd; /* Parent fd, if tunnel socket * was created by userspace */ +#if IS_ENABLED(CONFIG_IPV6) + bool v4mapped; +#endif struct work_struct del_work; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 5ebee2ded9e..8c46b271064 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -353,7 +353,9 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh goto error_put_sess_tun; } + local_bh_disable(); l2tp_xmit_skb(session, skb, session->hdr_len); + local_bh_enable(); sock_put(ps->tunnel_sock); sock_put(sk); @@ -422,7 +424,9 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb->data[0] = ppph[0]; skb->data[1] = ppph[1]; + local_bh_disable(); l2tp_xmit_skb(session, skb, session->hdr_len); + local_bh_enable(); sock_put(sk_tun); sock_put(sk); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 2e7855a1b10..629dee7ec9b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3518,7 +3518,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } band = chanctx_conf->def.chan->band; - sta = sta_info_get(sdata, peer); + sta = sta_info_get_bss(sdata, peer); if (sta) { qos = test_sta_flag(sta, WLAN_STA_WME); } else { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b6186517ec5..611abfcfb5e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -893,6 +893,8 @@ struct tpt_led_trigger { * that the scan completed. * @SCAN_ABORTED: Set for our scan work function when the driver reported * a scan complete for an aborted scan. + * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being + * cancelled. */ enum { SCAN_SW_SCANNING, @@ -900,6 +902,7 @@ enum { SCAN_ONCHANNEL_SCANNING, SCAN_COMPLETED, SCAN_ABORTED, + SCAN_HW_CANCELLED, }; /** diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index acd1f71adc0..0c2a29484c0 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -394,6 +394,8 @@ void ieee80211_sw_roc_work(struct work_struct *work) if (started) ieee80211_start_next_roc(local); + else if (list_empty(&local->roc_list)) + ieee80211_run_deferred_scan(local); } out_unlock: diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 54395d7583b..674eac1f996 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3056,6 +3056,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, case NL80211_IFTYPE_ADHOC: if (!bssid) return 0; + if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || + ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) + return 0; if (ieee80211_is_beacon(hdr->frame_control)) { return 1; } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 08afe74b98f..d2d17a44922 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -238,6 +238,9 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) enum ieee80211_band band; int i, ielen, n_chans; + if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) + return false; + do { if (local->hw_scan_band == IEEE80211_NUM_BANDS) return false; @@ -940,7 +943,23 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) if (!local->scan_req) goto out; + /* + * We have a scan running and the driver already reported completion, + * but the worker hasn't run yet or is stuck on the mutex - mark it as + * cancelled. + */ + if (test_bit(SCAN_HW_SCANNING, &local->scanning) && + test_bit(SCAN_COMPLETED, &local->scanning)) { + set_bit(SCAN_HW_CANCELLED, &local->scanning); + goto out; + } + if (test_bit(SCAN_HW_SCANNING, &local->scanning)) { + /* + * Make sure that __ieee80211_scan_completed doesn't trigger a + * scan on another band. + */ + set_bit(SCAN_HW_CANCELLED, &local->scanning); if (local->ops->cancel_hw_scan) drv_cancel_hw_scan(local, rcu_dereference_protected(local->scan_sdata, diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 368837fe3b8..78dc2e99027 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -180,6 +180,9 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + sta->last_rx = jiffies; + if (ieee80211_is_data_qos(mgmt->frame_control)) { struct ieee80211_hdr *hdr = (void *) skb->data; u8 *qc = ieee80211_get_qos_ctl(hdr); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3456c0486b4..70b5a05c0a4 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1120,7 +1120,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->sta = rcu_dereference(sdata->u.vlan.sta); if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr) return TX_DROP; - } else if (info->flags & IEEE80211_TX_CTL_INJECTED || + } else if (info->flags & (IEEE80211_TX_CTL_INJECTED | + IEEE80211_TX_INTFL_NL80211_FRAME_TX) || tx->sdata->control_port_protocol == tx->skb->protocol) { tx->sta = sta_info_get_bss(sdata, hdr->addr1); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e1b34a18b24..69e4ef5348a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2103,7 +2103,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; - int rate, skip, shift; + int rate, shift; u8 i, exrates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; u32 rate_flags; @@ -2131,14 +2131,11 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, exrates + 2); *pos++ = WLAN_EID_EXT_SUPP_RATES; *pos++ = exrates; - skip = 0; for (i = 8; i < sband->n_bitrates; i++) { u8 basic = 0; if ((rate_flags & sband->bitrates[i].flags) != rate_flags) continue; - if (skip++ < 8) - continue; if (need_basic && basic_rates & BIT(i)) basic = 0x80; rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, @@ -2241,6 +2238,10 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, } rate = cfg80211_calculate_bitrate(&ri); + if (WARN_ONCE(!rate, + "Invalid bitrate: flags=0x%x, idx=%d, vht_nss=%d\n", + status->flag, status->rate_idx, status->vht_nss)) + return 0; /* rewind from end of MPDU */ if (status->flag & RX_FLAG_MACTIME_END) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f4484719f3e..f63c2388f38 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1637,12 +1637,9 @@ static int sync_thread_master(void *data) continue; } while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { - int ret = 0; - - __wait_event_interruptible(*sk_sleep(sk), + int ret = __wait_event_interruptible(*sk_sleep(sk), sock_writeable(sk) || - kthread_should_stop(), - ret); + kthread_should_stop()); if (unlikely(kthread_should_stop())) goto done; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index bdebd03bc8c..70866d192ef 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src, flowi6_to_flowi(&fl1), false)) { if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2), false)) { - if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, - sizeof(rt1->rt6i_gateway)) && + if (ipv6_addr_equal(rt6_nexthop(rt1), + rt6_nexthop(rt2)) && rt1->dst.dev == rt2->dst.dev) ret = 1; dst_release(&rt2->dst); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8b03028cca6..227aa11e840 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -845,8 +845,13 @@ xt_replace_table(struct xt_table *table, return NULL; } - table->private = newinfo; newinfo->initial_entries = private->initial_entries; + /* + * Ensure contents of newinfo are visible before assigning to + * private. + */ + smp_wmb(); + table->private = newinfo; /* * Even though table entries have now been swapped, other CPU's diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 1e2fae32f81..ed00fef5899 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -147,6 +147,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v3 *info = par->targinfo; u32 queue = info->queuenum; + int ret; if (info->queues_total > 1) { if (info->flags & NFQ_FLAG_CPU_FANOUT) { @@ -157,7 +158,11 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) queue = nfqueue_hash(skb, par); } - return NF_QUEUE_NR(queue); + ret = NF_QUEUE_NR(queue); + if (info->flags & NFQ_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + return ret; } static struct xt_target nfqueue_tg_reg[] __read_mostly = { diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index c3235675f35..5c2dab27610 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -65,8 +65,7 @@ void ovs_dp_notify_wq(struct work_struct *work) continue; netdev_vport = netdev_vport_priv(vport); - if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED || - netdev_vport->dev->reg_state == NETREG_UNREGISTERING) + if (!(netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH)) dp_detach_port_notify(vport); } } @@ -88,6 +87,10 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, return NOTIFY_DONE; if (event == NETDEV_UNREGISTER) { + /* upper_dev_unlink and decrement promisc immediately */ + ovs_netdev_detach_dev(vport); + + /* schedule vport destroy, dev_put and genl notification */ ovs_net = net_generic(dev_net(dev), ovs_net_id); queue_work(system_wq, &ovs_net->dp_notify_work); } diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 09d93c13cfd..d21f77d875b 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -150,15 +150,25 @@ static void free_port_rcu(struct rcu_head *rcu) ovs_vport_free(vport_from_priv(netdev_vport)); } -static void netdev_destroy(struct vport *vport) +void ovs_netdev_detach_dev(struct vport *vport) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); - rtnl_lock(); + ASSERT_RTNL(); netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(netdev_vport->dev); - netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp)); + netdev_upper_dev_unlink(netdev_vport->dev, + netdev_master_upper_dev_get(netdev_vport->dev)); dev_set_promiscuity(netdev_vport->dev, -1); +} + +static void netdev_destroy(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + rtnl_lock(); + if (netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH) + ovs_netdev_detach_dev(vport); rtnl_unlock(); call_rcu(&netdev_vport->rcu, free_port_rcu); diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index dd298b5c5cd..8df01c1127e 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -39,5 +39,6 @@ netdev_vport_priv(const struct vport *vport) } const char *ovs_netdev_get_name(const struct vport *); +void ovs_netdev_detach_dev(struct vport *); #endif /* vport_netdev.h */ diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a2fef8b10b9..fdc041c5785 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -255,6 +255,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; + f->time_next_packet = 0ULL; } return f; } @@ -472,20 +473,16 @@ begin: if (f->credit > 0 || !q->rate_enable) goto out; - if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { - rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; + rate = q->flow_max_rate; + if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) + rate = min(skb->sk->sk_pacing_rate, rate); - rate = min(rate, q->flow_max_rate); - } else { - rate = q->flow_max_rate; - if (rate == ~0U) - goto out; - } - if (rate) { + if (rate != ~0U) { u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; - do_div(len, rate); + if (likely(rate)) + do_div(len, rate); /* Since socket rate can change later, * clamp the delay to 125 ms. * TODO: maybe segment the too big skb, as in commit @@ -656,7 +653,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); if (tb[TCA_FQ_INITIAL_QUANTUM]) - q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); + q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); @@ -735,12 +732,14 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; + /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore, + * do not bother giving its value + */ if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || - nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) || nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a6d788d4521..b87e83d0747 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -358,6 +358,21 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche return PSCHED_NS2TICKS(ticks); } +static void tfifo_reset(struct Qdisc *sch) +{ + struct netem_sched_data *q = qdisc_priv(sch); + struct rb_node *p; + + while ((p = rb_first(&q->t_root))) { + struct sk_buff *skb = netem_rb_to_skb(p); + + rb_erase(p, &q->t_root); + skb->next = NULL; + skb->prev = NULL; + kfree_skb(skb); + } +} + static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -520,6 +535,7 @@ static unsigned int netem_drop(struct Qdisc *sch) skb->next = NULL; skb->prev = NULL; len = qdisc_pkt_len(skb); + sch->qstats.backlog -= len; kfree_skb(skb); } } @@ -609,6 +625,7 @@ static void netem_reset(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); qdisc_reset_queue(sch); + tfifo_reset(sch); if (q->qdisc) qdisc_reset(q->qdisc); qdisc_watchdog_cancel(&q->watchdog); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e7b2d4fe2b6..96a55910262 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -279,7 +279,9 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, sctp_v6_to_addr(&dst_saddr, &fl6->saddr, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) + if (!laddr->valid || laddr->state == SCTP_ADDR_DEL || + (laddr->state != SCTP_ADDR_SRC && + !asoc->src_out_of_asoc_ok)) continue; /* Do not compare against v4 addrs */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 0ac3a65dacc..319137340d1 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -536,7 +536,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. */ if (!sctp_checksum_disable) { - if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) { + if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || + (dst_xfrm(dst) != NULL) || packet->ipfragok) { __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); /* 3) Put the resultant value into the checksum field in the diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 666c6684279..1a6eef39ab2 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -860,7 +860,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; - BUG_ON(asoc->peer.primary_path == NULL); sctp_unhash_established(asoc); sctp_association_free(asoc); } diff --git a/net/socket.c b/net/socket.c index ebed4b68f76..c226aceee65 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1964,6 +1964,16 @@ struct used_address { unsigned int name_len; }; +static int copy_msghdr_from_user(struct msghdr *kmsg, + struct msghdr __user *umsg) +{ + if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) + return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + return -EINVAL; + return 0; +} + static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, struct used_address *used_address) @@ -1982,8 +1992,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; + } if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; @@ -2191,8 +2204,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; + } if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 084656671d6..97912b40c25 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -420,41 +420,53 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg) memcpy(gss_msg->databuf, &uid, sizeof(uid)); gss_msg->msg.data = gss_msg->databuf; gss_msg->msg.len = sizeof(uid); - BUG_ON(sizeof(uid) > UPCALL_BUF_LEN); + + BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf)); } -static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, +static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, const char *service_name, const char *target_name) { struct gss_api_mech *mech = gss_msg->auth->mech; char *p = gss_msg->databuf; - int len = 0; - - gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ", - mech->gm_name, - from_kuid(&init_user_ns, gss_msg->uid)); - p += gss_msg->msg.len; + size_t buflen = sizeof(gss_msg->databuf); + int len; + + len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name, + from_kuid(&init_user_ns, gss_msg->uid)); + buflen -= len; + p += len; + gss_msg->msg.len = len; if (target_name) { - len = sprintf(p, "target=%s ", target_name); + len = scnprintf(p, buflen, "target=%s ", target_name); + buflen -= len; p += len; gss_msg->msg.len += len; } if (service_name != NULL) { - len = sprintf(p, "service=%s ", service_name); + len = scnprintf(p, buflen, "service=%s ", service_name); + buflen -= len; p += len; gss_msg->msg.len += len; } if (mech->gm_upcall_enctypes) { - len = sprintf(p, "enctypes=%s ", mech->gm_upcall_enctypes); + len = scnprintf(p, buflen, "enctypes=%s ", + mech->gm_upcall_enctypes); + buflen -= len; p += len; gss_msg->msg.len += len; } - len = sprintf(p, "\n"); + len = scnprintf(p, buflen, "\n"); + if (len == 0) + goto out_overflow; gss_msg->msg.len += len; gss_msg->msg.data = gss_msg->databuf; - BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN); + return 0; +out_overflow: + WARN_ON_ONCE(1); + return -ENOMEM; } static struct gss_upcall_msg * @@ -463,15 +475,15 @@ gss_alloc_msg(struct gss_auth *gss_auth, { struct gss_upcall_msg *gss_msg; int vers; + int err = -ENOMEM; gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); if (gss_msg == NULL) - return ERR_PTR(-ENOMEM); + goto err; vers = get_pipe_version(gss_auth->net); - if (vers < 0) { - kfree(gss_msg); - return ERR_PTR(vers); - } + err = vers; + if (err < 0) + goto err_free_msg; gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe; INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); @@ -482,10 +494,17 @@ gss_alloc_msg(struct gss_auth *gss_auth, switch (vers) { case 0: gss_encode_v0_msg(gss_msg); + break; default: - gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name); + err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name); + if (err) + goto err_free_msg; }; return gss_msg; +err_free_msg: + kfree(gss_msg); +err: + return ERR_PTR(err); } static struct gss_upcall_msg * diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 77479606a97..dab09dac8fc 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -25,12 +25,12 @@ #include <linux/namei.h> #include <linux/mount.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <linux/utsname.h> #include <linux/workqueue.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/un.h> -#include <linux/rcupdate.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/addr.h> @@ -264,6 +264,26 @@ void rpc_clients_notifier_unregister(void) return rpc_pipefs_notifier_unregister(&rpc_clients_block); } +static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + const struct rpc_timeout *timeout) +{ + struct rpc_xprt *old; + + spin_lock(&clnt->cl_lock); + old = rcu_dereference_protected(clnt->cl_xprt, + lockdep_is_held(&clnt->cl_lock)); + + if (!xprt_bound(xprt)) + clnt->cl_autobind = 1; + + clnt->cl_timeout = timeout; + rcu_assign_pointer(clnt->cl_xprt, xprt); + spin_unlock(&clnt->cl_lock); + + return old; +} + static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) { clnt->cl_nodelen = strlen(nodename); @@ -272,12 +292,13 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen); } -static int rpc_client_register(const struct rpc_create_args *args, - struct rpc_clnt *clnt) +static int rpc_client_register(struct rpc_clnt *clnt, + rpc_authflavor_t pseudoflavor, + const char *client_name) { struct rpc_auth_create_args auth_args = { - .pseudoflavor = args->authflavor, - .target_name = args->client_name, + .pseudoflavor = pseudoflavor, + .target_name = client_name, }; struct rpc_auth *auth; struct net *net = rpc_net_ns(clnt); @@ -298,7 +319,7 @@ static int rpc_client_register(const struct rpc_create_args *args, auth = rpcauth_create(&auth_args, clnt); if (IS_ERR(auth)) { dprintk("RPC: Couldn't create auth handle (flavor %u)\n", - args->authflavor); + pseudoflavor); err = PTR_ERR(auth); goto err_auth; } @@ -337,7 +358,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, { const struct rpc_program *program = args->program; const struct rpc_version *version; - struct rpc_clnt *clnt = NULL; + struct rpc_clnt *clnt = NULL; + const struct rpc_timeout *timeout; int err; /* sanity check the name before trying to print it */ @@ -365,7 +387,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, if (err) goto out_no_clid; - rcu_assign_pointer(clnt->cl_xprt, xprt); clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_prog = args->prognumber ? : program->number; @@ -380,16 +401,15 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, INIT_LIST_HEAD(&clnt->cl_tasks); spin_lock_init(&clnt->cl_lock); - if (!xprt_bound(xprt)) - clnt->cl_autobind = 1; - - clnt->cl_timeout = xprt->timeout; + timeout = xprt->timeout; if (args->timeout != NULL) { memcpy(&clnt->cl_timeout_default, args->timeout, sizeof(clnt->cl_timeout_default)); - clnt->cl_timeout = &clnt->cl_timeout_default; + timeout = &clnt->cl_timeout_default; } + rpc_clnt_set_transport(clnt, xprt, timeout); + clnt->cl_rtt = &clnt->cl_rtt_default; rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval); @@ -398,7 +418,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, /* save the nodename */ rpc_clnt_set_nodename(clnt, utsname()->nodename); - err = rpc_client_register(args, clnt); + err = rpc_client_register(clnt, args->authflavor, args->client_name); if (err) goto out_no_path; if (parent) @@ -600,6 +620,80 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor) } EXPORT_SYMBOL_GPL(rpc_clone_client_set_auth); +/** + * rpc_switch_client_transport: switch the RPC transport on the fly + * @clnt: pointer to a struct rpc_clnt + * @args: pointer to the new transport arguments + * @timeout: pointer to the new timeout parameters + * + * This function allows the caller to switch the RPC transport for the + * rpc_clnt structure 'clnt' to allow it to connect to a mirrored NFS + * server, for instance. It assumes that the caller has ensured that + * there are no active RPC tasks by using some form of locking. + * + * Returns zero if "clnt" is now using the new xprt. Otherwise a + * negative errno is returned, and "clnt" continues to use the old + * xprt. + */ +int rpc_switch_client_transport(struct rpc_clnt *clnt, + struct xprt_create *args, + const struct rpc_timeout *timeout) +{ + const struct rpc_timeout *old_timeo; + rpc_authflavor_t pseudoflavor; + struct rpc_xprt *xprt, *old; + struct rpc_clnt *parent; + int err; + + xprt = xprt_create_transport(args); + if (IS_ERR(xprt)) { + dprintk("RPC: failed to create new xprt for clnt %p\n", + clnt); + return PTR_ERR(xprt); + } + + pseudoflavor = clnt->cl_auth->au_flavor; + + old_timeo = clnt->cl_timeout; + old = rpc_clnt_set_transport(clnt, xprt, timeout); + + rpc_unregister_client(clnt); + __rpc_clnt_remove_pipedir(clnt); + + /* + * A new transport was created. "clnt" therefore + * becomes the root of a new cl_parent tree. clnt's + * children, if it has any, still point to the old xprt. + */ + parent = clnt->cl_parent; + clnt->cl_parent = clnt; + + /* + * The old rpc_auth cache cannot be re-used. GSS + * contexts in particular are between a single + * client and server. + */ + err = rpc_client_register(clnt, pseudoflavor, NULL); + if (err) + goto out_revert; + + synchronize_rcu(); + if (parent != clnt) + rpc_release_client(parent); + xprt_put(old); + dprintk("RPC: replaced xprt for clnt %p\n", clnt); + return 0; + +out_revert: + rpc_clnt_set_transport(clnt, old, old_timeo); + clnt->cl_parent = parent; + rpc_client_register(clnt, pseudoflavor, NULL); + xprt_put(xprt); + dprintk("RPC: failed to switch xprt for clnt %p\n", clnt); + return err; +} +EXPORT_SYMBOL_GPL(rpc_switch_client_transport); + /* * Kill all tasks for the given client. * XXX: kill their descendants as well? @@ -772,6 +866,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; + if (clnt->cl_noretranstimeo) + task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; if (sk_memalloc_socks()) { struct rpc_xprt *xprt; @@ -1690,6 +1786,7 @@ call_connect_status(struct rpc_task *task) dprint_status(task); trace_rpc_connect_status(task, status); + task->tk_status = 0; switch (status) { /* if soft mounted, test if we've timed out */ case -ETIMEDOUT: @@ -1698,12 +1795,14 @@ call_connect_status(struct rpc_task *task) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + /* retry with existing socket, after a delay */ + rpc_delay(task, 3*HZ); if (RPC_IS_SOFTCONN(task)) break; - /* retry with existing socket, after a delay */ - case 0: case -EAGAIN: - task->tk_status = 0; + task->tk_action = call_bind; + return; + case 0: clnt->cl_stats->netreconn++; task->tk_action = call_transmit; return; @@ -1717,13 +1816,14 @@ call_connect_status(struct rpc_task *task) static void call_transmit(struct rpc_task *task) { + int is_retrans = RPC_WAS_SENT(task); + dprint_status(task); task->tk_action = call_status; if (task->tk_status < 0) return; - task->tk_status = xprt_prepare_transmit(task); - if (task->tk_status != 0) + if (!xprt_prepare_transmit(task)) return; task->tk_action = call_transmit_status; /* Encode here so that rpcsec_gss can use correct sequence number. */ @@ -1742,6 +1842,8 @@ call_transmit(struct rpc_task *task) xprt_transmit(task); if (task->tk_status < 0) return; + if (is_retrans) + task->tk_client->cl_stats->rpcretrans++; /* * On success, ensure that we call xprt_end_transmit() before sleeping * in order to allow access to the socket to other RPC requests. @@ -1811,8 +1913,7 @@ call_bc_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - task->tk_status = xprt_prepare_transmit(task); - if (task->tk_status == -EAGAIN) { + if (!xprt_prepare_transmit(task)) { /* * Could not reserve the transport. Try again after the * transport is released. @@ -1900,7 +2001,8 @@ call_status(struct rpc_task *task) rpc_delay(task, 3*HZ); case -ETIMEDOUT: task->tk_action = call_timeout; - if (task->tk_client->cl_discrtry) + if (!(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) + && task->tk_client->cl_discrtry) xprt_conditional_disconnect(req->rq_xprt, req->rq_connect_cookie); break; @@ -1982,7 +2084,6 @@ call_timeout(struct rpc_task *task) rpcauth_invalcred(task); retry: - clnt->cl_stats->rpcretrans++; task->tk_action = call_bind; task->tk_status = 0; } @@ -2025,7 +2126,6 @@ call_decode(struct rpc_task *task) if (req->rq_rcv_buf.len < 12) { if (!RPC_IS_SOFT(task)) { task->tk_action = call_bind; - clnt->cl_stats->rpcretrans++; goto out_retry; } dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 095363eee76..04199bc8416 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -205,10 +205,8 @@ int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) goto out_sleep; } xprt->snd_task = task; - if (req != NULL) { - req->rq_bytes_sent = 0; + if (req != NULL) req->rq_ntrans++; - } return 1; @@ -263,7 +261,6 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; - req->rq_bytes_sent = 0; req->rq_ntrans++; return 1; } @@ -300,10 +297,8 @@ static bool __xprt_lock_write_func(struct rpc_task *task, void *data) req = task->tk_rqstp; xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; + if (req) req->rq_ntrans++; - } return true; } @@ -329,7 +324,6 @@ static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data) } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; - req->rq_bytes_sent = 0; req->rq_ntrans++; return true; } @@ -358,6 +352,11 @@ out_unlock: void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { + if (task != NULL) { + struct rpc_rqst *req = task->tk_rqstp; + if (req != NULL) + req->rq_bytes_sent = 0; + } xprt_clear_locked(xprt); __xprt_lock_write_next(xprt); } @@ -375,6 +374,11 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { + if (task != NULL) { + struct rpc_rqst *req = task->tk_rqstp; + if (req != NULL) + req->rq_bytes_sent = 0; + } xprt_clear_locked(xprt); __xprt_lock_write_next_cong(xprt); } @@ -854,24 +858,36 @@ static inline int xprt_has_timer(struct rpc_xprt *xprt) * @task: RPC task about to send a request * */ -int xprt_prepare_transmit(struct rpc_task *task) +bool xprt_prepare_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int err = 0; + bool ret = false; dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); - if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) { - err = req->rq_reply_bytes_recvd; + if (!req->rq_bytes_sent) { + if (req->rq_reply_bytes_recvd) { + task->tk_status = req->rq_reply_bytes_recvd; + goto out_unlock; + } + if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) + && xprt_connected(xprt) + && req->rq_connect_cookie == xprt->connect_cookie) { + xprt->ops->set_retrans_timeout(task); + rpc_sleep_on(&xprt->pending, task, xprt_timer); + goto out_unlock; + } + } + if (!xprt->ops->reserve_xprt(xprt, task)) { + task->tk_status = -EAGAIN; goto out_unlock; } - if (!xprt->ops->reserve_xprt(xprt, task)) - err = -EAGAIN; + ret = true; out_unlock: spin_unlock_bh(&xprt->transport_lock); - return err; + return ret; } void xprt_end_transmit(struct rpc_task *task) @@ -912,7 +928,6 @@ void xprt_transmit(struct rpc_task *task) } else if (!req->rq_bytes_sent) return; - req->rq_connect_cookie = xprt->connect_cookie; req->rq_xtime = ktime_get(); status = xprt->ops->send_request(task); if (status != 0) { @@ -938,12 +953,14 @@ void xprt_transmit(struct rpc_task *task) /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) { + else { /* * Sleep on the pending queue since * we're expecting a reply. */ - rpc_sleep_on(&xprt->pending, task, xprt_timer); + if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) + rpc_sleep_on(&xprt->pending, task, xprt_timer); + req->rq_connect_cookie = xprt->connect_cookie; } spin_unlock_bh(&xprt->transport_lock); } @@ -1087,11 +1104,9 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size, for (i = 0; i < num_prealloc; i++) { req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); if (!req) - break; + goto out_free; list_add(&req->rq_list, &xprt->free); } - if (i < num_prealloc) - goto out_free; if (max_alloc > num_prealloc) xprt->max_reqs = max_alloc; else @@ -1186,6 +1201,12 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_xprt = xprt; req->rq_buffer = NULL; req->rq_xid = xprt_alloc_xid(xprt); + req->rq_connect_cookie = xprt->connect_cookie - 1; + req->rq_bytes_sent = 0; + req->rq_snd_buf.len = 0; + req->rq_snd_buf.buflen = 0; + req->rq_rcv_buf.len = 0; + req->rq_rcv_buf.buflen = 0; req->rq_release_snd_buf = NULL; xprt_reset_majortimeo(req); dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ee03d35677d..17c88928b7d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -835,6 +835,8 @@ static void xs_close(struct rpc_xprt *xprt) dprintk("RPC: xs_close xprt %p\n", xprt); + cancel_delayed_work_sync(&transport->connect_worker); + xs_reset_transport(transport); xprt->reestablish_timeout = 0; @@ -854,14 +856,6 @@ static void xs_tcp_close(struct rpc_xprt *xprt) xs_tcp_shutdown(xprt); } -static void xs_local_destroy(struct rpc_xprt *xprt) -{ - xs_close(xprt); - xs_free_peer_addresses(xprt); - xprt_free(xprt); - module_put(THIS_MODULE); -} - /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -869,13 +863,12 @@ static void xs_local_destroy(struct rpc_xprt *xprt) */ static void xs_destroy(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_delayed_work_sync(&transport->connect_worker); - - xs_local_destroy(xprt); + xs_close(xprt); + xs_free_peer_addresses(xprt); + xprt_free(xprt); + module_put(THIS_MODULE); } static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) @@ -1511,6 +1504,7 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_copied = 0; transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; + xprt->connect_cookie++; xprt_wake_pending_tasks(xprt, -EAGAIN); } @@ -1816,6 +1810,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) } #endif +static void xs_dummy_setup_socket(struct work_struct *work) +{ +} + static struct socket *xs_create_sock(struct rpc_xprt *xprt, struct sock_xprt *transport, int family, int type, int protocol) { @@ -2112,6 +2110,19 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) if (!transport->inet) { struct sock *sk = sock->sk; + unsigned int keepidle = xprt->timeout->to_initval / HZ; + unsigned int keepcnt = xprt->timeout->to_retries + 1; + unsigned int opt_on = 1; + + /* TCP Keepalive options */ + kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + (char *)&opt_on, sizeof(opt_on)); + kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, + (char *)&keepidle, sizeof(keepidle)); + kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, + (char *)&keepidle, sizeof(keepidle)); + kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, + (char *)&keepcnt, sizeof(keepcnt)); write_lock_bh(&sk->sk_callback_lock); @@ -2151,7 +2162,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) case 0: case -EINPROGRESS: /* SYN_SENT! */ - xprt->connect_cookie++; if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; } @@ -2498,7 +2508,7 @@ static struct rpc_xprt_ops xs_local_ops = { .send_request = xs_local_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_close, - .destroy = xs_local_destroy, + .destroy = xs_destroy, .print_stats = xs_local_print_stats, }; @@ -2655,6 +2665,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) xprt->ops = &xs_local_ops; xprt->timeout = &xs_local_default_timeout; + INIT_DELAYED_WORK(&transport->connect_worker, + xs_dummy_setup_socket); + switch (sun->sun_family) { case AF_LOCAL: if (sun->sun_path[0] != '/') { @@ -2859,8 +2872,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) if (args->bc_xprt->xpt_bc_xprt) { /* * This server connection already has a backchannel - * export; we can't create a new one, as we wouldn't be - * able to match replies based on xid any more. So, + * transport; we can't create a new one, as we wouldn't + * be able to match replies based on xid any more. So, * reuse the already-existing one: */ return args->bc_xprt->xpt_bc_xprt; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 86de99ad297..c1f403bed68 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1246,6 +1246,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) return 0; } +static void unix_sock_inherit_flags(const struct socket *old, + struct socket *new) +{ + if (test_bit(SOCK_PASSCRED, &old->flags)) + set_bit(SOCK_PASSCRED, &new->flags); + if (test_bit(SOCK_PASSSEC, &old->flags)) + set_bit(SOCK_PASSSEC, &new->flags); +} + static int unix_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk = sock->sk; @@ -1280,6 +1289,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) /* attach accepted sock to socket */ unix_state_lock(tsk); newsock->state = SS_CONNECTED; + unix_sock_inherit_flags(sock, newsock); sock_graft(tsk, newsock); unix_state_unlock(tsk); return 0; diff --git a/net/unix/diag.c b/net/unix/diag.c index d591091603b..86fa0f3b2ca 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; rep->udiag_state = sk->sk_state; + rep->pad = 0; rep->udiag_ino = sk_ino; sock_diag_save_cookie(sk, rep->udiag_cookie); diff --git a/net/wireless/core.c b/net/wireless/core.c index 67153964aad..aff959e5a1b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -566,18 +566,13 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - + rtnl_lock(); res = device_add(&rdev->wiphy.dev); - if (res) - return res; - - res = rfkill_register(rdev->rfkill); if (res) { - device_del(&rdev->wiphy.dev); + rtnl_unlock(); return res; } - rtnl_lock(); /* set up regulatory info */ wiphy_regulatory_register(wiphy); @@ -606,6 +601,15 @@ int wiphy_register(struct wiphy *wiphy) rdev->wiphy.registered = true; rtnl_unlock(); + + res = rfkill_register(rdev->rfkill); + if (res) { + rfkill_destroy(rdev->rfkill); + rdev->rfkill = NULL; + wiphy_unregister(&rdev->wiphy); + return res; + } + return 0; } EXPORT_SYMBOL(wiphy_register); @@ -640,7 +644,8 @@ void wiphy_unregister(struct wiphy *wiphy) rtnl_unlock(); __count == 0; })); - rfkill_unregister(rdev->rfkill); + if (rdev->rfkill) + rfkill_unregister(rdev->rfkill); rtnl_lock(); rdev->wiphy.registered = false; @@ -953,8 +958,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) return notifier_from_errno(-EOPNOTSUPP); - if (rfkill_blocked(rdev->rfkill)) - return notifier_from_errno(-ERFKILL); ret = cfg80211_can_add_interface(rdev, wdev->iftype); if (ret) return notifier_from_errno(ret); diff --git a/net/wireless/core.h b/net/wireless/core.h index 9ad43c619c5..3159e9c284c 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -411,6 +411,9 @@ static inline int cfg80211_can_add_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { + if (rfkill_blocked(rdev->rfkill)) + return -ERFKILL; + return cfg80211_can_change_interface(rdev, NULL, iftype); } diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 39bff7d3676..403fe29c024 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -263,6 +263,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (chan->flags & IEEE80211_CHAN_DISABLED) continue; wdev->wext.ibss.chandef.chan = chan; + wdev->wext.ibss.chandef.center_freq1 = + chan->center_freq; break; } @@ -347,6 +349,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (chan) { wdev->wext.ibss.chandef.chan = chan; wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + wdev->wext.ibss.chandef.center_freq1 = freq; wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index af8d84a4a5b..626dc3b5fd8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2421,7 +2421,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) change = true; } - if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) && + if (flags && (*flags & MONITOR_FLAG_ACTIVE) && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; @@ -2483,7 +2483,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); - if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) && + if (!err && (flags & MONITOR_FLAG_ACTIVE) && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 7d604c06c3d..a271c27fac7 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -97,6 +97,10 @@ int ieee80211_radiotap_iterator_init( struct ieee80211_radiotap_header *radiotap_header, int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns) { + /* check the radiotap header can actually be present */ + if (max_length < sizeof(struct ieee80211_radiotap_header)) + return -EINVAL; + /* Linux only supports version 0 radiotap format */ if (radiotap_header->it_version) return -EINVAL; @@ -131,7 +135,8 @@ int ieee80211_radiotap_iterator_init( */ if ((unsigned long)iterator->_arg - - (unsigned long)iterator->_rtheader > + (unsigned long)iterator->_rtheader + + sizeof(uint32_t) > (unsigned long)iterator->_max_length) return -EINVAL; } diff --git a/net/x25/Kconfig b/net/x25/Kconfig index c959312c45e..e2fa133f9fb 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -16,8 +16,8 @@ config X25 if you want that) and the lower level data link layer protocol LAPB (say Y to "LAPB Data Link Driver" below if you want that). - You can read more about X.25 at <http://www.sangoma.com/x25.htm> and - <http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm>. + You can read more about X.25 at <http://www.sangoma.com/tutorials/x25/> and + <http://docwiki.cisco.com/wiki/X.25>. Information about X.25 for Linux is contained in the files <file:Documentation/networking/x25.txt> and <file:Documentation/networking/x25-iface.txt>. diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 2906d520eea..3be02b68026 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -141,14 +141,14 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) const int plen = skb->len; int dlen = IPCOMP_SCRATCH_SIZE; u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + struct crypto_comp *tfm; + u8 *scratch; int err; local_bh_disable(); + scratch = *this_cpu_ptr(ipcomp_scratches); + tfm = *this_cpu_ptr(ipcd->tfms); err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); if (err) goto out; @@ -158,13 +158,13 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) } memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); + local_bh_enable(); pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); return 0; out: - put_cpu(); + local_bh_enable(); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ed38d5d81f9..76e1873811d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -334,7 +334,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) atomic_inc(&policy->genid); - del_timer(&policy->polq.hold_timer); + if (del_timer(&policy->polq.hold_timer)) + xfrm_pol_put(policy); xfrm_queue_purge(&policy->polq.hold_queue); if (del_timer(&policy->timer)) @@ -589,7 +590,8 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice_init(&pq->hold_queue, &list); - del_timer(&pq->hold_timer); + if (del_timer(&pq->hold_timer)) + xfrm_pol_put(old); spin_unlock_bh(&pq->hold_queue.lock); if (skb_queue_empty(&list)) @@ -600,7 +602,8 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice(&list, &pq->hold_queue); pq->timeout = XFRM_QUEUE_TMO_MIN; - mod_timer(&pq->hold_timer, jiffies); + if (!mod_timer(&pq->hold_timer, jiffies)) + xfrm_pol_hold(new); spin_unlock_bh(&pq->hold_queue.lock); } @@ -1769,6 +1772,10 @@ static void xfrm_policy_queue_process(unsigned long arg) spin_lock(&pq->hold_queue.lock); skb = skb_peek(&pq->hold_queue); + if (!skb) { + spin_unlock(&pq->hold_queue.lock); + goto out; + } dst = skb_dst(skb); sk = skb->sk; xfrm_decode_session(skb, &fl, dst->ops->family); @@ -1787,8 +1794,9 @@ static void xfrm_policy_queue_process(unsigned long arg) goto purge_queue; pq->timeout = pq->timeout << 1; - mod_timer(&pq->hold_timer, jiffies + pq->timeout); - return; + if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) + xfrm_pol_hold(pol); + goto out; } dst_release(dst); @@ -1819,11 +1827,14 @@ static void xfrm_policy_queue_process(unsigned long arg) err = dst_output(skb); } +out: + xfrm_pol_put(pol); return; purge_queue: pq->timeout = 0; xfrm_queue_purge(&pq->hold_queue); + xfrm_pol_put(pol); } static int xdst_queue_output(struct sk_buff *skb) @@ -1831,7 +1842,8 @@ static int xdst_queue_output(struct sk_buff *skb) unsigned long sched_next; struct dst_entry *dst = skb_dst(skb); struct xfrm_dst *xdst = (struct xfrm_dst *) dst; - struct xfrm_policy_queue *pq = &xdst->pols[0]->polq; + struct xfrm_policy *pol = xdst->pols[0]; + struct xfrm_policy_queue *pq = &pol->polq; if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { kfree_skb(skb); @@ -1850,10 +1862,12 @@ static int xdst_queue_output(struct sk_buff *skb) if (del_timer(&pq->hold_timer)) { if (time_before(pq->hold_timer.expires, sched_next)) sched_next = pq->hold_timer.expires; + xfrm_pol_put(pol); } __skb_queue_tail(&pq->hold_queue, skb); - mod_timer(&pq->hold_timer, sched_next); + if (!mod_timer(&pq->hold_timer, sched_next)) + xfrm_pol_hold(pol); spin_unlock_bh(&pq->hold_queue.lock); diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 8dafe6d3c6e..dab57daae40 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -61,9 +61,9 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event) switch (event) { case XFRM_REPLAY_UPDATE: - if (x->replay_maxdiff && - (x->replay.seq - x->preplay.seq < x->replay_maxdiff) && - (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) { + if (!x->replay_maxdiff || + ((x->replay.seq - x->preplay.seq < x->replay_maxdiff) && + (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))) { if (x->xflags & XFRM_TIME_DEFER) event = XFRM_REPLAY_TIMEOUT; else @@ -129,8 +129,7 @@ static int xfrm_replay_check(struct xfrm_state *x, return 0; diff = x->replay.seq - seq; - if (diff >= min_t(unsigned int, x->props.replay_window, - sizeof(x->replay.bitmap) * 8)) { + if (diff >= x->props.replay_window) { x->stats.replay_window++; goto err; } @@ -302,9 +301,10 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) switch (event) { case XFRM_REPLAY_UPDATE: - if (x->replay_maxdiff && - (replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && - (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff)) { + if (!x->replay_maxdiff || + ((replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && + (replay_esn->oseq - preplay_esn->oseq + < x->replay_maxdiff))) { if (x->xflags & XFRM_TIME_DEFER) event = XFRM_REPLAY_TIMEOUT; else @@ -353,28 +353,30 @@ static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) switch (event) { case XFRM_REPLAY_UPDATE: - if (!x->replay_maxdiff) - break; - - if (replay_esn->seq_hi == preplay_esn->seq_hi) - seq_diff = replay_esn->seq - preplay_esn->seq; - else - seq_diff = ~preplay_esn->seq + replay_esn->seq + 1; - - if (replay_esn->oseq_hi == preplay_esn->oseq_hi) - oseq_diff = replay_esn->oseq - preplay_esn->oseq; - else - oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1; - - if (seq_diff < x->replay_maxdiff && - oseq_diff < x->replay_maxdiff) { + if (x->replay_maxdiff) { + if (replay_esn->seq_hi == preplay_esn->seq_hi) + seq_diff = replay_esn->seq - preplay_esn->seq; + else + seq_diff = ~preplay_esn->seq + replay_esn->seq + + 1; - if (x->xflags & XFRM_TIME_DEFER) - event = XFRM_REPLAY_TIMEOUT; + if (replay_esn->oseq_hi == preplay_esn->oseq_hi) + oseq_diff = replay_esn->oseq + - preplay_esn->oseq; else - return; + oseq_diff = ~preplay_esn->oseq + + replay_esn->oseq + 1; + + if (seq_diff >= x->replay_maxdiff || + oseq_diff >= x->replay_maxdiff) + break; } + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + break; case XFRM_REPLAY_TIMEOUT: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3f565e495ac..f964d4c00ff 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -446,7 +446,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->sel, &p->sel, sizeof(x->sel)); memcpy(&x->lft, &p->lft, sizeof(x->lft)); x->props.mode = p->mode; - x->props.replay_window = p->replay_window; + x->props.replay_window = min_t(unsigned int, p->replay_window, + sizeof(x->replay.bitmap) * 8); x->props.reqid = p->reqid; x->props.family = p->family; memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); @@ -1856,7 +1857,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (x->km.state != XFRM_STATE_VALID) goto out; - err = xfrm_replay_verify_len(x->replay_esn, rp); + err = xfrm_replay_verify_len(x->replay_esn, re); if (err) goto out; |