diff options
Diffstat (limited to 'net')
298 files changed, 9993 insertions, 5409 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 762896ebfcf..47c908f1f62 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -530,6 +530,23 @@ static const struct header_ops vlan_header_ops = { .parse = eth_header_parse, }; +static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, + unsigned int len) +{ + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev = vlan->real_dev; + + return dev_hard_header(skb, real_dev, type, daddr, saddr, len); +} + +static const struct header_ops vlan_passthru_header_ops = { + .create = vlan_passthru_hard_header, + .rebuild = dev_rebuild_header, + .parse = eth_header_parse, +}; + static struct device_type vlan_type = { .name = "vlan", }; @@ -573,7 +590,7 @@ static int vlan_dev_init(struct net_device *dev) dev->needed_headroom = real_dev->needed_headroom; if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { - dev->header_ops = real_dev->header_ops; + dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { dev->header_ops = &vlan_header_ops; diff --git a/net/Kconfig b/net/Kconfig index d334678c0bd..e411046a62e 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -238,12 +238,19 @@ config XPS depends on SMP default y -config NETPRIO_CGROUP +config CGROUP_NET_PRIO tristate "Network priority cgroup" depends on CGROUPS ---help--- Cgroup subsystem for use in assigning processes to network priorities on - a per-interface basis + a per-interface basis. + +config CGROUP_NET_CLASSID + boolean "Network classid cgroup" + depends on CGROUPS + ---help--- + Cgroup subsystem for use as general purpose socket classid marker that is + being used in cls_cgroup and for netfilter matching. config NET_RX_BUSY_POLL boolean diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index fa780b76630..2b2dc473e1f 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -5,7 +5,6 @@ config BATMAN_ADV tristate "B.A.T.M.A.N. Advanced Meshing Protocol" depends on NET - select CRC16 select LIBCRC32C default n help diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 4f4aabbd8ea..42df18f877e 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # -# Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # @@ -13,9 +13,7 @@ # General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA +# along with this program; if not, see <http://www.gnu.org/licenses/>. # obj-$(CONFIG_BATMAN_ADV) += batman-adv.o diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index a4808c29ea3..4e49666f8c6 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BAT_ALGO_H_ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a2b480a9087..921ac20cf69 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -274,7 +272,14 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, if (!neigh_node) goto out; - spin_lock_init(&neigh_node->bat_iv.lq_update_lock); + if (!atomic_inc_not_zero(&hard_iface->refcount)) { + kfree(neigh_node); + neigh_node = NULL; + goto out; + } + + neigh_node->orig_node = orig_neigh; + neigh_node->if_incoming = hard_iface; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", @@ -307,9 +312,9 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) hard_iface->bat_iv.ogm_buff = ogm_buff; batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; - batadv_ogm_packet->header.packet_type = BATADV_IV_OGM; - batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION; - batadv_ogm_packet->header.ttl = 2; + batadv_ogm_packet->packet_type = BATADV_IV_OGM; + batadv_ogm_packet->version = BATADV_COMPAT_VERSION; + batadv_ogm_packet->ttl = 2; batadv_ogm_packet->flags = BATADV_NO_FLAGS; batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; @@ -346,7 +351,7 @@ batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP; - batadv_ogm_packet->header.ttl = BATADV_TTL; + batadv_ogm_packet->ttl = BATADV_TTL; } /* when do we schedule our own ogm to be sent */ @@ -435,7 +440,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, fwd_str, (packet_num > 0 ? "aggregated " : ""), batadv_ogm_packet->orig, ntohl(batadv_ogm_packet->seqno), - batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl, + batadv_ogm_packet->tq, batadv_ogm_packet->ttl, (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? "on" : "off"), hard_iface->net_dev->name, @@ -461,17 +466,9 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* send a batman ogm packet */ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) { - struct batadv_hard_iface *hard_iface; struct net_device *soft_iface; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; - struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char directlink; - uint8_t *packet_pos; - - packet_pos = forw_packet->skb->data; - batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; - directlink = (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? 1 : 0); if (!forw_packet->if_incoming) { pr_err("Error - can't forward packet: incoming iface not specified\n"); @@ -481,59 +478,48 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) soft_iface = forw_packet->if_incoming->soft_iface; bat_priv = netdev_priv(soft_iface); - if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) + if (WARN_ON(!forw_packet->if_outgoing)) goto out; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) + if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface)) goto out; - /* multihomed peer assumed - * non-primary OGMs are only broadcasted on their interface - */ - if ((directlink && (batadv_ogm_packet->header.ttl == 1)) || - (forw_packet->own && (forw_packet->if_incoming != primary_if))) { - /* FIXME: what about aggregated packets ? */ - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "%s packet (originator %pM, seqno %u, TTL %d) on interface %s [%pM]\n", - (forw_packet->own ? "Sending own" : "Forwarding"), - batadv_ogm_packet->orig, - ntohl(batadv_ogm_packet->seqno), - batadv_ogm_packet->header.ttl, - forw_packet->if_incoming->net_dev->name, - forw_packet->if_incoming->net_dev->dev_addr); - - /* skb is only used once and than forw_packet is free'd */ - batadv_send_skb_packet(forw_packet->skb, - forw_packet->if_incoming, - batadv_broadcast_addr); - forw_packet->skb = NULL; - + if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) goto out; - } - /* broadcast on every interface */ - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != soft_iface) - continue; + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; - batadv_iv_ogm_send_to_if(forw_packet, hard_iface); - } - rcu_read_unlock(); + /* only for one specific outgoing interface */ + batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing); out: if (primary_if) batadv_hardif_free_ref(primary_if); } -/* return true if new_packet can be aggregated with forw_packet */ +/** + * batadv_iv_ogm_can_aggregate - find out if an OGM can be aggregated on an + * existing forward packet + * @new_bat_ogm_packet: OGM packet to be aggregated + * @bat_priv: the bat priv with all the soft interface information + * @packet_len: (total) length of the OGM + * @send_time: timestamp (jiffies) when the packet is to be sent + * @direktlink: true if this is a direct link packet + * @if_incoming: interface where the packet was received + * @if_outgoing: interface for which the retransmission should be considered + * @forw_packet: the forwarded packet which should be checked + * + * Returns true if new_packet can be aggregated with forw_packet + */ static bool batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, struct batadv_priv *bat_priv, int packet_len, unsigned long send_time, bool directlink, const struct batadv_hard_iface *if_incoming, + const struct batadv_hard_iface *if_outgoing, const struct batadv_forw_packet *forw_packet) { struct batadv_ogm_packet *batadv_ogm_packet; @@ -567,12 +553,16 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, if (!primary_if) goto out; + /* packet is not leaving on the same interface. */ + if (forw_packet->if_outgoing != if_outgoing) + goto out; + /* packets without direct link flag and high TTL * are flooded through the net */ if ((!directlink) && (!(batadv_ogm_packet->flags & BATADV_DIRECTLINK)) && - (batadv_ogm_packet->header.ttl != 1) && + (batadv_ogm_packet->ttl != 1) && /* own packets originating non-primary * interfaces leave only that interface @@ -587,7 +577,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, * interface only - we still can aggregate */ if ((directlink) && - (new_bat_ogm_packet->header.ttl == 1) && + (new_bat_ogm_packet->ttl == 1) && (forw_packet->if_incoming == if_incoming) && /* packets from direct neighbors or @@ -608,11 +598,21 @@ out: return res; } -/* create a new aggregated packet and add this packet to it */ +/* batadv_iv_ogm_aggregate_new - create a new aggregated packet and add this + * packet to it. + * @packet_buff: pointer to the OGM + * @packet_len: (total) length of the OGM + * @send_time: timestamp (jiffies) when the packet is to be sent + * @direct_link: whether this OGM has direct link status + * @if_incoming: interface where the packet was received + * @if_outgoing: interface for which the retransmission should be considered + * @own_packet: true if it is a self-generated ogm + */ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, int packet_len, unsigned long send_time, bool direct_link, struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing, int own_packet) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); @@ -623,6 +623,9 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, if (!atomic_inc_not_zero(&if_incoming->refcount)) return; + if (!atomic_inc_not_zero(&if_outgoing->refcount)) + goto out_free_incoming; + /* own packet should always be scheduled */ if (!own_packet) { if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) { @@ -663,6 +666,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, forw_packet_aggr->own = own_packet; forw_packet_aggr->if_incoming = if_incoming; + forw_packet_aggr->if_outgoing = if_outgoing; forw_packet_aggr->num_packets = 0; forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS; forw_packet_aggr->send_time = send_time; @@ -685,6 +689,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, return; out: + batadv_hardif_free_ref(if_outgoing); +out_free_incoming: batadv_hardif_free_ref(if_incoming); } @@ -708,10 +714,21 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, } } +/** + * batadv_iv_ogm_queue_add - queue up an OGM for transmission + * @bat_priv: the bat priv with all the soft interface information + * @packet_buff: pointer to the OGM + * @packet_len: (total) length of the OGM + * @if_incoming: interface where the packet was received + * @if_outgoing: interface for which the retransmission should be considered + * @own_packet: true if it is a self-generated ogm + * @send_time: timestamp (jiffies) when the packet is to be sent + */ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, unsigned char *packet_buff, int packet_len, struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing, int own_packet, unsigned long send_time) { /* _aggr -> pointer to the packet we want to aggregate with @@ -737,6 +754,7 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, bat_priv, packet_len, send_time, direct_link, if_incoming, + if_outgoing, forw_packet_pos)) { forw_packet_aggr = forw_packet_pos; break; @@ -760,7 +778,8 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, batadv_iv_ogm_aggregate_new(packet_buff, packet_len, send_time, direct_link, - if_incoming, own_packet); + if_incoming, if_outgoing, + own_packet); } else { batadv_iv_ogm_aggregate(forw_packet_aggr, packet_buff, packet_len, direct_link); @@ -773,12 +792,13 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, struct batadv_ogm_packet *batadv_ogm_packet, bool is_single_hop_neigh, bool is_from_best_next_hop, - struct batadv_hard_iface *if_incoming) + struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); uint16_t tvlv_len; - if (batadv_ogm_packet->header.ttl <= 1) { + if (batadv_ogm_packet->ttl <= 1) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n"); return; } @@ -798,7 +818,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, tvlv_len = ntohs(batadv_ogm_packet->tvlv_len); - batadv_ogm_packet->header.ttl--; + batadv_ogm_packet->ttl--; memcpy(batadv_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN); /* apply hop penalty */ @@ -807,7 +827,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: tq: %i, ttl: %i\n", - batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl); + batadv_ogm_packet->tq, batadv_ogm_packet->ttl); /* switch of primaries first hop flag when forwarding */ batadv_ogm_packet->flags &= ~BATADV_PRIMARIES_FIRST_HOP; @@ -818,7 +838,8 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, batadv_iv_ogm_queue_add(bat_priv, (unsigned char *)batadv_ogm_packet, BATADV_OGM_HLEN + tvlv_len, - if_incoming, 0, batadv_iv_ogm_fwd_send_time()); + if_incoming, if_outgoing, 0, + batadv_iv_ogm_fwd_send_time()); } /** @@ -863,10 +884,11 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; struct batadv_ogm_packet *batadv_ogm_packet; - struct batadv_hard_iface *primary_if; + struct batadv_hard_iface *primary_if, *tmp_hard_iface; int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len; uint32_t seqno; uint16_t tvlv_len = 0; + unsigned long send_time; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -889,23 +911,60 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) atomic_inc(&hard_iface->bat_iv.ogm_seqno); batadv_iv_ogm_slide_own_bcast_window(hard_iface); - batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, - hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, - batadv_iv_ogm_emit_send_time(bat_priv)); + send_time = batadv_iv_ogm_emit_send_time(bat_priv); + + if (hard_iface != primary_if) { + /* OGMs from secondary interfaces are only scheduled on their + * respective interfaces. + */ + batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, + hard_iface, hard_iface, 1, send_time); + goto out; + } + + /* OGMs from primary interfaces are scheduled on all + * interfaces. + */ + rcu_read_lock(); + list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) { + if (tmp_hard_iface->soft_iface != hard_iface->soft_iface) + continue; + batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, + *ogm_buff_len, hard_iface, + tmp_hard_iface, 1, send_time); + } + rcu_read_unlock(); + +out: if (primary_if) batadv_hardif_free_ref(primary_if); } +/** + * batadv_iv_ogm_orig_update - use OGM to update corresponding data in an + * originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the orig node who originally emitted the ogm packet + * @orig_ifinfo: ifinfo for the outgoing interface of the orig_node + * @ethhdr: Ethernet header of the OGM + * @batadv_ogm_packet: the ogm packet + * @if_incoming: interface where the packet was received + * @if_outgoing: interface for which the retransmission should be considered + * @dup_status: the duplicate status of this ogm packet. + */ static void batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, + struct batadv_orig_ifinfo *orig_ifinfo, const struct ethhdr *ethhdr, const struct batadv_ogm_packet *batadv_ogm_packet, struct batadv_hard_iface *if_incoming, - const unsigned char *tt_buff, + struct batadv_hard_iface *if_outgoing, enum batadv_dup_status dup_status) { + struct batadv_neigh_ifinfo *neigh_ifinfo = NULL; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; @@ -933,12 +992,21 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (dup_status != BATADV_NO_DUP) continue; - spin_lock_bh(&tmp_neigh_node->bat_iv.lq_update_lock); - batadv_ring_buffer_set(tmp_neigh_node->bat_iv.tq_recv, - &tmp_neigh_node->bat_iv.tq_index, 0); - tq_avg = batadv_ring_buffer_avg(tmp_neigh_node->bat_iv.tq_recv); - tmp_neigh_node->bat_iv.tq_avg = tq_avg; - spin_unlock_bh(&tmp_neigh_node->bat_iv.lq_update_lock); + /* only update the entry for this outgoing interface */ + neigh_ifinfo = batadv_neigh_ifinfo_get(tmp_neigh_node, + if_outgoing); + if (!neigh_ifinfo) + continue; + + spin_lock_bh(&tmp_neigh_node->ifinfo_lock); + batadv_ring_buffer_set(neigh_ifinfo->bat_iv.tq_recv, + &neigh_ifinfo->bat_iv.tq_index, 0); + tq_avg = batadv_ring_buffer_avg(neigh_ifinfo->bat_iv.tq_recv); + neigh_ifinfo->bat_iv.tq_avg = tq_avg; + spin_unlock_bh(&tmp_neigh_node->ifinfo_lock); + + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + neigh_ifinfo = NULL; } if (!neigh_node) { @@ -960,39 +1028,49 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, "Updating existing last-hop neighbor of originator\n"); rcu_read_unlock(); + neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); + if (!neigh_ifinfo) + goto out; neigh_node->last_seen = jiffies; - spin_lock_bh(&neigh_node->bat_iv.lq_update_lock); - batadv_ring_buffer_set(neigh_node->bat_iv.tq_recv, - &neigh_node->bat_iv.tq_index, + spin_lock_bh(&neigh_node->ifinfo_lock); + batadv_ring_buffer_set(neigh_ifinfo->bat_iv.tq_recv, + &neigh_ifinfo->bat_iv.tq_index, batadv_ogm_packet->tq); - tq_avg = batadv_ring_buffer_avg(neigh_node->bat_iv.tq_recv); - neigh_node->bat_iv.tq_avg = tq_avg; - spin_unlock_bh(&neigh_node->bat_iv.lq_update_lock); + tq_avg = batadv_ring_buffer_avg(neigh_ifinfo->bat_iv.tq_recv); + neigh_ifinfo->bat_iv.tq_avg = tq_avg; + spin_unlock_bh(&neigh_node->ifinfo_lock); if (dup_status == BATADV_NO_DUP) { - orig_node->last_ttl = batadv_ogm_packet->header.ttl; - neigh_node->last_ttl = batadv_ogm_packet->header.ttl; + orig_ifinfo->last_ttl = batadv_ogm_packet->ttl; + neigh_ifinfo->last_ttl = batadv_ogm_packet->ttl; } - batadv_bonding_candidate_add(bat_priv, orig_node, neigh_node); - /* if this neighbor already is our next hop there is nothing * to change */ - router = batadv_orig_node_get_router(orig_node); + router = batadv_orig_router_get(orig_node, if_outgoing); if (router == neigh_node) goto out; - /* if this neighbor does not offer a better TQ we won't consider it */ - if (router && (router->bat_iv.tq_avg > neigh_node->bat_iv.tq_avg)) - goto out; + if (router) { + router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); + if (!router_ifinfo) + goto out; + + /* if this neighbor does not offer a better TQ we won't + * consider it + */ + if (router_ifinfo->bat_iv.tq_avg > neigh_ifinfo->bat_iv.tq_avg) + goto out; + } /* if the TQ is the same and the link not more symmetric we * won't consider it either */ - if (router && (neigh_node->bat_iv.tq_avg == router->bat_iv.tq_avg)) { + if (router_ifinfo && + (neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg)) { orig_node_tmp = router->orig_node; spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock); if_num = router->if_incoming->if_num; @@ -1009,7 +1087,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, goto out; } - batadv_update_route(bat_priv, orig_node, neigh_node); + batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node); goto out; unlock: @@ -1019,20 +1097,37 @@ out: batadv_neigh_node_free_ref(neigh_node); if (router) batadv_neigh_node_free_ref(router); + if (neigh_ifinfo) + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); } +/** + * batadv_iv_ogm_calc_tq - calculate tq for current received ogm packet + * @orig_node: the orig node who originally emitted the ogm packet + * @orig_neigh_node: the orig node struct of the neighbor who sent the packet + * @batadv_ogm_packet: the ogm packet + * @if_incoming: interface where the packet was received + * @if_outgoing: interface for which the retransmission should be considered + * + * Returns 1 if the link can be considered bidirectional, 0 otherwise + */ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, struct batadv_ogm_packet *batadv_ogm_packet, - struct batadv_hard_iface *if_incoming) + struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; + struct batadv_neigh_ifinfo *neigh_ifinfo; uint8_t total_count; uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0; unsigned int combined_tq; + int tq_iface_penalty; /* find corresponding one hop neighbor */ rcu_read_lock(); @@ -1072,7 +1167,13 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); if_num = if_incoming->if_num; orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; - neigh_rq_count = neigh_node->bat_iv.real_packet_count; + neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); + if (neigh_ifinfo) { + neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count; + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + } else { + neigh_rq_count = 0; + } spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); /* pay attention to not get a value bigger than 100 % */ @@ -1108,15 +1209,31 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, inv_asym_penalty /= neigh_rq_max_cube; tq_asym_penalty = BATADV_TQ_MAX_VALUE - inv_asym_penalty; - combined_tq = batadv_ogm_packet->tq * tq_own * tq_asym_penalty; - combined_tq /= BATADV_TQ_MAX_VALUE * BATADV_TQ_MAX_VALUE; + /* penalize if the OGM is forwarded on the same interface. WiFi + * interfaces and other half duplex devices suffer from throughput + * drops as they can't send and receive at the same time. + */ + tq_iface_penalty = BATADV_TQ_MAX_VALUE; + if (if_outgoing && (if_incoming == if_outgoing) && + batadv_is_wifi_netdev(if_outgoing->net_dev)) + tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE, + bat_priv); + + combined_tq = batadv_ogm_packet->tq * + tq_own * + tq_asym_penalty * + tq_iface_penalty; + combined_tq /= BATADV_TQ_MAX_VALUE * + BATADV_TQ_MAX_VALUE * + BATADV_TQ_MAX_VALUE; batadv_ogm_packet->tq = combined_tq; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, total tq: %3i\n", + "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n", orig_node->orig, orig_neigh_node->orig, total_count, - neigh_rq_count, tq_own, - tq_asym_penalty, batadv_ogm_packet->tq); + neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty, + batadv_ogm_packet->tq, if_incoming->net_dev->name, + if_outgoing ? if_outgoing->net_dev->name : "DEFAULT"); /* if link has the minimum required transmission quality * consider it bidirectional @@ -1136,17 +1253,21 @@ out: * @ethhdr: ethernet header of the packet * @batadv_ogm_packet: OGM packet to be considered * @if_incoming: interface on which the OGM packet was received + * @if_outgoing: interface for which the retransmission should be considered * * Returns duplicate status as enum batadv_dup_status */ static enum batadv_dup_status batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, const struct batadv_ogm_packet *batadv_ogm_packet, - const struct batadv_hard_iface *if_incoming) + const struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_orig_node *orig_node; - struct batadv_neigh_node *tmp_neigh_node; + struct batadv_orig_ifinfo *orig_ifinfo = NULL; + struct batadv_neigh_node *neigh_node; + struct batadv_neigh_ifinfo *neigh_ifinfo; int is_dup; int32_t seq_diff; int need_update = 0; @@ -1161,27 +1282,37 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, if (!orig_node) return BATADV_NO_DUP; + orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); + if (WARN_ON(!orig_ifinfo)) { + batadv_orig_node_free_ref(orig_node); + return 0; + } + spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); - seq_diff = seqno - orig_node->last_real_seqno; + seq_diff = seqno - orig_ifinfo->last_real_seqno; /* signalize caller that the packet is to be dropped. */ if (!hlist_empty(&orig_node->neigh_list) && batadv_window_protected(bat_priv, seq_diff, - &orig_node->batman_seqno_reset)) { + &orig_ifinfo->batman_seqno_reset)) { ret = BATADV_PROTECTED; goto out; } rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, - &orig_node->neigh_list, list) { - neigh_addr = tmp_neigh_node->addr; - is_dup = batadv_test_bit(tmp_neigh_node->bat_iv.real_bits, - orig_node->last_real_seqno, + hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { + neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, + if_outgoing); + if (!neigh_ifinfo) + continue; + + neigh_addr = neigh_node->addr; + is_dup = batadv_test_bit(neigh_ifinfo->bat_iv.real_bits, + orig_ifinfo->last_real_seqno, seqno); if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && - tmp_neigh_node->if_incoming == if_incoming) { + neigh_node->if_incoming == if_incoming) { set_mark = 1; if (is_dup) ret = BATADV_NEIGH_DUP; @@ -1192,173 +1323,78 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, } /* if the window moved, set the update flag. */ - bitmap = tmp_neigh_node->bat_iv.real_bits; + bitmap = neigh_ifinfo->bat_iv.real_bits; need_update |= batadv_bit_get_packet(bat_priv, bitmap, seq_diff, set_mark); - packet_count = bitmap_weight(tmp_neigh_node->bat_iv.real_bits, + packet_count = bitmap_weight(bitmap, BATADV_TQ_LOCAL_WINDOW_SIZE); - tmp_neigh_node->bat_iv.real_packet_count = packet_count; + neigh_ifinfo->bat_iv.real_packet_count = packet_count; + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); } rcu_read_unlock(); if (need_update) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "updating last_seqno: old %u, new %u\n", - orig_node->last_real_seqno, seqno); - orig_node->last_real_seqno = seqno; + "%s updating last_seqno: old %u, new %u\n", + if_outgoing ? if_outgoing->net_dev->name : "DEFAULT", + orig_ifinfo->last_real_seqno, seqno); + orig_ifinfo->last_real_seqno = seqno; } out: spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); batadv_orig_node_free_ref(orig_node); + if (orig_ifinfo) + batadv_orig_ifinfo_free_ref(orig_ifinfo); return ret; } -static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, - struct batadv_ogm_packet *batadv_ogm_packet, - const unsigned char *tt_buff, - struct batadv_hard_iface *if_incoming) + +/** + * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if + * @skb: the skb containing the OGM + * @orig_node: the (cached) orig node for the originator of this OGM + * @if_incoming: the interface where this packet was received + * @if_outgoing: the interface for which the packet should be considered + */ +static void +batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct batadv_hard_iface *hard_iface; - struct batadv_orig_node *orig_neigh_node, *orig_node, *orig_node_tmp; struct batadv_neigh_node *router = NULL, *router_router = NULL; + struct batadv_orig_node *orig_neigh_node; + struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *orig_neigh_router = NULL; - int has_directlink_flag; - int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - int is_bidirect; - bool is_single_hop_neigh = false; - bool is_from_best_next_hop = false; - int sameseq, similar_ttl; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_ogm_packet *ogm_packet; enum batadv_dup_status dup_status; - uint32_t if_incoming_seqno; + bool is_from_best_next_hop = false; + bool is_single_hop_neigh = false; + bool sameseq, similar_ttl; + struct sk_buff *skb_priv; + struct ethhdr *ethhdr; uint8_t *prev_sender; + int is_bidirect; - /* Silently drop when the batman packet is actually not a - * correct packet. - * - * This might happen if a packet is padded (e.g. Ethernet has a - * minimum frame length of 64 byte) and the aggregation interprets - * it as an additional length. - * - * TODO: A more sane solution would be to have a bit in the - * batadv_ogm_packet to detect whether the packet is the last - * packet in an aggregation. Here we expect that the padding - * is always zero (or not 0x01) + /* create a private copy of the skb, as some functions change tq value + * and/or flags. */ - if (batadv_ogm_packet->header.packet_type != BATADV_IV_OGM) + skb_priv = skb_copy(skb, GFP_ATOMIC); + if (!skb_priv) return; - /* could be changed by schedule_own_packet() */ - if_incoming_seqno = atomic_read(&if_incoming->bat_iv.ogm_seqno); - - if (batadv_ogm_packet->flags & BATADV_DIRECTLINK) - has_directlink_flag = 1; - else - has_directlink_flag = 0; + ethhdr = eth_hdr(skb_priv); + ogm_packet = (struct batadv_ogm_packet *)(skb_priv->data + ogm_offset); - if (batadv_compare_eth(ethhdr->h_source, batadv_ogm_packet->orig)) + dup_status = batadv_iv_ogm_update_seqnos(ethhdr, ogm_packet, + if_incoming, if_outgoing); + if (batadv_compare_eth(ethhdr->h_source, ogm_packet->orig)) is_single_hop_neigh = true; - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, tq %d, TTL %d, V %d, IDF %d)\n", - ethhdr->h_source, if_incoming->net_dev->name, - if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig, - batadv_ogm_packet->prev_sender, - ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->tq, - batadv_ogm_packet->header.ttl, - batadv_ogm_packet->header.version, has_directlink_flag); - - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->if_status != BATADV_IF_ACTIVE) - continue; - - if (hard_iface->soft_iface != if_incoming->soft_iface) - continue; - - if (batadv_compare_eth(ethhdr->h_source, - hard_iface->net_dev->dev_addr)) - is_my_addr = 1; - - if (batadv_compare_eth(batadv_ogm_packet->orig, - hard_iface->net_dev->dev_addr)) - is_my_orig = 1; - - if (batadv_compare_eth(batadv_ogm_packet->prev_sender, - hard_iface->net_dev->dev_addr)) - is_my_oldorig = 1; - } - rcu_read_unlock(); - - if (is_my_addr) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: received my own broadcast (sender: %pM)\n", - ethhdr->h_source); - return; - } - - if (is_my_orig) { - unsigned long *word; - int offset; - int32_t bit_pos; - int16_t if_num; - uint8_t *weight; - - orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, - ethhdr->h_source); - if (!orig_neigh_node) - return; - - /* neighbor has to indicate direct link and it has to - * come via the corresponding interface - * save packet seqno for bidirectional check - */ - if (has_directlink_flag && - batadv_compare_eth(if_incoming->net_dev->dev_addr, - batadv_ogm_packet->orig)) { - if_num = if_incoming->if_num; - offset = if_num * BATADV_NUM_WORDS; - - spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); - word = &(orig_neigh_node->bat_iv.bcast_own[offset]); - bit_pos = if_incoming_seqno - 2; - bit_pos -= ntohl(batadv_ogm_packet->seqno); - batadv_set_bit(word, bit_pos); - weight = &orig_neigh_node->bat_iv.bcast_own_sum[if_num]; - *weight = bitmap_weight(word, - BATADV_TQ_LOCAL_WINDOW_SIZE); - spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); - } - - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: originator packet from myself (via neighbor)\n"); - batadv_orig_node_free_ref(orig_neigh_node); - return; - } - - if (is_my_oldorig) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: ignoring all rebroadcast echos (sender: %pM)\n", - ethhdr->h_source); - return; - } - - if (batadv_ogm_packet->flags & BATADV_NOT_BEST_NEXT_HOP) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: ignoring all packets not forwarded from the best next hop (sender: %pM)\n", - ethhdr->h_source); - return; - } - - orig_node = batadv_iv_ogm_orig_get(bat_priv, batadv_ogm_packet->orig); - if (!orig_node) - return; - - dup_status = batadv_iv_ogm_update_seqnos(ethhdr, batadv_ogm_packet, - if_incoming); - if (dup_status == BATADV_PROTECTED) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: packet within seqno protection time (sender: %pM)\n", @@ -1366,27 +1402,28 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, goto out; } - if (batadv_ogm_packet->tq == 0) { + if (ogm_packet->tq == 0) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: originator packet with tq equal 0\n"); goto out; } - router = batadv_orig_node_get_router(orig_node); + router = batadv_orig_router_get(orig_node, if_outgoing); if (router) { - orig_node_tmp = router->orig_node; - router_router = batadv_orig_node_get_router(orig_node_tmp); + router_router = batadv_orig_router_get(router->orig_node, + if_outgoing); + router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); } - if ((router && router->bat_iv.tq_avg != 0) && + if ((router_ifinfo && router_ifinfo->bat_iv.tq_avg != 0) && (batadv_compare_eth(router->addr, ethhdr->h_source))) is_from_best_next_hop = true; - prev_sender = batadv_ogm_packet->prev_sender; + prev_sender = ogm_packet->prev_sender; /* avoid temporary routing loops */ if (router && router_router && (batadv_compare_eth(router->addr, prev_sender)) && - !(batadv_compare_eth(batadv_ogm_packet->orig, prev_sender)) && + !(batadv_compare_eth(ogm_packet->orig, prev_sender)) && (batadv_compare_eth(router->addr, router_router->addr))) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: ignoring all rebroadcast packets that may make me loop (sender: %pM)\n", @@ -1394,7 +1431,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, goto out; } - batadv_tvlv_ogm_receive(bat_priv, batadv_ogm_packet, orig_node); + if (if_outgoing == BATADV_IF_DEFAULT) + batadv_tvlv_ogm_receive(bat_priv, ogm_packet, orig_node); /* if sender is a direct neighbor the sender mac equals * originator mac @@ -1410,9 +1448,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, /* Update nc_nodes of the originator */ batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node, - batadv_ogm_packet, is_single_hop_neigh); + ogm_packet, is_single_hop_neigh); - orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node); + orig_neigh_router = batadv_orig_router_get(orig_neigh_node, + if_outgoing); /* drop packet if sender is not a direct neighbor and if we * don't route towards it @@ -1424,28 +1463,48 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, } is_bidirect = batadv_iv_ogm_calc_tq(orig_node, orig_neigh_node, - batadv_ogm_packet, if_incoming); - - batadv_bonding_save_primary(orig_node, orig_neigh_node, - batadv_ogm_packet); + ogm_packet, if_incoming, + if_outgoing); /* update ranking if it is not a duplicate or has the same * seqno and similar ttl as the non-duplicate */ - sameseq = orig_node->last_real_seqno == ntohl(batadv_ogm_packet->seqno); - similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->header.ttl; + orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); + if (!orig_ifinfo) + goto out_neigh; + + sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno); + similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl; + if (is_bidirect && ((dup_status == BATADV_NO_DUP) || - (sameseq && similar_ttl))) - batadv_iv_ogm_orig_update(bat_priv, orig_node, ethhdr, - batadv_ogm_packet, if_incoming, - tt_buff, dup_status); + (sameseq && similar_ttl))) { + batadv_iv_ogm_orig_update(bat_priv, orig_node, + orig_ifinfo, ethhdr, + ogm_packet, if_incoming, + if_outgoing, dup_status); + } + batadv_orig_ifinfo_free_ref(orig_ifinfo); + + /* only forward for specific interface, not for the default one. */ + if (if_outgoing == BATADV_IF_DEFAULT) + goto out_neigh; /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { + /* OGMs from secondary interfaces should only scheduled once + * per interface where it has been received, not multiple times + */ + if ((ogm_packet->ttl <= 2) && + (if_incoming != if_outgoing)) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Drop packet: OGM from secondary interface and wrong outgoing interface\n"); + goto out_neigh; + } /* mark direct link on incoming interface */ - batadv_iv_ogm_forward(orig_node, ethhdr, batadv_ogm_packet, + batadv_iv_ogm_forward(orig_node, ethhdr, ogm_packet, is_single_hop_neigh, - is_from_best_next_hop, if_incoming); + is_from_best_next_hop, if_incoming, + if_outgoing); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: rebroadcast neighbor packet with direct link flag\n"); @@ -1467,9 +1526,9 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: rebroadcast originator packet\n"); - batadv_iv_ogm_forward(orig_node, ethhdr, batadv_ogm_packet, + batadv_iv_ogm_forward(orig_node, ethhdr, ogm_packet, is_single_hop_neigh, is_from_best_next_hop, - if_incoming); + if_incoming, if_outgoing); out_neigh: if ((orig_neigh_node) && (!is_single_hop_neigh)) @@ -1482,6 +1541,165 @@ out: if (orig_neigh_router) batadv_neigh_node_free_ref(orig_neigh_router); + kfree_skb(skb_priv); +} + +/** + * batadv_iv_ogm_process - process an incoming batman iv OGM + * @skb: the skb containing the OGM + * @ogm_offset: offset to the OGM which should be processed (for aggregates) + * @if_incoming: the interface where this packet was receved + */ +static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, + struct batadv_hard_iface *if_incoming) +{ + struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_orig_node *orig_neigh_node, *orig_node; + struct batadv_hard_iface *hard_iface; + struct batadv_ogm_packet *ogm_packet; + uint32_t if_incoming_seqno; + bool has_directlink_flag; + struct ethhdr *ethhdr; + bool is_my_oldorig = false; + bool is_my_addr = false; + bool is_my_orig = false; + + ogm_packet = (struct batadv_ogm_packet *)(skb->data + ogm_offset); + ethhdr = eth_hdr(skb); + + /* Silently drop when the batman packet is actually not a + * correct packet. + * + * This might happen if a packet is padded (e.g. Ethernet has a + * minimum frame length of 64 byte) and the aggregation interprets + * it as an additional length. + * + * TODO: A more sane solution would be to have a bit in the + * batadv_ogm_packet to detect whether the packet is the last + * packet in an aggregation. Here we expect that the padding + * is always zero (or not 0x01) + */ + if (ogm_packet->packet_type != BATADV_IV_OGM) + return; + + /* could be changed by schedule_own_packet() */ + if_incoming_seqno = atomic_read(&if_incoming->bat_iv.ogm_seqno); + + if (ogm_packet->flags & BATADV_DIRECTLINK) + has_directlink_flag = true; + else + has_directlink_flag = false; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, tq %d, TTL %d, V %d, IDF %d)\n", + ethhdr->h_source, if_incoming->net_dev->name, + if_incoming->net_dev->dev_addr, ogm_packet->orig, + ogm_packet->prev_sender, ntohl(ogm_packet->seqno), + ogm_packet->tq, ogm_packet->ttl, + ogm_packet->version, has_directlink_flag); + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->if_status != BATADV_IF_ACTIVE) + continue; + + if (hard_iface->soft_iface != if_incoming->soft_iface) + continue; + + if (batadv_compare_eth(ethhdr->h_source, + hard_iface->net_dev->dev_addr)) + is_my_addr = true; + + if (batadv_compare_eth(ogm_packet->orig, + hard_iface->net_dev->dev_addr)) + is_my_orig = true; + + if (batadv_compare_eth(ogm_packet->prev_sender, + hard_iface->net_dev->dev_addr)) + is_my_oldorig = true; + } + rcu_read_unlock(); + + if (is_my_addr) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Drop packet: received my own broadcast (sender: %pM)\n", + ethhdr->h_source); + return; + } + + if (is_my_orig) { + unsigned long *word; + int offset; + int32_t bit_pos; + int16_t if_num; + uint8_t *weight; + + orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, + ethhdr->h_source); + if (!orig_neigh_node) + return; + + /* neighbor has to indicate direct link and it has to + * come via the corresponding interface + * save packet seqno for bidirectional check + */ + if (has_directlink_flag && + batadv_compare_eth(if_incoming->net_dev->dev_addr, + ogm_packet->orig)) { + if_num = if_incoming->if_num; + offset = if_num * BATADV_NUM_WORDS; + + spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); + word = &(orig_neigh_node->bat_iv.bcast_own[offset]); + bit_pos = if_incoming_seqno - 2; + bit_pos -= ntohl(ogm_packet->seqno); + batadv_set_bit(word, bit_pos); + weight = &orig_neigh_node->bat_iv.bcast_own_sum[if_num]; + *weight = bitmap_weight(word, + BATADV_TQ_LOCAL_WINDOW_SIZE); + spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); + } + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Drop packet: originator packet from myself (via neighbor)\n"); + batadv_orig_node_free_ref(orig_neigh_node); + return; + } + + if (is_my_oldorig) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Drop packet: ignoring all rebroadcast echos (sender: %pM)\n", + ethhdr->h_source); + return; + } + + if (ogm_packet->flags & BATADV_NOT_BEST_NEXT_HOP) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Drop packet: ignoring all packets not forwarded from the best next hop (sender: %pM)\n", + ethhdr->h_source); + return; + } + + orig_node = batadv_iv_ogm_orig_get(bat_priv, ogm_packet->orig); + if (!orig_node) + return; + + batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node, + if_incoming, BATADV_IF_DEFAULT); + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->if_status != BATADV_IF_ACTIVE) + continue; + + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node, + if_incoming, hard_iface); + } + rcu_read_unlock(); + batadv_orig_node_free_ref(orig_node); } @@ -1489,11 +1707,9 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct batadv_ogm_packet *batadv_ogm_packet; - struct ethhdr *ethhdr; - int buff_pos = 0, packet_len; - unsigned char *tvlv_buff, *packet_buff; + struct batadv_ogm_packet *ogm_packet; uint8_t *packet_pos; + int ogm_offset; bool ret; ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); @@ -1510,42 +1726,68 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES, skb->len + ETH_HLEN); - packet_len = skb_headlen(skb); - ethhdr = eth_hdr(skb); - packet_buff = skb->data; - batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; + ogm_offset = 0; + ogm_packet = (struct batadv_ogm_packet *)skb->data; /* unpack the aggregated packets and process them one by one */ - while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, - batadv_ogm_packet->tvlv_len)) { - tvlv_buff = packet_buff + buff_pos + BATADV_OGM_HLEN; + while (batadv_iv_ogm_aggr_packet(ogm_offset, skb_headlen(skb), + ogm_packet->tvlv_len)) { + batadv_iv_ogm_process(skb, ogm_offset, if_incoming); - batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, - tvlv_buff, if_incoming); + ogm_offset += BATADV_OGM_HLEN; + ogm_offset += ntohs(ogm_packet->tvlv_len); - buff_pos += BATADV_OGM_HLEN; - buff_pos += ntohs(batadv_ogm_packet->tvlv_len); - - packet_pos = packet_buff + buff_pos; - batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; + packet_pos = skb->data + ogm_offset; + ogm_packet = (struct batadv_ogm_packet *)packet_pos; } kfree_skb(skb); return NET_RX_SUCCESS; } +/* batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table + * @orig_node: the orig_node for which the neighbors are printed + * @if_outgoing: outgoing interface for these entries + * @seq: debugfs table seq_file struct + * + * Must be called while holding an rcu lock. + */ +static void +batadv_iv_ogm_orig_print_neigh(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing, + struct seq_file *seq) +{ + struct batadv_neigh_node *neigh_node; + struct batadv_neigh_ifinfo *n_ifinfo; + + hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { + n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); + if (!n_ifinfo) + continue; + + seq_printf(seq, " %pM (%3i)", + neigh_node->addr, + n_ifinfo->bat_iv.tq_avg); + + batadv_neigh_ifinfo_free_ref(n_ifinfo); + } +} + /** * batadv_iv_ogm_orig_print - print the originator table * @bat_priv: the bat priv with all the soft interface information * @seq: debugfs table seq_file struct + * @if_outgoing: the outgoing interface for which this should be printed */ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, - struct seq_file *seq) + struct seq_file *seq, + struct batadv_hard_iface *if_outgoing) { - struct batadv_neigh_node *neigh_node, *neigh_node_tmp; + struct batadv_neigh_node *neigh_node; struct batadv_hashtable *hash = bat_priv->orig_hash; int last_seen_msecs, last_seen_secs; struct batadv_orig_node *orig_node; + struct batadv_neigh_ifinfo *n_ifinfo; unsigned long last_seen_jiffies; struct hlist_head *head; int batman_count = 0; @@ -1560,11 +1802,17 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - neigh_node = batadv_orig_node_get_router(orig_node); + neigh_node = batadv_orig_router_get(orig_node, + if_outgoing); if (!neigh_node) continue; - if (neigh_node->bat_iv.tq_avg == 0) + n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, + if_outgoing); + if (!n_ifinfo) + goto next; + + if (n_ifinfo->bat_iv.tq_avg == 0) goto next; last_seen_jiffies = jiffies - orig_node->last_seen; @@ -1574,22 +1822,19 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, seq_printf(seq, "%pM %4i.%03is (%3i) %pM [%10s]:", orig_node->orig, last_seen_secs, - last_seen_msecs, neigh_node->bat_iv.tq_avg, + last_seen_msecs, n_ifinfo->bat_iv.tq_avg, neigh_node->addr, neigh_node->if_incoming->net_dev->name); - hlist_for_each_entry_rcu(neigh_node_tmp, - &orig_node->neigh_list, list) { - seq_printf(seq, " %pM (%3i)", - neigh_node_tmp->addr, - neigh_node_tmp->bat_iv.tq_avg); - } - + batadv_iv_ogm_orig_print_neigh(orig_node, if_outgoing, + seq); seq_puts(seq, "\n"); batman_count++; next: batadv_neigh_node_free_ref(neigh_node); + if (n_ifinfo) + batadv_neigh_ifinfo_free_ref(n_ifinfo); } rcu_read_unlock(); } @@ -1601,37 +1846,84 @@ next: /** * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors * @neigh1: the first neighbor object of the comparison + * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison + * @if_outgoing2: outgoing interface for the second neighbor * * Returns a value less, equal to or greater than 0 if the metric via neigh1 is * lower, the same as or higher than the metric via neigh2 */ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, - struct batadv_neigh_node *neigh2) + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2) { + struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; uint8_t tq1, tq2; + int diff; + + neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); + neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + + if (!neigh1_ifinfo || !neigh2_ifinfo) { + diff = 0; + goto out; + } + + tq1 = neigh1_ifinfo->bat_iv.tq_avg; + tq2 = neigh2_ifinfo->bat_iv.tq_avg; + diff = tq1 - tq2; - tq1 = neigh1->bat_iv.tq_avg; - tq2 = neigh2->bat_iv.tq_avg; +out: + if (neigh1_ifinfo) + batadv_neigh_ifinfo_free_ref(neigh1_ifinfo); + if (neigh2_ifinfo) + batadv_neigh_ifinfo_free_ref(neigh2_ifinfo); - return tq1 - tq2; + return diff; } /** * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than * neigh2 from the metric prospective * @neigh1: the first neighbor object of the comparison + * @if_outgoing: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison - * - * Returns true if the metric via neigh1 is equally good or better than the - * metric via neigh2, false otherwise. + * @if_outgoing2: outgoing interface for the second neighbor + + * Returns true if the metric via neigh1 is equally good or better than + * the metric via neigh2, false otherwise. */ -static bool batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1, - struct batadv_neigh_node *neigh2) +static bool +batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2) { - int diff = batadv_iv_ogm_neigh_cmp(neigh1, neigh2); + struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; + uint8_t tq1, tq2; + bool ret; - return diff > -BATADV_TQ_SIMILARITY_THRESHOLD; + neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); + neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + + /* we can't say that the metric is better */ + if (!neigh1_ifinfo || !neigh2_ifinfo) { + ret = false; + goto out; + } + + tq1 = neigh1_ifinfo->bat_iv.tq_avg; + tq2 = neigh2_ifinfo->bat_iv.tq_avg; + ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD; + +out: + if (neigh1_ifinfo) + batadv_neigh_ifinfo_free_ref(neigh1_ifinfo); + if (neigh2_ifinfo) + batadv_neigh_ifinfo_free_ref(neigh2_ifinfo); + + return ret; } static struct batadv_algo_ops batadv_batman_iv __read_mostly = { diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 973982414d5..9586750022f 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index a81b9322e38..cc2407351d3 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BITARRAY_H_ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 28eb5e6d0a0..5c0eda48d7a 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index da173e760e7..43c985d92c3 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BLA_H_ diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 049a7a2ac5b..b758881be10 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -250,6 +248,19 @@ static int batadv_originators_open(struct inode *inode, struct file *file) return single_open(file, batadv_orig_seq_print_text, net_dev); } +/** + * batadv_originators_hardif_open - handles debugfs output for the + * originator table of an hard interface + * @inode: inode pointer to debugfs file + * @file: pointer to the seq_file + */ +static int batadv_originators_hardif_open(struct inode *inode, + struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_orig_hardif_seq_print_text, net_dev); +} + static int batadv_gateways_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; @@ -371,6 +382,28 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { NULL, }; +#define BATADV_HARDIF_DEBUGINFO(_name, _mode, _open) \ +struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \ + .attr = { \ + .name = __stringify(_name), \ + .mode = _mode, \ + }, \ + .fops = { \ + .owner = THIS_MODULE, \ + .open = _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ + }, \ +}; +static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO, + batadv_originators_hardif_open); + +static struct batadv_debuginfo *batadv_hardif_debuginfos[] = { + &batadv_hardif_debuginfo_originators, + NULL, +}; + void batadv_debugfs_init(void) { struct batadv_debuginfo **bat_debug; @@ -398,6 +431,7 @@ void batadv_debugfs_init(void) return; err: debugfs_remove_recursive(batadv_debugfs); + batadv_debugfs = NULL; } void batadv_debugfs_destroy(void) @@ -406,6 +440,59 @@ void batadv_debugfs_destroy(void) batadv_debugfs = NULL; } +/** + * batadv_debugfs_add_hardif - creates the base directory for a hard interface + * in debugfs. + * @hard_iface: hard interface which should be added. + */ +int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) +{ + struct batadv_debuginfo **bat_debug; + struct dentry *file; + + if (!batadv_debugfs) + goto out; + + hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name, + batadv_debugfs); + if (!hard_iface->debug_dir) + goto out; + + for (bat_debug = batadv_hardif_debuginfos; *bat_debug; ++bat_debug) { + file = debugfs_create_file(((*bat_debug)->attr).name, + S_IFREG | ((*bat_debug)->attr).mode, + hard_iface->debug_dir, + hard_iface->net_dev, + &(*bat_debug)->fops); + if (!file) + goto rem_attr; + } + + return 0; +rem_attr: + debugfs_remove_recursive(hard_iface->debug_dir); + hard_iface->debug_dir = NULL; +out: +#ifdef CONFIG_DEBUG_FS + return -ENOMEM; +#else + return 0; +#endif /* CONFIG_DEBUG_FS */ +} + +/** + * batadv_debugfs_del_hardif - delete the base directory for a hard interface + * in debugfs. + * @hard_iface: hard interface which is deleted. + */ +void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) +{ + if (batadv_debugfs) { + debugfs_remove_recursive(hard_iface->debug_dir); + hard_iface->debug_dir = NULL; + } +} + int batadv_debugfs_add_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index f8c3849edff..37c4d6ddd04 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_DEBUGFS_H_ @@ -26,5 +24,7 @@ void batadv_debugfs_init(void); void batadv_debugfs_destroy(void); int batadv_debugfs_add_meshif(struct net_device *dev); void batadv_debugfs_del_meshif(struct net_device *dev); +int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); +void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface); #endif /* _NET_BATMAN_ADV_DEBUGFS_H_ */ diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 6c8c3934bd7..edee5041189 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: * * Antonio Quartulli * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/if_ether.h> @@ -141,7 +139,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) const void *data1 = container_of(node, struct batadv_dat_entry, hash_entry); - return (memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0); + return memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0; } /** @@ -349,7 +347,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - switch (unicast_4addr_packet->u.header.packet_type) { + switch (unicast_4addr_packet->u.packet_type) { case BATADV_UNICAST: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* encapsulated within a UNICAST packet\n"); @@ -374,7 +372,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, break; default: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: Unknown (%u)!\n", - unicast_4addr_packet->u.header.packet_type); + unicast_4addr_packet->u.packet_type); } break; case BATADV_BCAST: @@ -387,7 +385,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, default: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* encapsulated within an unknown packet type (0x%x)\n", - unicast_4addr_packet->u.header.packet_type); + unicast_4addr_packet->u.packet_type); } } @@ -591,7 +589,8 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, if (cand[i].type == BATADV_DAT_CANDIDATE_NOT_FOUND) continue; - neigh_node = batadv_orig_node_get_router(cand[i].orig_node); + neigh_node = batadv_orig_router_get(cand[i].orig_node, + BATADV_IF_DEFAULT); if (!neigh_node) goto free_orig; @@ -1039,9 +1038,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) err = batadv_send_skb_via_tt_4addr(bat_priv, skb_new, BATADV_P_DAT_CACHE_REPLY, - vid); + NULL, vid); else - err = batadv_send_skb_via_tt(bat_priv, skb_new, vid); + err = batadv_send_skb_via_tt(bat_priv, skb_new, NULL, vid); if (err != NET_XMIT_DROP) { batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX); diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index 60d853beb8d..ac9be9b67a2 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: * * Antonio Quartulli * @@ -12,13 +12,11 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _NET_BATMAN_ADV_ARP_H_ -#define _NET_BATMAN_ADV_ARP_H_ +#ifndef _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_ +#define _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_ #ifdef CONFIG_BATMAN_ADV_DAT @@ -169,4 +167,4 @@ static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, #endif /* CONFIG_BATMAN_ADV_DAT */ -#endif /* _NET_BATMAN_ADV_ARP_H_ */ +#endif /* _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_ */ diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 271d321b3a0..88df9b1d552 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2014 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -355,7 +353,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, batadv_add_counter(bat_priv, BATADV_CNT_FRAG_FWD_BYTES, skb->len + ETH_HLEN); - packet->header.ttl--; + packet->ttl--; batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = true; @@ -444,9 +442,9 @@ bool batadv_frag_send_packet(struct sk_buff *skb, goto out_err; /* Create one header to be copied to all fragments */ - frag_header.header.packet_type = BATADV_UNICAST_FRAG; - frag_header.header.version = BATADV_COMPAT_VERSION; - frag_header.header.ttl = BATADV_TTL; + frag_header.packet_type = BATADV_UNICAST_FRAG; + frag_header.version = BATADV_COMPAT_VERSION; + frag_header.ttl = BATADV_TTL; frag_header.seqno = htons(atomic_inc_return(&bat_priv->frag_seqno)); frag_header.reserved = 0; frag_header.no = 0; diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index ca029e2676e..5d7a0e66a22 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2014 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_FRAGMENTATION_H_ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 2449afaa763..55cf2260d29 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -30,11 +28,17 @@ #include <linux/udp.h> #include <linux/if_vlan.h> -/* This is the offset of the options field in a dhcp packet starting at - * the beginning of the dhcp header +/* These are the offsets of the "hw type" and "hw address length" in the dhcp + * packet starting at the beginning of the dhcp header */ -#define BATADV_DHCP_OPTIONS_OFFSET 240 -#define BATADV_DHCP_REQUEST 3 +#define BATADV_DHCP_HTYPE_OFFSET 1 +#define BATADV_DHCP_HLEN_OFFSET 2 +/* Value of htype representing Ethernet */ +#define BATADV_DHCP_HTYPE_ETHERNET 0x01 +/* This is the offset of the "chaddr" field in the dhcp packet starting at the + * beginning of the dhcp header + */ +#define BATADV_DHCP_CHADDR_OFFSET 28 static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node) { @@ -105,7 +109,18 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, spin_unlock_bh(&bat_priv->gw.list_lock); } -void batadv_gw_deselect(struct batadv_priv *bat_priv) +/** + * batadv_gw_reselect - force a gateway reselection + * @bat_priv: the bat priv with all the soft interface information + * + * Set a flag to remind the GW component to perform a new gateway reselection. + * However this function does not ensure that the current gateway is going to be + * deselected. The reselection mechanism may elect the same gateway once again. + * + * This means that invoking batadv_gw_reselect() does not guarantee a gateway + * change and therefore a uevent is not necessarily expected. + */ +void batadv_gw_reselect(struct batadv_priv *bat_priv) { atomic_set(&bat_priv->gw.reselect, 1); } @@ -114,6 +129,7 @@ static struct batadv_gw_node * batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) { struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo; struct batadv_gw_node *gw_node, *curr_gw = NULL; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; uint32_t gw_divisor; @@ -130,14 +146,19 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) continue; orig_node = gw_node->orig_node; - router = batadv_orig_node_get_router(orig_node); + router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); if (!router) continue; + router_ifinfo = batadv_neigh_ifinfo_get(router, + BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto next; + if (!atomic_inc_not_zero(&gw_node->refcount)) goto next; - tq_avg = router->bat_iv.tq_avg; + tq_avg = router_ifinfo->bat_iv.tq_avg; switch (atomic_read(&bat_priv->gw_sel_class)) { case 1: /* fast connection */ @@ -182,6 +203,8 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) next: batadv_neigh_node_free_ref(router); + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); } rcu_read_unlock(); @@ -207,6 +230,11 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv) if (!curr_gw) return; + /* deselect the current gateway so that next time that client mode is + * enabled a proper GW_ADD event can be sent + */ + batadv_gw_select(bat_priv, NULL); + /* if batman-adv is switching the gw client mode off and a gateway was * already selected, send a DEL uevent */ @@ -219,6 +247,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv) { struct batadv_gw_node *curr_gw = NULL, *next_gw = NULL; struct batadv_neigh_node *router = NULL; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; char gw_addr[18] = { '\0' }; if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) @@ -237,9 +266,17 @@ void batadv_gw_election(struct batadv_priv *bat_priv) if (next_gw) { sprintf(gw_addr, "%pM", next_gw->orig_node->orig); - router = batadv_orig_node_get_router(next_gw->orig_node); + router = batadv_orig_router_get(next_gw->orig_node, + BATADV_IF_DEFAULT); if (!router) { - batadv_gw_deselect(bat_priv); + batadv_gw_reselect(bat_priv); + goto out; + } + + router_ifinfo = batadv_neigh_ifinfo_get(router, + BATADV_IF_DEFAULT); + if (!router_ifinfo) { + batadv_gw_reselect(bat_priv); goto out; } } @@ -256,7 +293,8 @@ void batadv_gw_election(struct batadv_priv *bat_priv) next_gw->bandwidth_down / 10, next_gw->bandwidth_down % 10, next_gw->bandwidth_up / 10, - next_gw->bandwidth_up % 10, router->bat_iv.tq_avg); + next_gw->bandwidth_up % 10, + router_ifinfo->bat_iv.tq_avg); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_ADD, gw_addr); } else { @@ -266,7 +304,8 @@ void batadv_gw_election(struct batadv_priv *bat_priv) next_gw->bandwidth_down / 10, next_gw->bandwidth_down % 10, next_gw->bandwidth_up / 10, - next_gw->bandwidth_up % 10, router->bat_iv.tq_avg); + next_gw->bandwidth_up % 10, + router_ifinfo->bat_iv.tq_avg); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_CHANGE, gw_addr); } @@ -280,33 +319,47 @@ out: batadv_gw_node_free_ref(next_gw); if (router) batadv_neigh_node_free_ref(router); + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); } void batadv_gw_check_election(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { + struct batadv_neigh_ifinfo *router_orig_tq = NULL; + struct batadv_neigh_ifinfo *router_gw_tq = NULL; struct batadv_orig_node *curr_gw_orig; struct batadv_neigh_node *router_gw = NULL, *router_orig = NULL; uint8_t gw_tq_avg, orig_tq_avg; curr_gw_orig = batadv_gw_get_selected_orig(bat_priv); if (!curr_gw_orig) - goto deselect; + goto reselect; - router_gw = batadv_orig_node_get_router(curr_gw_orig); + router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT); if (!router_gw) - goto deselect; + goto reselect; + + router_gw_tq = batadv_neigh_ifinfo_get(router_gw, + BATADV_IF_DEFAULT); + if (!router_gw_tq) + goto reselect; /* this node already is the gateway */ if (curr_gw_orig == orig_node) goto out; - router_orig = batadv_orig_node_get_router(orig_node); + router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); if (!router_orig) goto out; - gw_tq_avg = router_gw->bat_iv.tq_avg; - orig_tq_avg = router_orig->bat_iv.tq_avg; + router_orig_tq = batadv_neigh_ifinfo_get(router_orig, + BATADV_IF_DEFAULT); + if (!router_orig_tq) + goto out; + + gw_tq_avg = router_gw_tq->bat_iv.tq_avg; + orig_tq_avg = router_orig_tq->bat_iv.tq_avg; /* the TQ value has to be better */ if (orig_tq_avg < gw_tq_avg) @@ -323,8 +376,8 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv, "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n", gw_tq_avg, orig_tq_avg); -deselect: - batadv_gw_deselect(bat_priv); +reselect: + batadv_gw_reselect(bat_priv); out: if (curr_gw_orig) batadv_orig_node_free_ref(curr_gw_orig); @@ -332,6 +385,10 @@ out: batadv_neigh_node_free_ref(router_gw); if (router_orig) batadv_neigh_node_free_ref(router_orig); + if (router_gw_tq) + batadv_neigh_ifinfo_free_ref(router_gw_tq); + if (router_orig_tq) + batadv_neigh_ifinfo_free_ref(router_orig_tq); return; } @@ -454,7 +511,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, */ curr_gw = batadv_gw_get_selected_gw_node(bat_priv); if (gw_node == curr_gw) - batadv_gw_deselect(bat_priv); + batadv_gw_reselect(bat_priv); } out: @@ -480,7 +537,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) struct batadv_gw_node *gw_node, *curr_gw; struct hlist_node *node_tmp; unsigned long timeout = msecs_to_jiffies(2 * BATADV_PURGE_TIMEOUT); - int do_deselect = 0; + int do_reselect = 0; curr_gw = batadv_gw_get_selected_gw_node(bat_priv); @@ -494,7 +551,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) continue; if (curr_gw == gw_node) - do_deselect = 1; + do_reselect = 1; hlist_del_rcu(&gw_node->list); batadv_gw_node_free_ref(gw_node); @@ -502,9 +559,9 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->gw.list_lock); - /* gw_deselect() needs to acquire the gw_list_lock */ - if (do_deselect) - batadv_gw_deselect(bat_priv); + /* gw_reselect() needs to acquire the gw_list_lock */ + if (do_reselect) + batadv_gw_reselect(bat_priv); if (curr_gw) batadv_gw_node_free_ref(curr_gw); @@ -517,28 +574,36 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv, { struct batadv_gw_node *curr_gw; struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; int ret = -1; - router = batadv_orig_node_get_router(gw_node->orig_node); + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); if (!router) goto out; + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", (curr_gw == gw_node ? "=>" : " "), gw_node->orig_node->orig, - router->bat_iv.tq_avg, router->addr, + router_ifinfo->bat_iv.tq_avg, router->addr, router->if_incoming->net_dev->name, gw_node->bandwidth_down / 10, gw_node->bandwidth_down % 10, gw_node->bandwidth_up / 10, gw_node->bandwidth_up % 10); - batadv_neigh_node_free_ref(router); if (curr_gw) batadv_gw_node_free_ref(curr_gw); out: + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); + if (router) + batadv_neigh_node_free_ref(router); return ret; } @@ -582,80 +647,39 @@ out: return 0; } -/* this call might reallocate skb data */ -static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len) -{ - int ret = false; - unsigned char *p; - int pkt_len; - - if (skb_linearize(skb) < 0) - goto out; - - pkt_len = skb_headlen(skb); - - if (pkt_len < header_len + BATADV_DHCP_OPTIONS_OFFSET + 1) - goto out; - - p = skb->data + header_len + BATADV_DHCP_OPTIONS_OFFSET; - pkt_len -= header_len + BATADV_DHCP_OPTIONS_OFFSET + 1; - - /* Access the dhcp option lists. Each entry is made up by: - * - octet 1: option type - * - octet 2: option data len (only if type != 255 and 0) - * - octet 3: option data - */ - while (*p != 255 && !ret) { - /* p now points to the first octet: option type */ - if (*p == 53) { - /* type 53 is the message type option. - * Jump the len octet and go to the data octet - */ - if (pkt_len < 2) - goto out; - p += 2; - - /* check if the message type is what we need */ - if (*p == BATADV_DHCP_REQUEST) - ret = true; - break; - } else if (*p == 0) { - /* option type 0 (padding), just go forward */ - if (pkt_len < 1) - goto out; - pkt_len--; - p++; - } else { - /* This is any other option. So we get the length... */ - if (pkt_len < 1) - goto out; - pkt_len--; - p++; - - /* ...and then we jump over the data */ - if (pkt_len < 1 + (*p)) - goto out; - pkt_len -= 1 + (*p); - p += 1 + (*p); - } - } -out: - return ret; -} - -/* this call might reallocate skb data */ -bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) +/** + * batadv_gw_dhcp_recipient_get - check if a packet is a DHCP message + * @skb: the packet to check + * @header_len: a pointer to the batman-adv header size + * @chaddr: buffer where the client address will be stored. Valid + * only if the function returns BATADV_DHCP_TO_CLIENT + * + * Returns: + * - BATADV_DHCP_NO if the packet is not a dhcp message or if there was an error + * while parsing it + * - BATADV_DHCP_TO_SERVER if this is a message going to the DHCP server + * - BATADV_DHCP_TO_CLIENT if this is a message going to a DHCP client + * + * This function may re-allocate the data buffer of the skb passed as argument. + */ +enum batadv_dhcp_recipient +batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, + uint8_t *chaddr) { + enum batadv_dhcp_recipient ret = BATADV_DHCP_NO; struct ethhdr *ethhdr; struct iphdr *iphdr; struct ipv6hdr *ipv6hdr; struct udphdr *udphdr; struct vlan_ethhdr *vhdr; + int chaddr_offset; __be16 proto; + uint8_t *p; /* check for ethernet header */ if (!pskb_may_pull(skb, *header_len + ETH_HLEN)) - return false; + return BATADV_DHCP_NO; + ethhdr = (struct ethhdr *)skb->data; proto = ethhdr->h_proto; *header_len += ETH_HLEN; @@ -663,7 +687,7 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) /* check for initial vlan header */ if (proto == htons(ETH_P_8021Q)) { if (!pskb_may_pull(skb, *header_len + VLAN_HLEN)) - return false; + return BATADV_DHCP_NO; vhdr = (struct vlan_ethhdr *)skb->data; proto = vhdr->h_vlan_encapsulated_proto; @@ -674,32 +698,34 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) switch (proto) { case htons(ETH_P_IP): if (!pskb_may_pull(skb, *header_len + sizeof(*iphdr))) - return false; + return BATADV_DHCP_NO; + iphdr = (struct iphdr *)(skb->data + *header_len); *header_len += iphdr->ihl * 4; /* check for udp header */ if (iphdr->protocol != IPPROTO_UDP) - return false; + return BATADV_DHCP_NO; break; case htons(ETH_P_IPV6): if (!pskb_may_pull(skb, *header_len + sizeof(*ipv6hdr))) - return false; + return BATADV_DHCP_NO; + ipv6hdr = (struct ipv6hdr *)(skb->data + *header_len); *header_len += sizeof(*ipv6hdr); /* check for udp header */ if (ipv6hdr->nexthdr != IPPROTO_UDP) - return false; + return BATADV_DHCP_NO; break; default: - return false; + return BATADV_DHCP_NO; } if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr))) - return false; + return BATADV_DHCP_NO; /* skb->data might have been reallocated by pskb_may_pull() */ ethhdr = (struct ethhdr *)skb->data; @@ -710,17 +736,40 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) *header_len += sizeof(*udphdr); /* check for bootp port */ - if ((proto == htons(ETH_P_IP)) && - (udphdr->dest != htons(67))) - return false; + switch (proto) { + case htons(ETH_P_IP): + if (udphdr->dest == htons(67)) + ret = BATADV_DHCP_TO_SERVER; + else if (udphdr->source == htons(67)) + ret = BATADV_DHCP_TO_CLIENT; + break; + case htons(ETH_P_IPV6): + if (udphdr->dest == htons(547)) + ret = BATADV_DHCP_TO_SERVER; + else if (udphdr->source == htons(547)) + ret = BATADV_DHCP_TO_CLIENT; + break; + } - if ((proto == htons(ETH_P_IPV6)) && - (udphdr->dest != htons(547))) - return false; + chaddr_offset = *header_len + BATADV_DHCP_CHADDR_OFFSET; + /* store the client address if the message is going to a client */ + if (ret == BATADV_DHCP_TO_CLIENT && + pskb_may_pull(skb, chaddr_offset + ETH_ALEN)) { + /* check if the DHCP packet carries an Ethernet DHCP */ + p = skb->data + *header_len + BATADV_DHCP_HTYPE_OFFSET; + if (*p != BATADV_DHCP_HTYPE_ETHERNET) + return BATADV_DHCP_NO; + + /* check if the DHCP packet carries a valid Ethernet address */ + p = skb->data + *header_len + BATADV_DHCP_HLEN_OFFSET; + if (*p != ETH_ALEN) + return BATADV_DHCP_NO; + + memcpy(chaddr, skb->data + chaddr_offset, ETH_ALEN); + } - return true; + return ret; } - /** * batadv_gw_out_of_range - check if the dhcp request destination is the best gw * @bat_priv: the bat priv with all the soft interface information @@ -734,6 +783,7 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) * false otherwise. * * This call might reallocate skb data. + * Must be invoked only when the DHCP packet is going TO a DHCP SERVER. */ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb) @@ -741,19 +791,14 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL; struct batadv_orig_node *orig_dst_node = NULL; struct batadv_gw_node *gw_node = NULL, *curr_gw = NULL; - struct ethhdr *ethhdr; - bool ret, out_of_range = false; - unsigned int header_len = 0; + struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo; + struct ethhdr *ethhdr = (struct ethhdr *)skb->data; + bool out_of_range = false; uint8_t curr_tq_avg; unsigned short vid; vid = batadv_get_vid(skb, 0); - ret = batadv_gw_is_dhcp_target(skb, &header_len); - if (!ret) - goto out; - - ethhdr = (struct ethhdr *)skb->data; orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source, ethhdr->h_dest, vid); if (!orig_dst_node) @@ -763,10 +808,6 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!gw_node->bandwidth_down == 0) goto out; - ret = batadv_is_type_dhcprequest(skb, header_len); - if (!ret) - goto out; - switch (atomic_read(&bat_priv->gw_mode)) { case BATADV_GW_MODE_SERVER: /* If we are a GW then we are our best GW. We can artificially @@ -792,7 +833,14 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!neigh_curr) goto out; - curr_tq_avg = neigh_curr->bat_iv.tq_avg; + curr_ifinfo = batadv_neigh_ifinfo_get(neigh_curr, + BATADV_IF_DEFAULT); + if (!curr_ifinfo) + goto out; + + curr_tq_avg = curr_ifinfo->bat_iv.tq_avg; + batadv_neigh_ifinfo_free_ref(curr_ifinfo); + break; case BATADV_GW_MODE_OFF: default: @@ -803,8 +851,13 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!neigh_old) goto out; - if (curr_tq_avg - neigh_old->bat_iv.tq_avg > BATADV_GW_THRESHOLD) + old_ifinfo = batadv_neigh_ifinfo_get(neigh_old, BATADV_IF_DEFAULT); + if (!old_ifinfo) + goto out; + + if ((curr_tq_avg - old_ifinfo->bat_iv.tq_avg) > BATADV_GW_THRESHOLD) out_of_range = true; + batadv_neigh_ifinfo_free_ref(old_ifinfo); out: if (orig_dst_node) diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index d95c2d23195..7ee53bb7d50 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,16 +12,14 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ #define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv); -void batadv_gw_deselect(struct batadv_priv *bat_priv); +void batadv_gw_reselect(struct batadv_priv *bat_priv); void batadv_gw_election(struct batadv_priv *bat_priv); struct batadv_orig_node * batadv_gw_get_selected_orig(struct batadv_priv *bat_priv); @@ -34,7 +32,9 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); void batadv_gw_node_purge(struct batadv_priv *bat_priv); int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); -bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len); bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); +enum batadv_dhcp_recipient +batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, + uint8_t *chaddr); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index b211b0f9cb7..6f5e621f220 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -164,7 +162,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, if ((down_curr == down_new) && (up_curr == up_new)) return count; - batadv_gw_deselect(bat_priv); + batadv_gw_reselect(bat_priv); batadv_info(net_dev, "Changing gateway bandwidth from: '%u.%u/%u.%u MBit' to: '%u.%u/%u.%u MBit'\n", down_curr / 10, down_curr % 10, up_curr / 10, up_curr % 10, diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 56384a4cd18..aa511656194 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_GATEWAY_COMMON_H_ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 57c2a19dcb5..1b12573bcf8 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -25,6 +23,7 @@ #include "translation-table.h" #include "routing.h" #include "sysfs.h" +#include "debugfs.h" #include "originator.h" #include "hash.h" #include "bridge_loop_avoidance.h" @@ -541,6 +540,7 @@ static void batadv_hardif_remove_interface_finish(struct work_struct *work) hard_iface = container_of(work, struct batadv_hard_iface, cleanup_work); + batadv_debugfs_del_hardif(hard_iface); batadv_sysfs_del_hardif(&hard_iface->hardif_obj); batadv_hardif_free_ref(hard_iface); } @@ -571,6 +571,11 @@ batadv_hardif_add_interface(struct net_device *net_dev) hard_iface->net_dev = net_dev; hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; + + ret = batadv_debugfs_add_hardif(hard_iface); + if (ret) + goto free_sysfs; + INIT_LIST_HEAD(&hard_iface->list); INIT_WORK(&hard_iface->cleanup_work, batadv_hardif_remove_interface_finish); @@ -587,6 +592,8 @@ batadv_hardif_add_interface(struct net_device *net_dev) return hard_iface; +free_sysfs: + batadv_sysfs_del_hardif(&hard_iface->hardif_obj); free_if: kfree(hard_iface); release_dev: diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index df4c8bd45c4..1918cd50b62 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_ @@ -42,6 +40,7 @@ enum batadv_hard_if_cleanup { extern struct notifier_block batadv_hard_if_notifier; bool batadv_is_wifi_netdev(struct net_device *net_device); +bool batadv_is_wifi_iface(int ifindex); struct batadv_hard_iface* batadv_hardif_get_by_netdev(const struct net_device *net_dev); int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, @@ -53,6 +52,11 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface); void batadv_update_min_mtu(struct net_device *soft_iface); void batadv_hardif_free_rcu(struct rcu_head *rcu); +/** + * batadv_hardif_free_ref - decrement the hard interface refcounter and + * possibly free it + * @hard_iface: the hard interface to free + */ static inline void batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface) { @@ -60,6 +64,18 @@ batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface) call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu); } +/** + * batadv_hardif_free_ref_now - decrement the hard interface refcounter and + * possibly free it (without rcu callback) + * @hard_iface: the hard interface to free + */ +static inline void +batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface) +{ + if (atomic_dec_and_test(&hard_iface->refcount)) + batadv_hardif_free_rcu(&hard_iface->rcu); +} + static inline struct batadv_hard_iface * batadv_primary_if_get_selected(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 7198dafd3bf..63bdf7e94f1 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 1b4da72f209..539fc126679 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_HASH_H_ diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 29ae4efe354..abb9d6e0388 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -194,7 +192,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, goto free_skb; } - if (icmp_header->header.packet_type != BATADV_ICMP) { + if (icmp_header->packet_type != BATADV_ICMP) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n"); len = -EINVAL; @@ -217,7 +215,8 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (!orig_node) goto dst_unreach; - neigh_node = batadv_orig_node_get_router(orig_node); + neigh_node = batadv_orig_router_get(orig_node, + BATADV_IF_DEFAULT); if (!neigh_node) goto dst_unreach; @@ -243,9 +242,9 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, icmp_header->uid = socket_client->index; - if (icmp_header->header.version != BATADV_COMPAT_VERSION) { + if (icmp_header->version != BATADV_COMPAT_VERSION) { icmp_header->msg_type = BATADV_PARAMETER_PROBLEM; - icmp_header->header.version = BATADV_COMPAT_VERSION; + icmp_header->version = BATADV_COMPAT_VERSION; batadv_socket_add_packet(socket_client, icmp_header, packet_len); goto free_skb; diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 6665080dff7..0c33950aa4a 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index c51a5e568f0..e56b4d6a43b 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/crc32c.h> @@ -383,17 +381,17 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, batadv_ogm_packet = (struct batadv_ogm_packet *)skb->data; - if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) { + if (batadv_ogm_packet->version != BATADV_COMPAT_VERSION) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: incompatible batman version (%i)\n", - batadv_ogm_packet->header.version); + batadv_ogm_packet->version); goto err_free; } /* all receive handlers return whether they received or reused * the supplied skb. if not, we have to free the skb. */ - idx = batadv_ogm_packet->header.packet_type; + idx = batadv_ogm_packet->packet_type; ret = (*batadv_rx_handler[idx])(skb, hard_iface); if (ret == NET_RX_DROP) @@ -421,13 +419,23 @@ static void batadv_recv_handler_init(void) for (i = BATADV_UNICAST_MIN; i <= BATADV_UNICAST_MAX; i++) batadv_rx_handler[i] = batadv_recv_unhandled_unicast_packet; - /* compile time checks for struct member offsets */ - BUILD_BUG_ON(offsetof(struct batadv_unicast_4addr_packet, src) != 10); - BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4); - BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4); - BUILD_BUG_ON(offsetof(struct batadv_frag_packet, dest) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, icmph.dst) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, icmph.dst) != 4); + /* compile time checks for sizes */ + BUILD_BUG_ON(sizeof(struct batadv_bla_claim_dst) != 6); + BUILD_BUG_ON(sizeof(struct batadv_ogm_packet) != 24); + BUILD_BUG_ON(sizeof(struct batadv_icmp_header) != 20); + BUILD_BUG_ON(sizeof(struct batadv_icmp_packet) != 20); + BUILD_BUG_ON(sizeof(struct batadv_icmp_packet_rr) != 116); + BUILD_BUG_ON(sizeof(struct batadv_unicast_packet) != 10); + BUILD_BUG_ON(sizeof(struct batadv_unicast_4addr_packet) != 18); + BUILD_BUG_ON(sizeof(struct batadv_frag_packet) != 20); + BUILD_BUG_ON(sizeof(struct batadv_bcast_packet) != 14); + BUILD_BUG_ON(sizeof(struct batadv_coded_packet) != 46); + BUILD_BUG_ON(sizeof(struct batadv_unicast_tvlv_packet) != 20); + BUILD_BUG_ON(sizeof(struct batadv_tvlv_hdr) != 4); + BUILD_BUG_ON(sizeof(struct batadv_tvlv_gateway_data) != 8); + BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_vlan_data) != 8); + BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12); + BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8); /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; @@ -1119,9 +1127,9 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, skb_reserve(skb, ETH_HLEN); tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len); unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff; - unicast_tvlv_packet->header.packet_type = BATADV_UNICAST_TVLV; - unicast_tvlv_packet->header.version = BATADV_COMPAT_VERSION; - unicast_tvlv_packet->header.ttl = BATADV_TTL; + unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV; + unicast_tvlv_packet->version = BATADV_COMPAT_VERSION; + unicast_tvlv_packet->ttl = BATADV_TTL; unicast_tvlv_packet->reserved = 0; unicast_tvlv_packet->tvlv_len = htons(tvlv_len); unicast_tvlv_packet->align = 0; @@ -1173,6 +1181,32 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) return vid; } +/** + * batadv_vlan_ap_isola_get - return the AP isolation status for the given vlan + * @bat_priv: the bat priv with all the soft interface information + * @vid: the VLAN identifier for which the AP isolation attributed as to be + * looked up + * + * Returns true if AP isolation is on for the VLAN idenfied by vid, false + * otherwise + */ +bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) +{ + bool ap_isolation_enabled = false; + struct batadv_softif_vlan *vlan; + + /* if the AP isolation is requested on a VLAN, then check for its + * setting in the proper VLAN private data structure + */ + vlan = batadv_softif_vlan_get(bat_priv, vid); + if (vlan) { + ap_isolation_enabled = atomic_read(&vlan->ap_isolation); + batadv_softif_vlan_free_ref(vlan); + } + + return ap_isolation_enabled; +} + static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) { struct batadv_algo_ops *bat_algo_ops; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 322dd732373..9374f1a5134 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_MAIN_H_ @@ -26,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.5.0" +#define BATADV_SOURCE_VERSION "2014.1.0" #endif /* B.A.T.M.A.N. parameters */ @@ -72,6 +70,14 @@ #define BATADV_NULL_IFINDEX 0 /* dummy ifindex used to avoid iface checks */ +#define BATADV_NO_MARK 0 + +/* default interface for multi interface operation. The default interface is + * used for communication which originated locally (i.e. is not forwarded) + * or where special forwarding is not desired/necessary. + */ +#define BATADV_IF_DEFAULT ((struct batadv_hard_iface *)NULL) + #define BATADV_NUM_WORDS BITS_TO_LONGS(BATADV_TQ_LOCAL_WINDOW_SIZE) #define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ @@ -369,5 +375,6 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, uint8_t *dst, uint8_t type, uint8_t version, void *tvlv_value, uint16_t tvlv_value_len); unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len); +bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid); #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 351e199bc0a..f1b604d88dc 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2014 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/debugfs.h> @@ -720,9 +718,21 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_ogm_packet *ogm_packet) { - if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno)) + struct batadv_orig_ifinfo *orig_ifinfo; + uint32_t last_real_seqno; + uint8_t last_ttl; + + orig_ifinfo = batadv_orig_ifinfo_get(orig_node, BATADV_IF_DEFAULT); + if (!orig_ifinfo) return false; - if (orig_node->last_ttl != ogm_packet->header.ttl + 1) + + last_ttl = orig_ifinfo->last_ttl; + last_real_seqno = orig_ifinfo->last_real_seqno; + batadv_orig_ifinfo_free_ref(orig_ifinfo); + + if (last_real_seqno != ntohl(ogm_packet->seqno)) + return false; + if (last_ttl != ogm_packet->ttl + 1) return false; if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender)) return false; @@ -1010,6 +1020,8 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, struct batadv_coded_packet *coded_packet; struct batadv_neigh_node *neigh_tmp, *router_neigh; struct batadv_neigh_node *router_coding = NULL; + struct batadv_neigh_ifinfo *router_neigh_ifinfo = NULL; + struct batadv_neigh_ifinfo *router_coding_ifinfo = NULL; uint8_t *first_source, *first_dest, *second_source, *second_dest; __be32 packet_id1, packet_id2; size_t count; @@ -1019,19 +1031,34 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, int coded_size = sizeof(*coded_packet); int header_add = coded_size - unicast_size; - router_neigh = batadv_orig_node_get_router(neigh_node->orig_node); + /* TODO: do we need to consider the outgoing interface for + * coded packets? + */ + router_neigh = batadv_orig_router_get(neigh_node->orig_node, + BATADV_IF_DEFAULT); if (!router_neigh) goto out; + router_neigh_ifinfo = batadv_neigh_ifinfo_get(router_neigh, + BATADV_IF_DEFAULT); + if (!router_neigh_ifinfo) + goto out; + neigh_tmp = nc_packet->neigh_node; - router_coding = batadv_orig_node_get_router(neigh_tmp->orig_node); + router_coding = batadv_orig_router_get(neigh_tmp->orig_node, + BATADV_IF_DEFAULT); if (!router_coding) goto out; - tq_tmp = batadv_nc_random_weight_tq(router_neigh->bat_iv.tq_avg); - tq_weighted_neigh = tq_tmp; - tq_tmp = batadv_nc_random_weight_tq(router_coding->bat_iv.tq_avg); - tq_weighted_coding = tq_tmp; + router_coding_ifinfo = batadv_neigh_ifinfo_get(router_coding, + BATADV_IF_DEFAULT); + if (!router_coding_ifinfo) + goto out; + + tq_tmp = router_neigh_ifinfo->bat_iv.tq_avg; + tq_weighted_neigh = batadv_nc_random_weight_tq(tq_tmp); + tq_tmp = router_coding_ifinfo->bat_iv.tq_avg; + tq_weighted_coding = batadv_nc_random_weight_tq(tq_tmp); /* Select one destination for the MAC-header dst-field based on * weighted TQ-values. @@ -1082,9 +1109,9 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, coded_packet = (struct batadv_coded_packet *)skb_dest->data; skb_reset_mac_header(skb_dest); - coded_packet->header.packet_type = BATADV_CODED; - coded_packet->header.version = BATADV_COMPAT_VERSION; - coded_packet->header.ttl = packet1->header.ttl; + coded_packet->packet_type = BATADV_CODED; + coded_packet->version = BATADV_COMPAT_VERSION; + coded_packet->ttl = packet1->ttl; /* Info about first unicast packet */ memcpy(coded_packet->first_source, first_source, ETH_ALEN); @@ -1097,7 +1124,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, memcpy(coded_packet->second_source, second_source, ETH_ALEN); memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN); coded_packet->second_crc = packet_id2; - coded_packet->second_ttl = packet2->header.ttl; + coded_packet->second_ttl = packet2->ttl; coded_packet->second_ttvn = packet2->ttvn; coded_packet->coded_len = htons(coding_len); @@ -1155,6 +1182,10 @@ out: batadv_neigh_node_free_ref(router_neigh); if (router_coding) batadv_neigh_node_free_ref(router_coding); + if (router_neigh_ifinfo) + batadv_neigh_ifinfo_free_ref(router_neigh_ifinfo); + if (router_coding_ifinfo) + batadv_neigh_ifinfo_free_ref(router_coding_ifinfo); return res; } @@ -1452,7 +1483,7 @@ bool batadv_nc_skb_forward(struct sk_buff *skb, /* We only handle unicast packets */ payload = skb_network_header(skb); packet = (struct batadv_unicast_packet *)payload; - if (packet->header.packet_type != BATADV_UNICAST) + if (packet->packet_type != BATADV_UNICAST) goto out; /* Try to find a coding opportunity and send the skb if one is found */ @@ -1505,7 +1536,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, /* Check for supported packet type */ payload = skb_network_header(skb); packet = (struct batadv_unicast_packet *)payload; - if (packet->header.packet_type != BATADV_UNICAST) + if (packet->packet_type != BATADV_UNICAST) goto out; /* Find existing nc_path or create a new */ @@ -1623,7 +1654,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, ttvn = coded_packet_tmp.second_ttvn; } else { orig_dest = coded_packet_tmp.first_orig_dest; - ttl = coded_packet_tmp.header.ttl; + ttl = coded_packet_tmp.ttl; ttvn = coded_packet_tmp.first_ttvn; } @@ -1648,9 +1679,9 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, /* Create decoded unicast packet */ unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.packet_type = BATADV_UNICAST; - unicast_packet->header.version = BATADV_COMPAT_VERSION; - unicast_packet->header.ttl = ttl; + unicast_packet->packet_type = BATADV_UNICAST; + unicast_packet->version = BATADV_COMPAT_VERSION; + unicast_packet->ttl = ttl; memcpy(unicast_packet->dest, orig_dest, ETH_ALEN); unicast_packet->ttvn = ttvn; diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index d4fd315b526..358c0d686ab 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2014 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_ @@ -64,7 +62,6 @@ static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv) static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { - return; } static inline void @@ -74,7 +71,6 @@ batadv_nc_update_nc_node(struct batadv_priv *bat_priv, struct batadv_ogm_packet *ogm_packet, int is_single_hop_neigh) { - return; } static inline void @@ -83,17 +79,14 @@ batadv_nc_purge_orig(struct batadv_priv *bat_priv, bool (*to_purge)(struct batadv_priv *, struct batadv_nc_node *)) { - return; } static inline void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) { - return; } static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node) { - return; } static inline bool batadv_nc_skb_forward(struct sk_buff *skb, @@ -106,14 +99,12 @@ static inline void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, struct sk_buff *skb) { - return; } static inline void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb) { - return; } static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq, diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8ab14340d10..6df12a2e360 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -41,7 +39,7 @@ int batadv_compare_orig(const struct hlist_node *node, const void *data2) const void *data1 = container_of(node, struct batadv_orig_node, hash_entry); - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); + return batadv_compare_eth(data1, data2); } /** @@ -150,20 +148,114 @@ err: return -ENOMEM; } +/** + * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object + * @rcu: rcu pointer of the neigh_ifinfo object + */ +static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu) +{ + struct batadv_neigh_ifinfo *neigh_ifinfo; + + neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu); + + if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) + batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing); + + kfree(neigh_ifinfo); +} + +/** + * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free + * the neigh_ifinfo (without rcu callback) + * @neigh_ifinfo: the neigh_ifinfo object to release + */ +static void +batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo) +{ + if (atomic_dec_and_test(&neigh_ifinfo->refcount)) + batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu); +} + +/** + * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free + * the neigh_ifinfo + * @neigh_ifinfo: the neigh_ifinfo object to release + */ +void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) +{ + if (atomic_dec_and_test(&neigh_ifinfo->refcount)) + call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu); +} + +/** + * batadv_neigh_node_free_rcu - free the neigh_node + * @rcu: rcu pointer of the neigh_node + */ +static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) +{ + struct hlist_node *node_tmp; + struct batadv_neigh_node *neigh_node; + struct batadv_neigh_ifinfo *neigh_ifinfo; + + neigh_node = container_of(rcu, struct batadv_neigh_node, rcu); + + hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, + &neigh_node->ifinfo_list, list) { + batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo); + } + batadv_hardif_free_ref_now(neigh_node->if_incoming); + + kfree(neigh_node); +} + +/** + * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter + * and possibly free it (without rcu callback) + * @neigh_node: neigh neighbor to free + */ +static void +batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node) +{ + if (atomic_dec_and_test(&neigh_node->refcount)) + batadv_neigh_node_free_rcu(&neigh_node->rcu); +} + +/** + * batadv_neigh_node_free_ref - decrement the neighbors refcounter + * and possibly free it + * @neigh_node: neigh neighbor to free + */ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) { if (atomic_dec_and_test(&neigh_node->refcount)) - kfree_rcu(neigh_node, rcu); + call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu); } -/* increases the refcounter of a found router */ +/** + * batadv_orig_node_get_router - router to the originator depending on iface + * @orig_node: the orig node for the router + * @if_outgoing: the interface where the payload packet has been received or + * the OGM should be sent to + * + * Returns the neighbor which should be router for this orig_node/iface. + * + * The object is returned with refcounter increased by 1. + */ struct batadv_neigh_node * -batadv_orig_node_get_router(struct batadv_orig_node *orig_node) +batadv_orig_router_get(struct batadv_orig_node *orig_node, + const struct batadv_hard_iface *if_outgoing) { - struct batadv_neigh_node *router; + struct batadv_orig_ifinfo *orig_ifinfo; + struct batadv_neigh_node *router = NULL; rcu_read_lock(); - router = rcu_dereference(orig_node->router); + hlist_for_each_entry_rcu(orig_ifinfo, &orig_node->ifinfo_list, list) { + if (orig_ifinfo->if_outgoing != if_outgoing) + continue; + + router = rcu_dereference(orig_ifinfo->router); + break; + } if (router && !atomic_inc_not_zero(&router->refcount)) router = NULL; @@ -173,6 +265,164 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node) } /** + * batadv_orig_ifinfo_get - find the ifinfo from an orig_node + * @orig_node: the orig node to be queried + * @if_outgoing: the interface for which the ifinfo should be acquired + * + * Returns the requested orig_ifinfo or NULL if not found. + * + * The object is returned with refcounter increased by 1. + */ +struct batadv_orig_ifinfo * +batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_orig_ifinfo *tmp, *orig_ifinfo = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp, &orig_node->ifinfo_list, + list) { + if (tmp->if_outgoing != if_outgoing) + continue; + + if (!atomic_inc_not_zero(&tmp->refcount)) + continue; + + orig_ifinfo = tmp; + break; + } + rcu_read_unlock(); + + return orig_ifinfo; +} + +/** + * batadv_orig_ifinfo_new - search and possibly create an orig_ifinfo object + * @orig_node: the orig node to be queried + * @if_outgoing: the interface for which the ifinfo should be acquired + * + * Returns NULL in case of failure or the orig_ifinfo object for the if_outgoing + * interface otherwise. The object is created and added to the list + * if it does not exist. + * + * The object is returned with refcounter increased by 1. + */ +struct batadv_orig_ifinfo * +batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_orig_ifinfo *orig_ifinfo = NULL; + unsigned long reset_time; + + spin_lock_bh(&orig_node->neigh_list_lock); + + orig_ifinfo = batadv_orig_ifinfo_get(orig_node, if_outgoing); + if (orig_ifinfo) + goto out; + + orig_ifinfo = kzalloc(sizeof(*orig_ifinfo), GFP_ATOMIC); + if (!orig_ifinfo) + goto out; + + if (if_outgoing != BATADV_IF_DEFAULT && + !atomic_inc_not_zero(&if_outgoing->refcount)) { + kfree(orig_ifinfo); + orig_ifinfo = NULL; + goto out; + } + + reset_time = jiffies - 1; + reset_time -= msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); + orig_ifinfo->batman_seqno_reset = reset_time; + orig_ifinfo->if_outgoing = if_outgoing; + INIT_HLIST_NODE(&orig_ifinfo->list); + atomic_set(&orig_ifinfo->refcount, 2); + hlist_add_head_rcu(&orig_ifinfo->list, + &orig_node->ifinfo_list); +out: + spin_unlock_bh(&orig_node->neigh_list_lock); + return orig_ifinfo; +} + +/** + * batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node + * @neigh_node: the neigh node to be queried + * @if_outgoing: the interface for which the ifinfo should be acquired + * + * The object is returned with refcounter increased by 1. + * + * Returns the requested neigh_ifinfo or NULL if not found + */ +struct batadv_neigh_ifinfo * +batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_neigh_ifinfo *neigh_ifinfo = NULL, + *tmp_neigh_ifinfo; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_ifinfo, &neigh->ifinfo_list, + list) { + if (tmp_neigh_ifinfo->if_outgoing != if_outgoing) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_ifinfo->refcount)) + continue; + + neigh_ifinfo = tmp_neigh_ifinfo; + break; + } + rcu_read_unlock(); + + return neigh_ifinfo; +} + +/** + * batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object + * @neigh_node: the neigh node to be queried + * @if_outgoing: the interface for which the ifinfo should be acquired + * + * Returns NULL in case of failure or the neigh_ifinfo object for the + * if_outgoing interface otherwise. The object is created and added to the list + * if it does not exist. + * + * The object is returned with refcounter increased by 1. + */ +struct batadv_neigh_ifinfo * +batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_neigh_ifinfo *neigh_ifinfo; + + spin_lock_bh(&neigh->ifinfo_lock); + + neigh_ifinfo = batadv_neigh_ifinfo_get(neigh, if_outgoing); + if (neigh_ifinfo) + goto out; + + neigh_ifinfo = kzalloc(sizeof(*neigh_ifinfo), GFP_ATOMIC); + if (!neigh_ifinfo) + goto out; + + if (if_outgoing && !atomic_inc_not_zero(&if_outgoing->refcount)) { + kfree(neigh_ifinfo); + neigh_ifinfo = NULL; + goto out; + } + + INIT_HLIST_NODE(&neigh_ifinfo->list); + atomic_set(&neigh_ifinfo->refcount, 2); + neigh_ifinfo->if_outgoing = if_outgoing; + + hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list); + +out: + spin_unlock_bh(&neigh->ifinfo_lock); + + return neigh_ifinfo; +} + +/** * batadv_neigh_node_new - create and init a new neigh_node object * @hard_iface: the interface where the neighbour is connected to * @neigh_addr: the mac address of the neighbour interface @@ -193,13 +443,13 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, goto out; INIT_HLIST_NODE(&neigh_node->list); + INIT_HLIST_HEAD(&neigh_node->ifinfo_list); + spin_lock_init(&neigh_node->ifinfo_lock); memcpy(neigh_node->addr, neigh_addr, ETH_ALEN); neigh_node->if_incoming = hard_iface; neigh_node->orig_node = orig_node; - INIT_LIST_HEAD(&neigh_node->bonding_list); - /* extra reference for return */ atomic_set(&neigh_node->refcount, 2); @@ -207,30 +457,68 @@ out: return neigh_node; } +/** + * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object + * @rcu: rcu pointer of the orig_ifinfo object + */ +static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) +{ + struct batadv_orig_ifinfo *orig_ifinfo; + + orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu); + + if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) + batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing); + + kfree(orig_ifinfo); +} + +/** + * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free + * the orig_ifinfo (without rcu callback) + * @orig_ifinfo: the orig_ifinfo object to release + */ +static void +batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo) +{ + if (atomic_dec_and_test(&orig_ifinfo->refcount)) + batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu); +} + +/** + * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free + * the orig_ifinfo + * @orig_ifinfo: the orig_ifinfo object to release + */ +void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) +{ + if (atomic_dec_and_test(&orig_ifinfo->refcount)) + call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu); +} + static void batadv_orig_node_free_rcu(struct rcu_head *rcu) { struct hlist_node *node_tmp; - struct batadv_neigh_node *neigh_node, *tmp_neigh_node; + struct batadv_neigh_node *neigh_node; struct batadv_orig_node *orig_node; + struct batadv_orig_ifinfo *orig_ifinfo; orig_node = container_of(rcu, struct batadv_orig_node, rcu); spin_lock_bh(&orig_node->neigh_list_lock); - /* for all bonding members ... */ - list_for_each_entry_safe(neigh_node, tmp_neigh_node, - &orig_node->bond_list, bonding_list) { - list_del_rcu(&neigh_node->bonding_list); - batadv_neigh_node_free_ref(neigh_node); - } - /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { hlist_del_rcu(&neigh_node->list); - batadv_neigh_node_free_ref(neigh_node); + batadv_neigh_node_free_ref_now(neigh_node); } + hlist_for_each_entry_safe(orig_ifinfo, node_tmp, + &orig_node->ifinfo_list, list) { + hlist_del_rcu(&orig_ifinfo->list); + batadv_orig_ifinfo_free_ref_now(orig_ifinfo); + } spin_unlock_bh(&orig_node->neigh_list_lock); /* Free nc_nodes */ @@ -327,8 +615,8 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, return NULL; INIT_HLIST_HEAD(&orig_node->neigh_list); - INIT_LIST_HEAD(&orig_node->bond_list); INIT_LIST_HEAD(&orig_node->vlan_list); + INIT_HLIST_HEAD(&orig_node->ifinfo_list); spin_lock_init(&orig_node->bcast_seqno_lock); spin_lock_init(&orig_node->neigh_list_lock); spin_lock_init(&orig_node->tt_buff_lock); @@ -344,15 +632,11 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, orig_node->bat_priv = bat_priv; memcpy(orig_node->orig, addr, ETH_ALEN); batadv_dat_init_orig_node_addr(orig_node); - orig_node->router = NULL; atomic_set(&orig_node->last_ttvn, 0); orig_node->tt_buff = NULL; orig_node->tt_buff_len = 0; reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_node->bcast_seqno_reset = reset_time; - orig_node->batman_seqno_reset = reset_time; - - atomic_set(&orig_node->bond_candidates, 0); /* create a vlan object for the "untagged" LAN */ vlan = batadv_orig_node_vlan_new(orig_node, BATADV_NO_FLAGS); @@ -376,20 +660,76 @@ free_orig_node: return NULL; } +/** + * batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be checked + * + * Returns true if any ifinfo entry was purged, false otherwise. + */ +static bool +batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node) +{ + struct batadv_orig_ifinfo *orig_ifinfo; + struct batadv_hard_iface *if_outgoing; + struct hlist_node *node_tmp; + bool ifinfo_purged = false; + + spin_lock_bh(&orig_node->neigh_list_lock); + + /* for all ifinfo objects for this originator */ + hlist_for_each_entry_safe(orig_ifinfo, node_tmp, + &orig_node->ifinfo_list, list) { + if_outgoing = orig_ifinfo->if_outgoing; + + /* always keep the default interface */ + if (if_outgoing == BATADV_IF_DEFAULT) + continue; + + /* don't purge if the interface is not (going) down */ + if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && + (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && + (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + continue; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "router/ifinfo purge: originator %pM, iface: %s\n", + orig_node->orig, if_outgoing->net_dev->name); + + ifinfo_purged = true; + + hlist_del_rcu(&orig_ifinfo->list); + batadv_orig_ifinfo_free_ref(orig_ifinfo); + if (orig_node->last_bonding_candidate == orig_ifinfo) { + orig_node->last_bonding_candidate = NULL; + batadv_orig_ifinfo_free_ref(orig_ifinfo); + } + } + + spin_unlock_bh(&orig_node->neigh_list_lock); + + return ifinfo_purged; +} + + +/** + * batadv_purge_orig_neighbors - purges neighbors from originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be checked + * + * Returns true if any neighbor was purged, false otherwise + */ static bool batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node **best_neigh) + struct batadv_orig_node *orig_node) { - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; bool neigh_purged = false; unsigned long last_seen; struct batadv_hard_iface *if_incoming; - *best_neigh = NULL; - spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ @@ -418,15 +758,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, neigh_purged = true; hlist_del_rcu(&neigh_node->list); - batadv_bonding_candidate_del(orig_node, neigh_node); batadv_neigh_node_free_ref(neigh_node); - } else { - /* store the best_neighbour if this is the first - * iteration or if a better neighbor has been found - */ - if (!*best_neigh || - bao->bat_neigh_cmp(neigh_node, *best_neigh) > 0) - *best_neigh = neigh_node; } } @@ -434,10 +766,57 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, return neigh_purged; } +/** + * batadv_find_best_neighbor - finds the best neighbor after purging + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be checked + * @if_outgoing: the interface for which the metric should be compared + * + * Returns the current best neighbor, with refcount increased. + */ +static struct batadv_neigh_node * +batadv_find_best_neighbor(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_neigh_node *best = NULL, *neigh; + struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + + rcu_read_lock(); + hlist_for_each_entry_rcu(neigh, &orig_node->neigh_list, list) { + if (best && (bao->bat_neigh_cmp(neigh, if_outgoing, + best, if_outgoing) <= 0)) + continue; + + if (!atomic_inc_not_zero(&neigh->refcount)) + continue; + + if (best) + batadv_neigh_node_free_ref(best); + + best = neigh; + } + rcu_read_unlock(); + + return best; +} + +/** + * batadv_purge_orig_node - purges obsolete information from an orig_node + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be checked + * + * This function checks if the orig_node or substructures of it have become + * obsolete, and purges this information if that's the case. + * + * Returns true if the orig_node is to be removed, false otherwise. + */ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { struct batadv_neigh_node *best_neigh_node; + struct batadv_hard_iface *hard_iface; + bool changed; if (batadv_has_timed_out(orig_node->last_seen, 2 * BATADV_PURGE_TIMEOUT)) { @@ -446,12 +825,39 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, orig_node->orig, jiffies_to_msecs(orig_node->last_seen)); return true; - } else { - if (batadv_purge_orig_neighbors(bat_priv, orig_node, - &best_neigh_node)) - batadv_update_route(bat_priv, orig_node, - best_neigh_node); } + changed = batadv_purge_orig_ifinfo(bat_priv, orig_node); + changed = changed || batadv_purge_orig_neighbors(bat_priv, orig_node); + + if (!changed) + return false; + + /* first for NULL ... */ + best_neigh_node = batadv_find_best_neighbor(bat_priv, orig_node, + BATADV_IF_DEFAULT); + batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT, + best_neigh_node); + if (best_neigh_node) + batadv_neigh_node_free_ref(best_neigh_node); + + /* ... then for all other interfaces. */ + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->if_status != BATADV_IF_ACTIVE) + continue; + + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + best_neigh_node = batadv_find_best_neighbor(bat_priv, + orig_node, + hard_iface); + batadv_update_route(bat_priv, orig_node, hard_iface, + best_neigh_node); + if (best_neigh_node) + batadv_neigh_node_free_ref(best_neigh_node); + } + rcu_read_unlock(); return false; } @@ -534,8 +940,54 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) return 0; } - bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq); + bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, + BATADV_IF_DEFAULT); + + return 0; +} + +/** + * batadv_orig_hardif_seq_print_text - writes originator infos for a specific + * outgoing interface + * @seq: debugfs table seq_file struct + * @offset: not used + * + * Returns 0 + */ +int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; + + hard_iface = batadv_hardif_get_by_netdev(net_dev); + + if (!hard_iface || !hard_iface->soft_iface) { + seq_puts(seq, "Interface not known to B.A.T.M.A.N.\n"); + goto out; + } + + bat_priv = netdev_priv(hard_iface->soft_iface); + if (!bat_priv->bat_algo_ops->bat_orig_print) { + seq_puts(seq, + "No printing function for this routing protocol\n"); + goto out; + } + + if (hard_iface->if_status != BATADV_IF_ACTIVE) { + seq_puts(seq, "Interface not active\n"); + goto out; + } + seq_printf(seq, "[B.A.T.M.A.N. adv %s, IF/MAC: %s/%pM (%s %s)]\n", + BATADV_SOURCE_VERSION, hard_iface->net_dev->name, + hard_iface->net_dev->dev_addr, + hard_iface->soft_iface->name, bat_priv->bat_algo_ops->name); + + bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface); + +out: + batadv_hardif_free_ref(hard_iface); return 0; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 6f77d808a91..37be290f63f 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ @@ -36,8 +34,26 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, struct batadv_orig_node *orig_node); void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * -batadv_orig_node_get_router(struct batadv_orig_node *orig_node); +batadv_orig_router_get(struct batadv_orig_node *orig_node, + const struct batadv_hard_iface *if_outgoing); +struct batadv_neigh_ifinfo * +batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, + struct batadv_hard_iface *if_outgoing); +struct batadv_neigh_ifinfo * +batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, + struct batadv_hard_iface *if_outgoing); +void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo); + +struct batadv_orig_ifinfo * +batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing); +struct batadv_orig_ifinfo * +batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing); +void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo); + int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); +int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num); int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 207459b6296..0a381d1174c 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_PACKET_H_ @@ -117,6 +115,7 @@ enum batadv_tt_client_flags { BATADV_TT_CLIENT_DEL = BIT(0), BATADV_TT_CLIENT_ROAM = BIT(1), BATADV_TT_CLIENT_WIFI = BIT(4), + BATADV_TT_CLIENT_ISOLA = BIT(5), BATADV_TT_CLIENT_NOPURGE = BIT(8), BATADV_TT_CLIENT_NEW = BIT(9), BATADV_TT_CLIENT_PENDING = BIT(10), @@ -155,6 +154,7 @@ enum batadv_tvlv_type { BATADV_TVLV_ROAM = 0x05, }; +#pragma pack(2) /* the destination hardware field in the ARP frame is used to * transport the claim type and the group id */ @@ -163,24 +163,20 @@ struct batadv_bla_claim_dst { uint8_t type; /* bla_claimframe */ __be16 group; /* group id */ }; - -struct batadv_header { - uint8_t packet_type; - uint8_t version; /* batman version field */ - uint8_t ttl; - /* the parent struct has to add a byte after the header to make - * everything 4 bytes aligned again - */ -}; +#pragma pack() /** * struct batadv_ogm_packet - ogm (routing protocol) packet - * @header: common batman packet header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @flags: contains routing relevant flags - see enum batadv_iv_flags * @tvlv_len: length of tvlv data following the ogm header */ struct batadv_ogm_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t flags; __be32 seqno; uint8_t orig[ETH_ALEN]; @@ -196,29 +192,51 @@ struct batadv_ogm_packet { #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) /** - * batadv_icmp_header - common ICMP header - * @header: common batman header + * batadv_icmp_header - common members among all the ICMP packets + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node * @uid: local ICMP socket identifier + * @align: not used - useful for alignment purposes only + * + * This structure is used for ICMP packets parsing only and it is never sent + * over the wire. The alignment field at the end is there to ensure that + * members are padded the same way as they are in real packets. */ struct batadv_icmp_header { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[ETH_ALEN]; uint8_t orig[ETH_ALEN]; uint8_t uid; + uint8_t align[3]; }; /** * batadv_icmp_packet - ICMP packet - * @icmph: common ICMP header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier * @reserved: not used - useful for alignment * @seqno: ICMP sequence number */ struct batadv_icmp_packet { - struct batadv_icmp_header icmph; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ + uint8_t dst[ETH_ALEN]; + uint8_t orig[ETH_ALEN]; + uint8_t uid; uint8_t reserved; __be16 seqno; }; @@ -227,13 +245,25 @@ struct batadv_icmp_packet { /** * batadv_icmp_packet_rr - ICMP RouteRecord packet - * @icmph: common ICMP header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier * @rr_cur: number of entries the rr array * @seqno: ICMP sequence number * @rr: route record array */ struct batadv_icmp_packet_rr { - struct batadv_icmp_header icmph; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ + uint8_t dst[ETH_ALEN]; + uint8_t orig[ETH_ALEN]; + uint8_t uid; uint8_t rr_cur; __be16 seqno; uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; @@ -253,8 +283,18 @@ struct batadv_icmp_packet_rr { */ #pragma pack(2) +/** + * struct batadv_unicast_packet - unicast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @ttvn: translation table version number + * @dest: originator destination of the unicast packet + */ struct batadv_unicast_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t ttvn; /* destination translation table version number */ uint8_t dest[ETH_ALEN]; /* "4 bytes boundary + 2 bytes" long to make the payload after the @@ -280,7 +320,9 @@ struct batadv_unicast_4addr_packet { /** * struct batadv_frag_packet - fragmented packet - * @header: common batman packet header with type, compatversion, and ttl + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @dest: final destination used when routing fragments * @orig: originator of the fragment used when merging the packet * @no: fragment number within this sequence @@ -289,7 +331,9 @@ struct batadv_unicast_4addr_packet { * @total_size: size of the merged packet */ struct batadv_frag_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; #if defined(__BIG_ENDIAN_BITFIELD) uint8_t no:4; uint8_t reserved:4; @@ -305,8 +349,19 @@ struct batadv_frag_packet { __be16 total_size; }; +/** + * struct batadv_bcast_packet - broadcast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @reserved: reserved byte for alignment + * @seqno: sequence identification + * @orig: originator of the broadcast packet + */ struct batadv_bcast_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t reserved; __be32 seqno; uint8_t orig[ETH_ALEN]; @@ -315,11 +370,11 @@ struct batadv_bcast_packet { */ }; -#pragma pack() - /** * struct batadv_coded_packet - network coded packet - * @header: common batman packet header and ttl of first included packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @reserved: Align following fields to 2-byte boundaries * @first_source: original source of first included packet * @first_orig_dest: original destinal of first included packet @@ -334,7 +389,9 @@ struct batadv_bcast_packet { * @coded_len: length of network coded part of the payload */ struct batadv_coded_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t first_ttvn; /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */ uint8_t first_source[ETH_ALEN]; @@ -349,9 +406,13 @@ struct batadv_coded_packet { __be16 coded_len; }; +#pragma pack() + /** * struct batadv_unicast_tvlv - generic unicast packet with tvlv payload - * @header: common batman packet header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @reserved: reserved field (for packet alignment) * @src: address of the source * @dst: address of the destination @@ -359,7 +420,9 @@ struct batadv_coded_packet { * @align: 2 bytes to align the header to a 4 byte boundry */ struct batadv_unicast_tvlv_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t reserved; uint8_t dst[ETH_ALEN]; uint8_t src[ETH_ALEN]; @@ -420,13 +483,13 @@ struct batadv_tvlv_tt_vlan_data { * struct batadv_tvlv_tt_change - translation table diff data * @flags: status indicators concerning the non-mesh client (see * batadv_tt_client_flags) - * @reserved: reserved field + * @reserved: reserved field - useful for alignment purposes only * @addr: mac address of non-mesh client that triggered this tt change * @vid: VLAN identifier */ struct batadv_tvlv_tt_change { uint8_t flags; - uint8_t reserved; + uint8_t reserved[3]; uint8_t addr[ETH_ALEN]; __be16 vid; }; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index d4114d775ad..1ed9f7c9ece 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -35,13 +33,32 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); +/** + * _batadv_update_route - set the router for this originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be configured + * @recv_if: the receive interface for which this route is set + * @neigh_node: neighbor which should be the next router + * + * This function does not perform any error checks + */ static void _batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if, struct batadv_neigh_node *neigh_node) { + struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *curr_router; - curr_router = batadv_orig_node_get_router(orig_node); + orig_ifinfo = batadv_orig_ifinfo_get(orig_node, recv_if); + if (!orig_ifinfo) + return; + + rcu_read_lock(); + curr_router = rcu_dereference(orig_ifinfo->router); + if (curr_router && !atomic_inc_not_zero(&curr_router->refcount)) + curr_router = NULL; + rcu_read_unlock(); /* route deleted */ if ((curr_router) && (!neigh_node)) { @@ -71,16 +88,25 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, neigh_node = NULL; spin_lock_bh(&orig_node->neigh_list_lock); - rcu_assign_pointer(orig_node->router, neigh_node); + rcu_assign_pointer(orig_ifinfo->router, neigh_node); spin_unlock_bh(&orig_node->neigh_list_lock); + batadv_orig_ifinfo_free_ref(orig_ifinfo); /* decrease refcount of previous best neighbor */ if (curr_router) batadv_neigh_node_free_ref(curr_router); } +/** + * batadv_update_route - set the router for this originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be configured + * @recv_if: the receive interface for which this route is set + * @neigh_node: neighbor which should be the next router + */ void batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if, struct batadv_neigh_node *neigh_node) { struct batadv_neigh_node *router = NULL; @@ -88,125 +114,16 @@ void batadv_update_route(struct batadv_priv *bat_priv, if (!orig_node) goto out; - router = batadv_orig_node_get_router(orig_node); + router = batadv_orig_router_get(orig_node, recv_if); if (router != neigh_node) - _batadv_update_route(bat_priv, orig_node, neigh_node); + _batadv_update_route(bat_priv, orig_node, recv_if, neigh_node); out: if (router) batadv_neigh_node_free_ref(router); } -/* caller must hold the neigh_list_lock */ -void batadv_bonding_candidate_del(struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node) -{ - /* this neighbor is not part of our candidate list */ - if (list_empty(&neigh_node->bonding_list)) - goto out; - - list_del_rcu(&neigh_node->bonding_list); - INIT_LIST_HEAD(&neigh_node->bonding_list); - batadv_neigh_node_free_ref(neigh_node); - atomic_dec(&orig_node->bond_candidates); - -out: - return; -} - -/** - * batadv_bonding_candidate_add - consider a new link for bonding mode towards - * the given originator - * @bat_priv: the bat priv with all the soft interface information - * @orig_node: the target node - * @neigh_node: the neighbor representing the new link to consider for bonding - * mode - */ -void batadv_bonding_candidate_add(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node) -{ - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; - struct batadv_neigh_node *tmp_neigh_node, *router = NULL; - uint8_t interference_candidate = 0; - - spin_lock_bh(&orig_node->neigh_list_lock); - - /* only consider if it has the same primary address ... */ - if (!batadv_compare_eth(orig_node->orig, - neigh_node->orig_node->primary_addr)) - goto candidate_del; - - router = batadv_orig_node_get_router(orig_node); - if (!router) - goto candidate_del; - - - /* ... and is good enough to be considered */ - if (bao->bat_neigh_is_equiv_or_better(neigh_node, router)) - goto candidate_del; - - /* check if we have another candidate with the same mac address or - * interface. If we do, we won't select this candidate because of - * possible interference. - */ - hlist_for_each_entry_rcu(tmp_neigh_node, - &orig_node->neigh_list, list) { - if (tmp_neigh_node == neigh_node) - continue; - - /* we only care if the other candidate is even - * considered as candidate. - */ - if (list_empty(&tmp_neigh_node->bonding_list)) - continue; - - if ((neigh_node->if_incoming == tmp_neigh_node->if_incoming) || - (batadv_compare_eth(neigh_node->addr, - tmp_neigh_node->addr))) { - interference_candidate = 1; - break; - } - } - - /* don't care further if it is an interference candidate */ - if (interference_candidate) - goto candidate_del; - - /* this neighbor already is part of our candidate list */ - if (!list_empty(&neigh_node->bonding_list)) - goto out; - - if (!atomic_inc_not_zero(&neigh_node->refcount)) - goto out; - - list_add_rcu(&neigh_node->bonding_list, &orig_node->bond_list); - atomic_inc(&orig_node->bond_candidates); - goto out; - -candidate_del: - batadv_bonding_candidate_del(orig_node, neigh_node); - -out: - spin_unlock_bh(&orig_node->neigh_list_lock); - - if (router) - batadv_neigh_node_free_ref(router); -} - -/* copy primary address for bonding */ -void -batadv_bonding_save_primary(const struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh_node, - const struct batadv_ogm_packet *batman_ogm_packet) -{ - if (!(batman_ogm_packet->flags & BATADV_PRIMARIES_FIRST_HOP)) - return; - - memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN); -} - /* checks whether the host restarted and is in the protection time. * returns: * 0 if the packet is to be accepted @@ -308,7 +225,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, memcpy(icmph->dst, icmph->orig, ETH_ALEN); memcpy(icmph->orig, primary_if->net_dev->dev_addr, ETH_ALEN); icmph->msg_type = BATADV_ECHO_REPLY; - icmph->header.ttl = BATADV_TTL; + icmph->ttl = BATADV_TTL; res = batadv_send_skb_to_orig(skb, orig_node, NULL); if (res != NET_XMIT_DROP) @@ -338,9 +255,9 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; /* send TTL exceeded if packet is an echo request (traceroute) */ - if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) { + if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) { pr_debug("Warning - can't forward icmp packet from %pM to %pM: ttl exceeded\n", - icmp_packet->icmph.orig, icmp_packet->icmph.dst); + icmp_packet->orig, icmp_packet->dst); goto out; } @@ -349,7 +266,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, goto out; /* get routing information */ - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig); + orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->orig); if (!orig_node) goto out; @@ -359,11 +276,11 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; - memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN); - memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr, + memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); + memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN); - icmp_packet->icmph.msg_type = BATADV_TTL_EXCEEDED; - icmp_packet->icmph.header.ttl = BATADV_TTL; + icmp_packet->msg_type = BATADV_TTL_EXCEEDED; + icmp_packet->ttl = BATADV_TTL; if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; @@ -434,7 +351,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, return batadv_recv_my_icmp_packet(bat_priv, skb); /* TTL exceeded */ - if (icmph->header.ttl < 2) + if (icmph->ttl < 2) return batadv_recv_icmp_ttl_exceeded(bat_priv, skb); /* get routing information */ @@ -449,7 +366,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmph = (struct batadv_icmp_header *)skb->data; /* decrement ttl */ - icmph->header.ttl--; + icmph->ttl--; /* route it */ if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) @@ -461,114 +378,6 @@ out: return ret; } -/* In the bonding case, send the packets in a round - * robin fashion over the remaining interfaces. - * - * This method rotates the bonding list and increases the - * returned router's refcount. - */ -static struct batadv_neigh_node * -batadv_find_bond_router(struct batadv_orig_node *primary_orig, - const struct batadv_hard_iface *recv_if) -{ - struct batadv_neigh_node *tmp_neigh_node; - struct batadv_neigh_node *router = NULL, *first_candidate = NULL; - - rcu_read_lock(); - list_for_each_entry_rcu(tmp_neigh_node, &primary_orig->bond_list, - bonding_list) { - if (!first_candidate) - first_candidate = tmp_neigh_node; - - /* recv_if == NULL on the first node. */ - if (tmp_neigh_node->if_incoming == recv_if) - continue; - - if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) - continue; - - router = tmp_neigh_node; - break; - } - - /* use the first candidate if nothing was found. */ - if (!router && first_candidate && - atomic_inc_not_zero(&first_candidate->refcount)) - router = first_candidate; - - if (!router) - goto out; - - /* selected should point to the next element - * after the current router - */ - spin_lock_bh(&primary_orig->neigh_list_lock); - /* this is a list_move(), which unfortunately - * does not exist as rcu version - */ - list_del_rcu(&primary_orig->bond_list); - list_add_rcu(&primary_orig->bond_list, - &router->bonding_list); - spin_unlock_bh(&primary_orig->neigh_list_lock); - -out: - rcu_read_unlock(); - return router; -} - -/** - * batadv_find_ifalter_router - find the best of the remaining candidates which - * are not using this interface - * @bat_priv: the bat priv with all the soft interface information - * @primary_orig: the destination - * @recv_if: the interface that the router returned by this function has to not - * use - * - * Returns the best candidate towards primary_orig that is not using recv_if. - * Increases the returned neighbor's refcount - */ -static struct batadv_neigh_node * -batadv_find_ifalter_router(struct batadv_priv *bat_priv, - struct batadv_orig_node *primary_orig, - const struct batadv_hard_iface *recv_if) -{ - struct batadv_neigh_node *router = NULL, *first_candidate = NULL; - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; - struct batadv_neigh_node *tmp_neigh_node; - - rcu_read_lock(); - list_for_each_entry_rcu(tmp_neigh_node, &primary_orig->bond_list, - bonding_list) { - if (!first_candidate) - first_candidate = tmp_neigh_node; - - /* recv_if == NULL on the first node. */ - if (tmp_neigh_node->if_incoming == recv_if) - continue; - - if (router && bao->bat_neigh_cmp(tmp_neigh_node, router)) - continue; - - if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) - continue; - - /* decrement refcount of previously selected router */ - if (router) - batadv_neigh_node_free_ref(router); - - /* we found a better router (or at least one valid router) */ - router = tmp_neigh_node; - } - - /* use the first candidate if nothing was found. */ - if (!router && first_candidate && - atomic_inc_not_zero(&first_candidate->refcount)) - router = first_candidate; - - rcu_read_unlock(); - return router; -} - /** * batadv_check_unicast_packet - Check for malformed unicast packets * @bat_priv: the bat priv with all the soft interface information @@ -606,95 +415,141 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, return 0; } -/* find a suitable router for this originator, and use - * bonding if possible. increases the found neighbors - * refcount. +/** + * batadv_find_router - find a suitable router for this originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the destination node + * @recv_if: pointer to interface this packet was received on + * + * Returns the router which should be used for this orig_node on + * this interface, or NULL if not available. */ struct batadv_neigh_node * batadv_find_router(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const struct batadv_hard_iface *recv_if) + struct batadv_hard_iface *recv_if) { - struct batadv_orig_node *primary_orig_node; - struct batadv_orig_node *router_orig; - struct batadv_neigh_node *router; - static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0}; - int bonding_enabled; - uint8_t *primary_addr; + struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_neigh_node *first_candidate_router = NULL; + struct batadv_neigh_node *next_candidate_router = NULL; + struct batadv_neigh_node *router, *cand_router = NULL; + struct batadv_neigh_node *last_cand_router = NULL; + struct batadv_orig_ifinfo *cand, *first_candidate = NULL; + struct batadv_orig_ifinfo *next_candidate = NULL; + struct batadv_orig_ifinfo *last_candidate; + bool last_candidate_found = false; if (!orig_node) return NULL; - router = batadv_orig_node_get_router(orig_node); - if (!router) - goto err; + router = batadv_orig_router_get(orig_node, recv_if); - /* without bonding, the first node should - * always choose the default router. + /* only consider bonding for recv_if == BATADV_IF_DEFAULT (first hop) + * and if activated. + */ + if (recv_if == BATADV_IF_DEFAULT || !atomic_read(&bat_priv->bonding) || + !router) + return router; + + /* bonding: loop through the list of possible routers found + * for the various outgoing interfaces and find a candidate after + * the last chosen bonding candidate (next_candidate). If no such + * router is found, use the first candidate found (the previously + * chosen bonding candidate might have been the last one in the list). + * If this can't be found either, return the previously choosen + * router - obviously there are no other candidates. */ - bonding_enabled = atomic_read(&bat_priv->bonding); - rcu_read_lock(); - /* select default router to output */ - router_orig = router->orig_node; - if (!router_orig) - goto err_unlock; + last_candidate = orig_node->last_bonding_candidate; + if (last_candidate) + last_cand_router = rcu_dereference(last_candidate->router); - if ((!recv_if) && (!bonding_enabled)) - goto return_router; + hlist_for_each_entry_rcu(cand, &orig_node->ifinfo_list, list) { + /* acquire some structures and references ... */ + if (!atomic_inc_not_zero(&cand->refcount)) + continue; - primary_addr = router_orig->primary_addr; + cand_router = rcu_dereference(cand->router); + if (!cand_router) + goto next; - /* if we have something in the primary_addr, we can search - * for a potential bonding candidate. - */ - if (batadv_compare_eth(primary_addr, zero_mac)) - goto return_router; + if (!atomic_inc_not_zero(&cand_router->refcount)) { + cand_router = NULL; + goto next; + } - /* find the orig_node which has the primary interface. might - * even be the same as our router_orig in many cases - */ - if (batadv_compare_eth(primary_addr, router_orig->orig)) { - primary_orig_node = router_orig; - } else { - primary_orig_node = batadv_orig_hash_find(bat_priv, - primary_addr); - if (!primary_orig_node) - goto return_router; + /* alternative candidate should be good enough to be + * considered + */ + if (!bao->bat_neigh_is_equiv_or_better(cand_router, + cand->if_outgoing, + router, recv_if)) + goto next; + + /* don't use the same router twice */ + if (last_cand_router == cand_router) + goto next; + + /* mark the first possible candidate */ + if (!first_candidate) { + atomic_inc(&cand_router->refcount); + atomic_inc(&cand->refcount); + first_candidate = cand; + first_candidate_router = cand_router; + } + + /* check if the loop has already passed the previously selected + * candidate ... this function should select the next candidate + * AFTER the previously used bonding candidate. + */ + if (!last_candidate || last_candidate_found) { + next_candidate = cand; + next_candidate_router = cand_router; + break; + } - batadv_orig_node_free_ref(primary_orig_node); + if (last_candidate == cand) + last_candidate_found = true; +next: + /* free references */ + if (cand_router) { + batadv_neigh_node_free_ref(cand_router); + cand_router = NULL; + } + batadv_orig_ifinfo_free_ref(cand); } + rcu_read_unlock(); - /* with less than 2 candidates, we can't do any - * bonding and prefer the original router. - */ - if (atomic_read(&primary_orig_node->bond_candidates) < 2) - goto return_router; + /* last_bonding_candidate is reset below, remove the old reference. */ + if (orig_node->last_bonding_candidate) + batadv_orig_ifinfo_free_ref(orig_node->last_bonding_candidate); - /* all nodes between should choose a candidate which - * is is not on the interface where the packet came - * in. + /* After finding candidates, handle the three cases: + * 1) there is a next candidate, use that + * 2) there is no next candidate, use the first of the list + * 3) there is no candidate at all, return the default router */ - batadv_neigh_node_free_ref(router); + if (next_candidate) { + batadv_neigh_node_free_ref(router); - if (bonding_enabled) - router = batadv_find_bond_router(primary_orig_node, recv_if); - else - router = batadv_find_ifalter_router(bat_priv, primary_orig_node, - recv_if); + /* remove references to first candidate, we don't need it. */ + if (first_candidate) { + batadv_neigh_node_free_ref(first_candidate_router); + batadv_orig_ifinfo_free_ref(first_candidate); + } + router = next_candidate_router; + orig_node->last_bonding_candidate = next_candidate; + } else if (first_candidate) { + batadv_neigh_node_free_ref(router); -return_router: - if (router && router->if_incoming->if_status != BATADV_IF_ACTIVE) - goto err_unlock; + /* refcounting has already been done in the loop above. */ + router = first_candidate_router; + orig_node->last_bonding_candidate = first_candidate; + } else { + orig_node->last_bonding_candidate = NULL; + } - rcu_read_unlock(); return router; -err_unlock: - rcu_read_unlock(); -err: - if (router) - batadv_neigh_node_free_ref(router); - return NULL; } static int batadv_route_unicast_packet(struct sk_buff *skb, @@ -709,7 +564,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_packet *)skb->data; /* TTL exceeded */ - if (unicast_packet->header.ttl < 2) { + if (unicast_packet->ttl < 2) { pr_debug("Warning - can't forward unicast packet from %pM to %pM: ttl exceeded\n", ethhdr->h_source, unicast_packet->dest); goto out; @@ -727,9 +582,9 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.ttl--; + unicast_packet->ttl--; - switch (unicast_packet->header.packet_type) { + switch (unicast_packet->packet_type) { case BATADV_UNICAST_4ADDR: hdr_len = sizeof(struct batadv_unicast_4addr_packet); break; @@ -970,7 +825,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - is4addr = unicast_packet->header.packet_type == BATADV_UNICAST_4ADDR; + is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); @@ -1135,6 +990,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, int hdr_size = sizeof(*bcast_packet); int ret = NET_RX_DROP; int32_t seq_diff; + uint32_t seqno; /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) @@ -1160,7 +1016,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) goto out; - if (bcast_packet->header.ttl < 2) + if (bcast_packet->ttl < 2) goto out; orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig); @@ -1170,12 +1026,13 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, spin_lock_bh(&orig_node->bcast_seqno_lock); + seqno = ntohl(bcast_packet->seqno); /* check whether the packet is a duplicate */ if (batadv_test_bit(orig_node->bcast_bits, orig_node->last_bcast_seqno, - ntohl(bcast_packet->seqno))) + seqno)) goto spin_unlock; - seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno; + seq_diff = seqno - orig_node->last_bcast_seqno; /* check whether the packet is old and the host just restarted. */ if (batadv_window_protected(bat_priv, seq_diff, @@ -1186,7 +1043,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, * if required. */ if (batadv_bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1)) - orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno); + orig_node->last_bcast_seqno = seqno; spin_unlock_bh(&orig_node->bcast_seqno_lock); diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 19544ddb81b..557d3d12a9a 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_ROUTING_H_ @@ -25,6 +23,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, int header_len); void batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if, struct batadv_neigh_node *neigh_node); int batadv_recv_icmp_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); @@ -45,16 +44,7 @@ int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, struct batadv_neigh_node * batadv_find_router(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const struct batadv_hard_iface *recv_if); -void batadv_bonding_candidate_del(struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node); -void batadv_bonding_candidate_add(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node); -void batadv_bonding_save_primary(const struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh_node, - const struct batadv_ogm_packet - *batman_ogm_packet); + struct batadv_hard_iface *recv_if); int batadv_window_protected(struct batadv_priv *bat_priv, int32_t seq_num_diff, unsigned long *last_reset); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index c83be5ebaa2..579f5f00a38 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -161,11 +159,11 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, return false; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.version = BATADV_COMPAT_VERSION; + unicast_packet->version = BATADV_COMPAT_VERSION; /* batman packet type: unicast */ - unicast_packet->header.packet_type = BATADV_UNICAST; + unicast_packet->packet_type = BATADV_UNICAST; /* set unicast ttl */ - unicast_packet->header.ttl = BATADV_TTL; + unicast_packet->ttl = BATADV_TTL; /* copy the destination for faster routing */ memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); /* set the destination tt version number */ @@ -221,7 +219,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, goto out; uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - uc_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR; + uc_4addr_packet->u.packet_type = BATADV_UNICAST_4ADDR; memcpy(uc_4addr_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN); uc_4addr_packet->subtype = packet_subtype; uc_4addr_packet->reserved = 0; @@ -321,13 +319,23 @@ out: */ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, - int packet_subtype, unsigned short vid) + int packet_subtype, uint8_t *dst_hint, + unsigned short vid) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_orig_node *orig_node; + uint8_t *src, *dst; + + src = ethhdr->h_source; + dst = ethhdr->h_dest; + + /* if we got an hint! let's send the packet to this client (if any) */ + if (dst_hint) { + src = NULL; + dst = dst_hint; + } + orig_node = batadv_transtable_search(bat_priv, src, dst, vid); - orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source, - ethhdr->h_dest, vid); return batadv_send_skb_unicast(bat_priv, skb, packet_type, packet_subtype, orig_node, vid); } @@ -379,6 +387,8 @@ static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) kfree_skb(forw_packet->skb); if (forw_packet->if_incoming) batadv_hardif_free_ref(forw_packet->if_incoming); + if (forw_packet->if_outgoing) + batadv_hardif_free_ref(forw_packet->if_outgoing); kfree(forw_packet); } @@ -436,12 +446,13 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, /* as we have a copy now, it is safe to decrease the TTL */ bcast_packet = (struct batadv_bcast_packet *)newskb->data; - bcast_packet->header.ttl--; + bcast_packet->ttl--; skb_reset_mac_header(newskb); forw_packet->skb = newskb; forw_packet->if_incoming = primary_if; + forw_packet->if_outgoing = NULL; /* how often did we send the bcast packet ? */ forw_packet->num_packets = 0; @@ -537,11 +548,16 @@ void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work) bat_priv->bat_algo_ops->bat_ogm_emit(forw_packet); - /* we have to have at least one packet in the queue - * to determine the queues wake up time unless we are - * shutting down + /* we have to have at least one packet in the queue to determine the + * queues wake up time unless we are shutting down. + * + * only re-schedule if this is the "original" copy, e.g. the OGM of the + * primary interface should only be rescheduled once per period, but + * this function will be called for the forw_packet instances of the + * other secondary interfaces as well. */ - if (forw_packet->own) + if (forw_packet->own && + forw_packet->if_incoming == forw_packet->if_outgoing) batadv_schedule_bat_ogm(forw_packet->if_incoming); out: @@ -602,7 +618,8 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, * we delete only packets belonging to the given interface */ if ((hard_iface) && - (forw_packet->if_incoming != hard_iface)) + (forw_packet->if_incoming != hard_iface) && + (forw_packet->if_outgoing != hard_iface)) continue; spin_unlock_bh(&bat_priv->forw_bat_list_lock); diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index aa2e2537a73..aaddaa9661c 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_SEND_H_ @@ -40,7 +38,8 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, int packet_subtype); int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, - int packet_subtype, unsigned short vid); + int packet_subtype, uint8_t *dst_hint, + unsigned short vid); int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid); @@ -57,11 +56,11 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, - struct sk_buff *skb, + struct sk_buff *skb, uint8_t *dst_hint, unsigned short vid) { return batadv_send_skb_via_tt_generic(bat_priv, skb, BATADV_UNICAST, 0, - vid); + dst_hint, vid); } /** @@ -81,11 +80,12 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_subtype, + uint8_t *dst_hint, unsigned short vid) { return batadv_send_skb_via_tt_generic(bat_priv, skb, BATADV_UNICAST_4ADDR, - packet_subtype, vid); + packet_subtype, dst_hint, vid); } #endif /* _NET_BATMAN_ADV_SEND_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 36f050876f8..f82c267e188 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -121,7 +119,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) batadv_tt_local_remove(bat_priv, old_addr, BATADV_NO_FLAGS, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, BATADV_NO_FLAGS, - BATADV_NULL_IFINDEX); + BATADV_NULL_IFINDEX, BATADV_NO_MARK); } return 0; @@ -162,6 +160,8 @@ static int batadv_interface_tx(struct sk_buff *skb, 0x00, 0x00}; static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, 0x00, 0x00}; + enum batadv_dhcp_recipient dhcp_rcp = BATADV_DHCP_NO; + uint8_t *dst_hint = NULL, chaddr[ETH_ALEN]; struct vlan_ethhdr *vhdr; unsigned int header_len = 0; int data_len = skb->len, ret; @@ -169,6 +169,7 @@ static int batadv_interface_tx(struct sk_buff *skb, bool do_bcast = false, client_added; unsigned short vid; uint32_t seqno; + int gw_mode; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@ -198,7 +199,8 @@ static int batadv_interface_tx(struct sk_buff *skb, /* Register the client MAC in the transtable */ if (!is_multicast_ether_addr(ethhdr->h_source)) { client_added = batadv_tt_local_add(soft_iface, ethhdr->h_source, - vid, skb->skb_iif); + vid, skb->skb_iif, + skb->mark); if (!client_added) goto dropped; } @@ -215,36 +217,39 @@ static int batadv_interface_tx(struct sk_buff *skb, if (batadv_compare_eth(ethhdr->h_dest, ectp_addr)) goto dropped; + gw_mode = atomic_read(&bat_priv->gw_mode); if (is_multicast_ether_addr(ethhdr->h_dest)) { - do_bcast = true; - - switch (atomic_read(&bat_priv->gw_mode)) { - case BATADV_GW_MODE_SERVER: - /* gateway servers should not send dhcp - * requests into the mesh - */ - ret = batadv_gw_is_dhcp_target(skb, &header_len); - if (ret) - goto dropped; - break; - case BATADV_GW_MODE_CLIENT: - /* gateway clients should send dhcp requests - * via unicast to their gateway - */ - ret = batadv_gw_is_dhcp_target(skb, &header_len); - if (ret) - do_bcast = false; - break; - case BATADV_GW_MODE_OFF: - default: - break; + /* if gw mode is off, broadcast every packet */ + if (gw_mode == BATADV_GW_MODE_OFF) { + do_bcast = true; + goto send; } - /* reminder: ethhdr might have become unusable from here on - * (batadv_gw_is_dhcp_target() might have reallocated skb data) + dhcp_rcp = batadv_gw_dhcp_recipient_get(skb, &header_len, + chaddr); + /* skb->data may have been modified by + * batadv_gw_dhcp_recipient_get() + */ + ethhdr = (struct ethhdr *)skb->data; + /* if gw_mode is on, broadcast any non-DHCP message. + * All the DHCP packets are going to be sent as unicast */ + if (dhcp_rcp == BATADV_DHCP_NO) { + do_bcast = true; + goto send; + } + + if (dhcp_rcp == BATADV_DHCP_TO_CLIENT) + dst_hint = chaddr; + else if ((gw_mode == BATADV_GW_MODE_SERVER) && + (dhcp_rcp == BATADV_DHCP_TO_SERVER)) + /* gateways should not forward any DHCP message if + * directed to a DHCP server + */ + goto dropped; } +send: batadv_skb_set_priority(skb, 0); /* ethernet packet should be broadcasted */ @@ -264,11 +269,11 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped; bcast_packet = (struct batadv_bcast_packet *)skb->data; - bcast_packet->header.version = BATADV_COMPAT_VERSION; - bcast_packet->header.ttl = BATADV_TTL; + bcast_packet->version = BATADV_COMPAT_VERSION; + bcast_packet->ttl = BATADV_TTL; /* batman packet type: broadcast */ - bcast_packet->header.packet_type = BATADV_BCAST; + bcast_packet->packet_type = BATADV_BCAST; bcast_packet->reserved = 0; /* hw address of first interface is the orig mac because only @@ -290,22 +295,22 @@ static int batadv_interface_tx(struct sk_buff *skb, /* unicast packet */ } else { - if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_OFF) { + /* DHCP packets going to a server will use the GW feature */ + if (dhcp_rcp == BATADV_DHCP_TO_SERVER) { ret = batadv_gw_out_of_range(bat_priv, skb); if (ret) goto dropped; - } - - if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) - goto dropped; - - batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); - - if (is_multicast_ether_addr(ethhdr->h_dest)) ret = batadv_send_skb_via_gw(bat_priv, skb, vid); - else - ret = batadv_send_skb_via_tt(bat_priv, skb, vid); + } else { + if (batadv_dat_snoop_outgoing_arp_request(bat_priv, + skb)) + goto dropped; + + batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); + ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint, + vid); + } if (ret == NET_XMIT_DROP) goto dropped_freed; } @@ -328,7 +333,7 @@ void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct batadv_hard_iface *recv_if, int hdr_size, struct batadv_orig_node *orig_node) { - struct batadv_header *batadv_header = (struct batadv_header *)skb->data; + struct batadv_bcast_packet *batadv_bcast_packet; struct batadv_priv *bat_priv = netdev_priv(soft_iface); __be16 ethertype = htons(ETH_P_BATMAN); struct vlan_ethhdr *vhdr; @@ -336,7 +341,8 @@ void batadv_interface_rx(struct net_device *soft_iface, unsigned short vid; bool is_bcast; - is_bcast = (batadv_header->packet_type == BATADV_BCAST); + batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; + is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST); /* check if enough space is available for pulling, and pull */ if (!pskb_may_pull(skb, hdr_size)) @@ -345,7 +351,12 @@ void batadv_interface_rx(struct net_device *soft_iface, skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); - vid = batadv_get_vid(skb, hdr_size); + /* clean the netfilter state now that the batman-adv header has been + * removed + */ + nf_reset(skb); + + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { @@ -388,9 +399,23 @@ void batadv_interface_rx(struct net_device *soft_iface, batadv_tt_add_temporary_global_entry(bat_priv, orig_node, ethhdr->h_source, vid); - if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest, - vid)) + if (is_multicast_ether_addr(ethhdr->h_dest)) { + /* set the mark on broadcast packets if AP isolation is ON and + * the packet is coming from an "isolated" client + */ + if (batadv_vlan_ap_isola_get(bat_priv, vid) && + batadv_tt_global_is_isolated(bat_priv, ethhdr->h_source, + vid)) { + /* save bits in skb->mark not covered by the mask and + * apply the mark on the rest + */ + skb->mark &= ~bat_priv->isolation_mark_mask; + skb->mark |= bat_priv->isolation_mark; + } + } else if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, + ethhdr->h_dest, vid)) { goto dropped; + } netif_rx(skb); goto out; @@ -479,7 +504,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) */ batadv_tt_local_add(bat_priv->soft_iface, bat_priv->soft_iface->dev_addr, vid, - BATADV_NULL_IFINDEX); + BATADV_NULL_IFINDEX, BATADV_NO_MARK); spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); @@ -672,7 +697,7 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->gw.bandwidth_down, 100); atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); - atomic_set(&bat_priv->hop_penalty, 30); + atomic_set(&bat_priv->hop_penalty, 15); #ifdef CONFIG_BATMAN_ADV_DEBUG atomic_set(&bat_priv->log_level, 0); #endif @@ -691,6 +716,8 @@ static int batadv_softif_init_late(struct net_device *dev) #endif bat_priv->tt.last_changeset = NULL; bat_priv->tt.last_changeset_len = 0; + bat_priv->isolation_mark = 0; + bat_priv->isolation_mark_mask = 0; /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 06fc91ff5a0..dbab22fd89a 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_ diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 6335433310a..e456bf6bb28 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -329,10 +327,10 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj, return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name); } -static void batadv_post_gw_deselect(struct net_device *net_dev) +static void batadv_post_gw_reselect(struct net_device *net_dev) { struct batadv_priv *bat_priv = netdev_priv(net_dev); - batadv_gw_deselect(bat_priv); + batadv_gw_reselect(bat_priv); } static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr, @@ -408,7 +406,16 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, batadv_info(net_dev, "Changing gw mode from: %s to: %s\n", curr_gw_mode_str, buff); - batadv_gw_deselect(bat_priv); + /* Invoking batadv_gw_reselect() is not enough to really de-select the + * current GW. It will only instruct the gateway client code to perform + * a re-election the next time that this is needed. + * + * When gw client mode is being switched off the current GW must be + * de-selected explicitly otherwise no GW_ADD uevent is thrown on + * client mode re-activation. This is operation is performed in + * batadv_gw_check_client_stop(). + */ + batadv_gw_reselect(bat_priv); /* always call batadv_gw_check_client_stop() before changing the gateway * state */ @@ -443,6 +450,74 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, return batadv_gw_bandwidth_set(net_dev, buff, count); } +/** + * batadv_show_isolation_mark - print the current isolation mark/mask + * @kobj: kobject representing the private mesh sysfs directory + * @attr: the batman-adv attribute the user is interacting with + * @buff: the buffer that will contain the data to send back to the user + * + * Returns the number of bytes written into 'buff' on success or a negative + * error code in case of failure + */ +static ssize_t batadv_show_isolation_mark(struct kobject *kobj, + struct attribute *attr, char *buff) +{ + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + + return sprintf(buff, "%#.8x/%#.8x\n", bat_priv->isolation_mark, + bat_priv->isolation_mark_mask); +} + +/** + * batadv_store_isolation_mark - parse and store the isolation mark/mask entered + * by the user + * @kobj: kobject representing the private mesh sysfs directory + * @attr: the batman-adv attribute the user is interacting with + * @buff: the buffer containing the user data + * @count: number of bytes in the buffer + * + * Returns 'count' on success or a negative error code in case of failure + */ +static ssize_t batadv_store_isolation_mark(struct kobject *kobj, + struct attribute *attr, char *buff, + size_t count) +{ + struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + struct batadv_priv *bat_priv = netdev_priv(net_dev); + uint32_t mark, mask; + char *mask_ptr; + + /* parse the mask if it has been specified, otherwise assume the mask is + * the biggest possible + */ + mask = 0xFFFFFFFF; + mask_ptr = strchr(buff, '/'); + if (mask_ptr) { + *mask_ptr = '\0'; + mask_ptr++; + + /* the mask must be entered in hex base as it is going to be a + * bitmask and not a prefix length + */ + if (kstrtou32(mask_ptr, 16, &mask) < 0) + return -EINVAL; + } + + /* the mark can be entered in any base */ + if (kstrtou32(buff, 0, &mark) < 0) + return -EINVAL; + + bat_priv->isolation_mark_mask = mask; + /* erase bits not covered by the mask */ + bat_priv->isolation_mark = mark & bat_priv->isolation_mark_mask; + + batadv_info(net_dev, + "New skb mark for extended isolation: %#.8x/%#.8x\n", + bat_priv->isolation_mark, bat_priv->isolation_mark_mask); + + return count; +} + BATADV_ATTR_SIF_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); #ifdef CONFIG_BATMAN_ADV_BLA @@ -461,7 +536,7 @@ BATADV_ATTR_SIF_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * BATADV_JITTER, BATADV_ATTR_SIF_UINT(hop_penalty, S_IRUGO | S_IWUSR, 0, BATADV_TQ_MAX_VALUE, NULL); BATADV_ATTR_SIF_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, BATADV_TQ_MAX_VALUE, - batadv_post_gw_deselect); + batadv_post_gw_reselect); static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, batadv_store_gw_bwidth); #ifdef CONFIG_BATMAN_ADV_DEBUG @@ -471,6 +546,8 @@ BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL); BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, batadv_nc_status_update); #endif +static BATADV_ATTR(isolation_mark, S_IRUGO | S_IWUSR, + batadv_show_isolation_mark, batadv_store_isolation_mark); static struct batadv_attribute *batadv_mesh_attrs[] = { &batadv_attr_aggregated_ogms, @@ -494,6 +571,7 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { #ifdef CONFIG_BATMAN_ADV_NC &batadv_attr_network_coding, #endif + &batadv_attr_isolation_mark, NULL, }; diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index c7d725de50a..b715b60db7c 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_SYSFS_H_ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 4add57d4857..a6fb1ff25f5 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -51,7 +49,7 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) const void *data1 = container_of(node, struct batadv_tt_common_entry, hash_entry); - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); + return batadv_compare_eth(data1, data2); } /** @@ -333,7 +331,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, return; tt_change_node->change.flags = flags; - tt_change_node->change.reserved = 0; + memset(tt_change_node->change.reserved, 0, + sizeof(tt_change_node->change.reserved)); memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); tt_change_node->change.vid = htons(common->vid); @@ -475,11 +474,13 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, * @vid: VLAN identifier * @ifindex: index of the interface where the client is connected to (useful to * identify wireless clients) + * @mark: the value contained in the skb->mark field of the received packet (if + * any) * * Returns true if the client was successfully added, false otherwise. */ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, - unsigned short vid, int ifindex) + unsigned short vid, int ifindex, uint32_t mark) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_tt_local_entry *tt_local; @@ -490,6 +491,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, int hash_added, table_size, packet_size_max; bool ret = false, roamed_back = false; uint8_t remote_flags; + uint32_t match_mark; if (ifindex != BATADV_NULL_IFINDEX) in_dev = dev_get_by_index(&init_net, ifindex); @@ -614,6 +616,17 @@ check_roaming: else tt_local->common.flags &= ~BATADV_TT_CLIENT_WIFI; + /* check the mark in the skb: if it's equal to the configured + * isolation_mark, it means the packet is coming from an isolated + * non-mesh client + */ + match_mark = (mark & bat_priv->isolation_mark_mask); + if (bat_priv->isolation_mark_mask && + match_mark == bat_priv->isolation_mark) + tt_local->common.flags |= BATADV_TT_CLIENT_ISOLA; + else + tt_local->common.flags &= ~BATADV_TT_CLIENT_ISOLA; + /* if any "dynamic" flag has been modified, resend an ADD event for this * entry so that all the nodes can get the new flags */ @@ -874,7 +887,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn)); - seq_printf(seq, " %-13s %s %-7s %-9s (%-10s)\n", "Client", "VID", + seq_printf(seq, " %-13s %s %-8s %-9s (%-10s)\n", "Client", "VID", "Flags", "Last seen", "CRC"); for (i = 0; i < hash->size; i++) { @@ -902,7 +915,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) } seq_printf(seq, - " * %pM %4i [%c%c%c%c%c] %3u.%03u (%#.8x)\n", + " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", tt_common_entry->addr, BATADV_PRINT_VID(tt_common_entry->vid), (tt_common_entry->flags & @@ -914,6 +927,8 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) BATADV_TT_CLIENT_PENDING ? 'X' : '.'), (tt_common_entry->flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), + (tt_common_entry->flags & + BATADV_TT_CLIENT_ISOLA ? 'I' : '.'), no_purge ? 0 : last_seen_secs, no_purge ? 0 : last_seen_msecs, vlan->tt.crc); @@ -1385,12 +1400,14 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, head = &tt_global_entry->orig_list; hlist_for_each_entry_rcu(orig_entry, head, list) { - router = batadv_orig_node_get_router(orig_entry->orig_node); + router = batadv_orig_router_get(orig_entry->orig_node, + BATADV_IF_DEFAULT); if (!router) continue; if (best_router && - bao->bat_neigh_cmp(router, best_router) <= 0) { + bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT, + best_router, BATADV_IF_DEFAULT) <= 0) { batadv_neigh_node_free_ref(router); continue; } @@ -1446,13 +1463,14 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn); seq_printf(seq, - " %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n", + " %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", '*', tt_global_entry->common.addr, BATADV_PRINT_VID(tt_global_entry->common.vid), best_entry->ttvn, best_entry->orig_node->orig, last_ttvn, vlan->tt.crc, (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), + (flags & BATADV_TT_CLIENT_ISOLA ? 'I' : '.'), (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); batadv_orig_node_vlan_free_ref(vlan); @@ -1477,13 +1495,14 @@ print_list: last_ttvn = atomic_read(&orig_entry->orig_node->last_ttvn); seq_printf(seq, - " %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n", + " %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", '+', tt_global_entry->common.addr, BATADV_PRINT_VID(tt_global_entry->common.vid), orig_entry->ttvn, orig_entry->orig_node->orig, last_ttvn, vlan->tt.crc, (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), + (flags & BATADV_TT_CLIENT_ISOLA ? 'I' : '.'), (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); batadv_orig_node_vlan_free_ref(vlan); @@ -1852,6 +1871,11 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, tt_global_entry->common.flags & BATADV_TT_CLIENT_WIFI) ret = true; + /* check if the two clients are marked as isolated */ + if (tt_local_entry->common.flags & BATADV_TT_CLIENT_ISOLA && + tt_global_entry->common.flags & BATADV_TT_CLIENT_ISOLA) + ret = true; + return ret; } @@ -1878,19 +1902,8 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global_entry = NULL; struct batadv_orig_node *orig_node = NULL; struct batadv_tt_orig_list_entry *best_entry; - bool ap_isolation_enabled = false; - struct batadv_softif_vlan *vlan; - /* if the AP isolation is requested on a VLAN, then check for its - * setting in the proper VLAN private data structure - */ - vlan = batadv_softif_vlan_get(bat_priv, vid); - if (vlan) { - ap_isolation_enabled = atomic_read(&vlan->ap_isolation); - batadv_softif_vlan_free_ref(vlan); - } - - if (src && ap_isolation_enabled) { + if (src && batadv_vlan_ap_isola_get(bat_priv, vid)) { tt_local_entry = batadv_tt_local_hash_find(bat_priv, src, vid); if (!tt_local_entry || (tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING)) @@ -2221,7 +2234,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, ETH_ALEN); tt_change->flags = tt_common_entry->flags; tt_change->vid = htons(tt_common_entry->vid); - tt_change->reserved = 0; + memset(tt_change->reserved, 0, + sizeof(tt_change->reserved)); tt_num_entries++; tt_change++; @@ -3565,3 +3579,29 @@ int batadv_tt_init(struct batadv_priv *bat_priv) return 1; } + +/** + * batadv_tt_global_is_isolated - check if a client is marked as isolated + * @bat_priv: the bat priv with all the soft interface information + * @addr: the mac address of the client + * @vid: the identifier of the VLAN where this client is connected + * + * Returns true if the client is marked with the TT_CLIENT_ISOLA flag, false + * otherwise + */ +bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, + const uint8_t *addr, unsigned short vid) +{ + struct batadv_tt_global_entry *tt; + bool ret; + + tt = batadv_tt_global_hash_find(bat_priv, addr, vid); + if (!tt) + return false; + + ret = tt->common.flags & BATADV_TT_CLIENT_ISOLA; + + batadv_tt_global_entry_free_ref(tt); + + return ret; +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 026b1ffa674..20a1d7861de 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ @@ -22,7 +20,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv); bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, - unsigned short vid, int ifindex); + unsigned short vid, int ifindex, uint32_t mark); uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, const uint8_t *addr, unsigned short vid, const char *message, bool roaming); @@ -50,5 +48,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *addr, unsigned short vid); +bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, + const uint8_t *addr, unsigned short vid); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 91dd369b0ff..b53f90d85db 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_TYPES_H_ @@ -36,6 +34,18 @@ #endif /* CONFIG_BATMAN_ADV_DAT */ /** + * enum batadv_dhcp_recipient - dhcp destination + * @BATADV_DHCP_NO: packet is not a dhcp message + * @BATADV_DHCP_TO_SERVER: dhcp message is directed to a server + * @BATADV_DHCP_TO_CLIENT: dhcp message is directed to a client + */ +enum batadv_dhcp_recipient { + BATADV_DHCP_NO = 0, + BATADV_DHCP_TO_SERVER, + BATADV_DHCP_TO_CLIENT, +}; + +/** * BATADV_TT_REMOTE_MASK - bitmask selecting the flags that are sent over the * wire only */ @@ -74,6 +84,7 @@ struct batadv_hard_iface_bat_iv { * @rcu: struct used for freeing in an RCU-safe manner * @bat_iv: BATMAN IV specific per hard interface data * @cleanup_work: work queue callback item for hard interface deinit + * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs */ struct batadv_hard_iface { struct list_head list; @@ -88,6 +99,29 @@ struct batadv_hard_iface { struct rcu_head rcu; struct batadv_hard_iface_bat_iv bat_iv; struct work_struct cleanup_work; + struct dentry *debug_dir; +}; + +/** + * struct batadv_orig_ifinfo - originator info per outgoing interface + * @list: list node for orig_node::ifinfo_list + * @if_outgoing: pointer to outgoing hard interface + * @router: router that should be used to reach this originator + * @last_real_seqno: last and best known sequence number + * @last_ttl: ttl of last received packet + * @batman_seqno_reset: time when the batman seqno window was reset + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +struct batadv_orig_ifinfo { + struct hlist_node list; + struct batadv_hard_iface *if_outgoing; + struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ + uint32_t last_real_seqno; + uint8_t last_ttl; + unsigned long batman_seqno_reset; + atomic_t refcount; + struct rcu_head rcu; }; /** @@ -165,11 +199,11 @@ struct batadv_orig_bat_iv { * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh * @orig: originator ethernet address * @primary_addr: hosts primary interface address - * @router: router that should be used to reach this originator + * @ifinfo_list: list for routers per outgoing interface + * @last_bonding_candidate: pointer to last ifinfo of last used router * @batadv_dat_addr_t: address of the orig node in the distributed hash * @last_seen: time when last packet from this node was received * @bcast_seqno_reset: time when the broadcast seqno window was reset - * @batman_seqno_reset: time when the batman seqno window was reset * @capabilities: announced capabilities of this originator * @last_ttvn: last seen translation table version number * @tt_buff: last tt changeset this node received from the orig node @@ -182,19 +216,15 @@ struct batadv_orig_bat_iv { * made up by two operations (data structure update and metdata -CRC/TTVN- * recalculation) and they have to be executed atomically in order to avoid * another thread to read the table/metadata between those. - * @last_real_seqno: last and best known sequence number - * @last_ttl: ttl of last received packet * @bcast_bits: bitfield containing the info which payload broadcast originated * from this orig node this host already has seen (relative to * last_bcast_seqno) * @last_bcast_seqno: last broadcast sequence number received by this host * @neigh_list: list of potential next hop neighbor towards this orig node - * @neigh_list_lock: lock protecting neigh_list, router and bonding_list + * @neigh_list_lock: lock protecting neigh_list and router * @hash_entry: hlist node for batadv_priv::orig_hash * @bat_priv: pointer to soft_iface this orig node belongs to * @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno - * @bond_candidates: how many candidates are available - * @bond_list: list of bonding candidates * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner * @in_coding_list: list of nodes this orig can hear @@ -210,13 +240,13 @@ struct batadv_orig_bat_iv { struct batadv_orig_node { uint8_t orig[ETH_ALEN]; uint8_t primary_addr[ETH_ALEN]; - struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ + struct hlist_head ifinfo_list; + struct batadv_orig_ifinfo *last_bonding_candidate; #ifdef CONFIG_BATMAN_ADV_DAT batadv_dat_addr_t dat_addr; #endif unsigned long last_seen; unsigned long bcast_seqno_reset; - unsigned long batman_seqno_reset; uint8_t capabilities; atomic_t last_ttvn; unsigned char *tt_buff; @@ -225,19 +255,15 @@ struct batadv_orig_node { bool tt_initialised; /* prevents from changing the table while reading it */ spinlock_t tt_lock; - uint32_t last_real_seqno; - uint8_t last_ttl; DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); uint32_t last_bcast_seqno; struct hlist_head neigh_list; - /* neigh_list_lock protects: neigh_list, router & bonding_list */ + /* neigh_list_lock protects: neigh_list and router */ spinlock_t neigh_list_lock; struct hlist_node hash_entry; struct batadv_priv *bat_priv; /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */ spinlock_t bcast_seqno_lock; - atomic_t bond_candidates; - struct list_head bond_list; atomic_t refcount; struct rcu_head rcu; #ifdef CONFIG_BATMAN_ADV_NC @@ -283,49 +309,62 @@ struct batadv_gw_node { }; /** - * struct batadv_neigh_bat_iv - B.A.T.M.A.N. IV specific structure for single - * hop neighbors + * struct batadv_neigh_node - structure for single hops neighbors + * @list: list node for batadv_orig_node::neigh_list + * @orig_node: pointer to corresponding orig_node + * @addr: the MAC address of the neighboring interface + * @ifinfo_list: list for routing metrics per outgoing interface + * @ifinfo_lock: lock protecting private ifinfo members and list + * @if_incoming: pointer to incoming hard interface + * @last_seen: when last packet via this neighbor was received + * @last_ttl: last received ttl from this neigh node + * @rcu: struct used for freeing in an RCU-safe manner + * @bat_iv: B.A.T.M.A.N. IV private structure + */ +struct batadv_neigh_node { + struct hlist_node list; + struct batadv_orig_node *orig_node; + uint8_t addr[ETH_ALEN]; + struct hlist_head ifinfo_list; + spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ + struct batadv_hard_iface *if_incoming; + unsigned long last_seen; + atomic_t refcount; + struct rcu_head rcu; +}; + +/* struct batadv_neigh_node_bat_iv - neighbor information per outgoing + * interface for BATMAN IV * @tq_recv: ring buffer of received TQ values from this neigh node * @tq_index: ring buffer index * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv) * @real_bits: bitfield containing the number of OGMs received from this neigh * node (relative to orig_node->last_real_seqno) * @real_packet_count: counted result of real_bits - * @lq_update_lock: lock protecting tq_recv & tq_index */ -struct batadv_neigh_bat_iv { +struct batadv_neigh_ifinfo_bat_iv { uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE]; uint8_t tq_index; uint8_t tq_avg; DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); uint8_t real_packet_count; - spinlock_t lq_update_lock; /* protects tq_recv & tq_index */ }; -/** - * struct batadv_neigh_node - structure for single hops neighbors - * @list: list node for batadv_orig_node::neigh_list - * @orig_node: pointer to corresponding orig_node - * @addr: the MAC address of the neighboring interface - * @if_incoming: pointer to incoming hard interface - * @last_seen: when last packet via this neighbor was received +/* struct batadv_neigh_ifinfo - neighbor information per outgoing interface + * @list: list node for batadv_neigh_node::ifinfo_list + * @if_outgoing: pointer to outgoing hard interface + * @bat_iv: B.A.T.M.A.N. IV private structure * @last_ttl: last received ttl from this neigh node - * @bonding_list: list node for batadv_orig_node::bond_list * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner - * @bat_iv: B.A.T.M.A.N. IV private structure + * @rcu: struct used for freeing in a RCU-safe manner */ -struct batadv_neigh_node { +struct batadv_neigh_ifinfo { struct hlist_node list; - struct batadv_orig_node *orig_node; - uint8_t addr[ETH_ALEN]; - struct batadv_hard_iface *if_incoming; - unsigned long last_seen; + struct batadv_hard_iface *if_outgoing; + struct batadv_neigh_ifinfo_bat_iv bat_iv; uint8_t last_ttl; - struct list_head bonding_list; atomic_t refcount; struct rcu_head rcu; - struct batadv_neigh_bat_iv bat_iv; }; /** @@ -687,6 +726,8 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_DEBUG atomic_t log_level; #endif + uint32_t isolation_mark; + uint32_t isolation_mark_mask; atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; @@ -981,8 +1022,10 @@ struct batadv_skb_cb { * @direct_link_flags: direct link flags for aggregated OGM packets * @num_packets: counter for bcast packet retransmission * @delayed_work: work queue callback item for packet sending - * @if_incoming: pointer incoming hard-iface or primary iface if locally - * generated packet + * @if_incoming: pointer to incoming hard-iface or primary iface if + * locally generated packet + * @if_outgoing: packet where the packet should be sent to, or NULL if + * unspecified */ struct batadv_forw_packet { struct hlist_node list; @@ -994,6 +1037,7 @@ struct batadv_forw_packet { uint8_t num_packets; struct delayed_work delayed_work; struct batadv_hard_iface *if_incoming; + struct batadv_hard_iface *if_outgoing; }; /** @@ -1007,9 +1051,11 @@ struct batadv_forw_packet { * @bat_primary_iface_set: called when primary interface is selected / changed * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue * @bat_ogm_emit: send scheduled OGM - * @bat_neigh_cmp: compare the metrics of two neighbors - * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or - * better than neigh2 from the metric prospective + * @bat_neigh_cmp: compare the metrics of two neighbors for their respective + * outgoing interfaces + * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better + * than neigh2 for their respective outgoing interface from the metric + * prospective * @bat_orig_print: print the originator table (optional) * @bat_orig_free: free the resources allocated by the routing algorithm for an * orig_node object @@ -1028,11 +1074,17 @@ struct batadv_algo_ops { void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface); void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1, - struct batadv_neigh_node *neigh2); - bool (*bat_neigh_is_equiv_or_better)(struct batadv_neigh_node *neigh1, - struct batadv_neigh_node *neigh2); + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2); + bool (*bat_neigh_is_equiv_or_better) + (struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2); /* orig_node handling API */ - void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq); + void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq, + struct batadv_hard_iface *hard_iface); void (*bat_orig_free)(struct batadv_orig_node *orig_node); int (*bat_orig_add_if)(struct batadv_orig_node *orig_node, int max_if_num); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c new file mode 100644 index 00000000000..adb3ea04ada --- /dev/null +++ b/net/bluetooth/6lowpan.c @@ -0,0 +1,860 @@ +/* + Copyright (c) 2013 Intel Corp. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 and + only version 2 as published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#include <linux/if_arp.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> + +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> + +#include <net/af_ieee802154.h> /* to get the address type */ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> +#include <net/bluetooth/l2cap.h> + +#include "6lowpan.h" + +#include "../ieee802154/6lowpan.h" /* for the compression support */ + +#define IFACE_NAME_TEMPLATE "bt%d" +#define EUI64_ADDR_LEN 8 + +struct skb_cb { + struct in6_addr addr; + struct l2cap_conn *conn; +}; +#define lowpan_cb(skb) ((struct skb_cb *)((skb)->cb)) + +/* The devices list contains those devices that we are acting + * as a proxy. The BT 6LoWPAN device is a virtual device that + * connects to the Bluetooth LE device. The real connection to + * BT device is done via l2cap layer. There exists one + * virtual device / one BT 6LoWPAN network (=hciX device). + * The list contains struct lowpan_dev elements. + */ +static LIST_HEAD(bt_6lowpan_devices); +static DEFINE_RWLOCK(devices_lock); + +struct lowpan_peer { + struct list_head list; + struct l2cap_conn *conn; + + /* peer addresses in various formats */ + unsigned char eui64_addr[EUI64_ADDR_LEN]; + struct in6_addr peer_addr; +}; + +struct lowpan_dev { + struct list_head list; + + struct hci_dev *hdev; + struct net_device *netdev; + struct list_head peers; + atomic_t peer_count; /* number of items in peers list */ + + struct work_struct delete_netdev; + struct delayed_work notify_peers; +}; + +static inline struct lowpan_dev *lowpan_dev(const struct net_device *netdev) +{ + return netdev_priv(netdev); +} + +static inline void peer_add(struct lowpan_dev *dev, struct lowpan_peer *peer) +{ + list_add(&peer->list, &dev->peers); + atomic_inc(&dev->peer_count); +} + +static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer) +{ + list_del(&peer->list); + + if (atomic_dec_and_test(&dev->peer_count)) { + BT_DBG("last peer"); + return true; + } + + return false; +} + +static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev, + bdaddr_t *ba, __u8 type) +{ + struct lowpan_peer *peer, *tmp; + + BT_DBG("peers %d addr %pMR type %d", atomic_read(&dev->peer_count), + ba, type); + + list_for_each_entry_safe(peer, tmp, &dev->peers, list) { + BT_DBG("addr %pMR type %d", + &peer->conn->hcon->dst, peer->conn->hcon->dst_type); + + if (bacmp(&peer->conn->hcon->dst, ba)) + continue; + + if (type == peer->conn->hcon->dst_type) + return peer; + } + + return NULL; +} + +static inline struct lowpan_peer *peer_lookup_conn(struct lowpan_dev *dev, + struct l2cap_conn *conn) +{ + struct lowpan_peer *peer, *tmp; + + list_for_each_entry_safe(peer, tmp, &dev->peers, list) { + if (peer->conn == conn) + return peer; + } + + return NULL; +} + +static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn) +{ + struct lowpan_dev *entry, *tmp; + struct lowpan_peer *peer = NULL; + unsigned long flags; + + read_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { + peer = peer_lookup_conn(entry, conn); + if (peer) + break; + } + + read_unlock_irqrestore(&devices_lock, flags); + + return peer; +} + +static struct lowpan_dev *lookup_dev(struct l2cap_conn *conn) +{ + struct lowpan_dev *entry, *tmp; + struct lowpan_dev *dev = NULL; + unsigned long flags; + + read_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { + if (conn->hcon->hdev == entry->hdev) { + dev = entry; + break; + } + } + + read_unlock_irqrestore(&devices_lock, flags); + + return dev; +} + +static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev) +{ + struct sk_buff *skb_cp; + int ret; + + skb_cp = skb_copy(skb, GFP_ATOMIC); + if (!skb_cp) + return -ENOMEM; + + ret = netif_rx(skb_cp); + + BT_DBG("receive skb %d", ret); + if (ret < 0) + return NET_RX_DROP; + + return ret; +} + +static int process_data(struct sk_buff *skb, struct net_device *netdev, + struct l2cap_conn *conn) +{ + const u8 *saddr, *daddr; + u8 iphc0, iphc1; + struct lowpan_dev *dev; + struct lowpan_peer *peer; + unsigned long flags; + + dev = lowpan_dev(netdev); + + read_lock_irqsave(&devices_lock, flags); + peer = peer_lookup_conn(dev, conn); + read_unlock_irqrestore(&devices_lock, flags); + if (!peer) + goto drop; + + saddr = peer->eui64_addr; + daddr = dev->netdev->dev_addr; + + /* at least two bytes will be used for the encoding */ + if (skb->len < 2) + goto drop; + + if (lowpan_fetch_skb_u8(skb, &iphc0)) + goto drop; + + if (lowpan_fetch_skb_u8(skb, &iphc1)) + goto drop; + + return lowpan_process_data(skb, netdev, + saddr, IEEE802154_ADDR_LONG, EUI64_ADDR_LEN, + daddr, IEEE802154_ADDR_LONG, EUI64_ADDR_LEN, + iphc0, iphc1, give_skb_to_upper); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int recv_pkt(struct sk_buff *skb, struct net_device *dev, + struct l2cap_conn *conn) +{ + struct sk_buff *local_skb; + int ret; + + if (!netif_running(dev)) + goto drop; + + if (dev->type != ARPHRD_6LOWPAN) + goto drop; + + /* check that it's our buffer */ + if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { + /* Copy the packet so that the IPv6 header is + * properly aligned. + */ + local_skb = skb_copy_expand(skb, NET_SKB_PAD - 1, + skb_tailroom(skb), GFP_ATOMIC); + if (!local_skb) + goto drop; + + local_skb->protocol = htons(ETH_P_IPV6); + local_skb->pkt_type = PACKET_HOST; + + skb_reset_network_header(local_skb); + skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); + + if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { + kfree_skb(local_skb); + goto drop; + } + + dev->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + + kfree_skb(local_skb); + kfree_skb(skb); + } else { + switch (skb->data[0] & 0xe0) { + case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ + local_skb = skb_clone(skb, GFP_ATOMIC); + if (!local_skb) + goto drop; + + ret = process_data(local_skb, dev, conn); + if (ret != NET_RX_SUCCESS) + goto drop; + + dev->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + + kfree_skb(skb); + break; + default: + break; + } + } + + return NET_RX_SUCCESS; + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +/* Packet from BT LE device */ +int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb) +{ + struct lowpan_dev *dev; + struct lowpan_peer *peer; + int err; + + peer = lookup_peer(conn); + if (!peer) + return -ENOENT; + + dev = lookup_dev(conn); + if (!dev || !dev->netdev) + return -ENOENT; + + err = recv_pkt(skb, dev->netdev, conn); + BT_DBG("recv pkt %d", err); + + return err; +} + +static inline int skbuff_copy(void *msg, int len, int count, int mtu, + struct sk_buff *skb, struct net_device *dev) +{ + struct sk_buff **frag; + int sent = 0; + + memcpy(skb_put(skb, count), msg, count); + + sent += count; + msg += count; + len -= count; + + dev->stats.tx_bytes += count; + dev->stats.tx_packets++; + + raw_dump_table(__func__, "Sending", skb->data, skb->len); + + /* Continuation fragments (no L2CAP header) */ + frag = &skb_shinfo(skb)->frag_list; + while (len > 0) { + struct sk_buff *tmp; + + count = min_t(unsigned int, mtu, len); + + tmp = bt_skb_alloc(count, GFP_ATOMIC); + if (!tmp) + return -ENOMEM; + + *frag = tmp; + + memcpy(skb_put(*frag, count), msg, count); + + raw_dump_table(__func__, "Sending fragment", + (*frag)->data, count); + + (*frag)->priority = skb->priority; + + sent += count; + msg += count; + len -= count; + + skb->len += (*frag)->len; + skb->data_len += (*frag)->len; + + frag = &(*frag)->next; + + dev->stats.tx_bytes += count; + dev->stats.tx_packets++; + } + + return sent; +} + +static struct sk_buff *create_pdu(struct l2cap_conn *conn, void *msg, + size_t len, u32 priority, + struct net_device *dev) +{ + struct sk_buff *skb; + int err, count; + struct l2cap_hdr *lh; + + /* FIXME: This mtu check should be not needed and atm is only used for + * testing purposes + */ + if (conn->mtu > (L2CAP_LE_MIN_MTU + L2CAP_HDR_SIZE)) + conn->mtu = L2CAP_LE_MIN_MTU + L2CAP_HDR_SIZE; + + count = min_t(unsigned int, (conn->mtu - L2CAP_HDR_SIZE), len); + + BT_DBG("conn %p len %zu mtu %d count %d", conn, len, conn->mtu, count); + + skb = bt_skb_alloc(count + L2CAP_HDR_SIZE, GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + + skb->priority = priority; + + lh = (struct l2cap_hdr *)skb_put(skb, L2CAP_HDR_SIZE); + lh->cid = cpu_to_le16(L2CAP_FC_6LOWPAN); + lh->len = cpu_to_le16(len); + + err = skbuff_copy(msg, len, count, conn->mtu, skb, dev); + if (unlikely(err < 0)) { + kfree_skb(skb); + BT_DBG("skbuff copy %d failed", err); + return ERR_PTR(err); + } + + return skb; +} + +static int conn_send(struct l2cap_conn *conn, + void *msg, size_t len, u32 priority, + struct net_device *dev) +{ + struct sk_buff *skb; + + skb = create_pdu(conn, msg, len, priority, dev); + if (IS_ERR(skb)) + return -EINVAL; + + BT_DBG("conn %p skb %p len %d priority %u", conn, skb, skb->len, + skb->priority); + + hci_send_acl(conn->hchan, skb, ACL_START); + + return 0; +} + +static void get_dest_bdaddr(struct in6_addr *ip6_daddr, + bdaddr_t *addr, u8 *addr_type) +{ + u8 *eui64; + + eui64 = ip6_daddr->s6_addr + 8; + + addr->b[0] = eui64[7]; + addr->b[1] = eui64[6]; + addr->b[2] = eui64[5]; + addr->b[3] = eui64[2]; + addr->b[4] = eui64[1]; + addr->b[5] = eui64[0]; + + addr->b[5] ^= 2; + + /* Set universal/local bit to 0 */ + if (addr->b[5] & 1) { + addr->b[5] &= ~1; + *addr_type = ADDR_LE_DEV_PUBLIC; + } else { + *addr_type = ADDR_LE_DEV_RANDOM; + } +} + +static int header_create(struct sk_buff *skb, struct net_device *netdev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) +{ + struct ipv6hdr *hdr; + struct lowpan_dev *dev; + struct lowpan_peer *peer; + bdaddr_t addr, *any = BDADDR_ANY; + u8 *saddr, *daddr = any->b; + u8 addr_type; + + if (type != ETH_P_IPV6) + return -EINVAL; + + hdr = ipv6_hdr(skb); + + dev = lowpan_dev(netdev); + + if (ipv6_addr_is_multicast(&hdr->daddr)) { + memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, + sizeof(struct in6_addr)); + lowpan_cb(skb)->conn = NULL; + } else { + unsigned long flags; + + /* Get destination BT device from skb. + * If there is no such peer then discard the packet. + */ + get_dest_bdaddr(&hdr->daddr, &addr, &addr_type); + + BT_DBG("dest addr %pMR type %d", &addr, addr_type); + + read_lock_irqsave(&devices_lock, flags); + peer = peer_lookup_ba(dev, &addr, addr_type); + read_unlock_irqrestore(&devices_lock, flags); + + if (!peer) { + BT_DBG("no such peer %pMR found", &addr); + return -ENOENT; + } + + daddr = peer->eui64_addr; + + memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, + sizeof(struct in6_addr)); + lowpan_cb(skb)->conn = peer->conn; + } + + saddr = dev->netdev->dev_addr; + + return lowpan_header_compress(skb, netdev, type, daddr, saddr, len); +} + +/* Packet to BT LE device */ +static int send_pkt(struct l2cap_conn *conn, const void *saddr, + const void *daddr, struct sk_buff *skb, + struct net_device *netdev) +{ + raw_dump_table(__func__, "raw skb data dump before fragmentation", + skb->data, skb->len); + + return conn_send(conn, skb->data, skb->len, 0, netdev); +} + +static void send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) +{ + struct sk_buff *local_skb; + struct lowpan_dev *entry, *tmp; + unsigned long flags; + + read_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { + struct lowpan_peer *pentry, *ptmp; + struct lowpan_dev *dev; + + if (entry->netdev != netdev) + continue; + + dev = lowpan_dev(entry->netdev); + + list_for_each_entry_safe(pentry, ptmp, &dev->peers, list) { + local_skb = skb_clone(skb, GFP_ATOMIC); + + send_pkt(pentry->conn, netdev->dev_addr, + pentry->eui64_addr, local_skb, netdev); + + kfree_skb(local_skb); + } + } + + read_unlock_irqrestore(&devices_lock, flags); +} + +static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + int err = 0; + unsigned char *eui64_addr; + struct lowpan_dev *dev; + struct lowpan_peer *peer; + bdaddr_t addr; + u8 addr_type; + + if (ipv6_addr_is_multicast(&lowpan_cb(skb)->addr)) { + /* We need to send the packet to every device + * behind this interface. + */ + send_mcast_pkt(skb, netdev); + } else { + unsigned long flags; + + get_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type); + eui64_addr = lowpan_cb(skb)->addr.s6_addr + 8; + dev = lowpan_dev(netdev); + + read_lock_irqsave(&devices_lock, flags); + peer = peer_lookup_ba(dev, &addr, addr_type); + read_unlock_irqrestore(&devices_lock, flags); + + BT_DBG("xmit from %s to %pMR (%pI6c) peer %p", netdev->name, + &addr, &lowpan_cb(skb)->addr, peer); + + if (peer && peer->conn) + err = send_pkt(peer->conn, netdev->dev_addr, + eui64_addr, skb, netdev); + } + dev_kfree_skb(skb); + + if (err) + BT_DBG("ERROR: xmit failed (%d)", err); + + return (err < 0) ? NET_XMIT_DROP : err; +} + +static const struct net_device_ops netdev_ops = { + .ndo_start_xmit = bt_xmit, +}; + +static struct header_ops header_ops = { + .create = header_create, +}; + +static void netdev_setup(struct net_device *dev) +{ + dev->addr_len = EUI64_ADDR_LEN; + dev->type = ARPHRD_6LOWPAN; + + dev->hard_header_len = 0; + dev->needed_tailroom = 0; + dev->mtu = IPV6_MIN_MTU; + dev->tx_queue_len = 0; + dev->flags = IFF_RUNNING | IFF_POINTOPOINT; + dev->watchdog_timeo = 0; + + dev->netdev_ops = &netdev_ops; + dev->header_ops = &header_ops; + dev->destructor = free_netdev; +} + +static struct device_type bt_type = { + .name = "bluetooth", +}; + +static void set_addr(u8 *eui, u8 *addr, u8 addr_type) +{ + /* addr is the BT address in little-endian format */ + eui[0] = addr[5]; + eui[1] = addr[4]; + eui[2] = addr[3]; + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[5] = addr[2]; + eui[6] = addr[1]; + eui[7] = addr[0]; + + eui[0] ^= 2; + + /* Universal/local bit set, RFC 4291 */ + if (addr_type == ADDR_LE_DEV_PUBLIC) + eui[0] |= 1; + else + eui[0] &= ~1; +} + +static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr, + u8 addr_type) +{ + netdev->addr_assign_type = NET_ADDR_PERM; + set_addr(netdev->dev_addr, addr->b, addr_type); + netdev->dev_addr[0] ^= 2; +} + +static void ifup(struct net_device *netdev) +{ + int err; + + rtnl_lock(); + err = dev_open(netdev); + if (err < 0) + BT_INFO("iface %s cannot be opened (%d)", netdev->name, err); + rtnl_unlock(); +} + +static void do_notify_peers(struct work_struct *work) +{ + struct lowpan_dev *dev = container_of(work, struct lowpan_dev, + notify_peers.work); + + netdev_notify_peers(dev->netdev); /* send neighbour adv at startup */ +} + +static bool is_bt_6lowpan(struct hci_conn *hcon) +{ + if (hcon->type != LE_LINK) + return false; + + return test_bit(HCI_CONN_6LOWPAN, &hcon->flags); +} + +static int add_peer_conn(struct l2cap_conn *conn, struct lowpan_dev *dev) +{ + struct lowpan_peer *peer; + unsigned long flags; + + peer = kzalloc(sizeof(*peer), GFP_ATOMIC); + if (!peer) + return -ENOMEM; + + peer->conn = conn; + memset(&peer->peer_addr, 0, sizeof(struct in6_addr)); + + /* RFC 2464 ch. 5 */ + peer->peer_addr.s6_addr[0] = 0xFE; + peer->peer_addr.s6_addr[1] = 0x80; + set_addr((u8 *)&peer->peer_addr.s6_addr + 8, conn->hcon->dst.b, + conn->hcon->dst_type); + + memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8, + EUI64_ADDR_LEN); + peer->eui64_addr[0] ^= 2; /* second bit-flip (Universe/Local) + * is done according RFC2464 + */ + + raw_dump_inline(__func__, "peer IPv6 address", + (unsigned char *)&peer->peer_addr, 16); + raw_dump_inline(__func__, "peer EUI64 address", peer->eui64_addr, 8); + + write_lock_irqsave(&devices_lock, flags); + INIT_LIST_HEAD(&peer->list); + peer_add(dev, peer); + write_unlock_irqrestore(&devices_lock, flags); + + /* Notifying peers about us needs to be done without locks held */ + INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers); + schedule_delayed_work(&dev->notify_peers, msecs_to_jiffies(100)); + + return 0; +} + +/* This gets called when BT LE 6LoWPAN device is connected. We then + * create network device that acts as a proxy between BT LE device + * and kernel network stack. + */ +int bt_6lowpan_add_conn(struct l2cap_conn *conn) +{ + struct lowpan_peer *peer = NULL; + struct lowpan_dev *dev; + struct net_device *netdev; + int err = 0; + unsigned long flags; + + if (!is_bt_6lowpan(conn->hcon)) + return 0; + + peer = lookup_peer(conn); + if (peer) + return -EEXIST; + + dev = lookup_dev(conn); + if (dev) + return add_peer_conn(conn, dev); + + netdev = alloc_netdev(sizeof(*dev), IFACE_NAME_TEMPLATE, netdev_setup); + if (!netdev) + return -ENOMEM; + + set_dev_addr(netdev, &conn->hcon->src, conn->hcon->src_type); + + netdev->netdev_ops = &netdev_ops; + SET_NETDEV_DEV(netdev, &conn->hcon->dev); + SET_NETDEV_DEVTYPE(netdev, &bt_type); + + err = register_netdev(netdev); + if (err < 0) { + BT_INFO("register_netdev failed %d", err); + free_netdev(netdev); + goto out; + } + + BT_DBG("ifindex %d peer bdaddr %pMR my addr %pMR", + netdev->ifindex, &conn->hcon->dst, &conn->hcon->src); + set_bit(__LINK_STATE_PRESENT, &netdev->state); + + dev = netdev_priv(netdev); + dev->netdev = netdev; + dev->hdev = conn->hcon->hdev; + INIT_LIST_HEAD(&dev->peers); + + write_lock_irqsave(&devices_lock, flags); + INIT_LIST_HEAD(&dev->list); + list_add(&dev->list, &bt_6lowpan_devices); + write_unlock_irqrestore(&devices_lock, flags); + + ifup(netdev); + + return add_peer_conn(conn, dev); + +out: + return err; +} + +static void delete_netdev(struct work_struct *work) +{ + struct lowpan_dev *entry = container_of(work, struct lowpan_dev, + delete_netdev); + + unregister_netdev(entry->netdev); + + /* The entry pointer is deleted in device_event() */ +} + +int bt_6lowpan_del_conn(struct l2cap_conn *conn) +{ + struct lowpan_dev *entry, *tmp; + struct lowpan_dev *dev = NULL; + struct lowpan_peer *peer; + int err = -ENOENT; + unsigned long flags; + bool last = false; + + if (!conn || !is_bt_6lowpan(conn->hcon)) + return 0; + + write_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { + dev = lowpan_dev(entry->netdev); + peer = peer_lookup_conn(dev, conn); + if (peer) { + last = peer_del(dev, peer); + err = 0; + break; + } + } + + if (!err && last && dev && !atomic_read(&dev->peer_count)) { + write_unlock_irqrestore(&devices_lock, flags); + + cancel_delayed_work_sync(&dev->notify_peers); + + /* bt_6lowpan_del_conn() is called with hci dev lock held which + * means that we must delete the netdevice in worker thread. + */ + INIT_WORK(&entry->delete_netdev, delete_netdev); + schedule_work(&entry->delete_netdev); + } else { + write_unlock_irqrestore(&devices_lock, flags); + } + + return err; +} + +static int device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct lowpan_dev *entry, *tmp; + unsigned long flags; + + if (netdev->type != ARPHRD_6LOWPAN) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UNREGISTER: + write_lock_irqsave(&devices_lock, flags); + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, + list) { + if (entry->netdev == netdev) { + list_del(&entry->list); + kfree(entry); + break; + } + } + write_unlock_irqrestore(&devices_lock, flags); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block bt_6lowpan_dev_notifier = { + .notifier_call = device_event, +}; + +int bt_6lowpan_init(void) +{ + return register_netdevice_notifier(&bt_6lowpan_dev_notifier); +} + +void bt_6lowpan_cleanup(void) +{ + unregister_netdevice_notifier(&bt_6lowpan_dev_notifier); +} diff --git a/net/bluetooth/6lowpan.h b/net/bluetooth/6lowpan.h new file mode 100644 index 00000000000..680eac808d7 --- /dev/null +++ b/net/bluetooth/6lowpan.h @@ -0,0 +1,26 @@ +/* + Copyright (c) 2013 Intel Corp. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 and + only version 2 as published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#ifndef __6LOWPAN_H +#define __6LOWPAN_H + +#include <linux/skbuff.h> +#include <net/bluetooth/l2cap.h> + +int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb); +int bt_6lowpan_add_conn(struct l2cap_conn *conn); +int bt_6lowpan_del_conn(struct l2cap_conn *conn); +int bt_6lowpan_init(void); +void bt_6lowpan_cleanup(void); + +#endif /* __6LOWPAN_H */ diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 6a791e73e39..cc6827e2ce6 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -10,6 +10,10 @@ obj-$(CONFIG_BT_HIDP) += hidp/ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ - a2mp.o amp.o + a2mp.o amp.o 6lowpan.o + +ifeq ($(CONFIG_IEEE802154_6LOWPAN),) + bluetooth-y += ../ieee802154/6lowpan_iphc.o +endif subdir-ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8b8b5f80dd8..5e8663c194c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -636,6 +636,49 @@ static int conn_max_interval_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, conn_max_interval_set, "%llu\n"); +static ssize_t lowpan_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t lowpan_write(struct file *fp, const char __user *user_buffer, + size_t count, loff_t *position) +{ + struct hci_dev *hdev = fp->private_data; + bool enable; + char buf[32]; + size_t buf_size = min(count, (sizeof(buf)-1)); + + if (copy_from_user(buf, user_buffer, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + if (strtobool(buf, &enable) < 0) + return -EINVAL; + + if (enable == test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags)) + return -EALREADY; + + change_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags); + + return count; +} + +static const struct file_operations lowpan_debugfs_fops = { + .open = simple_open, + .read = lowpan_read, + .write = lowpan_write, + .llseek = default_llseek, +}; + /* ---- HCI requests ---- */ static void hci_req_sync_complete(struct hci_dev *hdev, u8 result) @@ -1261,8 +1304,13 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) * as supported send it. If not supported assume that the controller * does not have actual support for stored link keys which makes this * command redundant anyway. + * + * Some controllers indicate that they support handling deleting + * stored link keys, but they don't. The quirk lets a driver + * just disable this command. */ - if (hdev->commands[6] & 0x80) { + if (hdev->commands[6] & 0x80 && + !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { struct hci_cp_delete_stored_link_key cp; bacpy(&cp.bdaddr, BDADDR_ANY); @@ -1406,6 +1454,8 @@ static int __hci_init(struct hci_dev *hdev) hdev, &conn_min_interval_fops); debugfs_create_file("conn_max_interval", 0644, hdev->debugfs, hdev, &conn_max_interval_fops); + debugfs_create_file("6lowpan", 0644, hdev->debugfs, hdev, + &lowpan_debugfs_fops); } return 0; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5fb3df66c2c..5f812455a45 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3533,6 +3533,9 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; + if (test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags)) + set_bit(HCI_CONN_6LOWPAN, &conn->flags); + hci_conn_add_sysfs(conn); hci_proto_connect_cfm(conn, ev->status); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 6a6c8bb4fd7..7552f9e3089 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -940,8 +940,22 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, bt_cb(skb)->pkt_type = *((unsigned char *) skb->data); skb_pull(skb, 1); - if (hci_pi(sk)->channel == HCI_CHANNEL_RAW && - bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { + if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { + /* No permission check is needed for user channel + * since that gets enforced when binding the socket. + * + * However check that the packet type is valid. + */ + if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && + bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && + bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + err = -EINVAL; + goto drop; + } + + skb_queue_tail(&hdev->raw_q, skb); + queue_work(hdev->workqueue, &hdev->tx_work); + } else if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { u16 opcode = get_unaligned_le16(skb->data); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); @@ -972,14 +986,6 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto drop; } - if (hci_pi(sk)->channel == HCI_CHANNEL_USER && - bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { - err = -EINVAL; - goto drop; - } - skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b6bca64b320..b0ad2c752d7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -40,6 +40,7 @@ #include "smp.h" #include "a2mp.h" #include "amp.h" +#include "6lowpan.h" bool disable_ertm; @@ -1468,6 +1469,8 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) BT_DBG(""); + bt_6lowpan_add_conn(conn); + /* Check if we have socket listening on cid */ pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_ATT, &hcon->src, &hcon->dst); @@ -7119,6 +7122,10 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) l2cap_conn_del(conn->hcon, EACCES); break; + case L2CAP_FC_6LOWPAN: + bt_6lowpan_recv(conn, skb); + break; + default: l2cap_data_channel(conn, cid, skb); break; @@ -7186,6 +7193,8 @@ void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); + bt_6lowpan_del_conn(hcon->l2cap_data); + l2cap_conn_del(hcon, bt_to_errno(reason)); } @@ -7467,11 +7476,14 @@ int __init l2cap_init(void) debugfs_create_u16("l2cap_le_default_mps", 0466, bt_debugfs, &le_default_mps); + bt_6lowpan_init(); + return 0; } void l2cap_exit(void) { + bt_6lowpan_cleanup(); debugfs_remove(l2cap_debugfs); l2cap_cleanup_sockets(); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index e7806e6d282..20ef748b290 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -147,6 +147,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) __le16_to_cpu(la.l2_psm) == L2CAP_PSM_RFCOMM) chan->sec_level = BT_SECURITY_SDP; break; + case L2CAP_CHAN_RAW: + chan->sec_level = BT_SECURITY_SDP; + break; } bacpy(&chan->src, &la.l2_bdaddr); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 84fcf9fff3e..f9c0980abee 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -58,6 +58,7 @@ struct rfcomm_dev { uint modem_status; struct rfcomm_dlc *dlc; + wait_queue_head_t conn_wait; struct device *tty_dev; @@ -103,20 +104,60 @@ static void rfcomm_dev_destruct(struct tty_port *port) module_put(THIS_MODULE); } -/* device-specific initialization: open the dlc */ -static int rfcomm_dev_activate(struct tty_port *port, struct tty_struct *tty) +static struct device *rfcomm_get_device(struct rfcomm_dev *dev) { - struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port); + struct hci_dev *hdev; + struct hci_conn *conn; - return rfcomm_dlc_open(dev->dlc, &dev->src, &dev->dst, dev->channel); + hdev = hci_get_route(&dev->dst, &dev->src); + if (!hdev) + return NULL; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &dev->dst); + + hci_dev_put(hdev); + + return conn ? &conn->dev : NULL; } -/* we block the open until the dlc->state becomes BT_CONNECTED */ -static int rfcomm_dev_carrier_raised(struct tty_port *port) +/* device-specific initialization: open the dlc */ +static int rfcomm_dev_activate(struct tty_port *port, struct tty_struct *tty) { struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port); + DEFINE_WAIT(wait); + int err; + + err = rfcomm_dlc_open(dev->dlc, &dev->src, &dev->dst, dev->channel); + if (err) + return err; + + while (1) { + prepare_to_wait(&dev->conn_wait, &wait, TASK_INTERRUPTIBLE); + + if (dev->dlc->state == BT_CLOSED) { + err = -dev->err; + break; + } + + if (dev->dlc->state == BT_CONNECTED) + break; + + if (signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + tty_unlock(tty); + schedule(); + tty_lock(tty); + } + finish_wait(&dev->conn_wait, &wait); + + if (!err) + device_move(dev->tty_dev, rfcomm_get_device(dev), + DPM_ORDER_DEV_AFTER_PARENT); - return (dev->dlc->state == BT_CONNECTED); + return err; } /* device-specific cleanup: close the dlc */ @@ -135,7 +176,6 @@ static const struct tty_port_operations rfcomm_port_ops = { .destruct = rfcomm_dev_destruct, .activate = rfcomm_dev_activate, .shutdown = rfcomm_dev_shutdown, - .carrier_raised = rfcomm_dev_carrier_raised, }; static struct rfcomm_dev *__rfcomm_dev_get(int id) @@ -169,22 +209,6 @@ static struct rfcomm_dev *rfcomm_dev_get(int id) return dev; } -static struct device *rfcomm_get_device(struct rfcomm_dev *dev) -{ - struct hci_dev *hdev; - struct hci_conn *conn; - - hdev = hci_get_route(&dev->dst, &dev->src); - if (!hdev) - return NULL; - - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &dev->dst); - - hci_dev_put(hdev); - - return conn ? &conn->dev : NULL; -} - static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf) { struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); @@ -258,6 +282,7 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) tty_port_init(&dev->port); dev->port.ops = &rfcomm_port_ops; + init_waitqueue_head(&dev->conn_wait); skb_queue_head_init(&dev->pending); @@ -437,7 +462,8 @@ static int rfcomm_release_dev(void __user *arg) tty_kref_put(tty); } - if (!test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags)) + if (!test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags) && + !test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags)) tty_port_put(&dev->port); tty_port_put(&dev->port); @@ -575,12 +601,9 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) BT_DBG("dlc %p dev %p err %d", dlc, dev, err); dev->err = err; - if (dlc->state == BT_CONNECTED) { - device_move(dev->tty_dev, rfcomm_get_device(dev), - DPM_ORDER_DEV_AFTER_PARENT); + wake_up_interruptible(&dev->conn_wait); - wake_up_interruptible(&dev->port.open_wait); - } else if (dlc->state == BT_CLOSED) + if (dlc->state == BT_CLOSED) tty_port_tty_hangup(&dev->port, false); } @@ -670,10 +693,20 @@ static int rfcomm_tty_install(struct tty_driver *driver, struct tty_struct *tty) /* install the tty_port */ err = tty_port_install(&dev->port, driver, tty); - if (err) + if (err) { rfcomm_tty_cleanup(tty); + return err; + } - return err; + /* take over the tty_port reference if the port was created with the + * flag RFCOMM_RELEASE_ONHUP. This will force the release of the port + * when the last process closes the tty. The behaviour is expected by + * userspace. + */ + if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) + tty_port_put(&dev->port); + + return 0; } static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) @@ -1010,10 +1043,6 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) BT_DBG("tty %p dev %p", tty, dev); tty_port_hangup(&dev->port); - - if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags) && - !test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags)) - tty_port_put(&dev->port); } static int rfcomm_tty_tiocmget(struct tty_struct *tty) @@ -1096,7 +1125,7 @@ int __init rfcomm_init_ttys(void) rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL; rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; rfcomm_tty_driver->init_termios = tty_std_termios; - rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL; + rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON; tty_set_operations(rfcomm_tty_driver, &rfcomm_ops); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f00cfd2a014..e4401a531af 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -32,7 +32,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; - struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); u16 vid = 0; rcu_read_lock(); @@ -90,12 +90,12 @@ static int br_dev_init(struct net_device *dev) struct net_bridge *br = netdev_priv(dev); int i; - br->stats = alloc_percpu(struct br_cpu_netstats); + br->stats = alloc_percpu(struct pcpu_sw_netstats); if (!br->stats) return -ENOMEM; for_each_possible_cpu(i) { - struct br_cpu_netstats *br_dev_stats; + struct pcpu_sw_netstats *br_dev_stats; br_dev_stats = per_cpu_ptr(br->stats, i); u64_stats_init(&br_dev_stats->syncp); } @@ -135,12 +135,12 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct net_bridge *br = netdev_priv(dev); - struct br_cpu_netstats tmp, sum = { 0 }; + struct pcpu_sw_netstats tmp, sum = { 0 }; unsigned int cpu; for_each_possible_cpu(cpu) { unsigned int start; - const struct br_cpu_netstats *bstats + const struct pcpu_sw_netstats *bstats = per_cpu_ptr(br->stats, cpu); do { start = u64_stats_fetch_begin_bh(&bstats->syncp); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7e73c32e205..bf8dc7d308d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -28,7 +28,7 @@ static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); - struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 4c214b2b88e..ef66365b735 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1998,7 +1998,7 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) u32 old; struct net_bridge_mdb_htable *mdb; - spin_lock(&br->multicast_lock); + spin_lock_bh(&br->multicast_lock); if (!netif_running(br->dev)) goto unlock; @@ -2030,7 +2030,7 @@ rollback: } unlock: - spin_unlock(&br->multicast_lock); + spin_unlock_bh(&br->multicast_lock); return err; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6526ac89bbf..e74b6d530cb 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -481,9 +481,7 @@ int __init br_netlink_init(void) int err; br_mdb_init(); - err = rtnl_af_register(&br_af_ops); - if (err) - goto out; + rtnl_af_register(&br_af_ops); err = rtnl_link_register(&br_link_ops); if (err) @@ -493,7 +491,6 @@ int __init br_netlink_init(void) out_af: rtnl_af_unregister(&br_af_ops); -out: br_mdb_uninit(); return err; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2e77d923c8e..3733f152351 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -210,21 +210,13 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device * rtnl_dereference(dev->rx_handler_data) : NULL; } -struct br_cpu_netstats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; -}; - struct net_bridge { spinlock_t lock; struct list_head port_list; struct net_device *dev; - struct br_cpu_netstats __percpu *stats; + struct pcpu_sw_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 3b9637fb793..8dac65552f1 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -49,53 +49,51 @@ static ssize_t store_bridge_parm(struct device *d, } -static ssize_t show_forward_delay(struct device *d, +static ssize_t forward_delay_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } -static ssize_t store_forward_delay(struct device *d, +static ssize_t forward_delay_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_forward_delay); } -static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, - show_forward_delay, store_forward_delay); +static DEVICE_ATTR_RW(forward_delay); -static ssize_t show_hello_time(struct device *d, struct device_attribute *attr, +static ssize_t hello_time_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->hello_time)); } -static ssize_t store_hello_time(struct device *d, +static ssize_t hello_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_hello_time); } -static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, - store_hello_time); +static DEVICE_ATTR_RW(hello_time); -static ssize_t show_max_age(struct device *d, struct device_attribute *attr, +static ssize_t max_age_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->max_age)); } -static ssize_t store_max_age(struct device *d, struct device_attribute *attr, +static ssize_t max_age_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_max_age); } -static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); +static DEVICE_ATTR_RW(max_age); -static ssize_t show_ageing_time(struct device *d, +static ssize_t ageing_time_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -108,16 +106,15 @@ static int set_ageing_time(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_ageing_time(struct device *d, +static ssize_t ageing_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_ageing_time); } -static DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, - store_ageing_time); +static DEVICE_ATTR_RW(ageing_time); -static ssize_t show_stp_state(struct device *d, +static ssize_t stp_state_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -125,7 +122,7 @@ static ssize_t show_stp_state(struct device *d, } -static ssize_t store_stp_state(struct device *d, +static ssize_t stp_state_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -147,20 +144,21 @@ static ssize_t store_stp_state(struct device *d, return len; } -static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, - store_stp_state); +static DEVICE_ATTR_RW(stp_state); -static ssize_t show_group_fwd_mask(struct device *d, - struct device_attribute *attr, char *buf) +static ssize_t group_fwd_mask_show(struct device *d, + struct device_attribute *attr, + char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%#x\n", br->group_fwd_mask); } -static ssize_t store_group_fwd_mask(struct device *d, - struct device_attribute *attr, const char *buf, - size_t len) +static ssize_t group_fwd_mask_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) { struct net_bridge *br = to_bridge(d); char *endp; @@ -180,10 +178,9 @@ static ssize_t store_group_fwd_mask(struct device *d, return len; } -static DEVICE_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask, - store_group_fwd_mask); +static DEVICE_ATTR_RW(group_fwd_mask); -static ssize_t show_priority(struct device *d, struct device_attribute *attr, +static ssize_t priority_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -197,93 +194,91 @@ static int set_priority(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_priority(struct device *d, struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t priority_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_priority); } -static DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, store_priority); +static DEVICE_ATTR_RW(priority); -static ssize_t show_root_id(struct device *d, struct device_attribute *attr, +static ssize_t root_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->designated_root); } -static DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); +static DEVICE_ATTR_RO(root_id); -static ssize_t show_bridge_id(struct device *d, struct device_attribute *attr, +static ssize_t bridge_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->bridge_id); } -static DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); +static DEVICE_ATTR_RO(bridge_id); -static ssize_t show_root_port(struct device *d, struct device_attribute *attr, +static ssize_t root_port_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_port); } -static DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); +static DEVICE_ATTR_RO(root_port); -static ssize_t show_root_path_cost(struct device *d, +static ssize_t root_path_cost_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost); } -static DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); +static DEVICE_ATTR_RO(root_path_cost); -static ssize_t show_topology_change(struct device *d, +static ssize_t topology_change_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->topology_change); } -static DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); +static DEVICE_ATTR_RO(topology_change); -static ssize_t show_topology_change_detected(struct device *d, +static ssize_t topology_change_detected_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->topology_change_detected); } -static DEVICE_ATTR(topology_change_detected, S_IRUGO, - show_topology_change_detected, NULL); +static DEVICE_ATTR_RO(topology_change_detected); -static ssize_t show_hello_timer(struct device *d, +static ssize_t hello_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); } -static DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); +static DEVICE_ATTR_RO(hello_timer); -static ssize_t show_tcn_timer(struct device *d, struct device_attribute *attr, +static ssize_t tcn_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); } -static DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); +static DEVICE_ATTR_RO(tcn_timer); -static ssize_t show_topology_change_timer(struct device *d, +static ssize_t topology_change_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); } -static DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, - NULL); +static DEVICE_ATTR_RO(topology_change_timer); -static ssize_t show_gc_timer(struct device *d, struct device_attribute *attr, +static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); } -static DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); +static DEVICE_ATTR_RO(gc_timer); -static ssize_t show_group_addr(struct device *d, +static ssize_t group_addr_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -293,7 +288,7 @@ static ssize_t show_group_addr(struct device *d, br->group_addr[4], br->group_addr[5]); } -static ssize_t store_group_addr(struct device *d, +static ssize_t group_addr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -324,10 +319,9 @@ static ssize_t store_group_addr(struct device *d, return len; } -static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR, - show_group_addr, store_group_addr); +static DEVICE_ATTR_RW(group_addr); -static ssize_t store_flush(struct device *d, +static ssize_t flush_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -339,26 +333,25 @@ static ssize_t store_flush(struct device *d, br_fdb_flush(br); return len; } -static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush); +static DEVICE_ATTR_WO(flush); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING -static ssize_t show_multicast_router(struct device *d, +static ssize_t multicast_router_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_router); } -static ssize_t store_multicast_router(struct device *d, +static ssize_t multicast_router_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_router); } -static DEVICE_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, - store_multicast_router); +static DEVICE_ATTR_RW(multicast_router); -static ssize_t show_multicast_snooping(struct device *d, +static ssize_t multicast_snooping_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -366,18 +359,17 @@ static ssize_t show_multicast_snooping(struct device *d, return sprintf(buf, "%d\n", !br->multicast_disabled); } -static ssize_t store_multicast_snooping(struct device *d, +static ssize_t multicast_snooping_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_toggle); } -static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR, - show_multicast_snooping, store_multicast_snooping); +static DEVICE_ATTR_RW(multicast_snooping); -static ssize_t show_multicast_query_use_ifaddr(struct device *d, - struct device_attribute *attr, - char *buf) +static ssize_t multicast_query_use_ifaddr_show(struct device *d, + struct device_attribute *attr, + char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr); @@ -390,17 +382,15 @@ static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val) } static ssize_t -store_multicast_query_use_ifaddr(struct device *d, +multicast_query_use_ifaddr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_use_ifaddr); } -static DEVICE_ATTR(multicast_query_use_ifaddr, S_IRUGO | S_IWUSR, - show_multicast_query_use_ifaddr, - store_multicast_query_use_ifaddr); +static DEVICE_ATTR_RW(multicast_query_use_ifaddr); -static ssize_t show_multicast_querier(struct device *d, +static ssize_t multicast_querier_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -408,16 +398,15 @@ static ssize_t show_multicast_querier(struct device *d, return sprintf(buf, "%d\n", br->multicast_querier); } -static ssize_t store_multicast_querier(struct device *d, +static ssize_t multicast_querier_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_querier); } -static DEVICE_ATTR(multicast_querier, S_IRUGO | S_IWUSR, - show_multicast_querier, store_multicast_querier); +static DEVICE_ATTR_RW(multicast_querier); -static ssize_t show_hash_elasticity(struct device *d, +static ssize_t hash_elasticity_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -430,31 +419,29 @@ static int set_elasticity(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_hash_elasticity(struct device *d, +static ssize_t hash_elasticity_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_elasticity); } -static DEVICE_ATTR(hash_elasticity, S_IRUGO | S_IWUSR, show_hash_elasticity, - store_hash_elasticity); +static DEVICE_ATTR_RW(hash_elasticity); -static ssize_t show_hash_max(struct device *d, struct device_attribute *attr, +static ssize_t hash_max_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->hash_max); } -static ssize_t store_hash_max(struct device *d, struct device_attribute *attr, +static ssize_t hash_max_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_hash_max); } -static DEVICE_ATTR(hash_max, S_IRUGO | S_IWUSR, show_hash_max, - store_hash_max); +static DEVICE_ATTR_RW(hash_max); -static ssize_t show_multicast_last_member_count(struct device *d, +static ssize_t multicast_last_member_count_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -468,17 +455,15 @@ static int set_last_member_count(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_last_member_count(struct device *d, +static ssize_t multicast_last_member_count_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_count); } -static DEVICE_ATTR(multicast_last_member_count, S_IRUGO | S_IWUSR, - show_multicast_last_member_count, - store_multicast_last_member_count); +static DEVICE_ATTR_RW(multicast_last_member_count); -static ssize_t show_multicast_startup_query_count( +static ssize_t multicast_startup_query_count_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -491,17 +476,15 @@ static int set_startup_query_count(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_startup_query_count( +static ssize_t multicast_startup_query_count_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_count); } -static DEVICE_ATTR(multicast_startup_query_count, S_IRUGO | S_IWUSR, - show_multicast_startup_query_count, - store_multicast_startup_query_count); +static DEVICE_ATTR_RW(multicast_startup_query_count); -static ssize_t show_multicast_last_member_interval( +static ssize_t multicast_last_member_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -515,17 +498,15 @@ static int set_last_member_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_last_member_interval( +static ssize_t multicast_last_member_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_interval); } -static DEVICE_ATTR(multicast_last_member_interval, S_IRUGO | S_IWUSR, - show_multicast_last_member_interval, - store_multicast_last_member_interval); +static DEVICE_ATTR_RW(multicast_last_member_interval); -static ssize_t show_multicast_membership_interval( +static ssize_t multicast_membership_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -539,17 +520,15 @@ static int set_membership_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_membership_interval( +static ssize_t multicast_membership_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_membership_interval); } -static DEVICE_ATTR(multicast_membership_interval, S_IRUGO | S_IWUSR, - show_multicast_membership_interval, - store_multicast_membership_interval); +static DEVICE_ATTR_RW(multicast_membership_interval); -static ssize_t show_multicast_querier_interval(struct device *d, +static ssize_t multicast_querier_interval_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -564,17 +543,15 @@ static int set_querier_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_querier_interval(struct device *d, +static ssize_t multicast_querier_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_querier_interval); } -static DEVICE_ATTR(multicast_querier_interval, S_IRUGO | S_IWUSR, - show_multicast_querier_interval, - store_multicast_querier_interval); +static DEVICE_ATTR_RW(multicast_querier_interval); -static ssize_t show_multicast_query_interval(struct device *d, +static ssize_t multicast_query_interval_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -589,17 +566,15 @@ static int set_query_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_query_interval(struct device *d, +static ssize_t multicast_query_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_interval); } -static DEVICE_ATTR(multicast_query_interval, S_IRUGO | S_IWUSR, - show_multicast_query_interval, - store_multicast_query_interval); +static DEVICE_ATTR_RW(multicast_query_interval); -static ssize_t show_multicast_query_response_interval( +static ssize_t multicast_query_response_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -614,17 +589,15 @@ static int set_query_response_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_query_response_interval( +static ssize_t multicast_query_response_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_response_interval); } -static DEVICE_ATTR(multicast_query_response_interval, S_IRUGO | S_IWUSR, - show_multicast_query_response_interval, - store_multicast_query_response_interval); +static DEVICE_ATTR_RW(multicast_query_response_interval); -static ssize_t show_multicast_startup_query_interval( +static ssize_t multicast_startup_query_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -639,18 +612,16 @@ static int set_startup_query_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_startup_query_interval( +static ssize_t multicast_startup_query_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_interval); } -static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR, - show_multicast_startup_query_interval, - store_multicast_startup_query_interval); +static DEVICE_ATTR_RW(multicast_startup_query_interval); #endif #ifdef CONFIG_BRIDGE_NETFILTER -static ssize_t show_nf_call_iptables( +static ssize_t nf_call_iptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -663,16 +634,15 @@ static int set_nf_call_iptables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_iptables( +static ssize_t nf_call_iptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_iptables); } -static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR, - show_nf_call_iptables, store_nf_call_iptables); +static DEVICE_ATTR_RW(nf_call_iptables); -static ssize_t show_nf_call_ip6tables( +static ssize_t nf_call_ip6tables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -685,16 +655,15 @@ static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_ip6tables( +static ssize_t nf_call_ip6tables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); } -static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR, - show_nf_call_ip6tables, store_nf_call_ip6tables); +static DEVICE_ATTR_RW(nf_call_ip6tables); -static ssize_t show_nf_call_arptables( +static ssize_t nf_call_arptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -707,17 +676,16 @@ static int set_nf_call_arptables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_arptables( +static ssize_t nf_call_arptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_arptables); } -static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR, - show_nf_call_arptables, store_nf_call_arptables); +static DEVICE_ATTR_RW(nf_call_arptables); #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING -static ssize_t show_vlan_filtering(struct device *d, +static ssize_t vlan_filtering_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -725,14 +693,13 @@ static ssize_t show_vlan_filtering(struct device *d, return sprintf(buf, "%d\n", br->vlan_enabled); } -static ssize_t store_vlan_filtering(struct device *d, +static ssize_t vlan_filtering_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); } -static DEVICE_ATTR(vlan_filtering, S_IRUGO | S_IWUSR, - show_vlan_filtering, store_vlan_filtering); +static DEVICE_ATTR_RW(vlan_filtering); #endif static struct attribute *bridge_attrs[] = { diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index cf54b22818c..5bcc0d8b31f 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -14,10 +14,30 @@ #include <linux/netfilter_bridge.h> #include <net/netfilter/nf_tables.h> +static unsigned int +nft_do_chain_bridge(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, ops, skb, in, out); + + return nft_do_chain(&pkt, ops); +} + static struct nft_af_info nft_af_bridge __read_mostly = { .family = NFPROTO_BRIDGE, .nhooks = NF_BR_NUMHOOKS, .owner = THIS_MODULE, + .nops = 1, + .hooks = { + [NF_BR_LOCAL_IN] = nft_do_chain_bridge, + [NF_BR_FORWARD] = nft_do_chain_bridge, + [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, + }, }; static int nf_tables_bridge_init_net(struct net *net) @@ -48,32 +68,14 @@ static struct pernet_operations nf_tables_bridge_net_ops = { .exit = nf_tables_bridge_exit_net, }; -static unsigned int -nft_do_chain_bridge(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, ops, skb, in, out); - - return nft_do_chain_pktinfo(&pkt, ops); -} - -static struct nf_chain_type filter_bridge = { - .family = NFPROTO_BRIDGE, +static const struct nf_chain_type filter_bridge = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_BRIDGE, + .owner = THIS_MODULE, .hook_mask = (1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | (1 << NF_BR_LOCAL_OUT), - .fn = { - [NF_BR_LOCAL_IN] = nft_do_chain_bridge, - [NF_BR_FORWARD] = nft_do_chain_bridge, - [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, - }, }; static int __init nf_tables_bridge_init(void) diff --git a/net/can/gw.c b/net/can/gw.c index 3f9b0f3a281..88c8a39c173 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -844,8 +844,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) if (!gwj->src.dev) goto out; - /* check for CAN netdev not using header_ops - see gw_rcv() */ - if (gwj->src.dev->type != ARPHRD_CAN || gwj->src.dev->header_ops) + if (gwj->src.dev->type != ARPHRD_CAN) goto put_src_out; gwj->dst.dev = dev_get_by_index(&init_net, gwj->ccgw.dst_idx); @@ -853,8 +852,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) if (!gwj->dst.dev) goto put_src_out; - /* check for CAN netdev not using header_ops - see gw_rcv() */ - if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops) + if (gwj->dst.dev->type != ARPHRD_CAN) goto put_src_dst_out; gwj->limit_hops = limhops; diff --git a/net/core/Makefile b/net/core/Makefile index b33b996f5dd..9628c20acff 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -21,4 +21,5 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o -obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o +obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o +obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o diff --git a/net/core/dev.c b/net/core/dev.c index c482fe8abf8..87312dcf0aa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -480,7 +480,7 @@ EXPORT_SYMBOL(dev_add_offload); * and must not be freed until after all the CPU's have gone * through a quiescent state. */ -void __dev_remove_offload(struct packet_offload *po) +static void __dev_remove_offload(struct packet_offload *po) { struct list_head *head = &offload_base; struct packet_offload *po1; @@ -498,7 +498,6 @@ void __dev_remove_offload(struct packet_offload *po) out: spin_unlock(&offload_lock); } -EXPORT_SYMBOL(__dev_remove_offload); /** * dev_remove_offload - remove packet offload handler @@ -1566,14 +1565,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); * are as for raw_notifier_call_chain(). */ -int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, - struct netdev_notifier_info *info) +static int call_netdevice_notifiers_info(unsigned long val, + struct net_device *dev, + struct netdev_notifier_info *info) { ASSERT_RTNL(); netdev_notifier_info_init(info, dev); return raw_notifier_call_chain(&netdev_chain, val, info); } -EXPORT_SYMBOL(call_netdevice_notifiers_info); /** * call_netdevice_notifiers - call all network notifier blocks @@ -2454,13 +2453,8 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) { struct dev_gso_cb *cb; - do { - struct sk_buff *nskb = skb->next; - - skb->next = nskb->next; - nskb->next = NULL; - kfree_skb(nskb); - } while (skb->next); + kfree_skb_list(skb->next); + skb->next = NULL; cb = DEV_GSO_CB(skb); if (cb->destructor) @@ -2747,7 +2741,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); @@ -3852,6 +3846,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); + NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3887,10 +3882,23 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) + if (NAPI_GRO_CB(skb)->flush) goto normal; - napi->gro_count++; + if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) { + struct sk_buff *nskb = napi->gro_list; + + /* locate the end of the list to select the 'oldest' flow */ + while (nskb->next) { + pp = &nskb->next; + nskb = *pp; + } + *pp = NULL; + nskb->next = NULL; + napi_gro_complete(nskb); + } else { + napi->gro_count++; + } NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; skb_shinfo(skb)->gso_size = skb_gro_len(skb); @@ -3928,6 +3936,31 @@ normal: goto pull; } +struct packet_offload *gro_find_receive_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_receive) + continue; + return ptype; + } + return NULL; +} + +struct packet_offload *gro_find_complete_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_complete) + continue; + return ptype; + } + return NULL; +} static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { @@ -4035,7 +4068,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) EXPORT_SYMBOL(napi_gro_frags); /* - * net_rps_action sends any pending IPI's for rps. + * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. */ static void net_rps_action_and_irq_enable(struct softnet_data *sd) @@ -4240,17 +4273,10 @@ EXPORT_SYMBOL(netif_napi_add); void netif_napi_del(struct napi_struct *napi) { - struct sk_buff *skb, *next; - list_del_init(&napi->dev_list); napi_free_frags(napi); - for (skb = napi->gro_list; skb; skb = next) { - next = skb->next; - skb->next = NULL; - kfree_skb(skb); - } - + kfree_skb_list(napi->gro_list); napi->gro_list = NULL; napi->gro_count = 0; } @@ -4367,19 +4393,6 @@ struct netdev_adjacent { struct rcu_head rcu; }; -static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *adj_list) -{ - struct netdev_adjacent *adj; - - list_for_each_entry_rcu(adj, adj_list, list) { - if (adj->dev == adj_dev) - return adj; - } - return NULL; -} - static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, struct net_device *adj_dev, struct list_head *adj_list) @@ -4418,13 +4431,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev); * Find out if a device is linked to an upper device and return true in case * it is. The caller must hold the RTNL lock. */ -bool netdev_has_any_upper_dev(struct net_device *dev) +static bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); return !list_empty(&dev->all_adj_list.upper); } -EXPORT_SYMBOL(netdev_has_any_upper_dev); /** * netdev_master_upper_dev_get - Get master upper device @@ -4473,7 +4485,7 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, { struct netdev_adjacent *upper; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); @@ -4656,9 +4668,9 @@ free_adj: return ret; } -void __netdev_adjacent_dev_remove(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *dev_list) +static void __netdev_adjacent_dev_remove(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) { struct netdev_adjacent *adj; char linkname[IFNAMSIZ+7]; @@ -4696,11 +4708,11 @@ void __netdev_adjacent_dev_remove(struct net_device *dev, kfree_rcu(adj, rcu); } -int __netdev_adjacent_dev_link_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list, - void *private, bool master) +static int __netdev_adjacent_dev_link_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list, + void *private, bool master) { int ret; @@ -4719,8 +4731,8 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev, return 0; } -int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) +static int __netdev_adjacent_dev_link(struct net_device *dev, + struct net_device *upper_dev) { return __netdev_adjacent_dev_link_lists(dev, upper_dev, &dev->all_adj_list.upper, @@ -4728,26 +4740,26 @@ int __netdev_adjacent_dev_link(struct net_device *dev, NULL, false); } -void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list) +static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list) { __netdev_adjacent_dev_remove(dev, upper_dev, up_list); __netdev_adjacent_dev_remove(upper_dev, dev, down_list); } -void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_adjacent_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) { __netdev_adjacent_dev_unlink_lists(dev, upper_dev, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } -int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, - struct net_device *upper_dev, - void *private, bool master) +static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, + struct net_device *upper_dev, + void *private, bool master) { int ret = __netdev_adjacent_dev_link(dev, upper_dev); @@ -4766,8 +4778,8 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, return 0; } -void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, + struct net_device *upper_dev) { __netdev_adjacent_dev_unlink(dev, upper_dev); __netdev_adjacent_dev_unlink_lists(dev, upper_dev, @@ -4956,21 +4968,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); -void *netdev_lower_dev_get_private_rcu(struct net_device *dev, - struct net_device *lower_dev) -{ - struct netdev_adjacent *lower; - - if (!lower_dev) - return NULL; - lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); - if (!lower) - return NULL; - - return lower->private; -} -EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu); - void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev) { @@ -5303,6 +5300,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags) } EXPORT_SYMBOL(dev_change_flags); +static int __dev_set_mtu(struct net_device *dev, int new_mtu) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_change_mtu) + return ops->ndo_change_mtu(dev, new_mtu); + + dev->mtu = new_mtu; + return 0; +} + /** * dev_set_mtu - Change maximum transfer unit * @dev: device @@ -5312,8 +5320,7 @@ EXPORT_SYMBOL(dev_change_flags); */ int dev_set_mtu(struct net_device *dev, int new_mtu) { - const struct net_device_ops *ops = dev->netdev_ops; - int err; + int err, orig_mtu; if (new_mtu == dev->mtu) return 0; @@ -5325,14 +5332,20 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (!netif_device_present(dev)) return -ENODEV; - err = 0; - if (ops->ndo_change_mtu) - err = ops->ndo_change_mtu(dev, new_mtu); - else - dev->mtu = new_mtu; + orig_mtu = dev->mtu; + err = __dev_set_mtu(dev, new_mtu); - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + if (!err) { + err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = notifier_to_errno(err); + if (err) { + /* setting mtu back and notifying everyone again, + * so that they have a chance to revert changes. + */ + __dev_set_mtu(dev, orig_mtu); + call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + } + } return err; } EXPORT_SYMBOL(dev_set_mtu); @@ -5825,13 +5838,8 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - /* Turn on no cache copy if HW is doing checksum */ if (!(dev->flags & IFF_LOOPBACK)) { dev->hw_features |= NETIF_F_NOCACHE_COPY; - if (dev->features & NETIF_F_ALL_CSUM) { - dev->wanted_features |= NETIF_F_NOCACHE_COPY; - dev->features |= NETIF_F_NOCACHE_COPY; - } } /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a666740051d..ea97361f0e9 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1298,7 +1298,7 @@ int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 && - dev->header_ops->rebuild(skb)) + dev_rebuild_header(skb)) return 0; return dev_queue_xmit(skb); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1a7b7b1df0d..49843bf7e43 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1358,7 +1358,7 @@ void netdev_class_remove_file_ns(struct class_attribute *class_attr, } EXPORT_SYMBOL(netdev_class_remove_file_ns); -int netdev_kobject_init(void) +int __init netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); return class_register(&net_class); diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index bd7751ec1c4..2745a1b51e0 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -1,7 +1,7 @@ #ifndef __NET_SYSFS_H__ #define __NET_SYSFS_H__ -int netdev_kobject_init(void); +int __init netdev_kobject_init(void); int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c new file mode 100644 index 00000000000..719efd54166 --- /dev/null +++ b/net/core/netclassid_cgroup.c @@ -0,0 +1,120 @@ +/* + * net/core/netclassid_cgroup.c Classid Cgroupfs Handling + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf <tgraf@suug.ch> + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/cgroup.h> +#include <linux/fdtable.h> +#include <net/cls_cgroup.h> +#include <net/sock.h> + +static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct cgroup_cls_state, css) : NULL; +} + +struct cgroup_cls_state *task_cls_state(struct task_struct *p) +{ + return css_cls_state(task_css(p, net_cls_subsys_id)); +} +EXPORT_SYMBOL_GPL(task_cls_state); + +static struct cgroup_subsys_state * +cgrp_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct cgroup_cls_state *cs; + + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) + return ERR_PTR(-ENOMEM); + + return &cs->css; +} + +static int cgrp_css_online(struct cgroup_subsys_state *css) +{ + struct cgroup_cls_state *cs = css_cls_state(css); + struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); + + if (parent) + cs->classid = parent->classid; + + return 0; +} + +static void cgrp_css_free(struct cgroup_subsys_state *css) +{ + kfree(css_cls_state(css)); +} + +static int update_classid(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + + if (sock) + sock->sk->sk_classid = (u32)(unsigned long)v; + + return 0; +} + +static void cgrp_attach(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset) +{ + struct cgroup_cls_state *cs = css_cls_state(css); + void *v = (void *)(unsigned long)cs->classid; + struct task_struct *p; + + cgroup_taskset_for_each(p, css, tset) { + task_lock(p); + iterate_fd(p->files, 0, update_classid, v); + task_unlock(p); + } +} + +static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) +{ + return css_cls_state(css)->classid; +} + +static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, + u64 value) +{ + css_cls_state(css)->classid = (u32) value; + + return 0; +} + +static struct cftype ss_files[] = { + { + .name = "classid", + .read_u64 = read_classid, + .write_u64 = write_classid, + }, + { } /* terminate */ +}; + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .css_alloc = cgrp_css_alloc, + .css_online = cgrp_css_online, + .css_free = cgrp_css_free, + .attach = cgrp_attach, + .subsys_id = net_cls_subsys_id, + .base_cftypes = ss_files, + .module = THIS_MODULE, +}; + +static int __init init_netclassid_cgroup(void) +{ + return cgroup_load_subsys(&net_cls_subsys); +} +__initcall(init_netclassid_cgroup); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8f971990677..30309787463 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -386,8 +386,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, !vlan_hw_offload_capable(netif_skb_features(skb), skb->vlan_proto)) { skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); - if (unlikely(!skb)) - break; + if (unlikely(!skb)) { + /* This is actually a packet drop, but we + * don't want the code at the end of this + * function to try and re-queue a NULL skb. + */ + status = NETDEV_TX_OK; + goto unlock_txq; + } skb->vlan_tci = 0; } @@ -395,6 +401,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (status == NETDEV_TX_OK) txq_trans_update(txq); } + unlock_txq: __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cf67144d3e3..e6e7d582f90 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -403,34 +403,16 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) } /** - * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. - * @ops: struct rtnl_af_ops * to register - * - * The caller must hold the rtnl_mutex. - * - * Returns 0 on success or a negative error code. - */ -int __rtnl_af_register(struct rtnl_af_ops *ops) -{ - list_add_tail(&ops->list, &rtnl_af_ops); - return 0; -} -EXPORT_SYMBOL_GPL(__rtnl_af_register); - -/** * rtnl_af_register - Register rtnl_af_ops with rtnetlink. * @ops: struct rtnl_af_ops * to register * * Returns 0 on success or a negative error code. */ -int rtnl_af_register(struct rtnl_af_ops *ops) +void rtnl_af_register(struct rtnl_af_ops *ops) { - int err; - rtnl_lock(); - err = __rtnl_af_register(ops); + list_add_tail(&ops->list, &rtnl_af_ops); rtnl_unlock(); - return err; } EXPORT_SYMBOL_GPL(rtnl_af_register); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2b6b863f51f..1d641e781f8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2121,6 +2121,91 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_and_csum_bits); + /** + * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() + * @from: source buffer + * + * Calculates the amount of linear headroom needed in the 'to' skb passed + * into skb_zerocopy(). + */ +unsigned int +skb_zerocopy_headlen(const struct sk_buff *from) +{ + unsigned int hlen = 0; + + if (!from->head_frag || + skb_headlen(from) < L1_CACHE_BYTES || + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + hlen = skb_headlen(from); + + if (skb_has_frag_list(from)) + hlen = from->len; + + return hlen; +} +EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); + +/** + * skb_zerocopy - Zero copy skb to skb + * @to: destination buffer + * @source: source buffer + * @len: number of bytes to copy from source buffer + * @hlen: size of linear headroom in destination buffer + * + * Copies up to `len` bytes from `from` to `to` by creating references + * to the frags in the source buffer. + * + * The `hlen` as calculated by skb_zerocopy_headlen() specifies the + * headroom in the `to` buffer. + */ +void +skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +{ + int i, j = 0; + int plen = 0; /* length of skb->head fragment */ + struct page *page; + unsigned int offset; + + BUG_ON(!from->head_frag && !hlen); + + /* dont bother with small payloads */ + if (len <= skb_tailroom(to)) { + skb_copy_bits(from, 0, skb_put(to, len), len); + return; + } + + if (hlen) { + skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + len -= hlen; + } else { + plen = min_t(int, skb_headlen(from), len); + if (plen) { + page = virt_to_head_page(from->head); + offset = from->data - (unsigned char *)page_address(page); + __skb_fill_page_desc(to, 0, page, offset, plen); + get_page(page); + j = 1; + len -= plen; + } + } + + to->truesize += len + plen; + to->len += len + plen; + to->data_len += len + plen; + + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { + if (!len) + break; + skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; + skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); + len -= skb_shinfo(to)->frags[j].size; + skb_frag_ref(to, j); + j++; + } + skb_shinfo(to)->nr_frags = j; +} +EXPORT_SYMBOL_GPL(skb_zerocopy); + void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { __wsum csum; @@ -2981,10 +3066,7 @@ perform_csum_check: return segs; err: - while ((skb = segs)) { - segs = skb->next; - kfree_skb(skb); - } + kfree_skb_list(segs); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(skb_segment); diff --git a/net/core/sock.c b/net/core/sock.c index 5393b4b719d..85ad6f0d389 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -925,8 +925,8 @@ set_rcvbuf: EXPORT_SYMBOL(sock_setsockopt); -void cred_to_ucred(struct pid *pid, const struct cred *cred, - struct ucred *ucred) +static void cred_to_ucred(struct pid *pid, const struct cred *cred, + struct ucred *ucred) { ucred->pid = pid_vnr(pid); ucred->uid = ucred->gid = -1; @@ -937,7 +937,6 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred, ucred->gid = from_kgid_munged(current_ns, cred->egid); } } -EXPORT_SYMBOL_GPL(cred_to_ucred); int sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) @@ -1308,19 +1307,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk) -{ - u32 classid; - - classid = task_cls_classid(current); - if (classid != sk->sk_classid) - sk->sk_classid = classid; -} -EXPORT_SYMBOL(sock_update_classid); -#endif - -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) void sock_update_netprioidx(struct sock *sk) { if (in_interrupt()) @@ -1666,22 +1653,6 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, EXPORT_SYMBOL(sock_wmalloc); /* - * Allocate a skb from the socket's receive buffer. - */ -struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - gfp_t priority) -{ - if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { - struct sk_buff *skb = alloc_skb(size, priority); - if (skb) { - skb_set_owner_r(skb, sk); - return skb; - } - } - return NULL; -} - -/* * Allocate a memory block from the socket's option memory buffer. */ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 62b5828acde..c073b81a1f3 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -8,7 +8,7 @@ #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -bool tfrc_debug; +static bool tfrc_debug; module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); #endif diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 40ee7d62b65..a3d8f7c76ae 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -21,7 +21,6 @@ #include "packet_history.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -extern bool tfrc_debug; #define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a) #else #define tfrc_pr_debug(format, a...) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 30948784dd5..c67816647cc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -479,7 +479,6 @@ void dccp_feat_list_purge(struct list_head *fn_list); int dccp_insert_options(struct sock *sk, struct sk_buff *skb); int dccp_insert_options_rsk(struct dccp_request_sock *, struct sk_buff *); -int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); u32 dccp_timestamp(void); void dccp_timestamping_init(void); int dccp_insert_option(struct sk_buff *skb, unsigned char option, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 88299c29101..22b5d818b20 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -989,6 +989,7 @@ static const struct net_protocol dccp_v4_protocol = { .err_handler = dccp_v4_err, .no_policy = 1, .netns_ok = 1, + .icmp_strict_tag_validation = 1, }; static const struct proto_ops inet_dccp_ops = { diff --git a/net/dccp/options.c b/net/dccp/options.c index a58e0b63405..9bce31886bd 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -343,38 +343,6 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time) return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; } -/* FIXME: This function is currently not used anywhere */ -int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) -{ - const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); - const int len = 2 + elapsed_time_len; - unsigned char *to; - - if (elapsed_time_len == 0) - return 0; - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) - return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - to = skb_push(skb, len); - *to++ = DCCPO_ELAPSED_TIME; - *to++ = len; - - if (elapsed_time_len == 2) { - const __be16 var16 = htons((u16)elapsed_time); - memcpy(to, &var16, 2); - } else { - const __be32 var32 = htonl(elapsed_time); - memcpy(to, &var32, 4); - } - - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); - static int dccp_insert_option_timestamp(struct sk_buff *skb) { __be32 now = htonl(dccp_timestamp()); diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 4c6bdf97a65..595ddf0459d 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -152,17 +152,6 @@ static const struct file_operations dccpprobe_fops = { .llseek = noop_llseek, }; -static __init int setup_jprobe(void) -{ - int ret = register_jprobe(&dccp_send_probe); - - if (ret) { - request_module("dccp"); - ret = register_jprobe(&dccp_send_probe); - } - return ret; -} - static __init int dccpprobe_init(void) { int ret = -ENOMEM; @@ -174,7 +163,13 @@ static __init int dccpprobe_init(void) if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops)) goto err0; - ret = setup_jprobe(); + ret = register_jprobe(&dccp_send_probe); + if (ret) { + ret = request_module("dccp"); + if (!ret) + ret = register_jprobe(&dccp_send_probe); + } + if (ret) goto err1; diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 459e200c08a..48b25c0af4d 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -62,9 +62,6 @@ #include "6lowpan.h" -/* TTL uncompression values */ -static const u8 lowpan_ttl_values[] = {0, 1, 64, 255}; - static LIST_HEAD(lowpan_devices); /* private device info */ @@ -104,378 +101,14 @@ static inline void lowpan_address_flip(u8 *src, u8 *dest) (dest)[IEEE802154_ADDR_LEN - i - 1] = (src)[i]; } -/* list of all 6lowpan devices, uses for package delivering */ -/* print data in line */ -static inline void lowpan_raw_dump_inline(const char *caller, char *msg, - unsigned char *buf, int len) -{ -#ifdef DEBUG - if (msg) - pr_debug("(%s) %s: ", caller, msg); - print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, - 16, 1, buf, len, false); -#endif /* DEBUG */ -} - -/* - * print data in a table format: - * - * addr: xx xx xx xx xx xx - * addr: xx xx xx xx xx xx - * ... - */ -static inline void lowpan_raw_dump_table(const char *caller, char *msg, - unsigned char *buf, int len) -{ -#ifdef DEBUG - if (msg) - pr_debug("(%s) %s:\n", caller, msg); - print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, - 16, 1, buf, len, false); -#endif /* DEBUG */ -} - -static u8 -lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr, - const unsigned char *lladdr) -{ - u8 val = 0; - - if (is_addr_mac_addr_based(ipaddr, lladdr)) - val = 3; /* 0-bits */ - else if (lowpan_is_iid_16_bit_compressable(ipaddr)) { - /* compress IID to 16 bits xxxx::XXXX */ - memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2); - *hc06_ptr += 2; - val = 2; /* 16-bits */ - } else { - /* do not compress IID => xxxx::IID */ - memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8); - *hc06_ptr += 8; - val = 1; /* 64-bits */ - } - - return rol8(val, shift); -} - -/* - * Uncompress address function for source and - * destination address(non-multicast). - * - * address_mode is sam value or dam value. - */ -static int -lowpan_uncompress_addr(struct sk_buff *skb, - struct in6_addr *ipaddr, - const u8 address_mode, - const struct ieee802154_addr *lladdr) -{ - bool fail; - - switch (address_mode) { - case LOWPAN_IPHC_ADDR_00: - /* for global link addresses */ - fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); - break; - case LOWPAN_IPHC_ADDR_01: - /* fe:80::XXXX:XXXX:XXXX:XXXX */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8); - break; - case LOWPAN_IPHC_ADDR_02: - /* fe:80::ff:fe00:XXXX */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - ipaddr->s6_addr[11] = 0xFF; - ipaddr->s6_addr[12] = 0xFE; - fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2); - break; - case LOWPAN_IPHC_ADDR_03: - fail = false; - switch (lladdr->addr_type) { - case IEEE802154_ADDR_LONG: - /* fe:80::XXXX:XXXX:XXXX:XXXX - * \_________________/ - * hwaddr - */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - memcpy(&ipaddr->s6_addr[8], lladdr->hwaddr, - IEEE802154_ADDR_LEN); - /* second bit-flip (Universe/Local) - * is done according RFC2464 - */ - ipaddr->s6_addr[8] ^= 0x02; - break; - case IEEE802154_ADDR_SHORT: - /* fe:80::ff:fe00:XXXX - * \__/ - * short_addr - * - * Universe/Local bit is zero. - */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - ipaddr->s6_addr[11] = 0xFF; - ipaddr->s6_addr[12] = 0xFE; - ipaddr->s6_addr16[7] = htons(lladdr->short_addr); - break; - default: - pr_debug("Invalid addr_type set\n"); - return -EINVAL; - } - break; - default: - pr_debug("Invalid address mode value: 0x%x\n", address_mode); - return -EINVAL; - } - - if (fail) { - pr_debug("Failed to fetch skb data\n"); - return -EIO; - } - - lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 addr is:\n", - ipaddr->s6_addr, 16); - - return 0; -} - -/* Uncompress address function for source context - * based address(non-multicast). - */ -static int -lowpan_uncompress_context_based_src_addr(struct sk_buff *skb, - struct in6_addr *ipaddr, - const u8 sam) -{ - switch (sam) { - case LOWPAN_IPHC_ADDR_00: - /* unspec address :: - * Do nothing, address is already :: - */ - break; - case LOWPAN_IPHC_ADDR_01: - /* TODO */ - case LOWPAN_IPHC_ADDR_02: - /* TODO */ - case LOWPAN_IPHC_ADDR_03: - /* TODO */ - netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam); - return -EINVAL; - default: - pr_debug("Invalid sam value: 0x%x\n", sam); - return -EINVAL; - } - - lowpan_raw_dump_inline(NULL, - "Reconstructed context based ipv6 src addr is:\n", - ipaddr->s6_addr, 16); - - return 0; -} - -/* Uncompress function for multicast destination address, - * when M bit is set. - */ -static int -lowpan_uncompress_multicast_daddr(struct sk_buff *skb, - struct in6_addr *ipaddr, - const u8 dam) -{ - bool fail; - - switch (dam) { - case LOWPAN_IPHC_DAM_00: - /* 00: 128 bits. The full address - * is carried in-line. - */ - fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); - break; - case LOWPAN_IPHC_DAM_01: - /* 01: 48 bits. The address takes - * the form ffXX::00XX:XXXX:XXXX. - */ - ipaddr->s6_addr[0] = 0xFF; - fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1); - fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[11], 5); - break; - case LOWPAN_IPHC_DAM_10: - /* 10: 32 bits. The address takes - * the form ffXX::00XX:XXXX. - */ - ipaddr->s6_addr[0] = 0xFF; - fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1); - fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[13], 3); - break; - case LOWPAN_IPHC_DAM_11: - /* 11: 8 bits. The address takes - * the form ff02::00XX. - */ - ipaddr->s6_addr[0] = 0xFF; - ipaddr->s6_addr[1] = 0x02; - fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1); - break; - default: - pr_debug("DAM value has a wrong value: 0x%x\n", dam); - return -EINVAL; - } - - if (fail) { - pr_debug("Failed to fetch skb data\n"); - return -EIO; - } - - lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is:\n", - ipaddr->s6_addr, 16); - - return 0; -} - -static void -lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb) -{ - struct udphdr *uh = udp_hdr(skb); - - if (((uh->source & LOWPAN_NHC_UDP_4BIT_MASK) == - LOWPAN_NHC_UDP_4BIT_PORT) && - ((uh->dest & LOWPAN_NHC_UDP_4BIT_MASK) == - LOWPAN_NHC_UDP_4BIT_PORT)) { - pr_debug("UDP header: both ports compression to 4 bits\n"); - **hc06_ptr = LOWPAN_NHC_UDP_CS_P_11; - **(hc06_ptr + 1) = /* subtraction is faster */ - (u8)((uh->dest - LOWPAN_NHC_UDP_4BIT_PORT) + - ((uh->source & LOWPAN_NHC_UDP_4BIT_PORT) << 4)); - *hc06_ptr += 2; - } else if ((uh->dest & LOWPAN_NHC_UDP_8BIT_MASK) == - LOWPAN_NHC_UDP_8BIT_PORT) { - pr_debug("UDP header: remove 8 bits of dest\n"); - **hc06_ptr = LOWPAN_NHC_UDP_CS_P_01; - memcpy(*hc06_ptr + 1, &uh->source, 2); - **(hc06_ptr + 3) = (u8)(uh->dest - LOWPAN_NHC_UDP_8BIT_PORT); - *hc06_ptr += 4; - } else if ((uh->source & LOWPAN_NHC_UDP_8BIT_MASK) == - LOWPAN_NHC_UDP_8BIT_PORT) { - pr_debug("UDP header: remove 8 bits of source\n"); - **hc06_ptr = LOWPAN_NHC_UDP_CS_P_10; - memcpy(*hc06_ptr + 1, &uh->dest, 2); - **(hc06_ptr + 3) = (u8)(uh->source - LOWPAN_NHC_UDP_8BIT_PORT); - *hc06_ptr += 4; - } else { - pr_debug("UDP header: can't compress\n"); - **hc06_ptr = LOWPAN_NHC_UDP_CS_P_00; - memcpy(*hc06_ptr + 1, &uh->source, 2); - memcpy(*hc06_ptr + 3, &uh->dest, 2); - *hc06_ptr += 5; - } - - /* checksum is always inline */ - memcpy(*hc06_ptr, &uh->check, 2); - *hc06_ptr += 2; - - /* skip the UDP header */ - skb_pull(skb, sizeof(struct udphdr)); -} - -static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val) -{ - if (unlikely(!pskb_may_pull(skb, 1))) - return -EINVAL; - - *val = skb->data[0]; - skb_pull(skb, 1); - - return 0; -} - -static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val) -{ - if (unlikely(!pskb_may_pull(skb, 2))) - return -EINVAL; - - *val = (skb->data[0] << 8) | skb->data[1]; - skb_pull(skb, 2); - - return 0; -} - -static int -lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) -{ - u8 tmp; - - if (!uh) - goto err; - - if (lowpan_fetch_skb_u8(skb, &tmp)) - goto err; - - if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) { - pr_debug("UDP header uncompression\n"); - switch (tmp & LOWPAN_NHC_UDP_CS_P_11) { - case LOWPAN_NHC_UDP_CS_P_00: - memcpy(&uh->source, &skb->data[0], 2); - memcpy(&uh->dest, &skb->data[2], 2); - skb_pull(skb, 4); - break; - case LOWPAN_NHC_UDP_CS_P_01: - memcpy(&uh->source, &skb->data[0], 2); - uh->dest = - skb->data[2] + LOWPAN_NHC_UDP_8BIT_PORT; - skb_pull(skb, 3); - break; - case LOWPAN_NHC_UDP_CS_P_10: - uh->source = skb->data[0] + LOWPAN_NHC_UDP_8BIT_PORT; - memcpy(&uh->dest, &skb->data[1], 2); - skb_pull(skb, 3); - break; - case LOWPAN_NHC_UDP_CS_P_11: - uh->source = - LOWPAN_NHC_UDP_4BIT_PORT + (skb->data[0] >> 4); - uh->dest = - LOWPAN_NHC_UDP_4BIT_PORT + (skb->data[0] & 0x0f); - skb_pull(skb, 1); - break; - default: - pr_debug("ERROR: unknown UDP format\n"); - goto err; - } - - pr_debug("uncompressed UDP ports: src = %d, dst = %d\n", - uh->source, uh->dest); - - /* copy checksum */ - memcpy(&uh->check, &skb->data[0], 2); - skb_pull(skb, 2); - - /* - * UDP lenght needs to be infered from the lower layers - * here, we obtain the hint from the remaining size of the - * frame - */ - uh->len = htons(skb->len + sizeof(struct udphdr)); - pr_debug("uncompressed UDP length: src = %d", uh->len); - } else { - pr_debug("ERROR: unsupported NH format\n"); - goto err; - } - - return 0; -err: - return -EINVAL; -} - static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *_daddr, const void *_saddr, unsigned int len) { - u8 tmp, iphc0, iphc1, *hc06_ptr; struct ipv6hdr *hdr; const u8 *saddr = _saddr; const u8 *daddr = _daddr; - u8 head[100]; struct ieee802154_addr sa, da; /* TODO: @@ -485,181 +118,14 @@ static int lowpan_header_create(struct sk_buff *skb, return 0; hdr = ipv6_hdr(skb); - hc06_ptr = head + 2; - - pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n" - "\tnexthdr = 0x%02x\n\thop_lim = %d\n", hdr->version, - ntohs(hdr->payload_len), hdr->nexthdr, hdr->hop_limit); - - lowpan_raw_dump_table(__func__, "raw skb network header dump", - skb_network_header(skb), sizeof(struct ipv6hdr)); if (!saddr) saddr = dev->dev_addr; - lowpan_raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); - - /* - * As we copy some bit-length fields, in the IPHC encoding bytes, - * we sometimes use |= - * If the field is 0, and the current bit value in memory is 1, - * this does not work. We therefore reset the IPHC encoding here - */ - iphc0 = LOWPAN_DISPATCH_IPHC; - iphc1 = 0; - - /* TODO: context lookup */ + raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); + raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); - lowpan_raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); - - /* - * Traffic class, flow label - * If flow label is 0, compress it. If traffic class is 0, compress it - * We have to process both in the same time as the offset of traffic - * class depends on the presence of version and flow label - */ - - /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */ - tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4); - tmp = ((tmp & 0x03) << 6) | (tmp >> 2); - - if (((hdr->flow_lbl[0] & 0x0F) == 0) && - (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { - /* flow label can be compressed */ - iphc0 |= LOWPAN_IPHC_FL_C; - if ((hdr->priority == 0) && - ((hdr->flow_lbl[0] & 0xF0) == 0)) { - /* compress (elide) all */ - iphc0 |= LOWPAN_IPHC_TC_C; - } else { - /* compress only the flow label */ - *hc06_ptr = tmp; - hc06_ptr += 1; - } - } else { - /* Flow label cannot be compressed */ - if ((hdr->priority == 0) && - ((hdr->flow_lbl[0] & 0xF0) == 0)) { - /* compress only traffic class */ - iphc0 |= LOWPAN_IPHC_TC_C; - *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); - memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2); - hc06_ptr += 3; - } else { - /* compress nothing */ - memcpy(hc06_ptr, &hdr, 4); - /* replace the top byte with new ECN | DSCP format */ - *hc06_ptr = tmp; - hc06_ptr += 4; - } - } - - /* NOTE: payload length is always compressed */ - - /* Next Header is compress if UDP */ - if (hdr->nexthdr == UIP_PROTO_UDP) - iphc0 |= LOWPAN_IPHC_NH_C; - - if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { - *hc06_ptr = hdr->nexthdr; - hc06_ptr += 1; - } - - /* - * Hop limit - * if 1: compress, encoding is 01 - * if 64: compress, encoding is 10 - * if 255: compress, encoding is 11 - * else do not compress - */ - switch (hdr->hop_limit) { - case 1: - iphc0 |= LOWPAN_IPHC_TTL_1; - break; - case 64: - iphc0 |= LOWPAN_IPHC_TTL_64; - break; - case 255: - iphc0 |= LOWPAN_IPHC_TTL_255; - break; - default: - *hc06_ptr = hdr->hop_limit; - hc06_ptr += 1; - break; - } - - /* source address compression */ - if (is_addr_unspecified(&hdr->saddr)) { - pr_debug("source address is unspecified, setting SAC\n"); - iphc1 |= LOWPAN_IPHC_SAC; - /* TODO: context lookup */ - } else if (is_addr_link_local(&hdr->saddr)) { - pr_debug("source address is link-local\n"); - iphc1 |= lowpan_compress_addr_64(&hc06_ptr, - LOWPAN_IPHC_SAM_BIT, &hdr->saddr, saddr); - } else { - pr_debug("send the full source address\n"); - memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16); - hc06_ptr += 16; - } - - /* destination address compression */ - if (is_addr_mcast(&hdr->daddr)) { - pr_debug("destination address is multicast: "); - iphc1 |= LOWPAN_IPHC_M; - if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) { - pr_debug("compressed to 1 octet\n"); - iphc1 |= LOWPAN_IPHC_DAM_11; - /* use last byte */ - *hc06_ptr = hdr->daddr.s6_addr[15]; - hc06_ptr += 1; - } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) { - pr_debug("compressed to 4 octets\n"); - iphc1 |= LOWPAN_IPHC_DAM_10; - /* second byte + the last three */ - *hc06_ptr = hdr->daddr.s6_addr[1]; - memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3); - hc06_ptr += 4; - } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) { - pr_debug("compressed to 6 octets\n"); - iphc1 |= LOWPAN_IPHC_DAM_01; - /* second byte + the last five */ - *hc06_ptr = hdr->daddr.s6_addr[1]; - memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5); - hc06_ptr += 6; - } else { - pr_debug("using full address\n"); - iphc1 |= LOWPAN_IPHC_DAM_00; - memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16); - hc06_ptr += 16; - } - } else { - /* TODO: context lookup */ - if (is_addr_link_local(&hdr->daddr)) { - pr_debug("dest address is unicast and link-local\n"); - iphc1 |= lowpan_compress_addr_64(&hc06_ptr, - LOWPAN_IPHC_DAM_BIT, &hdr->daddr, daddr); - } else { - pr_debug("dest address is unicast: using full one\n"); - memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16); - hc06_ptr += 16; - } - } - - /* UDP header compression */ - if (hdr->nexthdr == UIP_PROTO_UDP) - lowpan_compress_udp_header(&hc06_ptr, skb); - - head[0] = iphc0; - head[1] = iphc1; - - skb_pull(skb, sizeof(struct ipv6hdr)); - skb_reset_transport_header(skb); - memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); - skb_reset_network_header(skb); - - lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, - skb->len); + lowpan_header_compress(skb, dev, type, daddr, saddr, len); /* * NOTE1: I'm still unsure about the fact that compression and WPAN @@ -671,39 +137,38 @@ static int lowpan_header_create(struct sk_buff *skb, * from MAC subif of the 'dev' and 'real_dev' network devices, but * this isn't implemented in mainline yet, so currently we assign 0xff */ - { - mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; - mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; + mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); - /* prepare wpan address data */ - sa.addr_type = IEEE802154_ADDR_LONG; - sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + /* prepare wpan address data */ + sa.addr_type = IEEE802154_ADDR_LONG; + sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - memcpy(&(sa.hwaddr), saddr, 8); - /* intra-PAN communications */ - da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + memcpy(&(sa.hwaddr), saddr, 8); + /* intra-PAN communications */ + da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - /* - * if the destination address is the broadcast address, use the - * corresponding short address - */ - if (lowpan_is_addr_broadcast(daddr)) { - da.addr_type = IEEE802154_ADDR_SHORT; - da.short_addr = IEEE802154_ADDR_BROADCAST; - } else { - da.addr_type = IEEE802154_ADDR_LONG; - memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN); - - /* request acknowledgment */ - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; - } + /* + * if the destination address is the broadcast address, use the + * corresponding short address + */ + if (lowpan_is_addr_broadcast(daddr)) { + da.addr_type = IEEE802154_ADDR_SHORT; + da.short_addr = IEEE802154_ADDR_BROADCAST; + } else { + da.addr_type = IEEE802154_ADDR_LONG; + memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN); - return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, - type, (void *)&da, (void *)&sa, skb->len); + /* request acknowledgment */ + mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; } + + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, + type, (void *)&da, (void *)&sa, skb->len); } -static int lowpan_give_skb_to_devices(struct sk_buff *skb) +static int lowpan_give_skb_to_devices(struct sk_buff *skb, + struct net_device *dev) { struct lowpan_dev_record *entry; struct sk_buff *skb_cp; @@ -726,31 +191,6 @@ static int lowpan_give_skb_to_devices(struct sk_buff *skb) return stat; } -static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) -{ - struct sk_buff *new; - int stat = NET_RX_SUCCESS; - - new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), - GFP_ATOMIC); - kfree_skb(skb); - - if (!new) - return -ENOMEM; - - skb_push(new, sizeof(struct ipv6hdr)); - skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr)); - - new->protocol = htons(ETH_P_IPV6); - new->pkt_type = PACKET_HOST; - - stat = lowpan_give_skb_to_devices(new); - - kfree_skb(new); - - return stat; -} - static void lowpan_fragment_timer_expired(unsigned long entry_addr) { struct lowpan_fragment *entry = (struct lowpan_fragment *)entry_addr; @@ -814,16 +254,12 @@ frame_err: return NULL; } -static int -lowpan_process_data(struct sk_buff *skb) +static int process_data(struct sk_buff *skb) { - struct ipv6hdr hdr = {}; - u8 tmp, iphc0, iphc1, num_context = 0; + u8 iphc0, iphc1; const struct ieee802154_addr *_saddr, *_daddr; - int err; - lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, - skb->len); + raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); /* at least two bytes will be used for the encoding */ if (skb->len < 2) goto drop; @@ -925,162 +361,11 @@ lowpan_process_data(struct sk_buff *skb) _saddr = &mac_cb(skb)->sa; _daddr = &mac_cb(skb)->da; - pr_debug("iphc0 = %02x, iphc1 = %02x\n", iphc0, iphc1); - - /* another if the CID flag is set */ - if (iphc1 & LOWPAN_IPHC_CID) { - pr_debug("CID flag is set, increase header with one\n"); - if (lowpan_fetch_skb_u8(skb, &num_context)) - goto drop; - } - - hdr.version = 6; - - /* Traffic Class and Flow Label */ - switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { - /* - * Traffic Class and FLow Label carried in-line - * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) - */ - case 0: /* 00b */ - if (lowpan_fetch_skb_u8(skb, &tmp)) - goto drop; - - memcpy(&hdr.flow_lbl, &skb->data[0], 3); - skb_pull(skb, 3); - hdr.priority = ((tmp >> 2) & 0x0f); - hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | - (hdr.flow_lbl[0] & 0x0f); - break; - /* - * Traffic class carried in-line - * ECN + DSCP (1 byte), Flow Label is elided - */ - case 2: /* 10b */ - if (lowpan_fetch_skb_u8(skb, &tmp)) - goto drop; - - hdr.priority = ((tmp >> 2) & 0x0f); - hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); - break; - /* - * Flow Label carried in-line - * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided - */ - case 1: /* 01b */ - if (lowpan_fetch_skb_u8(skb, &tmp)) - goto drop; - - hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30); - memcpy(&hdr.flow_lbl[1], &skb->data[0], 2); - skb_pull(skb, 2); - break; - /* Traffic Class and Flow Label are elided */ - case 3: /* 11b */ - break; - default: - break; - } - - /* Next Header */ - if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { - /* Next header is carried inline */ - if (lowpan_fetch_skb_u8(skb, &(hdr.nexthdr))) - goto drop; - - pr_debug("NH flag is set, next header carried inline: %02x\n", - hdr.nexthdr); - } - - /* Hop Limit */ - if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) - hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; - else { - if (lowpan_fetch_skb_u8(skb, &(hdr.hop_limit))) - goto drop; - } - - /* Extract SAM to the tmp variable */ - tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; - - if (iphc1 & LOWPAN_IPHC_SAC) { - /* Source address context based uncompression */ - pr_debug("SAC bit is set. Handle context based source address.\n"); - err = lowpan_uncompress_context_based_src_addr( - skb, &hdr.saddr, tmp); - } else { - /* Source address uncompression */ - pr_debug("source address stateless compression\n"); - err = lowpan_uncompress_addr(skb, &hdr.saddr, tmp, _saddr); - } - - /* Check on error of previous branch */ - if (err) - goto drop; - - /* Extract DAM to the tmp variable */ - tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03; - - /* check for Multicast Compression */ - if (iphc1 & LOWPAN_IPHC_M) { - if (iphc1 & LOWPAN_IPHC_DAC) { - pr_debug("dest: context-based mcast compression\n"); - /* TODO: implement this */ - } else { - err = lowpan_uncompress_multicast_daddr( - skb, &hdr.daddr, tmp); - if (err) - goto drop; - } - } else { - pr_debug("dest: stateless compression\n"); - err = lowpan_uncompress_addr(skb, &hdr.daddr, tmp, _daddr); - if (err) - goto drop; - } - - /* UDP data uncompression */ - if (iphc0 & LOWPAN_IPHC_NH_C) { - struct udphdr uh; - struct sk_buff *new; - if (lowpan_uncompress_udp_header(skb, &uh)) - goto drop; - - /* - * replace the compressed UDP head by the uncompressed UDP - * header - */ - new = skb_copy_expand(skb, sizeof(struct udphdr), - skb_tailroom(skb), GFP_ATOMIC); - kfree_skb(skb); - - if (!new) - return -ENOMEM; - - skb = new; - - skb_push(skb, sizeof(struct udphdr)); - skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); - - lowpan_raw_dump_table(__func__, "raw UDP header dump", - (u8 *)&uh, sizeof(uh)); - - hdr.nexthdr = UIP_PROTO_UDP; - } - - /* Not fragmented package */ - hdr.payload_len = htons(skb->len); - - pr_debug("skb headroom size = %d, data length = %d\n", - skb_headroom(skb), skb->len); - - pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n\t" - "nexthdr = 0x%02x\n\thop_lim = %d\n", hdr.version, - ntohs(hdr.payload_len), hdr.nexthdr, hdr.hop_limit); - - lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, - sizeof(hdr)); - return lowpan_skb_deliver(skb, &hdr); + return lowpan_process_data(skb, skb->dev, (u8 *)_saddr->hwaddr, + _saddr->addr_type, IEEE802154_ADDR_LEN, + (u8 *)_daddr->hwaddr, _daddr->addr_type, + IEEE802154_ADDR_LEN, iphc0, iphc1, + lowpan_give_skb_to_devices); unlock_and_drop: spin_unlock_bh(&flist_lock); @@ -1112,7 +397,7 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, hlen = (type == LOWPAN_DISPATCH_FRAG1) ? LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE; - lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); + raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); frag = netdev_alloc_skb(skb->dev, hlen + mlen + plen + IEEE802154_MFR_SIZE); @@ -1132,8 +417,7 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, skb_copy_to_linear_data_offset(frag, mlen + hlen, skb_network_header(skb) + offset, plen); - lowpan_raw_dump_table(__func__, " raw fragment dump", frag->data, - frag->len); + raw_dump_table(__func__, " raw fragment dump", frag->data, frag->len); return dev_queue_xmit(frag); } @@ -1316,7 +600,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, /* Pull off the 1-byte of 6lowpan header. */ skb_pull(local_skb, 1); - lowpan_give_skb_to_devices(local_skb); + lowpan_give_skb_to_devices(local_skb, NULL); kfree_skb(local_skb); kfree_skb(skb); @@ -1328,7 +612,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, local_skb = skb_clone(skb, GFP_ATOMIC); if (!local_skb) goto drop; - lowpan_process_data(local_skb); + process_data(local_skb); kfree_skb(skb); break; diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h index 2869c0526da..2b835db3bda 100644 --- a/net/ieee802154/6lowpan.h +++ b/net/ieee802154/6lowpan.h @@ -231,6 +231,61 @@ #define LOWPAN_NHC_UDP_CS_P_10 0xF2 /* source = 0xF0 + 8bit inline, dest = 16 bit inline */ #define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ +#define LOWPAN_NHC_UDP_CS_C 0x04 /* checksum elided */ + +#ifdef DEBUG +/* print data in line */ +static inline void raw_dump_inline(const char *caller, char *msg, + unsigned char *buf, int len) +{ + if (msg) + pr_debug("%s():%s: ", caller, msg); + + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, buf, len, false); +} + +/* print data in a table format: + * + * addr: xx xx xx xx xx xx + * addr: xx xx xx xx xx xx + * ... + */ +static inline void raw_dump_table(const char *caller, char *msg, + unsigned char *buf, int len) +{ + if (msg) + pr_debug("%s():%s:\n", caller, msg); + + print_hex_dump_debug("\t", DUMP_PREFIX_OFFSET, 16, 1, buf, len, false); +} +#else +static inline void raw_dump_table(const char *caller, char *msg, + unsigned char *buf, int len) { } +static inline void raw_dump_inline(const char *caller, char *msg, + unsigned char *buf, int len) { } +#endif + +static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val) +{ + if (unlikely(!pskb_may_pull(skb, 1))) + return -EINVAL; + + *val = skb->data[0]; + skb_pull(skb, 1); + + return 0; +} + +static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val) +{ + if (unlikely(!pskb_may_pull(skb, 2))) + return -EINVAL; + + *val = (skb->data[0] << 8) | skb->data[1]; + skb_pull(skb, 2); + + return 0; +} static inline bool lowpan_fetch_skb(struct sk_buff *skb, void *data, const unsigned int len) @@ -244,4 +299,21 @@ static inline bool lowpan_fetch_skb(struct sk_buff *skb, return false; } +static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data, + const size_t len) +{ + memcpy(*hc_ptr, data, len); + *hc_ptr += len; +} + +typedef int (*skb_delivery_cb)(struct sk_buff *skb, struct net_device *dev); + +int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, + const u8 *saddr, const u8 saddr_type, const u8 saddr_len, + const u8 *daddr, const u8 daddr_type, const u8 daddr_len, + u8 iphc0, u8 iphc1, skb_delivery_cb skb_deliver); +int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len); + #endif /* __6LOWPAN_H__ */ diff --git a/net/ieee802154/6lowpan_iphc.c b/net/ieee802154/6lowpan_iphc.c new file mode 100644 index 00000000000..11840f9e46d --- /dev/null +++ b/net/ieee802154/6lowpan_iphc.c @@ -0,0 +1,799 @@ +/* + * Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* + * Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <linux/bitops.h> +#include <linux/if_arp.h> +#include <linux/netdevice.h> +#include <net/ipv6.h> +#include <net/af_ieee802154.h> + +#include "6lowpan.h" + +/* + * Uncompress address function for source and + * destination address(non-multicast). + * + * address_mode is sam value or dam value. + */ +static int uncompress_addr(struct sk_buff *skb, + struct in6_addr *ipaddr, const u8 address_mode, + const u8 *lladdr, const u8 addr_type, + const u8 addr_len) +{ + bool fail; + + switch (address_mode) { + case LOWPAN_IPHC_ADDR_00: + /* for global link addresses */ + fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); + break; + case LOWPAN_IPHC_ADDR_01: + /* fe:80::XXXX:XXXX:XXXX:XXXX */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8); + break; + case LOWPAN_IPHC_ADDR_02: + /* fe:80::ff:fe00:XXXX */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + ipaddr->s6_addr[11] = 0xFF; + ipaddr->s6_addr[12] = 0xFE; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2); + break; + case LOWPAN_IPHC_ADDR_03: + fail = false; + switch (addr_type) { + case IEEE802154_ADDR_LONG: + /* fe:80::XXXX:XXXX:XXXX:XXXX + * \_________________/ + * hwaddr + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + memcpy(&ipaddr->s6_addr[8], lladdr, addr_len); + /* second bit-flip (Universe/Local) + * is done according RFC2464 + */ + ipaddr->s6_addr[8] ^= 0x02; + break; + case IEEE802154_ADDR_SHORT: + /* fe:80::ff:fe00:XXXX + * \__/ + * short_addr + * + * Universe/Local bit is zero. + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + ipaddr->s6_addr[11] = 0xFF; + ipaddr->s6_addr[12] = 0xFE; + ipaddr->s6_addr16[7] = htons(*((u16 *)lladdr)); + break; + default: + pr_debug("Invalid addr_type set\n"); + return -EINVAL; + } + break; + default: + pr_debug("Invalid address mode value: 0x%x\n", address_mode); + return -EINVAL; + } + + if (fail) { + pr_debug("Failed to fetch skb data\n"); + return -EIO; + } + + raw_dump_inline(NULL, "Reconstructed ipv6 addr is", + ipaddr->s6_addr, 16); + + return 0; +} + +/* + * Uncompress address function for source context + * based address(non-multicast). + */ +static int uncompress_context_based_src_addr(struct sk_buff *skb, + struct in6_addr *ipaddr, + const u8 sam) +{ + switch (sam) { + case LOWPAN_IPHC_ADDR_00: + /* unspec address :: + * Do nothing, address is already :: + */ + break; + case LOWPAN_IPHC_ADDR_01: + /* TODO */ + case LOWPAN_IPHC_ADDR_02: + /* TODO */ + case LOWPAN_IPHC_ADDR_03: + /* TODO */ + netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam); + return -EINVAL; + default: + pr_debug("Invalid sam value: 0x%x\n", sam); + return -EINVAL; + } + + raw_dump_inline(NULL, + "Reconstructed context based ipv6 src addr is", + ipaddr->s6_addr, 16); + + return 0; +} + +static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr, + struct net_device *dev, skb_delivery_cb deliver_skb) +{ + struct sk_buff *new; + int stat; + + new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), + GFP_ATOMIC); + kfree_skb(skb); + + if (!new) + return -ENOMEM; + + skb_push(new, sizeof(struct ipv6hdr)); + skb_reset_network_header(new); + skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr)); + + new->protocol = htons(ETH_P_IPV6); + new->pkt_type = PACKET_HOST; + new->dev = dev; + + raw_dump_table(__func__, "raw skb data dump before receiving", + new->data, new->len); + + stat = deliver_skb(new, dev); + + kfree_skb(new); + + return stat; +} + +/* Uncompress function for multicast destination address, + * when M bit is set. + */ +static int +lowpan_uncompress_multicast_daddr(struct sk_buff *skb, + struct in6_addr *ipaddr, + const u8 dam) +{ + bool fail; + + switch (dam) { + case LOWPAN_IPHC_DAM_00: + /* 00: 128 bits. The full address + * is carried in-line. + */ + fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); + break; + case LOWPAN_IPHC_DAM_01: + /* 01: 48 bits. The address takes + * the form ffXX::00XX:XXXX:XXXX. + */ + ipaddr->s6_addr[0] = 0xFF; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1); + fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[11], 5); + break; + case LOWPAN_IPHC_DAM_10: + /* 10: 32 bits. The address takes + * the form ffXX::00XX:XXXX. + */ + ipaddr->s6_addr[0] = 0xFF; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1); + fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[13], 3); + break; + case LOWPAN_IPHC_DAM_11: + /* 11: 8 bits. The address takes + * the form ff02::00XX. + */ + ipaddr->s6_addr[0] = 0xFF; + ipaddr->s6_addr[1] = 0x02; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1); + break; + default: + pr_debug("DAM value has a wrong value: 0x%x\n", dam); + return -EINVAL; + } + + if (fail) { + pr_debug("Failed to fetch skb data\n"); + return -EIO; + } + + raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is", + ipaddr->s6_addr, 16); + + return 0; +} + +static int +uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) +{ + bool fail; + u8 tmp = 0, val = 0; + + if (!uh) + goto err; + + fail = lowpan_fetch_skb(skb, &tmp, 1); + + if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) { + pr_debug("UDP header uncompression\n"); + switch (tmp & LOWPAN_NHC_UDP_CS_P_11) { + case LOWPAN_NHC_UDP_CS_P_00: + fail |= lowpan_fetch_skb(skb, &uh->source, 2); + fail |= lowpan_fetch_skb(skb, &uh->dest, 2); + break; + case LOWPAN_NHC_UDP_CS_P_01: + fail |= lowpan_fetch_skb(skb, &uh->source, 2); + fail |= lowpan_fetch_skb(skb, &val, 1); + uh->dest = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); + break; + case LOWPAN_NHC_UDP_CS_P_10: + fail |= lowpan_fetch_skb(skb, &val, 1); + uh->source = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); + fail |= lowpan_fetch_skb(skb, &uh->dest, 2); + break; + case LOWPAN_NHC_UDP_CS_P_11: + fail |= lowpan_fetch_skb(skb, &val, 1); + uh->source = htons(LOWPAN_NHC_UDP_4BIT_PORT + + (val >> 4)); + uh->dest = htons(LOWPAN_NHC_UDP_4BIT_PORT + + (val & 0x0f)); + break; + default: + pr_debug("ERROR: unknown UDP format\n"); + goto err; + break; + } + + pr_debug("uncompressed UDP ports: src = %d, dst = %d\n", + ntohs(uh->source), ntohs(uh->dest)); + + /* checksum */ + if (tmp & LOWPAN_NHC_UDP_CS_C) { + pr_debug_ratelimited("checksum elided currently not supported\n"); + goto err; + } else { + fail |= lowpan_fetch_skb(skb, &uh->check, 2); + } + + /* + * UDP lenght needs to be infered from the lower layers + * here, we obtain the hint from the remaining size of the + * frame + */ + uh->len = htons(skb->len + sizeof(struct udphdr)); + pr_debug("uncompressed UDP length: src = %d", ntohs(uh->len)); + } else { + pr_debug("ERROR: unsupported NH format\n"); + goto err; + } + + if (fail) + goto err; + + return 0; +err: + return -EINVAL; +} + +/* TTL uncompression values */ +static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 }; + +int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, + const u8 *saddr, const u8 saddr_type, const u8 saddr_len, + const u8 *daddr, const u8 daddr_type, const u8 daddr_len, + u8 iphc0, u8 iphc1, skb_delivery_cb deliver_skb) +{ + struct ipv6hdr hdr = {}; + u8 tmp, num_context = 0; + int err; + + raw_dump_table(__func__, "raw skb data dump uncompressed", + skb->data, skb->len); + + /* another if the CID flag is set */ + if (iphc1 & LOWPAN_IPHC_CID) { + pr_debug("CID flag is set, increase header with one\n"); + if (lowpan_fetch_skb_u8(skb, &num_context)) + goto drop; + } + + hdr.version = 6; + + /* Traffic Class and Flow Label */ + switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { + /* + * Traffic Class and FLow Label carried in-line + * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) + */ + case 0: /* 00b */ + if (lowpan_fetch_skb_u8(skb, &tmp)) + goto drop; + + memcpy(&hdr.flow_lbl, &skb->data[0], 3); + skb_pull(skb, 3); + hdr.priority = ((tmp >> 2) & 0x0f); + hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | + (hdr.flow_lbl[0] & 0x0f); + break; + /* + * Traffic class carried in-line + * ECN + DSCP (1 byte), Flow Label is elided + */ + case 2: /* 10b */ + if (lowpan_fetch_skb_u8(skb, &tmp)) + goto drop; + + hdr.priority = ((tmp >> 2) & 0x0f); + hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); + break; + /* + * Flow Label carried in-line + * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided + */ + case 1: /* 01b */ + if (lowpan_fetch_skb_u8(skb, &tmp)) + goto drop; + + hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30); + memcpy(&hdr.flow_lbl[1], &skb->data[0], 2); + skb_pull(skb, 2); + break; + /* Traffic Class and Flow Label are elided */ + case 3: /* 11b */ + break; + default: + break; + } + + /* Next Header */ + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + /* Next header is carried inline */ + if (lowpan_fetch_skb_u8(skb, &(hdr.nexthdr))) + goto drop; + + pr_debug("NH flag is set, next header carried inline: %02x\n", + hdr.nexthdr); + } + + /* Hop Limit */ + if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) + hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; + else { + if (lowpan_fetch_skb_u8(skb, &(hdr.hop_limit))) + goto drop; + } + + /* Extract SAM to the tmp variable */ + tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; + + if (iphc1 & LOWPAN_IPHC_SAC) { + /* Source address context based uncompression */ + pr_debug("SAC bit is set. Handle context based source address.\n"); + err = uncompress_context_based_src_addr( + skb, &hdr.saddr, tmp); + } else { + /* Source address uncompression */ + pr_debug("source address stateless compression\n"); + err = uncompress_addr(skb, &hdr.saddr, tmp, saddr, + saddr_type, saddr_len); + } + + /* Check on error of previous branch */ + if (err) + goto drop; + + /* Extract DAM to the tmp variable */ + tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03; + + /* check for Multicast Compression */ + if (iphc1 & LOWPAN_IPHC_M) { + if (iphc1 & LOWPAN_IPHC_DAC) { + pr_debug("dest: context-based mcast compression\n"); + /* TODO: implement this */ + } else { + err = lowpan_uncompress_multicast_daddr( + skb, &hdr.daddr, tmp); + if (err) + goto drop; + } + } else { + err = uncompress_addr(skb, &hdr.daddr, tmp, daddr, + daddr_type, daddr_len); + pr_debug("dest: stateless compression mode %d dest %pI6c\n", + tmp, &hdr.daddr); + if (err) + goto drop; + } + + /* UDP data uncompression */ + if (iphc0 & LOWPAN_IPHC_NH_C) { + struct udphdr uh; + struct sk_buff *new; + if (uncompress_udp_header(skb, &uh)) + goto drop; + + /* + * replace the compressed UDP head by the uncompressed UDP + * header + */ + new = skb_copy_expand(skb, sizeof(struct udphdr), + skb_tailroom(skb), GFP_ATOMIC); + kfree_skb(skb); + + if (!new) + return -ENOMEM; + + skb = new; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); + + raw_dump_table(__func__, "raw UDP header dump", + (u8 *)&uh, sizeof(uh)); + + hdr.nexthdr = UIP_PROTO_UDP; + } + + hdr.payload_len = htons(skb->len); + + pr_debug("skb headroom size = %d, data length = %d\n", + skb_headroom(skb), skb->len); + + pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n\t" + "nexthdr = 0x%02x\n\thop_lim = %d\n\tdest = %pI6c\n", + hdr.version, ntohs(hdr.payload_len), hdr.nexthdr, + hdr.hop_limit, &hdr.daddr); + + raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, + sizeof(hdr)); + + return skb_deliver(skb, &hdr, dev, deliver_skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} +EXPORT_SYMBOL_GPL(lowpan_process_data); + +static u8 lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, + const struct in6_addr *ipaddr, + const unsigned char *lladdr) +{ + u8 val = 0; + + if (is_addr_mac_addr_based(ipaddr, lladdr)) { + val = 3; /* 0-bits */ + pr_debug("address compression 0 bits\n"); + } else if (lowpan_is_iid_16_bit_compressable(ipaddr)) { + /* compress IID to 16 bits xxxx::XXXX */ + memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2); + *hc06_ptr += 2; + val = 2; /* 16-bits */ + raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)", + *hc06_ptr - 2, 2); + } else { + /* do not compress IID => xxxx::IID */ + memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8); + *hc06_ptr += 8; + val = 1; /* 64-bits */ + raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)", + *hc06_ptr - 8, 8); + } + + return rol8(val, shift); +} + +static void compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + u8 tmp; + + if (((ntohs(uh->source) & LOWPAN_NHC_UDP_4BIT_MASK) == + LOWPAN_NHC_UDP_4BIT_PORT) && + ((ntohs(uh->dest) & LOWPAN_NHC_UDP_4BIT_MASK) == + LOWPAN_NHC_UDP_4BIT_PORT)) { + pr_debug("UDP header: both ports compression to 4 bits\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_11; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + /* source and destination port */ + tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_4BIT_PORT + + ((ntohs(uh->source) - LOWPAN_NHC_UDP_4BIT_PORT) << 4); + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + } else if ((ntohs(uh->dest) & LOWPAN_NHC_UDP_8BIT_MASK) == + LOWPAN_NHC_UDP_8BIT_PORT) { + pr_debug("UDP header: remove 8 bits of dest\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_01; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + /* source port */ + lowpan_push_hc_data(hc06_ptr, &uh->source, sizeof(uh->source)); + /* destination port */ + tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_8BIT_PORT; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + } else if ((ntohs(uh->source) & LOWPAN_NHC_UDP_8BIT_MASK) == + LOWPAN_NHC_UDP_8BIT_PORT) { + pr_debug("UDP header: remove 8 bits of source\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_10; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + /* source port */ + tmp = ntohs(uh->source) - LOWPAN_NHC_UDP_8BIT_PORT; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + /* destination port */ + lowpan_push_hc_data(hc06_ptr, &uh->dest, sizeof(uh->dest)); + } else { + pr_debug("UDP header: can't compress\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_00; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + /* source port */ + lowpan_push_hc_data(hc06_ptr, &uh->source, sizeof(uh->source)); + /* destination port */ + lowpan_push_hc_data(hc06_ptr, &uh->dest, sizeof(uh->dest)); + } + + /* checksum is always inline */ + lowpan_push_hc_data(hc06_ptr, &uh->check, sizeof(uh->check)); + + /* skip the UDP header */ + skb_pull(skb, sizeof(struct udphdr)); +} + +int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) +{ + u8 tmp, iphc0, iphc1, *hc06_ptr; + struct ipv6hdr *hdr; + u8 head[100] = {}; + + if (type != ETH_P_IPV6) + return -EINVAL; + + hdr = ipv6_hdr(skb); + hc06_ptr = head + 2; + + pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n" + "\tnexthdr = 0x%02x\n\thop_lim = %d\n\tdest = %pI6c\n", + hdr->version, ntohs(hdr->payload_len), hdr->nexthdr, + hdr->hop_limit, &hdr->daddr); + + raw_dump_table(__func__, "raw skb network header dump", + skb_network_header(skb), sizeof(struct ipv6hdr)); + + /* + * As we copy some bit-length fields, in the IPHC encoding bytes, + * we sometimes use |= + * If the field is 0, and the current bit value in memory is 1, + * this does not work. We therefore reset the IPHC encoding here + */ + iphc0 = LOWPAN_DISPATCH_IPHC; + iphc1 = 0; + + /* TODO: context lookup */ + + raw_dump_inline(__func__, "saddr", + (unsigned char *)_saddr, IEEE802154_ADDR_LEN); + raw_dump_inline(__func__, "daddr", + (unsigned char *)_daddr, IEEE802154_ADDR_LEN); + + raw_dump_table(__func__, + "sending raw skb network uncompressed packet", + skb->data, skb->len); + + /* + * Traffic class, flow label + * If flow label is 0, compress it. If traffic class is 0, compress it + * We have to process both in the same time as the offset of traffic + * class depends on the presence of version and flow label + */ + + /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */ + tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4); + tmp = ((tmp & 0x03) << 6) | (tmp >> 2); + + if (((hdr->flow_lbl[0] & 0x0F) == 0) && + (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { + /* flow label can be compressed */ + iphc0 |= LOWPAN_IPHC_FL_C; + if ((hdr->priority == 0) && + ((hdr->flow_lbl[0] & 0xF0) == 0)) { + /* compress (elide) all */ + iphc0 |= LOWPAN_IPHC_TC_C; + } else { + /* compress only the flow label */ + *hc06_ptr = tmp; + hc06_ptr += 1; + } + } else { + /* Flow label cannot be compressed */ + if ((hdr->priority == 0) && + ((hdr->flow_lbl[0] & 0xF0) == 0)) { + /* compress only traffic class */ + iphc0 |= LOWPAN_IPHC_TC_C; + *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); + memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2); + hc06_ptr += 3; + } else { + /* compress nothing */ + memcpy(hc06_ptr, &hdr, 4); + /* replace the top byte with new ECN | DSCP format */ + *hc06_ptr = tmp; + hc06_ptr += 4; + } + } + + /* NOTE: payload length is always compressed */ + + /* Next Header is compress if UDP */ + if (hdr->nexthdr == UIP_PROTO_UDP) + iphc0 |= LOWPAN_IPHC_NH_C; + + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + *hc06_ptr = hdr->nexthdr; + hc06_ptr += 1; + } + + /* + * Hop limit + * if 1: compress, encoding is 01 + * if 64: compress, encoding is 10 + * if 255: compress, encoding is 11 + * else do not compress + */ + switch (hdr->hop_limit) { + case 1: + iphc0 |= LOWPAN_IPHC_TTL_1; + break; + case 64: + iphc0 |= LOWPAN_IPHC_TTL_64; + break; + case 255: + iphc0 |= LOWPAN_IPHC_TTL_255; + break; + default: + *hc06_ptr = hdr->hop_limit; + hc06_ptr += 1; + break; + } + + /* source address compression */ + if (is_addr_unspecified(&hdr->saddr)) { + pr_debug("source address is unspecified, setting SAC\n"); + iphc1 |= LOWPAN_IPHC_SAC; + /* TODO: context lookup */ + } else if (is_addr_link_local(&hdr->saddr)) { + iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + LOWPAN_IPHC_SAM_BIT, &hdr->saddr, _saddr); + pr_debug("source address unicast link-local %pI6c " + "iphc1 0x%02x\n", &hdr->saddr, iphc1); + } else { + pr_debug("send the full source address\n"); + memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16); + hc06_ptr += 16; + } + + /* destination address compression */ + if (is_addr_mcast(&hdr->daddr)) { + pr_debug("destination address is multicast: "); + iphc1 |= LOWPAN_IPHC_M; + if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) { + pr_debug("compressed to 1 octet\n"); + iphc1 |= LOWPAN_IPHC_DAM_11; + /* use last byte */ + *hc06_ptr = hdr->daddr.s6_addr[15]; + hc06_ptr += 1; + } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) { + pr_debug("compressed to 4 octets\n"); + iphc1 |= LOWPAN_IPHC_DAM_10; + /* second byte + the last three */ + *hc06_ptr = hdr->daddr.s6_addr[1]; + memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3); + hc06_ptr += 4; + } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) { + pr_debug("compressed to 6 octets\n"); + iphc1 |= LOWPAN_IPHC_DAM_01; + /* second byte + the last five */ + *hc06_ptr = hdr->daddr.s6_addr[1]; + memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5); + hc06_ptr += 6; + } else { + pr_debug("using full address\n"); + iphc1 |= LOWPAN_IPHC_DAM_00; + memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16); + hc06_ptr += 16; + } + } else { + /* TODO: context lookup */ + if (is_addr_link_local(&hdr->daddr)) { + iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + LOWPAN_IPHC_DAM_BIT, &hdr->daddr, _daddr); + pr_debug("dest address unicast link-local %pI6c " + "iphc1 0x%02x\n", &hdr->daddr, iphc1); + } else { + pr_debug("dest address unicast %pI6c\n", &hdr->daddr); + memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16); + hc06_ptr += 16; + } + } + + /* UDP header compression */ + if (hdr->nexthdr == UIP_PROTO_UDP) + compress_udp_header(&hc06_ptr, skb); + + head[0] = iphc0; + head[1] = iphc1; + + skb_pull(skb, sizeof(struct ipv6hdr)); + skb_reset_transport_header(skb); + memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); + skb_reset_network_header(skb); + + pr_debug("header len %d skb %u\n", (int)(hc06_ptr - head), skb->len); + + raw_dump_table(__func__, "raw skb data dump compressed", + skb->data, skb->len); + return 0; +} +EXPORT_SYMBOL_GPL(lowpan_header_compress); diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index d7716d64c6b..951a83ee8af 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o -obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o +obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o 6lowpan_iphc.o ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o af_802154-y := af_ieee802154.o raw.o dgram.o diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index ef56ab5b35f..4dd37615a74 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -46,7 +46,7 @@ MASTER_SHOW(current_channel, "%d"); MASTER_SHOW(current_page, "%d"); MASTER_SHOW_COMPLEX(transmit_power, "%d +- %d dB", ((signed char) (phy->transmit_power << 2)) >> 2, - (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1 ); + (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1); MASTER_SHOW(cca_mode, "%d"); static ssize_t channels_supported_show(struct device *dev, diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4b81e91c80f..f8c49ce5b28 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ - inet_fragment.o ping.o ip_tunnel_core.o + inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o @@ -19,7 +19,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_NET_IPIP) += ipip.o -gre-y := gre_demux.o gre_offload.o +gre-y := gre_demux.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b8bc1a3d5cf..ecd2c3f245c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1391,9 +1391,15 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | - (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); + ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); + /* Save the IP ID check to be included later when we get to + * the transport layer so only the inner most IP ID is checked. + * This is because some GSO/TSO implementations do not + * correctly increment the IP ID for the outer hdrs. + */ + NAPI_GRO_CB(p)->flush_id = + ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; } @@ -1539,6 +1545,7 @@ static const struct net_protocol tcp_protocol = { .err_handler = tcp_v4_err, .no_policy = 1, .netns_ok = 1, + .icmp_strict_tag_validation = 1, }; static const struct net_protocol udp_protocol = { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index b67cf805910..1a9b99e0446 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -732,6 +732,7 @@ static int arp_process(struct sk_buff *skb) int addr_type; struct neighbour *n; struct net *net = dev_net(dev); + bool is_garp = false; /* arp_rcv below verifies the ARP header and verifies the device * is ARP'able. @@ -898,10 +899,12 @@ static int arp_process(struct sk_buff *skb) It is possible, that this option should be enabled for some devices (strip is candidate) */ + is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip && + inet_addr_type(net, sip) == RTN_UNICAST; + if (n == NULL && - (arp->ar_op == htons(ARPOP_REPLY) || - (arp->ar_op == htons(ARPOP_REQUEST) && tip == sip)) && - inet_addr_type(net, sip) == RTN_UNICAST) + ((arp->ar_op == htons(ARPOP_REPLY) && + inet_addr_type(net, sip) == RTN_UNICAST) || is_garp)) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } @@ -914,8 +917,10 @@ static int arp_process(struct sk_buff *skb) agents are active. Taking the first reply prevents arp trashing and chooses the fastest router. */ - override = time_after(jiffies, n->updated + - NEIGH_VAR(n->parms, LOCKTIME)); + override = time_after(jiffies, + n->updated + + NEIGH_VAR(n->parms, LOCKTIME)) || + is_garp; /* Broadcast replies and request packets do not assert neighbour reachability. @@ -1112,7 +1117,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } -int arp_invalidate(struct net_device *dev, __be32 ip) +static int arp_invalidate(struct net_device *dev, __be32 ip) { struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); int err = -ENXIO; @@ -1127,7 +1132,6 @@ int arp_invalidate(struct net_device *dev, __be32 ip) return err; } -EXPORT_SYMBOL(arp_invalidate); static int arp_req_delete_public(struct net *net, struct arpreq *r, struct net_device *dev) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 4b59b6e488d..69e77c8ff28 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1335,8 +1335,7 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = - netlbl_secattr_catmap_alloc(GFP_ATOMIC); + secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); if (secattr->attr.mls.cat == NULL) return -ENOMEM; @@ -1431,8 +1430,7 @@ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = - netlbl_secattr_catmap_alloc(GFP_ATOMIC); + secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); if (secattr->attr.mls.cat == NULL) return -ENOMEM; @@ -1526,8 +1524,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = - netlbl_secattr_catmap_alloc(GFP_ATOMIC); + secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); if (secattr->attr.mls.cat == NULL) return -ENOMEM; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index de03fe7002d..9809f7b6972 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1385,6 +1385,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); inet_insert_ifa(ifa); } } @@ -1696,7 +1698,7 @@ static int inet_netconf_msgsize_devconf(int type) size += nla_total_size(4); if (type == -1 || type == NETCONFA_MC_FORWARDING) size += nla_total_size(4); - if (type == -1 || type == NETCONFA_PROXY_ARP) + if (type == -1 || type == NETCONFA_PROXY_NEIGH) size += nla_total_size(4); return size; @@ -1734,8 +1736,8 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_MC_FORWARDING, IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) goto nla_put_failure; - if ((type == -1 || type == NETCONFA_PROXY_ARP) && - nla_put_s32(skb, NETCONFA_PROXY_ARP, + if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; @@ -1775,7 +1777,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, - [NETCONFA_PROXY_ARP] = { .len = sizeof(int) }, + [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, @@ -2002,7 +2004,7 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, if (i == IPV4_DEVCONF_PROXY_ARP - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); - inet_netconf_notify_devconf(net, NETCONFA_PROXY_ARP, + inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, ifindex, cnf); } } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 388d113fd28..1e4f6600b31 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -33,8 +33,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); -int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, int *last_idx, int dflt); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e63f47a4e65..b53f0bf84dc 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -426,8 +426,9 @@ struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) return NULL; } -int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, int *last_idx, int dflt) +static int fib_detect_death(struct fib_info *fi, int order, + struct fib_info **last_resort, int *last_idx, + int dflt) { struct neighbour *n; int state = NUD_NONE; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 5893e99e829..1863422fb7d 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -355,14 +355,7 @@ static int __init gre_init(void) goto err_gre; } - if (gre_offload_init()) { - pr_err("can't add protocol offload\n"); - goto err_gso; - } - return 0; -err_gso: - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); err_gre: inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); err: @@ -371,8 +364,6 @@ err: static void __exit gre_exit(void) { - gre_offload_exit(); - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index e5d43618846..29512e3e7e7 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -26,8 +26,9 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EINVAL); netdev_features_t enc_features; - int ghl = GRE_HEADER_SECTION; + int ghl; struct gre_base_hdr *greh; + u16 mac_offset = skb->mac_header; int mac_len = skb->mac_len; __be16 protocol = skb->protocol; int tnl_hlen; @@ -48,23 +49,19 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); - if (greh->flags & GRE_KEY) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_SEQ) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_CSUM) { - ghl += GRE_HEADER_SECTION; - csum = true; - } else - csum = false; + ghl = skb_inner_network_header(skb) - skb_transport_header(skb); + if (unlikely(ghl < sizeof(*greh))) + goto out; - /* setup inner skb. */ - skb->protocol = greh->protocol; - skb->encapsulation = 0; + csum = !!(greh->flags & GRE_CSUM); if (unlikely(!pskb_may_pull(skb, ghl))) goto out; + /* setup inner skb. */ + skb->protocol = greh->protocol; + skb->encapsulation = 0; + __skb_pull(skb, ghl); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); @@ -73,8 +70,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); goto out; + } skb = segs; tnl_hlen = skb_tnl_header_len(skb); @@ -113,19 +112,182 @@ out: return segs; } +/* Compute the whole skb csum in s/w and store it, then verify GRO csum + * starting from gro_offset. + */ +static __sum16 gro_skb_checksum(struct sk_buff *skb) +{ + __sum16 sum; + + skb->csum = skb_checksum(skb, 0, skb->len, 0); + NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum, + csum_partial(skb->data, skb_gro_offset(skb), 0)); + sum = csum_fold(NAPI_GRO_CB(skb)->csum); + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { + if (unlikely(!sum)) + netdev_rx_csum_fault(skb->dev); + } else + skb->ip_summed = CHECKSUM_COMPLETE; + + return sum; +} + +static struct sk_buff **gre_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + const struct gre_base_hdr *greh; + unsigned int hlen, grehlen; + unsigned int off; + int flush = 1; + struct packet_offload *ptype; + __be16 type; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*greh); + greh = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + greh = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!greh)) + goto out; + } + + /* Only support version 0 and K (key), C (csum) flags. Note that + * although the support for the S (seq#) flag can be added easily + * for GRO, this is problematic for GSO hence can not be enabled + * here because a GRO pkt may end up in the forwarding path, thus + * requiring GSO support to break it up correctly. + */ + if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0) + goto out; + + type = greh->protocol; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (ptype == NULL) + goto out_unlock; + + grehlen = GRE_HEADER_SECTION; + + if (greh->flags & GRE_KEY) + grehlen += GRE_HEADER_SECTION; + + if (greh->flags & GRE_CSUM) + grehlen += GRE_HEADER_SECTION; + + hlen = off + grehlen; + if (skb_gro_header_hard(skb, hlen)) { + greh = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!greh)) + goto out_unlock; + } + if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */ + __sum16 csum = 0; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum = csum_fold(NAPI_GRO_CB(skb)->csum); + /* Don't trust csum error calculated/reported by h/w */ + if (skb->ip_summed == CHECKSUM_NONE || csum != 0) + csum = gro_skb_checksum(skb); + + /* GRE CSUM is the 1's complement of the 1's complement sum + * of the GRE hdr plus payload so it should add up to 0xffff + * (and 0 after csum_fold()) just like the IPv4 hdr csum. + */ + if (csum) + goto out_unlock; + } + flush = 0; + + for (p = *head; p; p = p->next) { + const struct gre_base_hdr *greh2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + /* The following checks are needed to ensure only pkts + * from the same tunnel are considered for aggregation. + * The criteria for "the same tunnel" includes: + * 1) same version (we only support version 0 here) + * 2) same protocol (we only support ETH_P_IP for now) + * 3) same set of flags + * 4) same key if the key field is present. + */ + greh2 = (struct gre_base_hdr *)(p->data + off); + + if (greh2->flags != greh->flags || + greh2->protocol != greh->protocol) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + if (greh->flags & GRE_KEY) { + /* compare keys */ + if (*(__be32 *)(greh2+1) != *(__be32 *)(greh+1)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + } + + skb_gro_pull(skb, grehlen); + + /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ + skb_gro_postpull_rcsum(skb, greh, grehlen); + + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int gre_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct gre_base_hdr *greh = (struct gre_base_hdr *)(skb->data + nhoff); + struct packet_offload *ptype; + unsigned int grehlen = sizeof(*greh); + int err = -ENOENT; + __be16 type; + + type = greh->protocol; + if (greh->flags & GRE_KEY) + grehlen += GRE_HEADER_SECTION; + + if (greh->flags & GRE_CSUM) + grehlen += GRE_HEADER_SECTION; + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype != NULL) + err = ptype->callbacks.gro_complete(skb, nhoff + grehlen); + + rcu_read_unlock(); + return err; +} + static const struct net_offload gre_offload = { .callbacks = { .gso_send_check = gre_gso_send_check, .gso_segment = gre_gso_segment, + .gro_receive = gre_gro_receive, + .gro_complete = gre_gro_complete, }, }; -int __init gre_offload_init(void) +static int __init gre_offload_init(void) { return inet_add_offload(&gre_offload, IPPROTO_GRE); } -void __exit gre_offload_exit(void) +static void __exit gre_offload_exit(void) { inet_del_offload(&gre_offload, IPPROTO_GRE); } + +module_init(gre_offload_init); +module_exit(gre_offload_exit); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fb3c5637199..0134663fdbc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -668,6 +668,16 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) rcu_read_unlock(); } +static bool icmp_tag_validation(int proto) +{ + bool ok; + + rcu_read_lock(); + ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation; + rcu_read_unlock(); + return ok; +} + /* * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and * ICMP_PARAMETERPROB. @@ -705,12 +715,22 @@ static void icmp_unreach(struct sk_buff *skb) case ICMP_PORT_UNREACH: break; case ICMP_FRAG_NEEDED: - if (net->ipv4.sysctl_ip_no_pmtu_disc == 2) { - goto out; - } else if (net->ipv4.sysctl_ip_no_pmtu_disc) { + /* for documentation of the ip_no_pmtu_disc + * values please see + * Documentation/networking/ip-sysctl.txt + */ + switch (net->ipv4.sysctl_ip_no_pmtu_disc) { + default: LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), &iph->daddr); - } else { + break; + case 2: + goto out; + case 3: + if (!icmp_tag_validation(iph->protocol)) + goto out; + /* fall through */ + case 0: info = ntohs(icmph->un.frag.mtu); if (!info) goto out; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 7defdc9ba16..84c4329cbd3 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -310,7 +310,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) struct ip_sf_list *psf; int scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (!is_in(pmc, psf, type, gdeleted, sdeleted)) continue; scount++; @@ -463,7 +463,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, } first = 1; psf_prev = NULL; - for (psf=*psf_list; psf; psf=psf_next) { + for (psf = *psf_list; psf; psf = psf_next) { __be32 *psrc; psf_next = psf->sf_next; @@ -520,7 +520,7 @@ empty_source: return skb; if (pmc->crcount || isquery) { /* make sure we have room for group header */ - if (skb && AVAILABLE(skb)<sizeof(struct igmpv3_grec)) { + if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)) { igmpv3_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } @@ -576,7 +576,7 @@ static void igmpv3_clear_zeros(struct ip_sf_list **ppsf) struct ip_sf_list *psf_prev, *psf_next, *psf; psf_prev = NULL; - for (psf=*ppsf; psf; psf = psf_next) { + for (psf = *ppsf; psf; psf = psf_next) { psf_next = psf->sf_next; if (psf->sf_crcount == 0) { if (psf_prev) @@ -600,7 +600,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) /* deleted MCA's */ pmc_prev = NULL; - for (pmc=in_dev->mc_tomb; pmc; pmc=pmc_next) { + for (pmc = in_dev->mc_tomb; pmc; pmc = pmc_next) { pmc_next = pmc->next; if (pmc->sfmode == MCAST_INCLUDE) { type = IGMPV3_BLOCK_OLD_SOURCES; @@ -764,7 +764,7 @@ static void igmp_ifc_event(struct in_device *in_dev) static void igmp_timer_expire(unsigned long data) { - struct ip_mc_list *im=(struct ip_mc_list *)data; + struct ip_mc_list *im = (struct ip_mc_list *)data; struct in_device *in_dev = im->interface; spin_lock(&im->lock); @@ -794,10 +794,10 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) int i, scount; scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) { + for (i = 0; i < nsrcs; i++) { /* skip inactive filters */ if (psf->sf_count[MCAST_INCLUDE] || pmc->sfcount[MCAST_EXCLUDE] != @@ -825,10 +825,10 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) /* mark INCLUDE-mode sources */ scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) + for (i = 0; i < nsrcs; i++) if (srcs[i] == psf->sf_inaddr) { psf->sf_gsresp = 1; scount++; @@ -1103,7 +1103,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc->tomb = im->tomb; pmc->sources = im->sources; im->tomb = im->sources = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = pmc->crcount; } spin_unlock_bh(&im->lock); @@ -1121,7 +1121,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) spin_lock_bh(&in_dev->mc_tomb_lock); pmc_prev = NULL; - for (pmc=in_dev->mc_tomb; pmc; pmc=pmc->next) { + for (pmc = in_dev->mc_tomb; pmc; pmc = pmc->next) { if (pmc->multiaddr == multiaddr) break; pmc_prev = pmc; @@ -1134,7 +1134,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) } spin_unlock_bh(&in_dev->mc_tomb_lock); if (pmc) { - for (psf=pmc->tomb; psf; psf=psf_next) { + for (psf = pmc->tomb; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -1167,7 +1167,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) psf = pmc->tomb; pmc->tomb = NULL; spin_unlock_bh(&pmc->lock); - for (; psf; psf=psf_next) { + for (; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -1557,7 +1557,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, int rv = 0; psf_prev = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == *psfsrc) break; psf_prev = psf; @@ -1630,7 +1630,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[sfmode]--; } err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { int rv = ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); changerec |= rv > 0; @@ -1650,7 +1650,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : IGMP_Unsolicited_Report_Count; in_dev->mr_ifc_count = pmc->crcount; - for (psf=pmc->sources; psf; psf = psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(pmc->interface); } else if (sf_setstate(pmc) || changerec) { @@ -1671,7 +1671,7 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, struct ip_sf_list *psf, *psf_prev; psf_prev = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == *psfsrc) break; psf_prev = psf; @@ -1699,7 +1699,7 @@ static void sf_markstate(struct ip_mc_list *pmc) struct ip_sf_list *psf; int mca_xcount = pmc->sfcount[MCAST_EXCLUDE]; - for (psf=pmc->sources; psf; psf=psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) if (pmc->sfcount[MCAST_EXCLUDE]) { psf->sf_oldin = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && @@ -1716,7 +1716,7 @@ static int sf_setstate(struct ip_mc_list *pmc) int new_in, rv; rv = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (pmc->sfcount[MCAST_EXCLUDE]) { new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; @@ -1726,7 +1726,7 @@ static int sf_setstate(struct ip_mc_list *pmc) if (!psf->sf_oldin) { struct ip_sf_list *prev = NULL; - for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) { + for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) { if (dpsf->sf_inaddr == psf->sf_inaddr) break; prev = dpsf; @@ -1748,7 +1748,7 @@ static int sf_setstate(struct ip_mc_list *pmc) * add or update "delete" records if an active filter * is now inactive */ - for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) + for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) if (dpsf->sf_inaddr == psf->sf_inaddr) break; if (!dpsf) { @@ -1800,7 +1800,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!delta) pmc->sfcount[sfmode]++; err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]); if (err) break; @@ -1810,7 +1810,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!delta) pmc->sfcount[sfmode]--; - for (j=0; j<i; j++) + for (j = 0; j < i; j++) (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST @@ -1829,7 +1829,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : IGMP_Unsolicited_Report_Count; in_dev->mr_ifc_count = pmc->crcount; - for (psf=pmc->sources; psf; psf = psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(in_dev); } else if (sf_setstate(pmc)) { @@ -1844,12 +1844,12 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) { struct ip_sf_list *psf, *nextpsf; - for (psf=pmc->tomb; psf; psf=nextpsf) { + for (psf = pmc->tomb; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } pmc->tomb = NULL; - for (psf=pmc->sources; psf; psf=nextpsf) { + for (psf = pmc->sources; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } @@ -2043,7 +2043,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!psl) goto done; /* err = -EADDRNOTAVAIL */ rv = !0; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, sizeof(__be32)); if (rv == 0) @@ -2062,7 +2062,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1, &mreqs->imr_sourceaddr, 1); - for (j=i+1; j<psl->sl_count; j++) + for (j = i+1; j < psl->sl_count; j++) psl->sl_addr[j-1] = psl->sl_addr[j]; psl->sl_count--; err = 0; @@ -2088,7 +2088,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct newpsl->sl_max = count; newpsl->sl_count = count - IP_SFBLOCK; if (psl) { - for (i=0; i<psl->sl_count; i++) + for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; /* decrease mem now to avoid the memleak warning */ atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); @@ -2098,7 +2098,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, sizeof(__be32)); if (rv == 0) @@ -2106,7 +2106,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } if (rv == 0) /* address already there is an error */ goto done; - for (j=psl->sl_count-1; j>=i; j--) + for (j = psl->sl_count-1; j >= i; j--) psl->sl_addr[j+1] = psl->sl_addr[j]; psl->sl_addr[i] = mreqs->imr_sourceaddr; psl->sl_count++; @@ -2305,7 +2305,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { return -EFAULT; } - for (i=0; i<copycount; i++) { + for (i = 0; i < copycount; i++) { struct sockaddr_storage ss; psin = (struct sockaddr_in *)&ss; @@ -2350,7 +2350,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) if (!psl) goto unlock; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { if (psl->sl_addr[i] == rmt_addr) break; } @@ -2423,7 +2423,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u rv = 1; } else if (im) { if (src_addr) { - for (psf=im->sources; psf; psf=psf->sf_next) { + for (psf = im->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == src_addr) break; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 56a964a553d..a0f52dac894 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -106,6 +106,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = inet->inet_dport; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; @@ -240,12 +244,19 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_family = tw->tw_family; r->idiag_retrans = 0; + r->id.idiag_if = tw->tw_bound_dev_if; sock_diag_save_cookie(tw, r->id.idiag_cookie); + r->id.idiag_sport = tw->tw_sport; r->id.idiag_dport = tw->tw_dport; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = tw->tw_rcv_saddr; r->id.idiag_dst[0] = tw->tw_daddr; + r->idiag_state = tw->tw_substate; r->idiag_timer = 3; r->idiag_expires = jiffies_to_msecs(tmo); @@ -726,8 +737,13 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = ireq->ir_rmt_port; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = ireq->ir_loc_addr; r->id.idiag_dst[0] = ireq->ir_rmt_addr; + r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 1975f52933c..f17ea49b28f 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -230,29 +230,6 @@ static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, lro_desc->last_skb = skb; } -static void lro_add_frags(struct net_lro_desc *lro_desc, - int len, int hlen, int truesize, - struct skb_frag_struct *skb_frags, - struct iphdr *iph, struct tcphdr *tcph) -{ - struct sk_buff *skb = lro_desc->parent; - int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); - - lro_add_common(lro_desc, iph, tcph, tcp_data_len); - - skb->truesize += truesize; - - skb_frags[0].page_offset += hlen; - skb_frag_size_sub(&skb_frags[0], hlen); - - while (tcp_data_len > 0) { - *(lro_desc->next_frag) = *skb_frags; - tcp_data_len -= skb_frag_size(skb_frags); - lro_desc->next_frag++; - skb_frags++; - skb_shinfo(skb)->nr_frags++; - } -} static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, struct iphdr *iph, @@ -371,128 +348,6 @@ out: return 1; } - -static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, - void *mac_hdr, - int hlen, __wsum sum, - u32 ip_summed) -{ - struct sk_buff *skb; - struct skb_frag_struct *skb_frags; - int data_len = len; - int hdr_len = min(len, hlen); - - skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad); - if (!skb) - return NULL; - - skb_reserve(skb, lro_mgr->frag_align_pad); - skb->len = len; - skb->data_len = len - hdr_len; - skb->truesize += true_size; - skb->tail += hdr_len; - - memcpy(skb->data, mac_hdr, hdr_len); - - skb_frags = skb_shinfo(skb)->frags; - while (data_len > 0) { - *skb_frags = *frags; - data_len -= skb_frag_size(frags); - skb_frags++; - frags++; - skb_shinfo(skb)->nr_frags++; - } - - skb_shinfo(skb)->frags[0].page_offset += hdr_len; - skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len); - - skb->ip_summed = ip_summed; - skb->csum = sum; - skb->protocol = eth_type_trans(skb, lro_mgr->dev); - return skb; -} - -static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, - void *priv, __wsum sum) -{ - struct net_lro_desc *lro_desc; - struct iphdr *iph; - struct tcphdr *tcph; - struct sk_buff *skb; - u64 flags; - void *mac_hdr; - int mac_hdr_len; - int hdr_len = LRO_MAX_PG_HLEN; - int vlan_hdr_len = 0; - - if (!lro_mgr->get_frag_header || - lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, - (void *)&tcph, &flags, priv)) { - mac_hdr = skb_frag_address(frags); - goto out1; - } - - if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) - goto out1; - - hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr); - mac_hdr_len = (int)((void *)(iph) - mac_hdr); - - lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); - if (!lro_desc) - goto out1; - - if (!lro_desc->active) { /* start new lro session */ - if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL)) - goto out1; - - skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, - hdr_len, 0, lro_mgr->ip_summed_aggr); - if (!skb) - goto out; - - if ((skb->protocol == htons(ETH_P_8021Q)) && - !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) - vlan_hdr_len = VLAN_HLEN; - - iph = (void *)(skb->data + vlan_hdr_len); - tcph = (void *)((u8 *)skb->data + vlan_hdr_len - + IP_HDR_LEN(iph)); - - lro_init_desc(lro_desc, skb, iph, tcph); - LRO_INC_STATS(lro_mgr, aggregated); - return NULL; - } - - if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) - goto out2; - - if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc)) - goto out2; - - lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph); - LRO_INC_STATS(lro_mgr, aggregated); - - if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) || - lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) - lro_flush(lro_mgr, lro_desc); - - return NULL; - -out2: /* send aggregated packets to the stack */ - lro_flush(lro_mgr, lro_desc); - -out1: /* Original packet has to be posted to the stack */ - skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, - hdr_len, sum, lro_mgr->ip_summed); -out: - return skb; -} - void lro_receive_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, void *priv) @@ -506,23 +361,6 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr, } EXPORT_SYMBOL(lro_receive_skb); -void lro_receive_frags(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, void *priv, __wsum sum) -{ - struct sk_buff *skb; - - skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum); - if (!skb) - return; - - if (lro_mgr->features & LRO_F_NAPI) - netif_receive_skb(skb); - else - netif_rx(skb); -} -EXPORT_SYMBOL(lro_receive_frags); - void lro_flush_all(struct net_lro_mgr *lro_mgr) { int i; @@ -534,14 +372,3 @@ void lro_flush_all(struct net_lro_mgr *lro_mgr) } } EXPORT_SYMBOL(lro_flush_all); - -void lro_flush_pkt(struct net_lro_mgr *lro_mgr, - struct iphdr *iph, struct tcphdr *tcph) -{ - struct net_lro_desc *lro_desc; - - lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); - if (lro_desc->active) - lro_flush(lro_mgr, lro_desc); -} -EXPORT_SYMBOL(lro_flush_pkt); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 33d5537881e..48f42446511 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -109,13 +109,6 @@ static inline void flush_check(struct inet_peer_base *base, int family) } } -void inetpeer_invalidate_family(int family) -{ - atomic_t *fp = inetpeer_seq_ptr(family); - - atomic_inc(fp); -} - #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -227,7 +220,7 @@ static int addr_compare(const struct inetpeer_addr *a, stackptr = _stack; \ *stackptr++ = &_base->root; \ for (u = rcu_deref_locked(_base->root, _base); \ - u != peer_avl_empty; ) { \ + u != peer_avl_empty;) { \ int cmp = addr_compare(_daddr, &u->daddr); \ if (cmp == 0) \ break; \ @@ -282,7 +275,7 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, *stackptr++ = &start->avl_left; \ v = &start->avl_left; \ for (u = rcu_deref_locked(*v, base); \ - u->avl_right != peer_avl_empty_rcu; ) { \ + u->avl_right != peer_avl_empty_rcu;) { \ v = &u->avl_right; \ *stackptr++ = v; \ u = rcu_deref_locked(*v, base); \ diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 694de3b7aeb..e9f1217a8af 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -54,6 +54,7 @@ static int ip_forward_finish(struct sk_buff *skb) int ip_forward(struct sk_buff *skb) { + u32 mtu; struct iphdr *iph; /* Our header */ struct rtable *rt; /* Route we use */ struct ip_options *opt = &(IPCB(skb)->opt); @@ -88,11 +89,13 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; - if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && + IPCB(skb)->flags |= IPSKB_FORWARDED; + mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); + if (unlikely(skb->len > mtu && !skb_is_gso(skb) && (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(dst_mtu(&rt->dst))); + htonl(mtu)); goto drop; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d7aea4c5b94..e560ef34cf4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -217,6 +217,7 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) iph->saddr, iph->daddr, tpi->key); if (tunnel) { + skb_pop_mac_header(skb); ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error); return PACKET_RCVD; } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ec7264514a8..f4ab72e19af 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -167,7 +167,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) soffset -= 4; if (soffset > 3) { memcpy(&faddr, &start[soffset-1], 4); - for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4) + for (soffset -= 4, doffset = 4; soffset > 3; soffset -= 4, doffset += 4) memcpy(&dptr[doffset-1], &start[soffset-1], 4); /* * RFC1812 requires to fix illegal source routes. @@ -227,7 +227,7 @@ void ip_options_fragment(struct sk_buff *skb) continue; } optlen = optptr[1]; - if (optlen<2 || optlen>l) + if (optlen < 2 || optlen > l) return; if (!IPOPT_COPIED(*optptr)) memset(optptr, IPOPT_NOOP, optlen); @@ -275,27 +275,27 @@ int ip_options_compile(struct net *net, for (l = opt->optlen; l > 0; ) { switch (*optptr) { - case IPOPT_END: - for (optptr++, l--; l>0; optptr++, l--) { + case IPOPT_END: + for (optptr++, l--; l > 0; optptr++, l--) { if (*optptr != IPOPT_END) { *optptr = IPOPT_END; opt->is_changed = 1; } } goto eol; - case IPOPT_NOOP: + case IPOPT_NOOP: l--; optptr++; continue; } optlen = optptr[1]; - if (optlen<2 || optlen>l) { + if (optlen < 2 || optlen > l) { pp_ptr = optptr; goto error; } switch (*optptr) { - case IPOPT_SSRR: - case IPOPT_LSRR: + case IPOPT_SSRR: + case IPOPT_LSRR: if (optlen < 3) { pp_ptr = optptr + 1; goto error; @@ -321,7 +321,7 @@ int ip_options_compile(struct net *net, opt->is_strictroute = (optptr[0] == IPOPT_SSRR); opt->srr = optptr - iph; break; - case IPOPT_RR: + case IPOPT_RR: if (opt->rr) { pp_ptr = optptr; goto error; @@ -349,7 +349,7 @@ int ip_options_compile(struct net *net, } opt->rr = optptr - iph; break; - case IPOPT_TIMESTAMP: + case IPOPT_TIMESTAMP: if (opt->ts) { pp_ptr = optptr; goto error; @@ -369,13 +369,13 @@ int ip_options_compile(struct net *net, goto error; } switch (optptr[3]&0xF) { - case IPOPT_TS_TSONLY: + case IPOPT_TS_TSONLY: if (skb) timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; - case IPOPT_TS_TSANDADDR: + case IPOPT_TS_TSANDADDR: if (optptr[2]+7 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -389,7 +389,7 @@ int ip_options_compile(struct net *net, opt->ts_needtime = 1; optptr[2] += 8; break; - case IPOPT_TS_PRESPEC: + case IPOPT_TS_PRESPEC: if (optptr[2]+7 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -405,7 +405,7 @@ int ip_options_compile(struct net *net, opt->ts_needtime = 1; optptr[2] += 8; break; - default: + default: if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) { pp_ptr = optptr + 3; goto error; @@ -433,7 +433,7 @@ int ip_options_compile(struct net *net, } opt->ts = optptr - iph; break; - case IPOPT_RA: + case IPOPT_RA: if (optlen < 4) { pp_ptr = optptr + 1; goto error; @@ -441,7 +441,7 @@ int ip_options_compile(struct net *net, if (optptr[2] == 0 && optptr[3] == 0) opt->router_alert = optptr - iph; break; - case IPOPT_CIPSO: + case IPOPT_CIPSO: if ((!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) || opt->cipso) { pp_ptr = optptr; goto error; @@ -452,9 +452,9 @@ int ip_options_compile(struct net *net, goto error; } break; - case IPOPT_SEC: - case IPOPT_SID: - default: + case IPOPT_SEC: + case IPOPT_SID: + default: if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) { pp_ptr = optptr; goto error; @@ -572,7 +572,7 @@ void ip_forward_options(struct sk_buff *skb) optptr = raw + opt->srr; - for ( srrptr=optptr[2], srrspace = optptr[1]; + for ( srrptr = optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4 ) { @@ -628,7 +628,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) if (rt->rt_type != RTN_LOCAL) return -EINVAL; - for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { + for (srrptr = optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { if (srrptr + 3 > srrspace) { icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24)); return -EINVAL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 912402752f2..9a78804cfe9 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -449,6 +449,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) __be16 not_last_frag; struct rtable *rt = skb_rtable(skb); int err = 0; + bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; dev = rt->dst.dev; @@ -458,12 +459,13 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); + mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || (IPCB(skb)->frag_max_size && - IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) { + IPCB(skb)->frag_max_size > mtu))) { IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(ip_skb_dst_mtu(skb))); + htonl(mtu)); kfree_skb(skb); return -EMSGSIZE; } @@ -473,7 +475,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) */ hlen = iph->ihl * 4; - mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ + mtu = mtu - hlen; /* Size of data space */ #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge) mtu -= nf_bridge_mtu_reduction(skb); @@ -828,7 +830,7 @@ static int __ip_append_data(struct sock *sk, if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, - mtu-exthdrlen); + mtu - (opt ? opt->optlen : 0)); return -EMSGSIZE; } @@ -1151,7 +1153,8 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, mtu : 0xFFFF; if (cork->length + size > maxnonfragsize - fragheaderlen) { - ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu); + ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, + mtu - (opt ? opt->optlen : 0)); return -EMSGSIZE; } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 90ff9570d7d..d3929a69f00 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -68,6 +68,63 @@ static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, IP_TNL_HASH_BITS); } +static inline void __tunnel_dst_set(struct ip_tunnel_dst *idst, + struct dst_entry *dst) +{ + struct dst_entry *old_dst; + + if (dst && (dst->flags & DST_NOCACHE)) + dst = NULL; + + spin_lock_bh(&idst->lock); + old_dst = rcu_dereference(idst->dst); + rcu_assign_pointer(idst->dst, dst); + dst_release(old_dst); + spin_unlock_bh(&idst->lock); +} + +static inline void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst) +{ + __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst); +} + +static inline void tunnel_dst_reset(struct ip_tunnel *t) +{ + tunnel_dst_set(t, NULL); +} + +static void tunnel_dst_reset_all(struct ip_tunnel *t) +{ + int i; + + for_each_possible_cpu(i) + __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); +} + +static inline struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) +{ + struct dst_entry *dst; + + rcu_read_lock(); + dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); + if (dst) + dst_hold(dst); + rcu_read_unlock(); + return dst; +} + +static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie) +{ + struct dst_entry *dst = tunnel_dst_get(t); + + if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + tunnel_dst_reset(t); + return NULL; + } + + return dst; +} + /* Often modified stats are per cpu, other are shared (netdev->stats) */ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *tot) @@ -75,7 +132,8 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, int i; for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); u64 rx_packets, rx_bytes, tx_packets, tx_bytes; unsigned int start; @@ -318,11 +376,10 @@ failed: return ERR_PTR(err); } -static inline struct rtable *ip_route_output_tunnel(struct net *net, - struct flowi4 *fl4, - int proto, - __be32 daddr, __be32 saddr, - __be32 key, __u8 tos, int oif) +static inline void init_tunnel_flow(struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = oif; @@ -331,7 +388,6 @@ static inline struct rtable *ip_route_output_tunnel(struct net *net, fl4->flowi4_tos = tos; fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; - return ip_route_output_key(net, fl4); } static int ip_tunnel_bind_dev(struct net_device *dev) @@ -350,14 +406,14 @@ static int ip_tunnel_bind_dev(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - rt = ip_route_output_tunnel(tunnel->net, &fl4, - tunnel->parms.iph.protocol, - iph->daddr, iph->saddr, - tunnel->parms.o_key, - RT_TOS(iph->tos), - tunnel->parms.link); + init_tunnel_flow(&fl4, iph->protocol, iph->daddr, + iph->saddr, tunnel->parms.o_key, + RT_TOS(iph->tos), tunnel->parms.link); + rt = ip_route_output_key(tunnel->net, &fl4); + if (!IS_ERR(rt)) { tdev = rt->dst.dev; + tunnel_dst_set(tunnel, dst_clone(&rt->dst)); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) @@ -405,7 +461,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, bool log_ecn_error) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; const struct iphdr *iph = ip_hdr(skb); int err; @@ -528,10 +584,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi4 fl4; u8 tos, ttl; __be16 df; - struct rtable *rt; /* Route to the other host */ + struct rtable *rt = NULL; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int err; + bool connected = true; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -581,27 +638,39 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, #endif else goto tx_error; + + connected = false; } tos = tnl_params->tos; if (tos & 0x1) { tos &= ~0x1; - if (skb->protocol == htons(ETH_P_IP)) + if (skb->protocol == htons(ETH_P_IP)) { tos = inner_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + connected = false; + } else if (skb->protocol == htons(ETH_P_IPV6)) { tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + connected = false; + } } - rt = ip_route_output_tunnel(tunnel->net, &fl4, - protocol, - dst, tnl_params->saddr, - tunnel->parms.o_key, - RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); + + if (connected) + rt = (struct rtable *)tunnel_dst_check(tunnel, 0); + + if (!rt) { + rt = ip_route_output_key(tunnel->net, &fl4); + + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (connected) + tunnel_dst_set(tunnel, dst_clone(&rt->dst)); } + if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; @@ -696,6 +765,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, if (set_mtu) dev->mtu = mtu; } + tunnel_dst_reset_all(t); netdev_state_change(dev); } @@ -811,6 +881,7 @@ static void ip_tunnel_dev_free(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); gro_cells_destroy(&tunnel->gro_cells); + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -979,18 +1050,31 @@ int ip_tunnel_init(struct net_device *dev) int i, err; dev->destructor = ip_tunnel_dev_free; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ipt_stats; + struct pcpu_sw_netstats *ipt_stats; ipt_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ipt_stats->syncp); } + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; + } + + for_each_possible_cpu(i) { + struct ip_tunnel_dst *idst = per_cpu_ptr(tunnel->dst_cache, i); + idst-> dst = NULL; + spin_lock_init(&idst->lock); + } + err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); return err; } @@ -1015,6 +1099,8 @@ void ip_tunnel_uninit(struct net_device *dev) /* fb_tunnel_dev will be unregisted in net-exit call. */ if (itn->fb_tunnel_dev != dev) ip_tunnel_del(netdev_priv(dev)); + + tunnel_dst_reset_all(tunnel); } EXPORT_SYMBOL_GPL(ip_tunnel_uninit); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 52b802a0cd8..0783200ad8d 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -60,7 +60,7 @@ static int vti_rcv(struct sk_buff *skb) tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; u32 oldmark = skb->mark; int ret; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 40d56073cd1..81c6910cfa9 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -39,23 +39,33 @@ config NF_CONNTRACK_PROC_COMPAT config NF_TABLES_IPV4 depends on NF_TABLES tristate "IPv4 nf_tables support" - -config NFT_REJECT_IPV4 - depends on NF_TABLES_IPV4 - tristate "nf_tables IPv4 reject support" + help + This option enables the IPv4 support for nf_tables. config NFT_CHAIN_ROUTE_IPV4 depends on NF_TABLES_IPV4 tristate "IPv4 nf_tables route chain support" + help + This option enables the "route" chain for IPv4 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, type of service and + the packet mark. config NFT_CHAIN_NAT_IPV4 depends on NF_TABLES_IPV4 depends on NF_NAT_IPV4 && NFT_NAT tristate "IPv4 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv4 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. config NF_TABLES_ARP depends on NF_TABLES tristate "ARP nf_tables support" + help + This option enables the ARP support for nf_tables. config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 19df72b7ba8..c16be9d5842 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -28,7 +28,6 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o -obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index b969131ad1c..5b6e0df4ccf 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -17,10 +17,6 @@ #include <linux/udp.h> #include <linux/icmp.h> #include <net/icmp.h> -#include <net/ip.h> -#include <net/tcp.h> -#include <net/route.h> -#include <net/dst.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_REJECT.h> @@ -28,128 +24,12 @@ #include <linux/netfilter_bridge.h> #endif +#include <net/netfilter/ipv4/nf_reject.h> + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4"); -/* Send RST reply */ -static void send_reset(struct sk_buff *oldskb, int hook) -{ - struct sk_buff *nskb; - const struct iphdr *oiph; - struct iphdr *niph; - const struct tcphdr *oth; - struct tcphdr _otcph, *tcph; - - /* IP header checks: fragment. */ - if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; - - oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), - sizeof(_otcph), &_otcph); - if (oth == NULL) - return; - - /* No RST for RST. */ - if (oth->rst) - return; - - if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return; - - /* Check checksum */ - if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) - return; - oiph = ip_hdr(oldskb); - - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); - if (!nskb) - return; - - skb_reserve(nskb, LL_MAX_HEADER); - - skb_reset_network_header(nskb); - niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); - niph->version = 4; - niph->ihl = sizeof(struct iphdr) / 4; - niph->tos = 0; - niph->id = 0; - niph->frag_off = htons(IP_DF); - niph->protocol = IPPROTO_TCP; - niph->check = 0; - niph->saddr = oiph->daddr; - niph->daddr = oiph->saddr; - - skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - memset(tcph, 0, sizeof(*tcph)); - tcph->source = oth->dest; - tcph->dest = oth->source; - tcph->doff = sizeof(struct tcphdr) / 4; - - if (oth->ack) - tcph->seq = oth->ack_seq; - else { - tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + - oldskb->len - ip_hdrlen(oldskb) - - (oth->doff << 2)); - tcph->ack = 1; - } - - tcph->rst = 1; - tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, - niph->daddr, 0); - nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->csum_start = (unsigned char *)tcph - nskb->head; - nskb->csum_offset = offsetof(struct tcphdr, check); - - /* ip_route_me_harder expects skb->dst to be set */ - skb_dst_set_noref(nskb, skb_dst(oldskb)); - - nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(nskb, RTN_UNSPEC)) - goto free_nskb; - - niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); - - /* "Never happens" */ - if (nskb->len > dst_mtu(skb_dst(nskb))) - goto free_nskb; - - nf_ct_attach(nskb, oldskb); - -#ifdef CONFIG_BRIDGE_NETFILTER - /* If we use ip_local_out for bridged traffic, the MAC source on - * the RST will be ours, instead of the destination's. This confuses - * some routers/firewalls, and they drop the packet. So we need to - * build the eth header using the original destination's MAC as the - * source, and send the RST packet directly. - */ - if (oldskb->nf_bridge) { - struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; - niph->tot_len = htons(nskb->len); - ip_send_check(niph); - if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) - goto free_nskb; - dev_queue_xmit(nskb); - } else -#endif - ip_local_out(nskb); - - return; - - free_nskb: - kfree_skb(nskb); -} - -static inline void send_unreach(struct sk_buff *skb_in, int code) -{ - icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); -} - static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { @@ -157,28 +37,28 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par) switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - send_unreach(skb, ICMP_NET_UNREACH); + nf_send_unreach(skb, ICMP_NET_UNREACH); break; case IPT_ICMP_HOST_UNREACHABLE: - send_unreach(skb, ICMP_HOST_UNREACH); + nf_send_unreach(skb, ICMP_HOST_UNREACH); break; case IPT_ICMP_PROT_UNREACHABLE: - send_unreach(skb, ICMP_PROT_UNREACH); + nf_send_unreach(skb, ICMP_PROT_UNREACH); break; case IPT_ICMP_PORT_UNREACHABLE: - send_unreach(skb, ICMP_PORT_UNREACH); + nf_send_unreach(skb, ICMP_PORT_UNREACH); break; case IPT_ICMP_NET_PROHIBITED: - send_unreach(skb, ICMP_NET_ANO); + nf_send_unreach(skb, ICMP_NET_ANO); break; case IPT_ICMP_HOST_PROHIBITED: - send_unreach(skb, ICMP_HOST_ANO); + nf_send_unreach(skb, ICMP_HOST_ANO); break; case IPT_ICMP_ADMIN_PROHIBITED: - send_unreach(skb, ICMP_PKT_FILTERED); + nf_send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(skb, par->hooknum); + nf_send_reset(skb, par->hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ecd8bec411c..8127dc80286 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -548,9 +548,3 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) module_init(nf_conntrack_l3proto_ipv4_init); module_exit(nf_conntrack_l3proto_ipv4_fini); - -void need_ipv4_conntrack(void) -{ - return; -} -EXPORT_SYMBOL_GPL(need_ipv4_conntrack); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 61a942265e8..d551e31b416 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -461,14 +461,14 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, } if (subid < 40) { - optr [0] = 0; - optr [1] = subid; + optr[0] = 0; + optr[1] = subid; } else if (subid < 80) { - optr [0] = 1; - optr [1] = subid - 40; + optr[0] = 1; + optr[1] = subid - 40; } else { - optr [0] = 2; - optr [1] = subid - 80; + optr[0] = 2; + optr[1] = subid - 80; } *len = 2; diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 3e67ef1c676..19412a4063f 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -14,10 +14,30 @@ #include <linux/netfilter_arp.h> #include <net/netfilter/nf_tables.h> +static unsigned int +nft_do_chain_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, ops, skb, in, out); + + return nft_do_chain(&pkt, ops); +} + static struct nft_af_info nft_af_arp __read_mostly = { .family = NFPROTO_ARP, .nhooks = NF_ARP_NUMHOOKS, .owner = THIS_MODULE, + .nops = 1, + .hooks = { + [NF_ARP_IN] = nft_do_chain_arp, + [NF_ARP_OUT] = nft_do_chain_arp, + [NF_ARP_FORWARD] = nft_do_chain_arp, + }, }; static int nf_tables_arp_init_net(struct net *net) @@ -48,32 +68,14 @@ static struct pernet_operations nf_tables_arp_net_ops = { .exit = nf_tables_arp_exit_net, }; -static unsigned int -nft_do_chain_arp(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, ops, skb, in, out); - - return nft_do_chain_pktinfo(&pkt, ops); -} - -static struct nf_chain_type filter_arp = { - .family = NFPROTO_ARP, +static const struct nf_chain_type filter_arp = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_ARP, + .owner = THIS_MODULE, .hook_mask = (1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD), - .fn = { - [NF_ARP_IN] = nft_do_chain_arp, - [NF_ARP_OUT] = nft_do_chain_arp, - [NF_ARP_FORWARD] = nft_do_chain_arp, - }, }; static int __init nf_tables_arp_init(void) diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 0f4cbfeb19b..6820c8c4084 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -18,14 +18,25 @@ #include <net/ip.h> #include <net/netfilter/nf_tables_ipv4.h> +static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + return nft_do_chain(&pkt, ops); +} + static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct nft_pktinfo pkt; - if (unlikely(skb->len < sizeof(struct iphdr) || ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { if (net_ratelimit()) @@ -33,19 +44,24 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, "packet\n"); return NF_ACCEPT; } - nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); - return nft_do_chain_pktinfo(&pkt, ops); + return nft_do_chain_ipv4(ops, skb, in, out, okfn); } -static struct nft_af_info nft_af_ipv4 __read_mostly = { +struct nft_af_info nft_af_ipv4 __read_mostly = { .family = NFPROTO_IPV4, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, + .nops = 1, .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, [NF_INET_LOCAL_OUT] = nft_ipv4_output, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, }, }; +EXPORT_SYMBOL_GPL(nft_af_ipv4); static int nf_tables_ipv4_init_net(struct net *net) { @@ -75,42 +91,28 @@ static struct pernet_operations nf_tables_ipv4_net_ops = { .exit = nf_tables_ipv4_exit_net, }; -static unsigned int -nft_do_chain_ipv4(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); - - return nft_do_chain_pktinfo(&pkt, ops); -} - -static struct nf_chain_type filter_ipv4 = { - .family = NFPROTO_IPV4, +static const struct nf_chain_type filter_ipv4 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), - .fn = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, - [NF_INET_LOCAL_OUT] = nft_ipv4_output, - [NF_INET_FORWARD] = nft_do_chain_ipv4, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, - }, }; static int __init nf_tables_ipv4_init(void) { + int ret; + nft_register_chain_type(&filter_ipv4); - return register_pernet_subsys(&nf_tables_ipv4_net_ops); + ret = register_pernet_subsys(&nf_tables_ipv4_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_ipv4); + + return ret; } static void __exit nf_tables_ipv4_exit(void) diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index cf2c792cd97..b5b256d45e6 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -75,7 +75,7 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); - ret = nft_do_chain_pktinfo(&pkt, ops); + ret = nft_do_chain(&pkt, ops); if (ret != NF_ACCEPT) return ret; if (!nf_nat_initialized(ct, maniptype)) { @@ -164,21 +164,21 @@ static unsigned int nf_nat_output(const struct nf_hook_ops *ops, return ret; } -static struct nf_chain_type nft_chain_nat_ipv4 = { - .family = NFPROTO_IPV4, +static const struct nf_chain_type nft_chain_nat_ipv4 = { .name = "nat", .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), - .fn = { + .hooks = { [NF_INET_PRE_ROUTING] = nf_nat_prerouting, [NF_INET_POST_ROUTING] = nf_nat_postrouting, [NF_INET_LOCAL_OUT] = nf_nat_output, [NF_INET_LOCAL_IN] = nf_nat_fn, }, - .me = THIS_MODULE, }; static int __init nft_chain_nat_init(void) diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 4e6bf9a3d7a..125b66766c0 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -47,7 +47,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, daddr = iph->daddr; tos = iph->tos; - ret = nft_do_chain_pktinfo(&pkt, ops); + ret = nft_do_chain(&pkt, ops); if (ret != NF_DROP && ret != NF_QUEUE) { iph = ip_hdr(skb); @@ -61,15 +61,15 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, return ret; } -static struct nf_chain_type nft_chain_route_ipv4 = { - .family = NFPROTO_IPV4, +static const struct nf_chain_type nft_chain_route_ipv4 = { .name = "route", .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_OUT), - .fn = { + .hooks = { [NF_INET_LOCAL_OUT] = nf_route_table_hook, }, - .me = THIS_MODULE, }; static int __init nft_chain_route_init(void) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 242e7f4ed6f..cae5262a337 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -53,8 +53,12 @@ #include <net/transp_v6.h> #endif +struct ping_table { + struct hlist_nulls_head hash[PING_HTABLE_SIZE]; + rwlock_t lock; +}; -struct ping_table ping_table; +static struct ping_table ping_table; struct pingv6_ops pingv6_ops; EXPORT_SYMBOL_GPL(pingv6_ops); @@ -668,8 +672,8 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, } EXPORT_SYMBOL_GPL(ping_common_sendmsg); -int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) { struct net *net = sock_net(sk); struct flowi4 fl4; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8ecd7ad959b..a6c8a80ec9d 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -333,22 +333,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_printf(seq, " OutMsgs OutErrors"); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f8da2827801..25071b48921 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -112,9 +112,6 @@ #define RT_FL_TOS(oldflp4) \ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) -/* IPv4 datagram length is stored into 16bit field (tot_len) */ -#define IP_MAX_MTU 0xFFFF - #define RT_GC_TIMEOUT (300*HZ) static int ip_rt_max_size; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b95331e6c07..f2ed13c2125 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -121,7 +121,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ - diff = (count - (cookie >> COOKIEBITS)) & ((__u32) - 1 >> COOKIEBITS); + diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d7b63a61445..44eba052b43 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -700,7 +700,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { + { .procname = "tcp_thin_dupack", .data = &sysctl_tcp_thin_dupack, .maxlen = sizeof(int), @@ -831,6 +831,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "ip_forward_use_pmtu", + .data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0ca87547bec..e2a40515e66 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -381,7 +381,7 @@ void tcp_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - skb_queue_head_init(&tp->out_of_order_queue); + __skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); @@ -622,19 +622,21 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) } /* If a not yet filled skb is pushed, do not send it if - * we have packets in Qdisc or NIC queues : + * we have data packets in Qdisc or NIC queues : * Because TX completion will happen shortly, it gives a chance * to coalesce future sendmsg() payload into this skb, without * need for a timer, and with no latency trade off. * As packets containing data payload have a bigger truesize - * than pure acks (dataless) packets, the last check prevents - * autocorking if we only have an ACK in Qdisc/NIC queues. + * than pure acks (dataless) packets, the last checks prevent + * autocorking if we only have an ACK in Qdisc/NIC queues, + * or if TX completion was delayed after we processed ACK packet. */ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && sysctl_tcp_autocorking && + skb != tcp_write_queue_head(sk) && atomic_read(&sk->sk_wmem_alloc) > skb->truesize; } @@ -660,7 +662,11 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); set_bit(TSQ_THROTTLED, &tp->tsq_flags); } - return; + /* It is possible TX completion already happened + * before we set TSQ_THROTTLED. + */ + if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize) + return; } if (flags & MSG_MORE) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c53b7f35c51..65cf90e063d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -766,7 +766,7 @@ static void tcp_update_pacing_rate(struct sock *sk) /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -void tcp_set_rto(struct sock *sk) +static void tcp_set_rto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) @@ -3686,7 +3686,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) int opcode = *ptr++; int opsize; - switch(opcode) { + switch (opcode) { case TCPOPT_EOL: return NULL; case TCPOPT_NOP: @@ -4046,7 +4046,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) WARN_ON(before(tp->rcv_nxt, sp->end_seq)); /* Zap this SACK, by moving forward any other SACKS. */ - for (i=this_sack+1; i < num_sacks; i++) + for (i = this_sack+1; i < num_sacks; i++) tp->selective_acks[i-1] = tp->selective_acks[i]; num_sacks--; continue; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bbaf8cb45eb..7297b56c28c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -827,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; int err = -1; - struct sk_buff * skb; + struct sk_buff *skb; /* First, grab a route. */ if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 06493736fbc..699a42faab9 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -31,7 +31,8 @@ struct tcp_fastopen_metrics { struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; - struct inetpeer_addr tcpm_addr; + struct inetpeer_addr tcpm_saddr; + struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; u32 tcpm_ts; u32 tcpm_ts_stamp; @@ -131,7 +132,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, } static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, - struct inetpeer_addr *addr, + struct inetpeer_addr *saddr, + struct inetpeer_addr *daddr, unsigned int hash, bool reclaim) { @@ -155,7 +157,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (!tm) goto out_unlock; } - tm->tcpm_addr = *addr; + tm->tcpm_saddr = *saddr; + tm->tcpm_daddr = *daddr; tcpm_suck_dst(tm, dst, true); @@ -189,7 +192,8 @@ static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, in return NULL; } -static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, +static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr, + const struct inetpeer_addr *daddr, struct net *net, unsigned int hash) { struct tcp_metrics_block *tm; @@ -197,7 +201,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *a for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, addr)) + if (addr_same(&tm->tcpm_saddr, saddr) && + addr_same(&tm->tcpm_daddr, daddr)) break; depth++; } @@ -208,19 +213,22 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, struct dst_entry *dst) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net; - addr.family = req->rsk_ops->family; - switch (addr.family) { + saddr.family = req->rsk_ops->family; + daddr.family = req->rsk_ops->family; + switch (daddr.family) { case AF_INET: - addr.addr.a4 = inet_rsk(req)->ir_rmt_addr; - hash = (__force unsigned int) addr.addr.a4; + saddr.addr.a4 = inet_rsk(req)->ir_loc_addr; + daddr.addr.a4 = inet_rsk(req)->ir_rmt_addr; + hash = (__force unsigned int) daddr.addr.a4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; + *(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr; + *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); break; #endif @@ -233,7 +241,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, &addr)) + if (addr_same(&tm->tcpm_saddr, &saddr) && + addr_same(&tm->tcpm_daddr, &daddr)) break; } tcpm_check_stamp(tm, dst); @@ -243,19 +252,22 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net; - addr.family = tw->tw_family; - switch (addr.family) { + saddr.family = tw->tw_family; + daddr.family = tw->tw_family; + switch (daddr.family) { case AF_INET: - addr.addr.a4 = tw->tw_daddr; - hash = (__force unsigned int) addr.addr.a4; + saddr.addr.a4 = tw->tw_rcv_saddr; + daddr.addr.a4 = tw->tw_daddr; + hash = (__force unsigned int) daddr.addr.a4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr; + *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr; + *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; hash = ipv6_addr_hash(&tw->tw_v6_daddr); break; #endif @@ -268,7 +280,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, &addr)) + if (addr_same(&tm->tcpm_saddr, &saddr) && + addr_same(&tm->tcpm_daddr, &daddr)) break; } return tm; @@ -279,20 +292,23 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, bool create) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net; bool reclaim; - addr.family = sk->sk_family; - switch (addr.family) { + saddr.family = sk->sk_family; + daddr.family = sk->sk_family; + switch (daddr.family) { case AF_INET: - addr.addr.a4 = inet_sk(sk)->inet_daddr; - hash = (__force unsigned int) addr.addr.a4; + saddr.addr.a4 = inet_sk(sk)->inet_saddr; + daddr.addr.a4 = inet_sk(sk)->inet_daddr; + hash = (__force unsigned int) daddr.addr.a4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr; + *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr; + *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; hash = ipv6_addr_hash(&sk->sk_v6_daddr); break; #endif @@ -303,14 +319,14 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, net = dev_net(dst->dev); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); - tm = __tcp_get_metrics(&addr, net, hash); + tm = __tcp_get_metrics(&saddr, &daddr, net, hash); reclaim = false; if (tm == TCP_METRICS_RECLAIM_PTR) { reclaim = true; tm = NULL; } if (!tm && create) - tm = tcpm_new(dst, &addr, hash, reclaim); + tm = tcpm_new(dst, &saddr, &daddr, hash, reclaim); else tcpm_check_stamp(tm, dst); @@ -724,15 +740,21 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, struct nlattr *nest; int i; - switch (tm->tcpm_addr.family) { + switch (tm->tcpm_daddr.family) { case AF_INET: if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, - tm->tcpm_addr.addr.a4) < 0) + tm->tcpm_daddr.addr.a4) < 0) + goto nla_put_failure; + if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4, + tm->tcpm_saddr.addr.a4) < 0) goto nla_put_failure; break; case AF_INET6: if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, - tm->tcpm_addr.addr.a6) < 0) + tm->tcpm_daddr.addr.a6) < 0) + goto nla_put_failure; + if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16, + tm->tcpm_saddr.addr.a6) < 0) goto nla_put_failure; break; default: @@ -855,44 +877,66 @@ done: return skb->len; } -static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, - unsigned int *hash, int optional) +static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, + unsigned int *hash, int optional, int v4, int v6) { struct nlattr *a; - a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4]; + a = info->attrs[v4]; if (a) { addr->family = AF_INET; addr->addr.a4 = nla_get_be32(a); - *hash = (__force unsigned int) addr->addr.a4; + if (hash) + *hash = (__force unsigned int) addr->addr.a4; return 0; } - a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; + a = info->attrs[v6]; if (a) { if (nla_len(a) != sizeof(struct in6_addr)) return -EINVAL; addr->family = AF_INET6; memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); - *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); + if (hash) + *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); return 0; } return optional ? 1 : -EAFNOSUPPORT; } +static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, + unsigned int *hash, int optional) +{ + return __parse_nl_addr(info, addr, hash, optional, + TCP_METRICS_ATTR_ADDR_IPV4, + TCP_METRICS_ATTR_ADDR_IPV6); +} + +static int parse_nl_saddr(struct genl_info *info, struct inetpeer_addr *addr) +{ + return __parse_nl_addr(info, addr, NULL, 0, + TCP_METRICS_ATTR_SADDR_IPV4, + TCP_METRICS_ATTR_SADDR_IPV6); +} + static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct sk_buff *msg; struct net *net = genl_info_net(info); void *reply; int ret; + bool src = true; - ret = parse_nl_addr(info, &addr, &hash, 0); + ret = parse_nl_addr(info, &daddr, &hash, 0); if (ret < 0) return ret; + ret = parse_nl_saddr(info, &saddr); + if (ret < 0) + src = false; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -907,7 +951,8 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, &addr)) { + if (addr_same(&tm->tcpm_daddr, &daddr) && + (!src || addr_same(&tm->tcpm_saddr, &saddr))) { ret = tcp_metrics_fill_info(msg, tm); break; } @@ -960,34 +1005,44 @@ static int tcp_metrics_flush_all(struct net *net) static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct tcpm_hash_bucket *hb; - struct tcp_metrics_block *tm; + struct tcp_metrics_block *tm, *tmlist = NULL; struct tcp_metrics_block __rcu **pp; - struct inetpeer_addr addr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net = genl_info_net(info); int ret; + bool src = true; - ret = parse_nl_addr(info, &addr, &hash, 1); + ret = parse_nl_addr(info, &daddr, &hash, 1); if (ret < 0) return ret; if (ret > 0) return tcp_metrics_flush_all(net); + ret = parse_nl_saddr(info, &saddr); + if (ret < 0) + src = false; hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); hb = net->ipv4.tcp_metrics_hash + hash; pp = &hb->chain; spin_lock_bh(&tcp_metrics_lock); - for (tm = deref_locked_genl(*pp); tm; - pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) { - if (addr_same(&tm->tcpm_addr, &addr)) { + for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { + if (addr_same(&tm->tcpm_daddr, &daddr) && + (!src || addr_same(&tm->tcpm_saddr, &saddr))) { *pp = tm->tcpm_next; - break; + tm->tcpm_next = tmlist; + tmlist = tm; + } else { + pp = &tm->tcpm_next; } } spin_unlock_bh(&tcp_metrics_lock); - if (!tm) + if (!tmlist) return -ESRCH; - kfree_rcu(tm, rcu_head); + for (tm = tmlist; tm; tm = tmlist) { + tmlist = tm->tcpm_next; + kfree_rcu(tm, rcu_head); + } return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 97b68415986..3aa9e324799 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -425,7 +425,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); - skb_queue_head_init(&newtp->out_of_order_queue); + __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 2658a27f540..771a3950d87 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -197,7 +197,8 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) goto out_check_final; found: - flush = NAPI_GRO_CB(p)->flush; + /* Include the IP ID check below from the inner most IP hdr */ + flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; flush |= (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); @@ -230,7 +231,7 @@ out_check_final: pp = head; out: - NAPI_GRO_CB(skb)->flush |= flush; + NAPI_GRO_CB(skb)->flush |= (flush != 0); return pp; } @@ -280,7 +281,7 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * if (NAPI_GRO_CB(skb)->flush) goto skip_csum; - wsum = skb->csum; + wsum = NAPI_GRO_CB(skb)->csum; switch (skb->ip_summed) { case CHECKSUM_NONE: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9e7aec7ee67..03d26b85eab 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2777,7 +2777,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, EXPORT_SYMBOL(tcp_make_synack); /* Do all connect socket setups that can be done AF independent. */ -void tcp_connect_init(struct sock *sk) +static void tcp_connect_init(struct sock *sk) { const struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 8b97d71e193..1f2d37613c9 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -38,7 +38,7 @@ MODULE_DESCRIPTION("TCP cwnd snooper"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.1"); -static int port __read_mostly = 0; +static int port __read_mostly; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); @@ -46,7 +46,7 @@ static unsigned int bufsize __read_mostly = 4096; MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); module_param(bufsize, uint, 0); -static unsigned int fwmark __read_mostly = 0; +static unsigned int fwmark __read_mostly; MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); module_param(fwmark, uint, 0); diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index a347a078ee0..1a8d271f994 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -3,7 +3,7 @@ * YeAH TCP * * For further details look at: - * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf + * https://web.archive.org/web/20080316215752/http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf * */ #include <linux/mm.h> @@ -15,13 +15,13 @@ #include "tcp_vegas.h" -#define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck -#define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt -#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss -#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion -#define TCP_YEAH_PHY 8 //lin maximum delta from base -#define TCP_YEAH_RHO 16 //lin minimum number of consecutive rtt to consider competition on loss -#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count +#define TCP_YEAH_ALPHA 80 /* number of packets queued at the bottleneck */ +#define TCP_YEAH_GAMMA 1 /* fraction of queue to be removed per rtt */ +#define TCP_YEAH_DELTA 3 /* log minimum fraction of cwnd to be removed on loss */ +#define TCP_YEAH_EPSILON 1 /* log maximum fraction to be removed on early decongestion */ +#define TCP_YEAH_PHY 8 /* maximum delta from base */ +#define TCP_YEAH_RHO 16 /* minimum number of consecutive rtt to consider competition on loss */ +#define TCP_YEAH_ZETA 50 /* minimum number of state switches to reset reno_count */ #define TCP_SCALABLE_AI_CNT 100U @@ -214,9 +214,9 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { if (yeah->doing_reno_now < TCP_YEAH_RHO) { reduction = yeah->lastQ; - reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) ); + reduction = min(reduction, max(tp->snd_cwnd>>1, 2U)); - reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); + reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); } else reduction = max(tp->snd_cwnd>>1, 2U); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d5d24ecde6a..80f649fbee6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2478,6 +2478,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; int mac_len = skb->mac_len; int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); __be16 protocol = skb->protocol; @@ -2497,8 +2498,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, + mac_len); goto out; + } outer_hlen = skb_tnl_header_len(skb); skb = segs; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 83206de2bc7..79c62bdcd3c 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -41,6 +41,14 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; + int offset; + __wsum csum; + + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) { + segs = skb_udp_tunnel_segment(skb, features); + goto out; + } mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -63,27 +71,20 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + /* Fragment the skb. IP headers of the fragments are updated in * inet_gso_segment() */ - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) - segs = skb_udp_tunnel_segment(skb, features); - else { - int offset; - __wsum csum; - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - segs = skb_segment(skb, features); - } + segs = skb_segment(skb, features); out: return segs; } diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index e3db3f91511..71acd0014f2 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -48,7 +48,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); skb_set_network_header(skb, -x->props.header_len - - hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); + hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); if (x->sel.family != AF_INET6) skb->network_header += IPV4_BEET_PHMAXLEN; skb->mac_header = skb->network_header + diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3c3425ee6a2..a9fa6c1feed 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -442,6 +442,8 @@ static int inet6_netconf_msgsize_devconf(int type) if (type == -1 || type == NETCONFA_MC_FORWARDING) size += nla_total_size(4); #endif + if (type == -1 || type == NETCONFA_PROXY_NEIGH) + size += nla_total_size(4); return size; } @@ -475,6 +477,10 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, devconf->mc_forwarding) < 0) goto nla_put_failure; #endif + if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -509,6 +515,7 @@ errout: static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, + [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, }; static int inet6_netconf_get_devconf(struct sk_buff *in_skb, @@ -988,12 +995,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) * --yoshfuji */ if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { - struct in6_addr prefix; struct rt6_info *rt; - ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - - rt = addrconf_get_prefix_route(&prefix, + rt = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, 0, RTF_GATEWAY | RTF_DEFAULT); @@ -1673,7 +1677,7 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - if (ifp->prefix_len == 127) /* RFC 6164 */ + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) @@ -1684,7 +1688,7 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - if (ifp->prefix_len == 127) /* RFC 6164 */ + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) @@ -1818,6 +1822,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_sit(eui, dev); case ARPHRD_IPGRE: return addrconf_ifid_gre(eui, dev); + case ARPHRD_6LOWPAN: case ARPHRD_IEEE802154: return addrconf_ifid_eui64(eui, dev); case ARPHRD_IEEE1394: @@ -2673,7 +2678,8 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_INFINIBAND) && (dev->type != ARPHRD_IEEE802154) && (dev->type != ARPHRD_IEEE1394) && - (dev->type != ARPHRD_TUNNEL6)) { + (dev->type != ARPHRD_TUNNEL6) && + (dev->type != ARPHRD_6LOWPAN)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -3472,7 +3478,12 @@ restart: &inet6_addr_lst[i], addr_lst) { unsigned long age; - if (ifp->flags & IFA_F_PERMANENT) + /* When setting preferred_lft to a value not zero or + * infinity, while valid_lft is infinity + * IFA_F_PERMANENT has a non-infinity life time. + */ + if ((ifp->flags & IFA_F_PERMANENT) && + (ifp->prefered_lft == INFINITY_LIFE_TIME)) continue; spin_lock(&ifp->lock); @@ -3497,7 +3508,8 @@ restart: ifp->flags |= IFA_F_DEPRECATED; } - if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) + if ((ifp->valid_lft != INFINITY_LIFE_TIME) && + (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))) next = ifp->tstamp + ifp->valid_lft * HZ; spin_unlock(&ifp->lock); @@ -3797,7 +3809,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); - if (!(ifa->flags&IFA_F_PERMANENT)) { + if (!((ifa->flags&IFA_F_PERMANENT) && + (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; valid = ifa->valid_lft; if (preferred != INFINITY_LIFE_TIME) { @@ -4728,6 +4741,46 @@ int addrconf_sysctl_disable(struct ctl_table *ctl, int write, return ret; } +static +int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int *valp = ctl->data; + int ret; + int old, new; + + old = *valp; + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + new = *valp; + + if (write && old != new) { + struct net *net = ctl->extra2; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (valp == &net->ipv6.devconf_dflt->proxy_ndp) + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); + else if (valp == &net->ipv6.devconf_all->proxy_ndp) + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); + else { + struct inet6_dev *idev = ctl->extra1; + + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + idev->dev->ifindex, + &idev->cnf); + } + rtnl_unlock(); + } + + return ret; +} + + static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; @@ -4914,7 +4967,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_proxy_ndp, }, { .procname = "accept_source_route", @@ -5163,9 +5216,7 @@ int __init addrconf_init(void) addrconf_verify(0); - err = rtnl_af_register(&inet6_ops); - if (err < 0) - goto errout_af; + rtnl_af_register(&inet6_ops); err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, NULL); @@ -5189,7 +5240,6 @@ int __init addrconf_init(void) return 0; errout: rtnl_af_unregister(&inet6_ops); -errout_af: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index eef8d945b36..9a809a4b3d8 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -315,8 +315,10 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +static struct dst_entry *icmpv6_route_lookup(struct net *net, + struct sk_buff *skb, + struct sock *sk, + struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -554,7 +556,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb) saddr = &ipv6_hdr(skb)->daddr; - if (!ipv6_unicast_destination(skb)) + if (!ipv6_unicast_destination(skb) && + !(net->ipv6.anycast_src_echo_reply && + ipv6_anycast_destination(skb))) saddr = NULL; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); @@ -984,7 +988,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL -struct ctl_table ipv6_icmp_table_template[] = { +static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5550a8113a6..075602fc6b6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1530,7 +1530,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, } void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) + void *arg) { struct fib6_table *table; struct hlist_head *head; @@ -1542,7 +1542,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), hlist_for_each_entry_rcu(table, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, prune, arg); + func, 0, arg); write_unlock_bh(&table->tb6_lock); } } @@ -1636,7 +1636,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) gc_args.more = icmp6_dst_gc(); - fib6_clean_all(net, fib6_age, 0, NULL); + fib6_clean_all(net, fib6_age, NULL); now = jiffies; net->ipv6.ip6_rt_last_gc = now; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e27fb78c61f..e7a440dd5c0 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -498,7 +498,7 @@ static int ip6gre_rcv(struct sk_buff *skb) &ipv6h->saddr, &ipv6h->daddr, key, gre_proto); if (tunnel) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; @@ -1265,12 +1265,12 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ip6gre_tunnel_stats; + struct pcpu_sw_netstats *ip6gre_tunnel_stats; ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ip6gre_tunnel_stats->syncp); } @@ -1466,12 +1466,12 @@ static int ip6gre_tap_init(struct net_device *dev) ip6gre_tnl_link_config(tunnel, 1); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ip6gre_tap_stats; + struct pcpu_sw_netstats *ip6gre_tap_stats; ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ip6gre_tap_stats->syncp); } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 6fb4162fa78..1e8683b135b 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -190,7 +190,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, unsigned int nlen; unsigned int hlen; unsigned int off; - int flush = 1; + u16 flush = 1; int proto; __wsum csum; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 788c01a5359..ef02b26ccf8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -321,6 +321,27 @@ static inline int ip6_forward_finish(struct sk_buff *skb) return dst_output(skb); } +static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +{ + unsigned int mtu; + struct inet6_dev *idev; + + if (dst_metric_locked(dst, RTAX_MTU)) { + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + return mtu; + } + + mtu = IPV6_MIN_MTU; + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); + if (idev) + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + + return mtu; +} + int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -441,7 +462,7 @@ int ip6_forward(struct sk_buff *skb) } } - mtu = dst_mtu(dst); + mtu = ip6_dst_mtu_forward(dst); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; @@ -1188,11 +1209,35 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { - if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); + unsigned int maxnonfragsize, headersize; + + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->tot_len : 0) + + (dst_allfrag(&rt->dst) ? + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + + maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ? + mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + + /* dontfrag active */ + if ((cork->length + length > mtu - headersize) && dontfrag && + (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu - headersize + + sizeof(struct ipv6hdr)); + goto emsgsize; + } + + if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: + ipv6_local_error(sk, EMSGSIZE, fl6, + mtu - headersize + + sizeof(struct ipv6hdr)); return -EMSGSIZE; } } @@ -1217,12 +1262,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || - sk->sk_protocol == IPPROTO_RAW)) { - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8d7c9867a44..1e5e2404f1a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -29,7 +29,6 @@ #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/if_tunnel.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> @@ -102,16 +101,26 @@ struct ip6_tnl_net { static struct net_device_stats *ip6_get_stats(struct net_device *dev) { - struct pcpu_tstats sum = { 0 }; + struct pcpu_sw_netstats tmp, sum = { 0 }; int i; for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; + unsigned int start; + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + tmp.rx_packets = tstats->rx_packets; + tmp.rx_bytes = tstats->rx_bytes; + tmp.tx_packets = tstats->tx_packets; + tmp.tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + sum.rx_packets += tmp.rx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.tx_bytes += tmp.tx_bytes; } dev->stats.rx_packets = sum.rx_packets; dev->stats.rx_bytes = sum.rx_bytes; @@ -784,7 +793,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (t->parms.proto != ipproto && t->parms.proto != 0) { rcu_read_unlock(); @@ -823,8 +832,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, } tstats = this_cpu_ptr(t->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); @@ -1497,12 +1508,12 @@ ip6_tnl_dev_init_gen(struct net_device *dev) t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ip6_tnl_stats; + struct pcpu_sw_netstats *ip6_tnl_stats; ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ip6_tnl_stats->syncp); } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ed94ba61dda..b50acd5e75d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -24,7 +24,6 @@ #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/if_tunnel.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> @@ -75,26 +74,6 @@ struct vti6_net { struct ip6_tnl __rcu **tnls[2]; }; -static struct net_device_stats *vti6_get_stats(struct net_device *dev) -{ - struct pcpu_tstats sum = { 0 }; - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; - } - dev->stats.rx_packets = sum.rx_packets; - dev->stats.rx_bytes = sum.rx_bytes; - dev->stats.tx_packets = sum.tx_packets; - dev->stats.tx_bytes = sum.tx_bytes; - return &dev->stats; -} - #define for_each_vti6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) @@ -312,7 +291,7 @@ static int vti6_rcv(struct sk_buff *skb) if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); @@ -331,8 +310,10 @@ static int vti6_rcv(struct sk_buff *skb) } tstats = this_cpu_ptr(t->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); skb->mark = 0; secpath_reset(skb); @@ -716,7 +697,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, .ndo_change_mtu = vti6_change_mtu, - .ndo_get_stats = vti6_get_stats, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; /** @@ -753,7 +734,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; return 0; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 7702f9e90a0..35750df744d 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -28,15 +28,27 @@ config NF_CONNTRACK_IPV6 config NF_TABLES_IPV6 depends on NF_TABLES tristate "IPv6 nf_tables support" + help + This option enables the IPv6 support for nf_tables. config NFT_CHAIN_ROUTE_IPV6 depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" + help + This option enables the "route" chain for IPv6 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, flowlabel, hop-limit and + the packet mark. config NFT_CHAIN_NAT_IPV6 depends on NF_TABLES_IPV6 depends on NF_NAT_IPV6 && NFT_NAT tristate "IPv6 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv6 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index da00a2ecde5..544b0a9da1b 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -23,181 +23,18 @@ #include <linux/skbuff.h> #include <linux/icmpv6.h> #include <linux/netdevice.h> -#include <net/ipv6.h> -#include <net/tcp.h> #include <net/icmp.h> -#include <net/ip6_checksum.h> -#include <net/ip6_fib.h> -#include <net/ip6_route.h> #include <net/flow.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_REJECT.h> +#include <net/netfilter/ipv6/nf_reject.h> + MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); -/* Send RST reply */ -static void send_reset(struct net *net, struct sk_buff *oldskb, int hook) -{ - struct sk_buff *nskb; - struct tcphdr otcph, *tcph; - unsigned int otcplen, hh_len; - int tcphoff, needs_ack; - const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE 0x0U - const __u8 tclass = DEFAULT_TOS_VALUE; - struct dst_entry *dst = NULL; - u8 proto; - __be16 frag_off; - struct flowi6 fl6; - - if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || - (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - pr_debug("addr is not unicast.\n"); - return; - } - - proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); - - if ((tcphoff < 0) || (tcphoff > oldskb->len)) { - pr_debug("Cannot get TCP header.\n"); - return; - } - - otcplen = oldskb->len - tcphoff; - - /* IP header checks: fragment, too short. */ - if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - pr_debug("proto(%d) != IPPROTO_TCP, " - "or too short. otcplen = %d\n", - proto, otcplen); - return; - } - - if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) - BUG(); - - /* No RST for RST. */ - if (otcph.rst) { - pr_debug("RST is set\n"); - return; - } - - /* Check checksum. */ - if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { - pr_debug("TCP checksum is invalid\n"); - return; - } - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.saddr = oip6h->daddr; - fl6.daddr = oip6h->saddr; - fl6.fl6_sport = otcph.dest; - fl6.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); - dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { - dst_release(dst); - return; - } - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(dst)) - return; - - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, - GFP_ATOMIC); - - if (!nskb) { - net_dbg_ratelimited("cannot alloc skb\n"); - dst_release(dst); - return; - } - - skb_dst_set(nskb, dst); - - skb_reserve(nskb, hh_len + dst->header_len); - - skb_put(nskb, sizeof(struct ipv6hdr)); - skb_reset_network_header(nskb); - ip6h = ipv6_hdr(nskb); - ip6_flow_hdr(ip6h, tclass, 0); - ip6h->hop_limit = ip6_dst_hoplimit(dst); - ip6h->nexthdr = IPPROTO_TCP; - ip6h->saddr = oip6h->daddr; - ip6h->daddr = oip6h->saddr; - - skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - /* Truncate to length (no data) */ - tcph->doff = sizeof(struct tcphdr)/4; - tcph->source = otcph.dest; - tcph->dest = otcph.source; - - if (otcph.ack) { - needs_ack = 0; - tcph->seq = otcph.ack_seq; - tcph->ack_seq = 0; - } else { - needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin - + otcplen - (otcph.doff<<2)); - tcph->seq = 0; - } - - /* Reset flags */ - ((u_int8_t *)tcph)[13] = 0; - tcph->rst = 1; - tcph->ack = needs_ack; - tcph->window = 0; - tcph->urg_ptr = 0; - tcph->check = 0; - - /* Adjust TCP checksum */ - tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, - &ipv6_hdr(nskb)->daddr, - sizeof(struct tcphdr), IPPROTO_TCP, - csum_partial(tcph, - sizeof(struct tcphdr), 0)); - - nf_ct_attach(nskb, oldskb); - -#ifdef CONFIG_BRIDGE_NETFILTER - /* If we use ip6_local_out for bridged traffic, the MAC source on - * the RST will be ours, instead of the destination's. This confuses - * some routers/firewalls, and they drop the packet. So we need to - * build the eth header using the original destination's MAC as the - * source, and send the RST packet directly. - */ - if (oldskb->nf_bridge) { - struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; - nskb->protocol = htons(ETH_P_IPV6); - ip6h->payload_len = htons(sizeof(struct tcphdr)); - if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) - return; - dev_queue_xmit(nskb); - } else -#endif - ip6_local_out(nskb); -} - -static inline void -send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, - unsigned int hooknum) -{ - if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = net->loopback_dev; - - icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); -} static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) @@ -208,25 +45,25 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) pr_debug("%s: medium point\n", __func__); switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(net, skb, par->hooknum); + nf_send_reset6(net, skb, par->hooknum); break; default: net_info_ratelimited("case %u not handled yet\n", reject->with); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index d77db8a1350..0d812b31277 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -16,34 +16,51 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_ipv6.h> +static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + return nft_do_chain(&pkt, ops); +} + static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct nft_pktinfo pkt; - if (unlikely(skb->len < sizeof(struct ipv6hdr))) { if (net_ratelimit()) pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " "packet\n"); return NF_ACCEPT; } - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) - return NF_DROP; - return nft_do_chain_pktinfo(&pkt, ops); + return nft_do_chain_ipv6(ops, skb, in, out, okfn); } -static struct nft_af_info nft_af_ipv6 __read_mostly = { +struct nft_af_info nft_af_ipv6 __read_mostly = { .family = NFPROTO_IPV6, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, + .nops = 1, .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, [NF_INET_LOCAL_OUT] = nft_ipv6_output, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, }, }; +EXPORT_SYMBOL_GPL(nft_af_ipv6); static int nf_tables_ipv6_init_net(struct net *net) { @@ -73,44 +90,28 @@ static struct pernet_operations nf_tables_ipv6_net_ops = { .exit = nf_tables_ipv6_exit_net, }; -static unsigned int -nft_do_chain_ipv6(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nft_pktinfo pkt; - - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) - return NF_DROP; - - return nft_do_chain_pktinfo(&pkt, ops); -} - -static struct nf_chain_type filter_ipv6 = { - .family = NFPROTO_IPV6, +static const struct nf_chain_type filter_ipv6 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), - .fn = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, - [NF_INET_LOCAL_OUT] = nft_ipv6_output, - [NF_INET_FORWARD] = nft_do_chain_ipv6, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, - }, }; static int __init nf_tables_ipv6_init(void) { + int ret; + nft_register_chain_type(&filter_ipv6); - return register_pernet_subsys(&nf_tables_ipv6_net_ops); + ret = register_pernet_subsys(&nf_tables_ipv6_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_ipv6); + + return ret; } static void __exit nf_tables_ipv6_exit(void) diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index e86dcd70dc7..9c3297a768f 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -79,7 +79,7 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); - ret = nft_do_chain_pktinfo(&pkt, ops); + ret = nft_do_chain(&pkt, ops); if (ret != NF_ACCEPT) return ret; if (!nf_nat_initialized(ct, maniptype)) { @@ -170,21 +170,21 @@ static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops, return ret; } -static struct nf_chain_type nft_chain_nat_ipv6 = { - .family = NFPROTO_IPV6, +static const struct nf_chain_type nft_chain_nat_ipv6 = { .name = "nat", .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), - .fn = { + .hooks = { [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting, [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting, [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output, [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn, }, - .me = THIS_MODULE, }; static int __init nft_chain_nat_ipv6_init(void) diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 3fe40f0456a..42031299585 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -47,7 +47,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u32 *)ipv6_hdr(skb)); - ret = nft_do_chain_pktinfo(&pkt, ops); + ret = nft_do_chain(&pkt, ops); if (ret != NF_DROP && ret != NF_QUEUE && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || @@ -59,15 +59,15 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, return ret; } -static struct nf_chain_type nft_chain_route_ipv6 = { - .family = NFPROTO_IPV6, +static const struct nf_chain_type nft_chain_route_ipv6 = { .name = "route", .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_OUT), - .fn = { + .hooks = { [NF_INET_LOCAL_OUT] = nf_route_table_hook, }, - .me = THIS_MODULE, }; static int __init nft_chain_route_init(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 89b2735cecf..11dac21e658 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -104,6 +104,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif +static void rt6_bind_peer(struct rt6_info *rt, int create) +{ + struct inet_peer_base *base; + struct inet_peer *peer; + + base = inetpeer_base_ptr(rt->_rt6i_peer); + if (!base) + return; + + peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); + if (peer) { + if (!rt6_set_peer(rt, peer)) + inet_putpeer(peer); + } +} + +static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) +{ + if (rt6_has_peer(rt)) + return rt6_peer_ptr(rt); + + rt6_bind_peer(rt, create); + return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); +} + +static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) +{ + return __rt6_get_peer(rt, 1); +} + static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { struct rt6_info *rt = (struct rt6_info *) dst; @@ -312,22 +342,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } -void rt6_bind_peer(struct rt6_info *rt, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_rt6i_peer); - if (!base) - return; - - peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (peer) { - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); - } -} - static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) { @@ -1909,9 +1923,7 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, else rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; - if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == - (RTF_DEFAULT | RTF_ADDRCONF)) - rt6_set_from(rt, ort); + rt6_set_from(rt, ort); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES @@ -2244,7 +2256,7 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) .net = net, .addr = &ifp->addr, }; - fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); + fib6_clean_all(net, fib6_remove_prefsrc, &adni); } struct arg_dev_net { @@ -2271,7 +2283,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) .net = net, }; - fib6_clean_all(net, fib6_ifdown, 0, &adn); + fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); } @@ -2326,7 +2338,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu) .mtu = mtu, }; - fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 366fbba3359..3dfbcf1dcb1 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -671,7 +671,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && tunnel->parms.iph.protocol != 0) @@ -702,8 +702,10 @@ static int ipip6_rcv(struct sk_buff *skb) } tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); @@ -924,7 +926,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (tunnel->parms.iph.daddr && skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -966,8 +968,10 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); - if (IS_ERR(skb)) + if (IS_ERR(skb)) { + ip_rt_put(rt); goto out; + } err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); @@ -1361,12 +1365,12 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ipip6_tunnel_stats; + struct pcpu_sw_netstats *ipip6_tunnel_stats; ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ipip6_tunnel_stats->syncp); } @@ -1391,12 +1395,12 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ipip6_fb_stats; + struct pcpu_sw_netstats *ipip6_fb_stats; ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ipip6_fb_stats->syncp); } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 107b2f1d90a..6b6a2c83027 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -24,6 +24,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "anycast_src_echo_reply", + .data = &init_net.ipv6.anycast_src_echo_reply, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -51,6 +58,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) if (!ipv6_table) goto out; ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; + ipv6_table[1].data = &net->ipv6.anycast_src_echo_reply; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2bb87b85212..ffd5fa8bdb1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -164,12 +164,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * connect() to INADDR_ANY means loopback (BSD'ism). */ - if(ipv6_addr_any(&usin->sin6_addr)) + if (ipv6_addr_any(&usin->sin6_addr)) usin->sin6_addr.s6_addr[15] = 0x1; addr_type = ipv6_addr_type(&usin->sin6_addr); - if(addr_type & IPV6_ADDR_MULTICAST) + if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type&IPV6_ADDR_LINKLOCAL) { @@ -336,7 +336,7 @@ static void tcp_v6_mtu_reduced(struct sock *sk) static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data; + const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; @@ -469,7 +469,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); - struct sk_buff * skb; + struct sk_buff *skb; int err = -ENOMEM; /* First, grab a route. */ @@ -912,7 +912,7 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, } -static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) +static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) { struct request_sock *req, **prev; const struct tcphdr *th = tcp_hdr(skb); @@ -1085,9 +1085,9 @@ drop: return 0; /* don't send reset */ } -static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) +static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); @@ -1382,7 +1382,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * otherwise we just shortcircuit this and continue with * the new socket.. */ - if(nsk != sk) { + if (nsk != sk) { sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; @@ -1742,7 +1742,7 @@ static void get_openreq6(struct seq_file *seq, dest->s6_addr32[2], dest->s6_addr32[3], ntohs(inet_rsk(req)->ir_rmt_port), TCP_SYN_RECV, - 0,0, /* could print option size, but that is af dependent. */ + 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->num_timeout, @@ -1801,7 +1801,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) atomic_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), - (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, + (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh ); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 9af77d9c0ec..735d0f60c83 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -176,7 +176,7 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id) * owned by userspace. A struct sock returned from this function must be * released using l2tp_tunnel_sock_put once you're done with it. */ -struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) +static struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) { int err = 0; struct socket *sock = NULL; @@ -202,10 +202,9 @@ struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) out: return sk; } -EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_lookup); /* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */ -void l2tp_tunnel_sock_put(struct sock *sk) +static void l2tp_tunnel_sock_put(struct sock *sk) { struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); if (tunnel) { @@ -217,7 +216,6 @@ void l2tp_tunnel_sock_put(struct sock *sk) } sock_put(sk); } -EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put); /* Lookup a session by id in the global session list */ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 1ee9f6965d6..1f01ba3435b 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -238,8 +238,6 @@ out: return tunnel; } -struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel); -void l2tp_tunnel_sock_put(struct sock *sk); struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7b01b9f5846..c71b699eb55 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -715,7 +715,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned long cpu_flags; size_t copied = 0; u32 peek_seq = 0; - u32 *seq; + u32 *seq, skb_len; unsigned long used; int target; /* Read at least this many bytes */ long timeo; @@ -812,6 +812,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, } continue; found_ok_skb: + skb_len = skb->len; /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) @@ -844,7 +845,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, } /* Partial read */ - if (used + offset < skb->len) + if (used + offset < skb_len) continue; } while (len > 0); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index cd872417796..42dc2e45c92 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -753,7 +753,7 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) * * Sends received pdus to the connection state machine. */ -static int llc_conn_rcv(struct sock* sk, struct sk_buff *skb) +static int llc_conn_rcv(struct sock *sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -891,7 +891,7 @@ out_kfree_skb: * * Initializes a socket with default llc values. */ -static void llc_sk_init(struct sock* sk) +static void llc_sk_init(struct sock *sk) { struct llc_sock *llc = llc_sk(sk); diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 2bb0ddff8c0..842851cef69 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -23,7 +23,7 @@ #include <net/llc.h> LIST_HEAD(llc_sap_list); -DEFINE_SPINLOCK(llc_sap_list_lock); +static DEFINE_SPINLOCK(llc_sap_list_lock); /** * llc_sap_alloc - allocates and initializes sap. @@ -48,7 +48,7 @@ static struct llc_sap *llc_sap_alloc(void) static struct llc_sap *__llc_sap_find(unsigned char sap_value) { - struct llc_sap* sap; + struct llc_sap *sap; list_for_each_entry(sap, &llc_sap_list, node) if (sap->laddr.lsap == sap_value) @@ -159,7 +159,6 @@ module_init(llc_init); module_exit(llc_exit); EXPORT_SYMBOL(llc_sap_list); -EXPORT_SYMBOL(llc_sap_list_lock); EXPORT_SYMBOL(llc_sap_find); EXPORT_SYMBOL(llc_sap_open); EXPORT_SYMBOL(llc_sap_close); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index e5850699098..06033f6c845 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -66,7 +66,7 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev, return skb; } -void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim) +void llc_save_primitive(struct sock *sk, struct sk_buff *skb, u8 prim) { struct sockaddr_llc *addr; @@ -114,7 +114,7 @@ void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb) * failure. */ static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, - struct sk_buff* skb) + struct sk_buff *skb) { int i = 0; struct llc_sap_state_trans *rc = NULL; diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index 537488cbf94..9b9009f9955 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -111,7 +111,7 @@ void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, } -struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]) +struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[]) { struct crypto_cipher *tfm; diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h index 20785a64725..0ce6487af79 100644 --- a/net/mac80211/aes_cmac.h +++ b/net/mac80211/aes_cmac.h @@ -11,7 +11,7 @@ #include <linux/crypto.h> -struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]); +struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[]); void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic); void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f80e8c4c6bc..09d2e58a2ba 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -301,9 +301,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, if (!sta) goto out; - if (pairwise) + if (pairwise && key_idx < NUM_DEFAULT_KEYS) key = rcu_dereference(sta->ptk[key_idx]); - else if (key_idx < NUM_DEFAULT_KEYS) + else if (!pairwise && + key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) key = rcu_dereference(sta->gtk[key_idx]); } else key = rcu_dereference(sdata->keys[key_idx]); @@ -827,6 +828,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, if (cfg80211_chandef_identical(&local->monitor_chandef, chandef)) return 0; + mutex_lock(&local->mtx); mutex_lock(&local->iflist_mtx); if (local->use_chanctx) { sdata = rcu_dereference_protected( @@ -845,6 +847,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, if (ret == 0) local->monitor_chandef = *chandef; mutex_unlock(&local->iflist_mtx); + mutex_unlock(&local->mtx); return ret; } @@ -873,8 +876,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, return 0; } -int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_beacon_data *params) +static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, + struct cfg80211_beacon_data *params) { struct beacon_data *new, *old; int new_head_len, new_tail_len; @@ -950,6 +953,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct beacon_data *old; struct ieee80211_sub_if_data *vlan; u32 changed = BSS_CHANGED_BEACON_INT | @@ -968,8 +972,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->needed_rx_chains = sdata->local->rx_chains; sdata->radar_required = params->radar_required; + mutex_lock(&local->mtx); err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); + mutex_unlock(&local->mtx); if (err) return err; ieee80211_vif_copy_chanctx_to_vlans(sdata, false); @@ -1097,17 +1103,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - sta_info_flush_defer(vlan); - sta_info_flush_defer(sdata); - synchronize_net(); - rcu_barrier(); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { - sta_info_flush_cleanup(vlan); - ieee80211_free_keys(vlan); - } - sta_info_flush_cleanup(sdata); - ieee80211_free_keys(sdata); + __sta_info_flush(sdata, true); + ieee80211_free_keys(sdata, true); sdata->vif.bss_conf.enable_beacon = false; sdata->vif.bss_conf.ssid_len = 0; @@ -1129,7 +1126,9 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) skb_queue_purge(&sdata->u.ap.ps.bc_buf); ieee80211_vif_copy_chanctx_to_vlans(sdata, true); + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); return 0; } @@ -1952,8 +1951,10 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = sdata->local->rx_chains; + mutex_lock(&sdata->local->mtx); err = ieee80211_vif_use_channel(sdata, &setup->chandef, IEEE80211_CHANCTX_SHARED); + mutex_unlock(&sdata->local->mtx); if (err) return err; @@ -1965,7 +1966,9 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); ieee80211_stop_mesh(sdata); + mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&sdata->local->mtx); return 0; } @@ -2587,8 +2590,8 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, int j; sdata->rc_rateidx_mask[i] = mask->control[i].legacy; - memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].mcs, - sizeof(mask->control[i].mcs)); + memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].ht_mcs, + sizeof(mask->control[i].ht_mcs)); sdata->rc_has_mcs_mask[i] = false; if (!sband) @@ -2903,26 +2906,29 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, unsigned long timeout; int err; - if (!list_empty(&local->roc_list) || local->scanning) - return -EBUSY; + mutex_lock(&local->mtx); + if (!list_empty(&local->roc_list) || local->scanning) { + err = -EBUSY; + goto out_unlock; + } /* whatever, but channel contexts should not complain about that one */ sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = local->rx_chains; sdata->radar_required = true; - mutex_lock(&local->iflist_mtx); err = ieee80211_vif_use_channel(sdata, chandef, IEEE80211_CHANCTX_SHARED); - mutex_unlock(&local->iflist_mtx); if (err) - return err; + goto out_unlock; timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); ieee80211_queue_delayed_work(&sdata->local->hw, &sdata->dfs_cac_timer_work, timeout); - return 0; + out_unlock: + mutex_unlock(&local->mtx); + return err; } static struct cfg80211_beacon_data * @@ -2998,7 +3004,9 @@ void ieee80211_csa_finalize_work(struct work_struct *work) goto unlock; sdata->radar_required = sdata->csa_radar_required; + mutex_lock(&local->mtx); err = ieee80211_vif_change_channel(sdata, &changed); + mutex_unlock(&local->mtx); if (WARN_ON(err < 0)) goto unlock; @@ -3047,8 +3055,8 @@ unlock: sdata_unlock(sdata); } -static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_csa_settings *params) +int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -3829,6 +3837,31 @@ static void ieee80211_set_wakeup(struct wiphy *wiphy, bool enabled) } #endif +static int ieee80211_set_qos_map(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_qos_map *qos_map) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct mac80211_qos_map *new_qos_map, *old_qos_map; + + if (qos_map) { + new_qos_map = kzalloc(sizeof(*new_qos_map), GFP_KERNEL); + if (!new_qos_map) + return -ENOMEM; + memcpy(&new_qos_map->qos_map, qos_map, sizeof(*qos_map)); + } else { + /* A NULL qos_map was passed to disable QoS mapping */ + new_qos_map = NULL; + } + + old_qos_map = rtnl_dereference(sdata->qos_map); + rcu_assign_pointer(sdata->qos_map, new_qos_map); + if (old_qos_map) + kfree_rcu(old_qos_map, rcu_head); + + return 0; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -3908,4 +3941,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_channel = ieee80211_cfg_get_channel, .start_radar_detection = ieee80211_start_radar_detection, .channel_switch = ieee80211_channel_switch, + .set_qos_map = ieee80211_set_qos_map, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index a57d5d9466b..f43613a97dd 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -232,8 +232,8 @@ ieee80211_new_chanctx(struct ieee80211_local *local, if (!local->use_chanctx) local->hw.conf.radar_enabled = ctx->conf.radar_enabled; - /* acquire mutex to prevent idle from changing */ - mutex_lock(&local->mtx); + /* we hold the mutex to prevent idle from changing */ + lockdep_assert_held(&local->mtx); /* turn idle off *before* setting channel -- some drivers need that */ changed = ieee80211_idle_off(local); if (changed) @@ -246,19 +246,14 @@ ieee80211_new_chanctx(struct ieee80211_local *local, err = drv_add_chanctx(local, ctx); if (err) { kfree(ctx); - ctx = ERR_PTR(err); - ieee80211_recalc_idle(local); - goto out; + return ERR_PTR(err); } } /* and keep the mutex held until the new chanctx is on the list */ list_add_rcu(&ctx->list, &local->chanctx_list); - out: - mutex_unlock(&local->mtx); - return ctx; } @@ -294,9 +289,7 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, /* throw a warning if this wasn't the only channel context. */ WARN_ON(check_single_channel && !list_empty(&local->chanctx_list)); - mutex_lock(&local->mtx); ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); } static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, @@ -358,6 +351,31 @@ static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, ieee80211_change_chanctx(local, ctx, compat); } +static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *chanctx) +{ + bool radar_enabled; + + lockdep_assert_held(&local->chanctx_mtx); + /* for setting local->radar_detect_enabled */ + lockdep_assert_held(&local->mtx); + + radar_enabled = ieee80211_is_radar_required(local); + + if (radar_enabled == chanctx->conf.radar_enabled) + return; + + chanctx->conf.radar_enabled = radar_enabled; + local->radar_detect_enabled = chanctx->conf.radar_enabled; + + if (!local->use_chanctx) { + local->hw.conf.radar_enabled = chanctx->conf.radar_enabled; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + } + + drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR); +} + static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *ctx) { @@ -404,29 +422,6 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) ieee80211_free_chanctx(local, ctx); } -void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, - struct ieee80211_chanctx *chanctx) -{ - bool radar_enabled; - - lockdep_assert_held(&local->chanctx_mtx); - - radar_enabled = ieee80211_is_radar_required(local); - - if (radar_enabled == chanctx->conf.radar_enabled) - return; - - chanctx->conf.radar_enabled = radar_enabled; - local->radar_detect_enabled = chanctx->conf.radar_enabled; - - if (!local->use_chanctx) { - local->hw.conf.radar_enabled = chanctx->conf.radar_enabled; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - } - - drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR); -} - void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx) { @@ -518,6 +513,8 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *ctx; int ret; + lockdep_assert_held(&local->mtx); + WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev)); mutex_lock(&local->chanctx_mtx); @@ -558,6 +555,8 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, int ret; u32 chanctx_changed = 0; + lockdep_assert_held(&local->mtx); + /* should never be called if not performing a channel switch. */ if (WARN_ON(!sdata->vif.csa_active)) return -EINVAL; @@ -655,6 +654,8 @@ void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) { WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev)); + lockdep_assert_held(&sdata->local->mtx); + mutex_lock(&sdata->local->chanctx_mtx); __ieee80211_vif_release_channel(sdata); mutex_unlock(&sdata->local->chanctx_mtx); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5d03c47c0a4..ef8b385eff0 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -242,22 +242,6 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local, return ret; } -static inline void drv_set_multicast_list(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct netdev_hw_addr_list *mc_list) -{ - bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; - - trace_drv_set_multicast_list(local, sdata, mc_list->count); - - check_sdata_in_driver(sdata); - - if (local->ops->set_multicast_list) - local->ops->set_multicast_list(&local->hw, &sdata->vif, - allmulti, mc_list); - trace_drv_return_void(local); -} - static inline void drv_configure_filter(struct ieee80211_local *local, unsigned int changed_flags, unsigned int *total_flags, @@ -550,6 +534,22 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, } #endif +static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + might_sleep(); + + sdata = get_bss_sdata(sdata); + check_sdata_in_driver(sdata); + + trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta); + if (local->ops->sta_pre_rcu_remove) + local->ops->sta_pre_rcu_remove(&local->hw, &sdata->vif, + &sta->sta); + trace_drv_return_void(local); +} + static inline __must_check int drv_sta_state(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 2eda7b13124..771080ec721 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -293,14 +293,17 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, radar_required = true; } + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); if (ieee80211_vif_use_channel(sdata, &chandef, ifibss->fixed_channel ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE)) { sdata_info(sdata, "Failed to join IBSS, no channel context\n"); + mutex_unlock(&local->mtx); return; } + mutex_unlock(&local->mtx); memcpy(ifibss->bssid, bssid, ETH_ALEN); @@ -363,7 +366,9 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.ssid_len = 0; RCU_INIT_POINTER(ifibss->presp, NULL); kfree_rcu(presp, rcu_head); + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); sdata_info(sdata, "Failed to join IBSS, driver failure: %d\n", err); return; @@ -522,7 +527,7 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, if (csa_settings) ieee80211_send_action_csa(sdata, csa_settings); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + return BSS_CHANGED_BEACON; out: return ret; } @@ -534,7 +539,8 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) int err; u16 capability; - sdata_lock(sdata); + sdata_assert_lock(sdata); + /* update cfg80211 bss information with the new channel */ if (!is_zero_ether_addr(ifibss->bssid)) { capability = WLAN_CAPABILITY_IBSS; @@ -559,10 +565,12 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) /* generate the beacon */ err = ieee80211_ibss_csa_beacon(sdata, NULL); - sdata_unlock(sdata); if (err < 0) return err; + if (err) + ieee80211_bss_info_change_notify(sdata, err); + return 0; } @@ -744,7 +752,9 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); drv_leave_ibss(local, sdata); + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); } static void ieee80211_csa_connection_drop_work(struct work_struct *work) @@ -753,12 +763,16 @@ static void ieee80211_csa_connection_drop_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.ibss.csa_connection_drop_work); + sdata_lock(sdata); + ieee80211_ibss_disconnect(sdata); synchronize_rcu(); skb_queue_purge(&sdata->skb_queue); /* trigger a scan to find another IBSS network to join */ ieee80211_queue_work(&sdata->local->hw, &sdata->work); + + sdata_unlock(sdata); } static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata) @@ -784,18 +798,10 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings params; struct ieee80211_csa_ie csa_ie; struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - struct ieee80211_chanctx_conf *chanctx_conf; - struct ieee80211_chanctx *chanctx; enum nl80211_channel_type ch_type; - int err, num_chanctx; + int err; u32 sta_flags; - if (sdata->vif.csa_active) - return true; - - if (!sdata->vif.bss_conf.ibss_joined) - return false; - sta_flags = IEEE80211_STA_DISABLE_VHT; switch (ifibss->chandef.width) { case NL80211_CHAN_WIDTH_5: @@ -830,9 +836,6 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, params.count = csa_ie.count; params.chandef = csa_ie.chandef; - if (ifibss->chandef.chan->band != params.chandef.chan->band) - goto disconnect; - switch (ifibss->chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: @@ -888,28 +891,12 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, params.radar_required = true; } - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!chanctx_conf) { - rcu_read_unlock(); - goto disconnect; - } - - /* don't handle for multi-VIF cases */ - chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); - if (chanctx->refcount > 1) { - rcu_read_unlock(); - goto disconnect; - } - num_chanctx = 0; - list_for_each_entry_rcu(chanctx, &sdata->local->chanctx_list, list) - num_chanctx++; - - if (num_chanctx > 1) { - rcu_read_unlock(); - goto disconnect; + if (cfg80211_chandef_identical(¶ms.chandef, + &sdata->vif.bss_conf.chandef)) { + ibss_dbg(sdata, + "received csa with an identical chandef, ignoring\n"); + return true; } - rcu_read_unlock(); /* all checks done, now perform the channel switch. */ ibss_dbg(sdata, @@ -918,19 +905,9 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, params.block_tx = !!csa_ie.mode; - ieee80211_ibss_csa_beacon(sdata, ¶ms); - sdata->csa_radar_required = params.radar_required; - - if (params.block_tx) - ieee80211_stop_queues_by_reason(&sdata->local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); - - sdata->csa_chandef = params.chandef; - sdata->vif.csa_active = true; - - ieee80211_bss_info_change_notify(sdata, err); - drv_channel_switch_beacon(sdata, ¶ms.chandef); + if (ieee80211_channel_switch(sdata->local->hw.wiphy, sdata->dev, + ¶ms)) + goto disconnect; ieee80211_ibss_csa_mark_radar(sdata); @@ -966,7 +943,8 @@ ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata, if (len < required_len) return; - ieee80211_ibss_process_chanswitch(sdata, elems, false); + if (!sdata->vif.csa_active) + ieee80211_ibss_process_chanswitch(sdata, elems, false); } static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, @@ -1147,7 +1125,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, goto put_bss; /* process channel switch */ - if (ieee80211_ibss_process_chanswitch(sdata, elems, true)) + if (sdata->vif.csa_active || + ieee80211_ibss_process_chanswitch(sdata, elems, true)) goto put_bss; /* same BSSID */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ed5bf8b4b5c..953b9e29454 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -232,6 +232,7 @@ struct ieee80211_rx_data { struct beacon_data { u8 *head, *tail; int head_len, tail_len; + struct ieee80211_meshconf_ie *meshconf; struct rcu_head rcu_head; }; @@ -245,7 +246,8 @@ struct ps_data { /* yes, this looks ugly, but guarantees that we can later use * bitmap_empty :) * NB: don't touch this bitmap, use sta_info_{set,clear}_tim_bit */ - u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)]; + u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)] + __aligned(__alignof__(unsigned long)); struct sk_buff_head bc_buf; atomic_t num_sta_ps; /* number of stations in PS mode */ int dtim_count; @@ -540,7 +542,10 @@ struct ieee80211_mesh_sync_ops { struct ieee80211_mgmt *mgmt, struct ieee802_11_elems *elems, struct ieee80211_rx_status *rx_status); - void (*adjust_tbtt)(struct ieee80211_sub_if_data *sdata); + + /* should be called with beacon_data under RCU read lock */ + void (*adjust_tbtt)(struct ieee80211_sub_if_data *sdata, + struct beacon_data *beacon); /* add other framework functions here */ }; @@ -614,6 +619,9 @@ struct ieee80211_if_mesh { bool chsw_init; u8 chsw_ttl; u16 pre_value; + + /* offset from skb->data while building IE */ + int meshconf_offset; }; #ifdef CONFIG_MAC80211_MESH @@ -686,6 +694,11 @@ struct ieee80211_chanctx { struct ieee80211_chanctx_conf conf; }; +struct mac80211_qos_map { + struct cfg80211_qos_map qos_map; + struct rcu_head rcu_head; +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -731,6 +744,7 @@ struct ieee80211_sub_if_data { int encrypt_headroom; struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; + struct mac80211_qos_map __rcu *qos_map; struct work_struct csa_finalize_work; int csa_counter_offset_beacon; @@ -776,10 +790,6 @@ struct ieee80211_sub_if_data { u32 mntr_flags; } u; - spinlock_t cleanup_stations_lock; - struct list_head cleanup_stations; - struct work_struct cleanup_stations_wk; - #ifdef CONFIG_MAC80211_DEBUGFS struct { struct dentry *subdir_stations; @@ -1117,6 +1127,7 @@ struct ieee80211_local { struct work_struct sched_scan_stopped_work; struct ieee80211_sub_if_data __rcu *sched_scan_sdata; + struct cfg80211_sched_scan_request *sched_scan_req; unsigned long leave_oper_channel_time; enum mac80211_scan_state next_scan_state; @@ -1425,6 +1436,9 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss); /* scheduled scan handling */ +int +__ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, + struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); @@ -1443,6 +1457,8 @@ void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); /* channel switch handling */ void ieee80211_csa_finalize_work(struct work_struct *work); +int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params); /* interface handling */ int ieee80211_iface_init(void); @@ -1465,8 +1481,6 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local); bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); -int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_beacon_data *params); static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) { @@ -1768,8 +1782,6 @@ void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); -void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, - struct ieee80211_chanctx *chanctx); void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, struct ieee80211_chanctx *ctx); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7aa9f9dea9d..b2c83c0f06d 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -418,8 +418,10 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) return ret; } + mutex_lock(&local->mtx); ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef, IEEE80211_CHANCTX_EXCLUSIVE); + mutex_unlock(&local->mtx); if (ret) { drv_remove_interface(local, sdata); kfree(sdata); @@ -456,7 +458,9 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) synchronize_net(); + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); drv_remove_interface(local, sdata); @@ -786,10 +790,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, * This is relevant only in WDS mode, in all other modes we've * already removed all stations when disconnecting or similar, * so warn otherwise. - * - * We call sta_info_flush_cleanup() later, to combine RCU waits. */ - flushed = sta_info_flush_defer(sdata); + flushed = sta_info_flush(sdata); WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); @@ -828,9 +830,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, if (sdata->wdev.cac_started) { chandef = sdata->vif.bss_conf.chandef; WARN_ON(local->suspended); - mutex_lock(&local->iflist_mtx); + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); - mutex_unlock(&local->iflist_mtx); + mutex_unlock(&local->mtx); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); @@ -891,23 +893,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->work); /* * When we get here, the interface is marked down. + * Free the remaining keys, if there are any + * (shouldn't be, except maybe in WDS mode?) * - * sta_info_flush_cleanup() requires rcu_barrier() - * first to wait for the station call_rcu() calls - * to complete, and we also need synchronize_rcu() - * to wait for the RX path in case it is using the - * interface and enqueuing frames at this very time on + * Force the key freeing to always synchronize_net() + * to wait for the RX path in case it is using this + * interface enqueuing frames * at this very time on * another CPU. */ - synchronize_rcu(); - rcu_barrier(); - sta_info_flush_cleanup(sdata); - - /* - * Free all remaining keys, there shouldn't be any, - * except maybe in WDS mode? - */ - ieee80211_free_keys(sdata); + ieee80211_free_keys(sdata, true); /* fall through */ case NL80211_IFTYPE_AP: @@ -1018,17 +1012,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev) atomic_dec(&local->iff_promiscs); sdata->flags ^= IEEE80211_SDATA_PROMISC; } - - /* - * TODO: If somebody needs this on AP interfaces, - * it can be enabled easily but multicast - * addresses from VLANs need to be synced. - */ - if (sdata->vif.type != NL80211_IFTYPE_MONITOR && - sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sdata->vif.type != NL80211_IFTYPE_AP) - drv_set_multicast_list(local, sdata, &dev->mc); - spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); @@ -1044,7 +1027,7 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) int i; /* free extra data */ - ieee80211_free_keys(sdata); + ieee80211_free_keys(sdata, false); ieee80211_debugfs_remove_netdev(sdata); @@ -1497,8 +1480,8 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, bool used = false; list_for_each_entry(sdata, &local->interfaces, list) { - if (memcmp(local->hw.wiphy->addresses[i].addr, - sdata->vif.addr, ETH_ALEN) == 0) { + if (ether_addr_equal(local->hw.wiphy->addresses[i].addr, + sdata->vif.addr)) { used = true; break; } @@ -1558,8 +1541,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, val += inc; list_for_each_entry(sdata, &local->interfaces, list) { - if (memcmp(tmp_addr, sdata->vif.addr, - ETH_ALEN) == 0) { + if (ether_addr_equal(tmp_addr, sdata->vif.addr)) { used = true; break; } @@ -1579,15 +1561,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); } -static void ieee80211_cleanup_sdata_stas_wk(struct work_struct *wk) -{ - struct ieee80211_sub_if_data *sdata; - - sdata = container_of(wk, struct ieee80211_sub_if_data, cleanup_stations_wk); - - ieee80211_cleanup_sdata_stas(sdata); -} - int ieee80211_if_add(struct ieee80211_local *local, const char *name, struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params) @@ -1660,9 +1633,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, INIT_LIST_HEAD(&sdata->key_list); - spin_lock_init(&sdata->cleanup_stations_lock); - INIT_LIST_HEAD(&sdata->cleanup_stations); - INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk); INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work, ieee80211_dfs_cac_timer_work); INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk, diff --git a/net/mac80211/key.c b/net/mac80211/key.c index e568d98167d..6ff65a1ebaa 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -589,14 +589,10 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_iter_keys); -void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) +static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, + struct list_head *keys) { struct ieee80211_key *key, *tmp; - LIST_HEAD(keys); - - cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk); - - mutex_lock(&sdata->local->key_mtx); sdata->crypto_tx_tailroom_needed_cnt -= sdata->crypto_tx_tailroom_pending_dec; @@ -608,28 +604,51 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - list_add_tail(&key->list, &keys); + list_add_tail(&key->list, keys); } ieee80211_debugfs_key_update_default(sdata); +} - if (!list_empty(&keys)) { - synchronize_net(); - list_for_each_entry_safe(key, tmp, &keys, list) - __ieee80211_key_destroy(key, false); +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, + bool force_synchronize) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *vlan; + struct ieee80211_key *key, *tmp; + LIST_HEAD(keys); + + cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk); + + mutex_lock(&local->key_mtx); + + ieee80211_free_keys_iface(sdata, &keys); + + if (sdata->vif.type == NL80211_IFTYPE_AP) { + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + ieee80211_free_keys_iface(vlan, &keys); } + if (!list_empty(&keys) || force_synchronize) + synchronize_net(); + list_for_each_entry_safe(key, tmp, &keys, list) + __ieee80211_key_destroy(key, false); + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || sdata->crypto_tx_tailroom_pending_dec); + if (sdata->vif.type == NL80211_IFTYPE_AP) { + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || + vlan->crypto_tx_tailroom_pending_dec); + } - mutex_unlock(&sdata->local->key_mtx); + mutex_unlock(&local->key_mtx); } void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta) { - struct ieee80211_key *key, *tmp; - LIST_HEAD(keys); + struct ieee80211_key *key; int i; mutex_lock(&local->key_mtx); @@ -640,7 +659,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - list_add(&key->list, &keys); + __ieee80211_key_destroy(key, true); } for (i = 0; i < NUM_DEFAULT_KEYS; i++) { @@ -650,17 +669,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - list_add(&key->list, &keys); - } - - /* - * NB: the station code relies on this being - * done even if there aren't any keys - */ - synchronize_net(); - - list_for_each_entry_safe(key, tmp, &keys, list) __ieee80211_key_destroy(key, true); + } mutex_unlock(&local->key_mtx); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 0aebb889cab..19db68663d7 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -136,7 +136,8 @@ void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, bool uni, bool multi); void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx); -void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, + bool force_synchronize); void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index fa34cd2344b..2bd5b552b2f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -250,12 +250,8 @@ static void ieee80211_restart_work(struct work_struct *work) /* wait for scan work complete */ flush_workqueue(local->workqueue); - mutex_lock(&local->mtx); - WARN(test_bit(SCAN_HW_SCANNING, &local->scanning) || - rcu_dereference_protected(local->sched_scan_sdata, - lockdep_is_held(&local->mtx)), - "%s called with hardware scan in progress\n", __func__); - mutex_unlock(&local->mtx); + WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), + "%s called with hardware scan in progress\n", __func__); rtnl_lock(); ieee80211_scan_cancel(local); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 89df62b2b68..5a74b249ba3 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -259,6 +259,9 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, *pos++ = WLAN_EID_MESH_CONFIG; *pos++ = meshconf_len; + /* save a pointer for quick updates in pre-tbtt */ + ifmsh->meshconf_offset = pos - skb->data; + /* Active path selection protocol ID */ *pos++ = ifmsh->mesh_pp_id; /* Active path selection metric ID */ @@ -723,6 +726,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) bcn->tail_len = skb->len; memcpy(bcn->tail, skb->data, bcn->tail_len); + bcn->meshconf = (struct ieee80211_meshconf_ie *) + (bcn->tail + ifmsh->meshconf_offset); dev_kfree_skb(skb); rcu_assign_pointer(ifmsh->beacon, bcn); diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index d1cf2d55349..2bc5dc25d5a 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -164,12 +164,15 @@ no_sync: rcu_read_unlock(); } -static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) +static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata, + struct beacon_data *beacon) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 cap; WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); BUG_ON(!rcu_read_lock_held()); + cap = beacon->meshconf->meshconf_cap; spin_lock_bh(&ifmsh->sync_offset_lock); @@ -194,6 +197,10 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) ifmsh->adjusting_tbtt = false; } spin_unlock_bh(&ifmsh->sync_offset_lock); + + beacon->meshconf->meshconf_cap = ifmsh->adjusting_tbtt ? + IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING | cap : + ~IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING & cap; } static const struct sync_method sync_methods[] = { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 900ead344f5..fc1d82465b3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -888,7 +888,9 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ifmgd->associated) goto out; + mutex_lock(&local->mtx); ret = ieee80211_vif_change_channel(sdata, &changed); + mutex_unlock(&local->mtx); if (ret) { sdata_info(sdata, "vif channel switch failed, disconnecting\n"); @@ -1401,10 +1403,14 @@ void ieee80211_dfs_cac_timer_work(struct work_struct *work) dfs_cac_timer_work); struct cfg80211_chan_def chandef = sdata->vif.bss_conf.chandef; - ieee80211_vif_release_channel(sdata); - cfg80211_cac_event(sdata->dev, &chandef, - NL80211_RADAR_CAC_FINISHED, - GFP_KERNEL); + mutex_lock(&sdata->local->mtx); + if (sdata->wdev.cac_started) { + ieee80211_vif_release_channel(sdata); + cfg80211_cac_event(sdata->dev, &chandef, + NL80211_RADAR_CAC_FINISHED, + GFP_KERNEL); + } + mutex_unlock(&sdata->local->mtx); } /* MLME */ @@ -1698,7 +1704,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(ifmgd->bssid, 0, ETH_ALEN); /* remove AP and TDLS peers */ - sta_info_flush_defer(sdata); + sta_info_flush(sdata); /* finally reset all BSS / config parameters */ changed |= ieee80211_reset_erp_info(sdata); @@ -1747,7 +1753,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->have_beacon = false; ifmgd->flags = 0; + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; } @@ -2070,7 +2078,9 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, memset(sdata->u.mgd.bssid, 0, ETH_ALEN); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; + mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&sdata->local->mtx); } cfg80211_put_bss(sdata->local->hw.wiphy, auth_data->bss); @@ -2319,7 +2329,9 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, memset(sdata->u.mgd.bssid, 0, ETH_ALEN); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; + mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&sdata->local->mtx); } kfree(assoc_data); @@ -3670,6 +3682,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, /* will change later if needed */ sdata->smps_mode = IEEE80211_SMPS_OFF; + mutex_lock(&local->mtx); /* * If this fails (possibly due to channel context sharing * on incompatible channels, e.g. 80+80 and 160 sharing the @@ -3681,13 +3694,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, /* don't downgrade for 5 and 10 MHz channels, though. */ if (chandef.width == NL80211_CHAN_WIDTH_5 || chandef.width == NL80211_CHAN_WIDTH_10) - return ret; + goto out; while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { ifmgd->flags |= ieee80211_chandef_downgrade(&chandef); ret = ieee80211_vif_use_channel(sdata, &chandef, IEEE80211_CHANCTX_SHARED); } + out: + mutex_unlock(&local->mtx); return ret; } diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 34012620434..af64fb8e8ad 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -37,9 +37,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_SUSPEND); - /* flush out all packets and station cleanup call_rcu()s */ + /* flush out all packets */ synchronize_net(); - rcu_barrier(); ieee80211_flush_queues(local, NULL); diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index d2f19f7e709..f3d88b0c054 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -135,7 +135,7 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) u32 usecs; int i; - for (i=0; i < MAX_THR_RATES; i++) + for (i = 0; i < MAX_THR_RATES; i++) tmp_tp_rate[i] = 0; for (i = 0; i < mi->n_rates; i++) { @@ -190,7 +190,7 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) * choose the maximum throughput rate as max_prob_rate * (2) if all success probabilities < 95%, the rate with * highest success probability is choosen as max_prob_rate */ - if (mr->probability >= MINSTREL_FRAC(95,100)) { + if (mr->probability >= MINSTREL_FRAC(95, 100)) { if (mr->cur_tp >= mi->r[tmp_prob_rate].cur_tp) tmp_prob_rate = i; } else { @@ -220,7 +220,7 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) static void minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, + struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb) { struct minstrel_priv *mp = priv; @@ -260,7 +260,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, static inline unsigned int minstrel_get_retry_count(struct minstrel_rate *mr, - struct ieee80211_tx_info *info) + struct ieee80211_tx_info *info) { unsigned int retry = mr->adjusted_retry_count; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index d2ed18d82fe..c1b5b73c5b9 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -63,7 +63,7 @@ #define CCK_DURATION(_bitrate, _short, _len) \ (1000 * (10 /* SIFS */ + \ - (_short ? 72 + 24 : 144 + 48 ) + \ + (_short ? 72 + 24 : 144 + 48) + \ (8 * (_len + 4) * 10) / (_bitrate))) #define CCK_ACK_DURATION(_bitrate, _short) \ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2dfa7552273..5a2afe9583a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1963,20 +1963,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) } } - if (skb) { - int align __maybe_unused; - #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - /* - * 'align' will only take the values 0 or 2 here - * since all frames are required to be aligned - * to 2-byte boundaries when being passed to - * mac80211; the code here works just as well if - * that isn't true, but mac80211 assumes it can - * access fields as 2-byte aligned (e.g. for - * compare_ether_addr) + if (skb) { + /* 'align' will only take the values 0 or 2 here since all + * frames are required to be aligned to 2-byte boundaries + * when being passed to mac80211; the code here works just + * as well if that isn't true, but mac80211 assumes it can + * access fields as 2-byte aligned (e.g. for ether_addr_equal) */ - align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3; + int align; + + align = (unsigned long)(skb->data + sizeof(struct ethhdr)) & 3; if (align) { if (WARN_ON(skb_headroom(skb) < 3)) { dev_kfree_skb(skb); @@ -1989,14 +1986,14 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) skb_set_tail_pointer(skb, len); } } + } #endif - if (skb) { - /* deliver to local stack */ - skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_receive_skb(skb); - } + if (skb) { + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, dev); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_receive_skb(skb); } if (xmit_skb) { diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 4d73c46df86..88c81616f8f 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -271,10 +271,11 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) return true; } -static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, - bool was_hw_scan) +static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); + bool hw_scan = local->ops->hw_scan; + bool was_scanning = local->scanning; lockdep_assert_held(&local->mtx); @@ -290,7 +291,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, if (WARN_ON(!local->scan_req)) return; - if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { + if (hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { int rc; rc = drv_hw_scan(local, @@ -316,7 +317,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, /* Set power back to normal operating levels. */ ieee80211_hw_config(local, 0); - if (!was_hw_scan) { + if (!hw_scan) { ieee80211_configure_filter(local); drv_sw_scan_complete(local); ieee80211_offchannel_return(local); @@ -327,7 +328,8 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); ieee80211_mesh_notify_scan_completed(local); - ieee80211_start_next_roc(local); + if (was_scanning) + ieee80211_start_next_roc(local); } void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) @@ -747,7 +749,7 @@ void ieee80211_scan_work(struct work_struct *work) container_of(work, struct ieee80211_local, scan_work.work); struct ieee80211_sub_if_data *sdata; unsigned long next_delay = 0; - bool aborted, hw_scan; + bool aborted; mutex_lock(&local->mtx); @@ -786,14 +788,6 @@ void ieee80211_scan_work(struct work_struct *work) } /* - * Avoid re-scheduling when the sdata is going away. - */ - if (!ieee80211_sdata_running(sdata)) { - aborted = true; - goto out_complete; - } - - /* * as long as no delay is required advance immediately * without scheduling a new work */ @@ -834,8 +828,7 @@ void ieee80211_scan_work(struct work_struct *work) goto out; out_complete: - hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); - __ieee80211_scan_completed(&local->hw, aborted, hw_scan); + __ieee80211_scan_completed(&local->hw, aborted); out: mutex_unlock(&local->mtx); } @@ -973,13 +966,13 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) */ cancel_delayed_work(&local->scan_work); /* and clean up */ - __ieee80211_scan_completed(&local->hw, true, false); + __ieee80211_scan_completed(&local->hw, true); out: mutex_unlock(&local->mtx); } -int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, - struct cfg80211_sched_scan_request *req) +int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, + struct cfg80211_sched_scan_request *req) { struct ieee80211_local *local = sdata->local; struct ieee80211_sched_scan_ies sched_scan_ies = {}; @@ -989,17 +982,10 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, iebufsz = 2 + IEEE80211_MAX_SSID_LEN + local->scan_ies_len + req->ie_len; - mutex_lock(&local->mtx); - - if (rcu_access_pointer(local->sched_scan_sdata)) { - ret = -EBUSY; - goto out; - } + lockdep_assert_held(&local->mtx); - if (!local->ops->sched_scan_start) { - ret = -ENOTSUPP; - goto out; - } + if (!local->ops->sched_scan_start) + return -ENOTSUPP; for (i = 0; i < IEEE80211_NUM_BANDS; i++) { if (!local->hw.wiphy->bands[i]) @@ -1020,13 +1006,39 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, } ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); - if (ret == 0) + if (ret == 0) { rcu_assign_pointer(local->sched_scan_sdata, sdata); + local->sched_scan_req = req; + } out_free: while (i > 0) kfree(sched_scan_ies.ie[--i]); -out: + + if (ret) { + /* Clean in case of failure after HW restart or upon resume. */ + rcu_assign_pointer(local->sched_scan_sdata, NULL); + local->sched_scan_req = NULL; + } + + return ret; +} + +int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, + struct cfg80211_sched_scan_request *req) +{ + struct ieee80211_local *local = sdata->local; + int ret; + + mutex_lock(&local->mtx); + + if (rcu_access_pointer(local->sched_scan_sdata)) { + mutex_unlock(&local->mtx); + return -EBUSY; + } + + ret = __ieee80211_request_sched_scan_start(sdata, req); + mutex_unlock(&local->mtx); return ret; } @@ -1043,6 +1055,9 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) goto out; } + /* We don't want to restart sched scan anymore. */ + local->sched_scan_req = NULL; + if (rcu_access_pointer(local->sched_scan_sdata)) drv_sched_scan_stop(local, sdata); @@ -1077,6 +1092,9 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) rcu_assign_pointer(local->sched_scan_sdata, NULL); + /* If sched scan was aborted by the driver. */ + local->sched_scan_req = NULL; + mutex_unlock(&local->mtx); cfg80211_sched_scan_stopped(local->hw.wiphy); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 8ed97f76c3c..4576ba0ff22 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -99,23 +99,6 @@ static void cleanup_single_sta(struct sta_info *sta) struct ieee80211_local *local = sdata->local; struct ps_data *ps; - /* - * At this point, when being called as call_rcu callback, - * neither mac80211 nor the driver can reference this - * sta struct any more except by still existing timers - * associated with this station that we clean up below. - * - * Note though that this still uses the sdata and even - * calls the driver in AP and mesh mode, so interfaces - * of those types mush use call sta_info_flush_cleanup() - * (typically via sta_info_flush()) before deconfiguring - * the driver. - * - * In station mode, nothing happens here so it doesn't - * have to (and doesn't) do that, this is intentional to - * speed up roaming. - */ - if (test_sta_flag(sta, WLAN_STA_PS_STA)) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -160,37 +143,6 @@ static void cleanup_single_sta(struct sta_info *sta) sta_info_free(local, sta); } -void ieee80211_cleanup_sdata_stas(struct ieee80211_sub_if_data *sdata) -{ - struct sta_info *sta; - - spin_lock_bh(&sdata->cleanup_stations_lock); - while (!list_empty(&sdata->cleanup_stations)) { - sta = list_first_entry(&sdata->cleanup_stations, - struct sta_info, list); - list_del(&sta->list); - spin_unlock_bh(&sdata->cleanup_stations_lock); - - cleanup_single_sta(sta); - - spin_lock_bh(&sdata->cleanup_stations_lock); - } - - spin_unlock_bh(&sdata->cleanup_stations_lock); -} - -static void free_sta_rcu(struct rcu_head *h) -{ - struct sta_info *sta = container_of(h, struct sta_info, rcu_head); - struct ieee80211_sub_if_data *sdata = sta->sdata; - - spin_lock(&sdata->cleanup_stations_lock); - list_add_tail(&sta->list, &sdata->cleanup_stations); - spin_unlock(&sdata->cleanup_stations_lock); - - ieee80211_queue_work(&sdata->local->hw, &sdata->cleanup_stations_wk); -} - /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) @@ -842,7 +794,7 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, return have_buffered; } -int __must_check __sta_info_destroy(struct sta_info *sta) +static int __must_check __sta_info_destroy_part1(struct sta_info *sta) { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -868,12 +820,35 @@ int __must_check __sta_info_destroy(struct sta_info *sta) ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); ret = sta_info_hash_del(local, sta); - if (ret) + if (WARN_ON(ret)) return ret; list_del_rcu(&sta->list); - /* this always calls synchronize_net() */ + drv_sta_pre_rcu_remove(local, sta->sdata, sta); + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + rcu_access_pointer(sdata->u.vlan.sta) == sta) + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); + + return 0; +} + +static void __sta_info_destroy_part2(struct sta_info *sta) +{ + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + int ret; + + /* + * NOTE: This assumes at least synchronize_net() was done + * after _part1 and before _part2! + */ + + might_sleep(); + lockdep_assert_held(&local->sta_mtx); + + /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); sta->dead = true; @@ -881,9 +856,6 @@ int __must_check __sta_info_destroy(struct sta_info *sta) local->num_sta--; local->sta_generation++; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); - while (sta->sta_state > IEEE80211_STA_NONE) { ret = sta_info_move_state(sta, sta->sta_state - 1); if (ret) { @@ -906,7 +878,19 @@ int __must_check __sta_info_destroy(struct sta_info *sta) ieee80211_sta_debugfs_remove(sta); ieee80211_recalc_min_chandef(sdata); - call_rcu(&sta->rcu_head, free_sta_rcu); + cleanup_single_sta(sta); +} + +int __must_check __sta_info_destroy(struct sta_info *sta) +{ + int err = __sta_info_destroy_part1(sta); + + if (err) + return err; + + synchronize_net(); + + __sta_info_destroy_part2(sta); return 0; } @@ -976,32 +960,38 @@ void sta_info_stop(struct ieee80211_local *local) } -int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata) +int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans) { struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; + LIST_HEAD(free_list); int ret = 0; might_sleep(); + WARN_ON(vlans && sdata->vif.type != NL80211_IFTYPE_AP); + WARN_ON(vlans && !sdata->bss); + mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - if (sdata == sta->sdata) { - WARN_ON(__sta_info_destroy(sta)); + if (sdata == sta->sdata || + (vlans && sdata->bss == sta->sdata->bss)) { + if (!WARN_ON(__sta_info_destroy_part1(sta))) + list_add(&sta->free_list, &free_list); ret++; } } + + if (!list_empty(&free_list)) { + synchronize_net(); + list_for_each_entry_safe(sta, tmp, &free_list, free_list) + __sta_info_destroy_part2(sta); + } mutex_unlock(&local->sta_mtx); return ret; } -void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata) -{ - ieee80211_cleanup_sdata_stas(sdata); - cancel_work_sync(&sdata->cleanup_stations_wk); -} - void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 0218caf5c14..d77ff709063 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -247,6 +247,7 @@ struct ieee80211_tx_latency_stat { * mac80211 is communicating with. * * @list: global linked list entry + * @free_list: list entry for keeping track of stations to free * @hnext: hash table linked list pointer * @local: pointer to the global information * @sdata: virtual interface this station belongs to @@ -329,7 +330,7 @@ struct ieee80211_tx_latency_stat { */ struct sta_info { /* General information, mostly static */ - struct list_head list; + struct list_head list, free_list; struct rcu_head rcu_head; struct sta_info __rcu *hnext; struct ieee80211_local *local; @@ -605,21 +606,6 @@ void sta_info_recalc_tim(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); -int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata); - -/** - * sta_info_flush_cleanup - flush the sta_info cleanup queue - * @sdata: the interface - * - * Flushes the sta_info cleanup queue for a given interface; - * this is necessary before the interface is removed or, for - * AP/mesh interfaces, before it is deconfigured. - * - * Note an rcu_barrier() must precede the function, after all - * stations have been flushed/removed to ensure the call_rcu() - * calls that add stations to the cleanup queue have completed. - */ -void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata); /** * sta_info_flush - flush matching STA entries from the STA table @@ -627,15 +613,13 @@ void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata); * Returns the number of removed STA entries. * * @sdata: sdata to remove all stations from + * @vlans: if the given interface is an AP interface, also flush VLANs */ +int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans); + static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata) { - int ret = sta_info_flush_defer(sdata); - - rcu_barrier(); - sta_info_flush_cleanup(sdata); - - return ret; + return __sta_info_flush(sdata, false); } void sta_set_rate_info_tx(struct sta_info *sta, @@ -651,6 +635,4 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); -void ieee80211_cleanup_sdata_stas(struct ieee80211_sub_if_data *sdata); - #endif /* STA_INFO_H */ diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 124b1fdc20d..0ae207771a5 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -186,7 +186,7 @@ void ieee80211_get_tkip_p1k_iv(struct ieee80211_key_conf *keyconf, EXPORT_SYMBOL(ieee80211_get_tkip_p1k_iv); void ieee80211_get_tkip_rx_p1k(struct ieee80211_key_conf *keyconf, - const u8 *ta, u32 iv32, u16 *p1k) + const u8 *ta, u32 iv32, u16 *p1k) { const u8 *tk = &keyconf->key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; struct tkip_ctx ctx; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e9ccf22f6dd..da9366632f3 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -443,30 +443,6 @@ TRACE_EVENT(drv_prepare_multicast, ) ); -TRACE_EVENT(drv_set_multicast_list, - TP_PROTO(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, int mc_count), - - TP_ARGS(local, sdata, mc_count), - - TP_STRUCT__entry( - LOCAL_ENTRY - __field(bool, allmulti) - __field(int, mc_count) - ), - - TP_fast_assign( - LOCAL_ASSIGN; - __entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; - __entry->mc_count = mc_count; - ), - - TP_printk( - LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d", - LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti - ) -); - TRACE_EVENT(drv_configure_filter, TP_PROTO(struct ieee80211_local *local, unsigned int changed_flags, @@ -577,7 +553,7 @@ TRACE_EVENT(drv_update_tkip_key, TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " iv32:%#x", - LOCAL_PR_ARG,VIF_PR_ARG,STA_PR_ARG, __entry->iv32 + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->iv32 ) ); @@ -790,7 +766,7 @@ TRACE_EVENT(drv_sta_rc_update, ) ); -TRACE_EVENT(drv_sta_add, +DECLARE_EVENT_CLASS(sta_event, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), @@ -815,29 +791,25 @@ TRACE_EVENT(drv_sta_add, ) ); -TRACE_EVENT(drv_sta_remove, +DEFINE_EVENT(sta_event, drv_sta_add, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); - TP_ARGS(local, sdata, sta), - - TP_STRUCT__entry( - LOCAL_ENTRY - VIF_ENTRY - STA_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - VIF_ASSIGN; - STA_ASSIGN; - ), +DEFINE_EVENT(sta_event, drv_sta_remove, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); - TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG - ) +DEFINE_EVENT(sta_event, drv_sta_pre_rcu_remove, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) ); TRACE_EVENT(drv_conf_tx, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6d59e21cdb9..377cf974d97 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2161,7 +2161,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, if (ieee80211_is_data_qos(fc)) { __le16 *qos_control; - qos_control = (__le16*) skb_push(skb, 2); + qos_control = (__le16 *) skb_push(skb, 2); memcpy(skb_push(skb, hdrlen - 2), &hdr, hdrlen - 2); /* * Maybe we could actually set some fields here, for now just @@ -2323,7 +2323,7 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, if (atomic_read(&ps->num_sta_ps) > 0) /* in the hope that this is faster than * checking byte-for-byte */ - have_bits = !bitmap_empty((unsigned long*)ps->tim, + have_bits = !bitmap_empty((unsigned long *)ps->tim, IEEE80211_MAX_AID+1); if (ps->dtim_count == 0) @@ -2549,7 +2549,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, */ skb = dev_alloc_skb(local->tx_headroom + beacon->head_len + - beacon->tail_len + 256); + beacon->tail_len + 256 + + local->hw.extra_beacon_tailroom); if (!skb) goto out; @@ -2581,7 +2582,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, ieee80211_update_csa(sdata, presp); - skb = dev_alloc_skb(local->tx_headroom + presp->head_len); + skb = dev_alloc_skb(local->tx_headroom + presp->head_len + + local->hw.extra_beacon_tailroom); if (!skb) goto out; skb_reserve(skb, local->tx_headroom); @@ -2602,13 +2604,13 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, ieee80211_update_csa(sdata, bcn); if (ifmsh->sync_ops) - ifmsh->sync_ops->adjust_tbtt( - sdata); + ifmsh->sync_ops->adjust_tbtt(sdata, bcn); skb = dev_alloc_skb(local->tx_headroom + bcn->head_len + 256 + /* TIM IE */ - bcn->tail_len); + bcn->tail_len + + local->hw.extra_beacon_tailroom); if (!skb) goto out; skb_reserve(skb, local->tx_headroom); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 875e172c001..df00f1978a7 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -76,7 +76,7 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, } if (ieee80211_is_ctl(fc)) { - if(ieee80211_is_pspoll(fc)) + if (ieee80211_is_pspoll(fc)) return hdr->addr1; if (ieee80211_is_back_req(fc)) { @@ -642,6 +642,17 @@ void ieee80211_iterate_active_interfaces_rtnl( } EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_rtnl); +struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + if (!ieee80211_sdata_running(sdata) || + !(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) + return NULL; + return &sdata->vif; +} +EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif); + /* * Nothing should have been stuffed into the workqueue during * the suspend->resume cycle. If this WARN is seen then there @@ -1451,6 +1462,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct sta_info *sta; int res, i; bool reconfig_due_to_wowlan = false; + struct ieee80211_sub_if_data *sched_scan_sdata; + bool sched_scan_stopped = false; #ifdef CONFIG_PM if (local->suspended) @@ -1754,6 +1767,27 @@ int ieee80211_reconfig(struct ieee80211_local *local) #else WARN_ON(1); #endif + + /* + * Reconfigure sched scan if it was interrupted by FW restart or + * suspend. + */ + mutex_lock(&local->mtx); + sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, + lockdep_is_held(&local->mtx)); + if (sched_scan_sdata && local->sched_scan_req) + /* + * Sched scan stopped, but we don't want to report it. Instead, + * we're trying to reschedule. + */ + if (__ieee80211_request_sched_scan_start(sched_scan_sdata, + local->sched_scan_req)) + sched_scan_stopped = true; + mutex_unlock(&local->mtx); + + if (sched_scan_stopped) + cfg80211_sched_scan_stopped(local->hw.wiphy); + return 0; } @@ -2281,9 +2315,14 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; struct cfg80211_chan_def chandef; + mutex_lock(&local->mtx); mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { - cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); + /* it might be waiting for the local->mtx, but then + * by the time it gets it, sdata->wdev.cac_started + * will no longer be true + */ + cancel_delayed_work(&sdata->dfs_cac_timer_work); if (sdata->wdev.cac_started) { chandef = sdata->vif.bss_conf.chandef; @@ -2295,6 +2334,7 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) } } mutex_unlock(&local->iflist_mtx); + mutex_unlock(&local->mtx); } void ieee80211_dfs_radar_detected_work(struct work_struct *work) @@ -2554,3 +2594,143 @@ int ieee80211_cs_headroom(struct ieee80211_local *local, return headroom; } + +static bool +ieee80211_extend_noa_desc(struct ieee80211_noa_data *data, u32 tsf, int i) +{ + s32 end = data->desc[i].start + data->desc[i].duration - (tsf + 1); + int skip; + + if (end > 0) + return false; + + /* End time is in the past, check for repetitions */ + skip = DIV_ROUND_UP(-end, data->desc[i].interval); + if (data->count[i] < 255) { + if (data->count[i] <= skip) { + data->count[i] = 0; + return false; + } + + data->count[i] -= skip; + } + + data->desc[i].start += skip * data->desc[i].interval; + + return true; +} + +static bool +ieee80211_extend_absent_time(struct ieee80211_noa_data *data, u32 tsf, + s32 *offset) +{ + bool ret = false; + int i; + + for (i = 0; i < IEEE80211_P2P_NOA_DESC_MAX; i++) { + s32 cur; + + if (!data->count[i]) + continue; + + if (ieee80211_extend_noa_desc(data, tsf + *offset, i)) + ret = true; + + cur = data->desc[i].start - tsf; + if (cur > *offset) + continue; + + cur = data->desc[i].start + data->desc[i].duration - tsf; + if (cur > *offset) + *offset = cur; + } + + return ret; +} + +static u32 +ieee80211_get_noa_absent_time(struct ieee80211_noa_data *data, u32 tsf) +{ + s32 offset = 0; + int tries = 0; + /* + * arbitrary limit, used to avoid infinite loops when combined NoA + * descriptors cover the full time period. + */ + int max_tries = 5; + + ieee80211_extend_absent_time(data, tsf, &offset); + do { + if (!ieee80211_extend_absent_time(data, tsf, &offset)) + break; + + tries++; + } while (tries < max_tries); + + return offset; +} + +void ieee80211_update_p2p_noa(struct ieee80211_noa_data *data, u32 tsf) +{ + u32 next_offset = BIT(31) - 1; + int i; + + data->absent = 0; + data->has_next_tsf = false; + for (i = 0; i < IEEE80211_P2P_NOA_DESC_MAX; i++) { + s32 start; + + if (!data->count[i]) + continue; + + ieee80211_extend_noa_desc(data, tsf, i); + start = data->desc[i].start - tsf; + if (start <= 0) + data->absent |= BIT(i); + + if (next_offset > start) + next_offset = start; + + data->has_next_tsf = true; + } + + if (data->absent) + next_offset = ieee80211_get_noa_absent_time(data, tsf); + + data->next_tsf = tsf + next_offset; +} +EXPORT_SYMBOL(ieee80211_update_p2p_noa); + +int ieee80211_parse_p2p_noa(const struct ieee80211_p2p_noa_attr *attr, + struct ieee80211_noa_data *data, u32 tsf) +{ + int ret = 0; + int i; + + memset(data, 0, sizeof(*data)); + + for (i = 0; i < IEEE80211_P2P_NOA_DESC_MAX; i++) { + const struct ieee80211_p2p_noa_desc *desc = &attr->desc[i]; + + if (!desc->count || !desc->duration) + continue; + + data->count[i] = desc->count; + data->desc[i].start = le32_to_cpu(desc->start_time); + data->desc[i].duration = le32_to_cpu(desc->duration); + data->desc[i].interval = le32_to_cpu(desc->interval); + + if (data->count[i] > 1 && + data->desc[i].interval < data->desc[i].duration) + continue; + + ieee80211_extend_noa_desc(data, tsf, i); + ret++; + } + + if (ret) + ieee80211_update_p2p_noa(data, tsf); + + return ret; +} +EXPORT_SYMBOL(ieee80211_parse_p2p_noa); diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index afba19cb6f8..21211c60ca9 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -106,6 +106,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct sta_info *sta = NULL; const u8 *ra = NULL; bool qos = false; + struct mac80211_qos_map *qos_map; if (local->hw.queues < IEEE80211_NUM_ACS || skb->len < 6) { skb->priority = 0; /* required for correct WPA/11i MIC */ @@ -155,7 +156,11 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, /* use the data classifier to determine what 802.1d tag the * data frame has */ - skb->priority = cfg80211_classify8021d(skb); + rcu_read_lock(); + qos_map = rcu_dereference(sdata->qos_map); + skb->priority = cfg80211_classify8021d(skb, qos_map ? + &qos_map->qos_map : NULL); + rcu_read_unlock(); return ieee80211_downgrade_queue(sdata, skb); } diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index e24bcf97729..372d8a222b9 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -444,8 +444,8 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) case IEEE802154_FC_TYPE_DATA: return mac802154_process_data(sdata->dev, skb); default: - pr_warning("ieee802154: bad frame received (type = %d)\n", - mac_cb_type(skb)); + pr_warn("ieee802154: bad frame received (type = %d)\n", + mac_cb_type(skb)); kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c3398cd99b9..afe50c0f526 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -414,47 +414,112 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK config NF_TABLES - depends on NETFILTER_NETLINK + select NETFILTER_NETLINK tristate "Netfilter nf_tables support" + help + nftables is the new packet classification framework that intends to + replace the existing {ip,ip6,arp,eb}_tables infrastructure. It + provides a pseudo-state machine with an extensible instruction-set + (also known as expressions) that the userspace 'nft' utility + (http://www.netfilter.org/projects/nftables) uses to build the + rule-set. It also comes with the generic set infrastructure that + allows you to construct mappings between matchings and actions + for performance lookups. + + To compile it as a module, choose M here. + +config NF_TABLES_INET + depends on NF_TABLES + select NF_TABLES_IPV4 + select NF_TABLES_IPV6 + tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support" + help + This option enables support for a mixed IPv4/IPv6 "inet" table. config NFT_EXTHDR depends on NF_TABLES tristate "Netfilter nf_tables IPv6 exthdr module" + help + This option adds the "exthdr" expression that you can use to match + IPv6 extension headers. config NFT_META depends on NF_TABLES tristate "Netfilter nf_tables meta module" + help + This option adds the "meta" expression that you can use to match and + to set packet metainformation such as the packet mark. config NFT_CT depends on NF_TABLES depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" + help + This option adds the "meta" expression that you can use to match + connection tracking information such as the flow state. config NFT_RBTREE depends on NF_TABLES tristate "Netfilter nf_tables rbtree set module" + help + This option adds the "rbtree" set type (Red Black tree) that is used + to build interval-based sets. config NFT_HASH depends on NF_TABLES tristate "Netfilter nf_tables hash set module" + help + This option adds the "hash" set type that is used to build one-way + mappings between matchings and actions. config NFT_COUNTER depends on NF_TABLES tristate "Netfilter nf_tables counter module" + help + This option adds the "counter" expression that you can use to + include packet and byte counters in a rule. config NFT_LOG depends on NF_TABLES tristate "Netfilter nf_tables log module" + help + This option adds the "log" expression that you can use to log + packets matching some criteria. config NFT_LIMIT depends on NF_TABLES tristate "Netfilter nf_tables limit module" + help + This option adds the "limit" expression that you can use to + ratelimit rule matchings. config NFT_NAT depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables nat module" + help + This option adds the "nat" expression that you can use to perform + typical Network Address Translation (NAT) packet transformations. + +config NFT_QUEUE + depends on NF_TABLES + depends on NETFILTER_XTABLES + depends on NETFILTER_NETLINK_QUEUE + tristate "Netfilter nf_tables queue module" + help + This is required if you intend to use the userspace queueing + infrastructure (also known as NFQUEUE) from nftables. + +config NFT_REJECT + depends on NF_TABLES + depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6 + default m if NETFILTER_ADVANCED=n + tristate "Netfilter nf_tables reject support" + help + This option adds the "reject" expression that you can use to + explicitly deny and notify via TCP reset/ICMP informational errors + unallowed traffic. config NFT_COMPAT depends on NF_TABLES @@ -858,6 +923,16 @@ config NETFILTER_XT_MATCH_BPF To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_CGROUP + tristate '"control group" match support' + depends on NETFILTER_ADVANCED + depends on CGROUPS + select CGROUP_NET_CLASSID + ---help--- + Socket/process control group matching allows you to match locally + generated packets based on which net_cls control group processes + belong to. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK @@ -1035,6 +1110,15 @@ config NETFILTER_XT_MATCH_HL in the IPv6 header, or the time-to-live field in the IPv4 header of the packet. +config NETFILTER_XT_MATCH_IPCOMP + tristate '"ipcomp" match support' + depends on NETFILTER_ADVANCED + help + This match extension allows you to match a range of CPIs(16 bits) + inside IPComp header of IPSec packets. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_IPRANGE tristate '"iprange" address range match support' depends on NETFILTER_ADVANCED @@ -1055,6 +1139,16 @@ config NETFILTER_XT_MATCH_IPVS If unsure, say N. +config NETFILTER_XT_MATCH_L2TP + tristate '"l2tp" match support' + depends on NETFILTER_ADVANCED + default L2TP + ---help--- + This option adds an "L2TP" match, which allows you to match against + L2TP protocol header fields. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 394483b2c19..ee9c4de5f8e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,13 +70,15 @@ nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o -#nf_tables-objs += nft_meta_target.o +obj-$(CONFIG_NFT_QUEUE) += nft_queue.o +obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o @@ -133,8 +135,10 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o +obj-$(CONFIG_NETFILTER_XT_MATCH_IPCOMP) += xt_ipcomp.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o +obj-$(CONFIG_NETFILTER_XT_MATCH_L2TP) += xt_l2tp.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o @@ -142,6 +146,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bac7e01df67..de770ec39e5 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -625,34 +625,6 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex); */ /* - * Find set by name, reference it once. The reference makes sure the - * thing pointed to, does not go away under our feet. - * - * The nfnl mutex is used in the function. - */ -ip_set_id_t -ip_set_nfnl_get(struct net *net, const char *name) -{ - ip_set_id_t i, index = IPSET_INVALID_ID; - struct ip_set *s; - struct ip_set_net *inst = ip_set_pernet(net); - - nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < inst->ip_set_max; i++) { - s = nfnl_set(inst, i); - if (s != NULL && STREQ(s->name, name)) { - __ip_set_get(s); - index = i; - break; - } - } - nfnl_unlock(NFNL_SUBSYS_IPSET); - - return index; -} -EXPORT_SYMBOL_GPL(ip_set_nfnl_get); - -/* * Find set by index, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index d5f41514f57..5882bbfd198 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -62,6 +62,7 @@ #include <net/ip_vs.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -96,6 +97,11 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return; + /* Applications may adjust TCP seqs */ + if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP && + !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct)) + return; + /* * The connection is not yet in the hashtable, so we update it. * CIP->VIP will remain the same, so leave the tuple in diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f63c2388f38..db801263ee9 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1637,7 +1637,10 @@ static int sync_thread_master(void *data) continue; } while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { - int ret = __wait_event_interruptible(*sk_sleep(sk), + /* (Ab)use interruptible sleep to avoid increasing + * the load avg. + */ + __wait_event_interruptible(*sk_sleep(sk), sock_writeable(sk) || kthread_should_stop()); if (unlikely(kthread_should_stop())) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 43549eb7a7b..8824ed0ccc9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -60,12 +60,6 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, const struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); -int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff); -EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); - DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); @@ -361,15 +355,6 @@ begin: return NULL; } -struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple) -{ - return ____nf_conntrack_find(net, zone, tuple, - hash_conntrack_raw(tuple, zone)); -} -EXPORT_SYMBOL_GPL(__nf_conntrack_find); - /* Find a connection corresponding to a tuple. */ static struct nf_conntrack_tuple_hash * __nf_conntrack_find_get(struct net *net, u16 zone, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 08870b85904..bb322d0beb4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2118,8 +2118,16 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) return err; } #if defined(CONFIG_NF_CONNTRACK_MARK) - if (cda[CTA_MARK]) - ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); + if (cda[CTA_MARK]) { + u32 mask = 0, mark, newmark; + if (cda[CTA_MARK_MASK]) + mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + + mark = ntohl(nla_get_be32(cda[CTA_MARK])); + newmark = (ct->mark & mask) ^ mark; + if (newmark != ct->mark) + ct->mark = newmark; + } #endif return 0; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index ce3004156ee..b65d5864b6d 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -92,12 +92,6 @@ nf_ct_l3proto_find_get(u_int16_t l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get); -void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p) -{ - module_put(p->me); -} -EXPORT_SYMBOL_GPL(nf_ct_l3proto_put); - int nf_ct_l3proto_try_module_get(unsigned short l3proto) { diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a99b6c3427b..cb372f96f10 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -428,7 +428,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, const char *msg; u_int8_t state; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); BUG_ON(dh == NULL); state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; @@ -457,7 +457,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, - NULL, msg); + NULL, "%s", msg); return false; } @@ -486,7 +486,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, u_int8_t type, old_state, new_state; enum ct_dccp_roles role; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); BUG_ON(dh == NULL); type = dh->dccph_type; @@ -577,7 +577,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, unsigned int cscov; const char *msg; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); if (dh == NULL) { msg = "nf_ct_dccp: short packet "; goto out_invalid; @@ -614,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg); + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index 17c1bcb182c..b2d38da6782 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -36,6 +36,11 @@ int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, if (off == 0) return 0; + if (unlikely(!seqadj)) { + WARN(1, "Wrong seqadj usage, missing nfct_seqadj_ext_add()\n"); + return 0; + } + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); spin_lock_bh(&ct->lock); diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 902fb0a6b38..7a394df0deb 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -97,7 +97,6 @@ int nf_conntrack_tstamp_pernet_init(struct net *net) void nf_conntrack_tstamp_pernet_fini(struct net *net) { nf_conntrack_tstamp_fini_sysctl(net); - nf_ct_extend_unregister(&tstamp_extend); } int nf_conntrack_tstamp_init(void) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 63a81540221..d3f5cd6dd96 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -315,7 +315,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, * manips not an issue. */ if (maniptype == NF_NAT_MANIP_SRC && - !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (in_range(l3proto, l4proto, orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -339,7 +339,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, */ /* Only bother mapping if it's not already in range and unique */ - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (l4proto->in_range(tuple, maniptype, &range->min_proto, diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c index 9baaf734c14..83a72a235ca 100644 --- a/net/netfilter/nf_nat_proto_common.c +++ b/net/netfilter/nf_nat_proto_common.c @@ -74,22 +74,24 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, range_size = ntohs(range->max_proto.all) - min + 1; } - if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) + if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) { off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC ? tuple->dst.u.all : tuple->src.u.all); - else + } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) { + off = prandom_u32(); + } else { off = *rover; + } for (i = 0; ; ++off) { *portptr = htons(min + off % range_size); if (++i != range_size && nf_nat_used_tuple(tuple, ct)) continue; - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) *rover = off; return; } - return; } EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f93b7d06f4b..36add31e08e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -124,37 +124,43 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) return ++table->hgenerator; } -static struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX]; +static const struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX]; -static int __nf_tables_chain_type_lookup(int family, const struct nlattr *nla) +static const struct nf_chain_type * +__nf_tables_chain_type_lookup(int family, const struct nlattr *nla) { int i; - for (i=0; i<NFT_CHAIN_T_MAX; i++) { + for (i = 0; i < NFT_CHAIN_T_MAX; i++) { if (chain_type[family][i] != NULL && !nla_strcmp(nla, chain_type[family][i]->name)) - return i; + return chain_type[family][i]; } - return -1; + return NULL; } -static int nf_tables_chain_type_lookup(const struct nft_af_info *afi, - const struct nlattr *nla, - bool autoload) +static const struct nf_chain_type * +nf_tables_chain_type_lookup(const struct nft_af_info *afi, + const struct nlattr *nla, + bool autoload) { - int type; + const struct nf_chain_type *type; type = __nf_tables_chain_type_lookup(afi->family, nla); + if (type != NULL) + return type; #ifdef CONFIG_MODULES - if (type < 0 && autoload) { + if (autoload) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-chain-%u-%*.s", afi->family, nla_len(nla)-1, (const char *)nla_data(nla)); nfnl_lock(NFNL_SUBSYS_NFTABLES); type = __nf_tables_chain_type_lookup(afi->family, nla); + if (type != NULL) + return ERR_PTR(-EAGAIN); } #endif - return type; + return ERR_PTR(-ENOENT); } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { @@ -180,7 +186,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->res_id = 0; if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags))) + nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || + nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -306,13 +313,17 @@ err: return err; } -static int nf_tables_table_enable(struct nft_table *table) +static int nf_tables_table_enable(const struct nft_af_info *afi, + struct nft_table *table) { struct nft_chain *chain; int err, i = 0; list_for_each_entry(chain, &table->chains, list) { - err = nf_register_hook(&nft_base_chain(chain)->ops); + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + + err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); if (err < 0) goto err; @@ -321,20 +332,27 @@ static int nf_tables_table_enable(struct nft_table *table) return 0; err: list_for_each_entry(chain, &table->chains, list) { + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + if (i-- <= 0) break; - nf_unregister_hook(&nft_base_chain(chain)->ops); + nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); } return err; } -static int nf_tables_table_disable(struct nft_table *table) +static int nf_tables_table_disable(const struct nft_af_info *afi, + struct nft_table *table) { struct nft_chain *chain; - list_for_each_entry(chain, &table->chains, list) - nf_unregister_hook(&nft_base_chain(chain)->ops); + list_for_each_entry(chain, &table->chains, list) { + if (chain->flags & NFT_BASE_CHAIN) + nf_unregister_hooks(nft_base_chain(chain)->ops, + afi->nops); + } return 0; } @@ -348,7 +366,7 @@ static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, int family = nfmsg->nfgen_family, ret = 0; if (nla[NFTA_TABLE_FLAGS]) { - __be32 flags; + u32 flags; flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); if (flags & ~NFT_TABLE_F_DORMANT) @@ -356,12 +374,12 @@ static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, if ((flags & NFT_TABLE_F_DORMANT) && !(table->flags & NFT_TABLE_F_DORMANT)) { - ret = nf_tables_table_disable(table); + ret = nf_tables_table_disable(afi, table); if (ret >= 0) table->flags |= NFT_TABLE_F_DORMANT; } else if (!(flags & NFT_TABLE_F_DORMANT) && table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(table); + ret = nf_tables_table_enable(afi, table); if (ret >= 0) table->flags &= ~NFT_TABLE_F_DORMANT; } @@ -384,6 +402,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + u32 flags = 0; afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) @@ -405,25 +424,25 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table); } + if (nla[NFTA_TABLE_FLAGS]) { + flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) + return -EINVAL; + } + + if (!try_module_get(afi->owner)) + return -EAFNOSUPPORT; + table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); - if (table == NULL) + if (table == NULL) { + module_put(afi->owner); return -ENOMEM; + } nla_strlcpy(table->name, name, nla_len(name)); INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); - - if (nla[NFTA_TABLE_FLAGS]) { - __be32 flags; - - flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); - if (flags & ~NFT_TABLE_F_DORMANT) { - kfree(table); - return -EINVAL; - } - - table->flags |= flags; - } + table->flags = flags; list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); @@ -448,16 +467,17 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - if (table->use) + if (!list_empty(&table->chains) || !list_empty(&table->sets)) return -EBUSY; list_del(&table->list); nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family); kfree(table); + module_put(afi->owner); return 0; } -int nft_register_chain_type(struct nf_chain_type *ctype) +int nft_register_chain_type(const struct nf_chain_type *ctype) { int err = 0; @@ -466,10 +486,6 @@ int nft_register_chain_type(struct nf_chain_type *ctype) err = -EBUSY; goto out; } - - if (!try_module_get(ctype->me)) - goto out; - chain_type[ctype->family][ctype->type] = ctype; out: nfnl_unlock(NFNL_SUBSYS_NFTABLES); @@ -477,11 +493,10 @@ out: } EXPORT_SYMBOL_GPL(nft_register_chain_type); -void nft_unregister_chain_type(struct nf_chain_type *ctype) +void nft_unregister_chain_type(const struct nf_chain_type *ctype) { nfnl_lock(NFNL_SUBSYS_NFTABLES); chain_type[ctype->family][ctype->type] = NULL; - module_put(ctype->me); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_chain_type); @@ -589,7 +604,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, if (chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = nft_base_chain(chain); - const struct nf_hook_ops *ops = &basechain->ops; + const struct nf_hook_ops *ops = &basechain->ops[0]; struct nlattr *nest; nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); @@ -605,9 +620,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, htonl(basechain->policy))) goto nla_put_failure; - if (nla_put_string(skb, NFTA_CHAIN_TYPE, - chain_type[ops->pf][nft_base_chain(chain)->type]->name)) - goto nla_put_failure; + if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) + goto nla_put_failure; if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) goto nla_put_failure; @@ -748,22 +762,6 @@ err: return err; } -static int -nf_tables_chain_policy(struct nft_base_chain *chain, const struct nlattr *attr) -{ - switch (ntohl(nla_get_be32(attr))) { - case NF_DROP: - chain->policy = NF_DROP; - break; - case NF_ACCEPT: - chain->policy = NF_ACCEPT; - break; - default: - return -EINVAL; - } - return 0; -} - static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, @@ -822,7 +820,9 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nlattr *ha[NFTA_HOOK_MAX + 1]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + u8 policy = NF_ACCEPT; u64 handle = 0; + unsigned int i; int err; bool create; @@ -836,9 +836,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - if (table->use == UINT_MAX) - return -EOVERFLOW; - chain = NULL; name = nla[NFTA_CHAIN_NAME]; @@ -856,6 +853,22 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, } } + if (nla[NFTA_CHAIN_POLICY]) { + if ((chain != NULL && + !(chain->flags & NFT_BASE_CHAIN)) || + nla[NFTA_CHAIN_HOOK] == NULL) + return -EOPNOTSUPP; + + policy = nla_get_be32(nla[NFTA_CHAIN_POLICY]); + switch (policy) { + case NF_DROP: + case NF_ACCEPT: + break; + default: + return -EINVAL; + } + } + if (chain != NULL) { if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; @@ -866,16 +879,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) return -EEXIST; - if (nla[NFTA_CHAIN_POLICY]) { - if (!(chain->flags & NFT_BASE_CHAIN)) - return -EOPNOTSUPP; - - err = nf_tables_chain_policy(nft_base_chain(chain), - nla[NFTA_CHAIN_POLICY]); - if (err < 0) - return err; - } - if (nla[NFTA_CHAIN_COUNTERS]) { if (!(chain->flags & NFT_BASE_CHAIN)) return -EOPNOTSUPP; @@ -886,24 +889,31 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, return err; } + if (nla[NFTA_CHAIN_POLICY]) + nft_base_chain(chain)->policy = policy; + if (nla[NFTA_CHAIN_HANDLE] && name) nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); goto notify; } + if (table->use == UINT_MAX) + return -EOVERFLOW; + if (nla[NFTA_CHAIN_HOOK]) { + const struct nf_chain_type *type; struct nf_hook_ops *ops; nf_hookfn *hookfn; - u32 hooknum; - int type = NFT_CHAIN_T_DEFAULT; + u32 hooknum, priority; + type = chain_type[family][NFT_CHAIN_T_DEFAULT]; if (nla[NFTA_CHAIN_TYPE]) { type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE], create); - if (type < 0) - return -ENOENT; + if (IS_ERR(type)) + return PTR_ERR(type); } err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], @@ -917,46 +927,23 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); if (hooknum >= afi->nhooks) return -EINVAL; + priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - hookfn = chain_type[family][type]->fn[hooknum]; - if (hookfn == NULL) + if (!(type->hook_mask & (1 << hooknum))) return -EOPNOTSUPP; + if (!try_module_get(type->owner)) + return -ENOENT; + hookfn = type->hooks[hooknum]; basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); if (basechain == NULL) return -ENOMEM; - basechain->type = type; - chain = &basechain->chain; - - ops = &basechain->ops; - ops->pf = family; - ops->owner = afi->owner; - ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); - ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - ops->priv = chain; - ops->hook = hookfn; - if (afi->hooks[ops->hooknum]) - ops->hook = afi->hooks[ops->hooknum]; - - chain->flags |= NFT_BASE_CHAIN; - - if (nla[NFTA_CHAIN_POLICY]) { - err = nf_tables_chain_policy(basechain, - nla[NFTA_CHAIN_POLICY]); - if (err < 0) { - free_percpu(basechain->stats); - kfree(basechain); - return err; - } - } else - basechain->policy = NF_ACCEPT; - if (nla[NFTA_CHAIN_COUNTERS]) { err = nf_tables_counters(basechain, nla[NFTA_CHAIN_COUNTERS]); if (err < 0) { - free_percpu(basechain->stats); + module_put(type->owner); kfree(basechain); return err; } @@ -964,12 +951,33 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nft_stats __percpu *newstats; newstats = alloc_percpu(struct nft_stats); - if (newstats == NULL) + if (newstats == NULL) { + module_put(type->owner); + kfree(basechain); return -ENOMEM; + } + rcu_assign_pointer(basechain->stats, newstats); + } + + basechain->type = type; + chain = &basechain->chain; - rcu_assign_pointer(nft_base_chain(chain)->stats, - newstats); + for (i = 0; i < afi->nops; i++) { + ops = &basechain->ops[i]; + ops->pf = family; + ops->owner = afi->owner; + ops->hooknum = hooknum; + ops->priority = priority; + ops->priv = chain; + ops->hook = afi->hooks[ops->hooknum]; + if (hookfn) + ops->hook = hookfn; + if (afi->hook_ops_init) + afi->hook_ops_init(ops, i); } + + chain->flags |= NFT_BASE_CHAIN; + basechain->policy = policy; } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) @@ -984,8 +992,9 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (!(table->flags & NFT_TABLE_F_DORMANT) && chain->flags & NFT_BASE_CHAIN) { - err = nf_register_hook(&nft_base_chain(chain)->ops); + err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); if (err < 0) { + module_put(basechain->type->owner); free_percpu(basechain->stats); kfree(basechain); return err; @@ -1006,6 +1015,7 @@ static void nf_tables_rcu_chain_destroy(struct rcu_head *head) BUG_ON(chain->use > 0); if (chain->flags & NFT_BASE_CHAIN) { + module_put(nft_base_chain(chain)->type->owner); free_percpu(nft_base_chain(chain)->stats); kfree(nft_base_chain(chain)); } else @@ -1043,7 +1053,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (!(table->flags & NFT_TABLE_F_DORMANT) && chain->flags & NFT_BASE_CHAIN) - nf_unregister_hook(&nft_base_chain(chain)->ops); + nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, family); @@ -1923,12 +1933,14 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, { struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + const struct nft_af_info *afi = NULL; const struct nft_table *table = NULL; - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); + if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + } if (nla[NFTA_SET_TABLE] != NULL) { table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); @@ -1973,11 +1985,14 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, return -ENOMEM; list_for_each_entry(i, &ctx->table->sets, list) { - if (!sscanf(i->name, name, &n)) + int tmp; + + if (!sscanf(i->name, name, &tmp)) continue; - if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE) + if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE) continue; - set_bit(n, inuse); + + set_bit(tmp, inuse); } n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE); @@ -2094,21 +2109,25 @@ done: return skb->len; } -static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, - struct netlink_callback *cb) +static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) { const struct nft_set *set; - unsigned int idx = 0, s_idx = cb->args[0]; + unsigned int idx, s_idx = cb->args[0]; struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; if (cb->args[1]) return skb->len; list_for_each_entry(table, &ctx->afi->tables, list) { - if (cur_table && cur_table != table) - continue; + if (cur_table) { + if (cur_table != table) + continue; + cur_table = NULL; + } ctx->table = table; + idx = 0; list_for_each_entry(set, &ctx->table->sets, list) { if (idx < s_idx) goto cont; @@ -2127,6 +2146,61 @@ done: return skb->len; } +static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx, s_idx = cb->args[0]; + const struct nft_af_info *afi; + struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; + struct net *net = sock_net(skb->sk); + int cur_family = cb->args[3]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(afi, &net->nft.af_info, list) { + if (cur_family) { + if (afi->family != cur_family) + continue; + + cur_family = 0; + } + + list_for_each_entry(table, &afi->tables, list) { + if (cur_table) { + if (cur_table != table) + continue; + + cur_table = NULL; + } + + ctx->table = table; + ctx->afi = afi; + idx = 0; + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, + NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + cb->args[2] = (unsigned long) table; + cb->args[3] = afi->family; + goto done; + } +cont: + idx++; + } + if (s_idx) + s_idx = 0; + } + } + cb->args[1] = 1; +done: + return skb->len; +} + static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); @@ -2143,9 +2217,12 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - if (ctx.table == NULL) - ret = nf_tables_dump_sets_all(&ctx, skb, cb); - else + if (ctx.table == NULL) { + if (ctx.afi == NULL) + ret = nf_tables_dump_sets_all(&ctx, skb, cb); + else + ret = nf_tables_dump_sets_family(&ctx, skb, cb); + } else ret = nf_tables_dump_sets_table(&ctx, skb, cb); return ret; @@ -2158,6 +2235,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); int err; /* Verify existance before starting dump */ @@ -2172,6 +2250,10 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } + /* Only accept unspec with dump */ + if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + return -EAFNOSUPPORT; + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); @@ -2341,6 +2423,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_set *set; struct nft_ctx ctx; int err; @@ -2352,6 +2435,9 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, if (err < 0) return err; + if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + return -EAFNOSUPPORT; + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); @@ -2370,7 +2456,9 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); - return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype); + return nft_validate_data_load(ctx, dreg, &elem->data, + set->dtype == NFT_DATA_VERDICT ? + NFT_DATA_VERDICT : NFT_DATA_VALUE); } int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, @@ -2521,9 +2609,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) u32 portid, seq; int event, err; - nfmsg = nlmsg_data(cb->nlh); - err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX, - nft_set_elem_list_policy); + err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla, + NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index cb9e685caae..0d879fcb876 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -109,14 +109,14 @@ static inline void nft_trace_packet(const struct nft_pktinfo *pkt, { struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, pkt->xt.family, pkt->hooknum, pkt->skb, pkt->in, + nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); } unsigned int -nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) +nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv; const struct nft_rule *rule; @@ -164,7 +164,7 @@ next_rule: break; } - switch (data[NFT_REG_VERDICT].verdict) { + switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: @@ -172,6 +172,9 @@ next_rule: nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); return data[NFT_REG_VERDICT].verdict; + } + + switch (data[NFT_REG_VERDICT].verdict) { case NFT_JUMP: if (unlikely(pkt->skb->nf_trace)) nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); @@ -213,7 +216,7 @@ next_rule: return nft_base_chain(chain)->policy; } -EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo); +EXPORT_SYMBOL_GPL(nft_do_chain); int __init nf_tables_core_module_init(void) { diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c new file mode 100644 index 00000000000..9dd2d216cfc --- /dev/null +++ b/net/netfilter/nf_tables_inet.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2012-2014 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv4.h> +#include <net/netfilter/nf_tables_ipv6.h> +#include <net/ip.h> + +static void nft_inet_hook_ops_init(struct nf_hook_ops *ops, unsigned int n) +{ + struct nft_af_info *afi; + + if (n == 1) + afi = &nft_af_ipv4; + else + afi = &nft_af_ipv6; + + ops->pf = afi->family; + if (afi->hooks[ops->hooknum]) + ops->hook = afi->hooks[ops->hooknum]; +} + +static struct nft_af_info nft_af_inet __read_mostly = { + .family = NFPROTO_INET, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .nops = 2, + .hook_ops_init = nft_inet_hook_ops_init, +}; + +static int __net_init nf_tables_inet_init_net(struct net *net) +{ + net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.inet == NULL) + return -ENOMEM; + memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet)); + + if (nft_register_afinfo(net, net->nft.inet) < 0) + goto err; + + return 0; + +err: + kfree(net->nft.inet); + return -ENOMEM; +} + +static void __net_exit nf_tables_inet_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.inet); + kfree(net->nft.inet); +} + +static struct pernet_operations nf_tables_inet_net_ops = { + .init = nf_tables_inet_init_net, + .exit = nf_tables_inet_exit_net, +}; + +static const struct nf_chain_type filter_inet = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_INET, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), +}; + +static int __init nf_tables_inet_init(void) +{ + int ret; + + nft_register_chain_type(&filter_inet); + ret = register_pernet_subsys(&nf_tables_inet_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_inet); + + return ret; +} + +static void __exit nf_tables_inet_exit(void) +{ + unregister_pernet_subsys(&nf_tables_inet_net_ops); + nft_unregister_chain_type(&filter_inet); +} + +module_init(nf_tables_inet_init); +module_exit(nf_tables_inet_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(1); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 3c4b69e5fe1..a155d19a225 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1053,6 +1053,7 @@ static void __net_exit nfnl_log_net_exit(struct net *net) #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); #endif + nf_log_unset(net, &nfulnl_logger); } static struct pernet_operations nfnl_log_net_ops = { diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 21258cf7009..f072fe80351 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -29,6 +29,7 @@ #include <linux/netfilter/nfnetlink_queue.h> #include <linux/list.h> #include <net/sock.h> +#include <net/tcp_states.h> #include <net/netfilter/nf_queue.h> #include <net/netns/generic.h> #include <net/netfilter/nfnetlink_queue.h> @@ -235,51 +236,6 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) spin_unlock_bh(&queue->lock); } -static void -nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) -{ - int i, j = 0; - int plen = 0; /* length of skb->head fragment */ - struct page *page; - unsigned int offset; - - /* dont bother with small payloads */ - if (len <= skb_tailroom(to)) { - skb_copy_bits(from, 0, skb_put(to, len), len); - return; - } - - if (hlen) { - skb_copy_bits(from, 0, skb_put(to, hlen), hlen); - len -= hlen; - } else { - plen = min_t(int, skb_headlen(from), len); - if (plen) { - page = virt_to_head_page(from->head); - offset = from->data - (unsigned char *)page_address(page); - __skb_fill_page_desc(to, 0, page, offset, plen); - get_page(page); - j = 1; - len -= plen; - } - } - - to->truesize += len + plen; - to->len += len + plen; - to->data_len += len + plen; - - for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { - if (!len) - break; - skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; - skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); - len -= skb_shinfo(to)->frags[j].size; - skb_frag_ref(to, j); - j++; - } - skb_shinfo(to)->nr_frags = j; -} - static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, bool csum_verify) @@ -297,6 +253,31 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0; } +static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk) +{ + const struct cred *cred; + + if (sk->sk_state == TCP_TIME_WAIT) + return 0; + + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + cred = sk->sk_socket->file->f_cred; + if (nla_put_be32(skb, NFQA_UID, + htonl(from_kuid_munged(&init_user_ns, cred->fsuid)))) + goto nla_put_failure; + if (nla_put_be32(skb, NFQA_GID, + htonl(from_kgid_munged(&init_user_ns, cred->fsgid)))) + goto nla_put_failure; + } + read_unlock_bh(&sk->sk_callback_lock); + return 0; + +nla_put_failure: + read_unlock_bh(&sk->sk_callback_lock); + return -1; +} + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -304,7 +285,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, { size_t size; size_t data_len = 0, cap_len = 0; - int hlen = 0; + unsigned int hlen = 0; struct sk_buff *skb; struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; @@ -356,14 +337,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (data_len > entskb->len) data_len = entskb->len; - if (!entskb->head_frag || - skb_headlen(entskb) < L1_CACHE_BYTES || - skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) - hlen = skb_headlen(entskb); - - if (skb_has_frag_list(entskb)) - hlen = entskb->len; - hlen = min_t(int, data_len, hlen); + hlen = skb_zerocopy_headlen(entskb); + hlen = min_t(unsigned int, hlen, data_len); size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; break; @@ -372,6 +347,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (queue->flags & NFQA_CFG_F_CONNTRACK) ct = nfqnl_ct_get(entskb, &size, &ctinfo); + if (queue->flags & NFQA_CFG_F_UID_GID) { + size += (nla_total_size(sizeof(u_int32_t)) /* uid */ + + nla_total_size(sizeof(u_int32_t))); /* gid */ + } + skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); if (!skb) @@ -484,6 +464,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nla_put_failure; } + if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk && + nfqnl_put_sk_uidgid(skb, entskb->sk) < 0) + goto nla_put_failure; + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; @@ -504,7 +488,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nla->nla_type = NFQA_PAYLOAD; nla->nla_len = nla_attr_size(data_len); - nfqnl_zcopy(skb, entskb, data_len, hlen); + skb_zerocopy(skb, entskb, data_len, hlen); } nlh->nlmsg_len = skb->len; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index da0c1f4ada1..82cb8236f8a 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -92,7 +92,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops; + const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; } @@ -253,7 +253,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops; + const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; if (hook_mask & target->hooks) @@ -323,7 +323,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops; + const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; } @@ -449,7 +449,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops; + const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; if (hook_mask & match->hooks) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 955f4e6e708..c7c12858e11 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -18,17 +18,21 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_ecache.h> struct nft_ct { enum nft_ct_keys key:8; enum ip_conntrack_dir dir:8; - enum nft_registers dreg:8; + union{ + enum nft_registers dreg:8; + enum nft_registers sreg:8; + }; uint8_t family; }; -static void nft_ct_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_ct_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_ct *priv = nft_expr_priv(expr); struct nft_data *dest = &data[priv->dreg]; @@ -123,24 +127,77 @@ err: data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +static void nft_ct_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 value = data[priv->sreg].data[0]; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return; + + switch (priv->key) { +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: + if (ct->mark != value) { + ct->mark = value; + nf_conntrack_event_cache(IPCT_MARK, ct); + } + break; +#endif + } +} + static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_DREG] = { .type = NLA_U32 }, [NFTA_CT_KEY] = { .type = NLA_U32 }, [NFTA_CT_DIRECTION] = { .type = NLA_U8 }, + [NFTA_CT_SREG] = { .type = NLA_U32 }, }; -static int nft_ct_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_ct_l3proto_try_module_get(uint8_t family) { - struct nft_ct *priv = nft_expr_priv(expr); int err; - if (tb[NFTA_CT_DREG] == NULL || - tb[NFTA_CT_KEY] == NULL) - return -EINVAL; + if (family == NFPROTO_INET) { + err = nf_ct_l3proto_try_module_get(NFPROTO_IPV4); + if (err < 0) + goto err1; + err = nf_ct_l3proto_try_module_get(NFPROTO_IPV6); + if (err < 0) + goto err2; + } else { + err = nf_ct_l3proto_try_module_get(family); + if (err < 0) + goto err1; + } + return 0; + +err2: + nf_ct_l3proto_module_put(NFPROTO_IPV4); +err1: + return err; +} + +static void nft_ct_l3proto_module_put(uint8_t family) +{ + if (family == NFPROTO_INET) { + nf_ct_l3proto_module_put(NFPROTO_IPV4); + nf_ct_l3proto_module_put(NFPROTO_IPV6); + } else + nf_ct_l3proto_module_put(family); +} + +static int nft_ct_init_validate_get(const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ct *priv = nft_expr_priv(expr); - priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); if (tb[NFTA_CT_DIRECTION] != NULL) { priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); switch (priv->dir) { @@ -179,34 +236,72 @@ static int nft_ct_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - err = nf_ct_l3proto_try_module_get(ctx->afi->family); + return 0; +} + +static int nft_ct_init_validate_set(uint32_t key) +{ + switch (key) { + case NFT_CT_MARK: + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int nft_ct_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ct *priv = nft_expr_priv(expr); + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); + + if (tb[NFTA_CT_DREG]) { + err = nft_ct_init_validate_get(expr, tb); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, + NFT_DATA_VALUE); + if (err < 0) + return err; + } else { + err = nft_ct_init_validate_set(priv->key); + if (err < 0) + return err; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + } + + err = nft_ct_l3proto_try_module_get(ctx->afi->family); if (err < 0) return err; - priv->family = ctx->afi->family; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - goto err1; + priv->family = ctx->afi->family; - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); - if (err < 0) - goto err1; return 0; - -err1: - nf_ct_l3proto_module_put(ctx->afi->family); - return err; } static void nft_ct_destroy(const struct nft_expr *expr) { struct nft_ct *priv = nft_expr_priv(expr); - nf_ct_l3proto_module_put(priv->family); + nft_ct_l3proto_module_put(priv->family); } -static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ct *priv = nft_expr_priv(expr); @@ -222,19 +317,61 @@ nla_put_failure: return -1; } +static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CT_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + static struct nft_expr_type nft_ct_type; -static const struct nft_expr_ops nft_ct_ops = { +static const struct nft_expr_ops nft_ct_get_ops = { .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), - .eval = nft_ct_eval, + .eval = nft_ct_get_eval, .init = nft_ct_init, .destroy = nft_ct_destroy, - .dump = nft_ct_dump, + .dump = nft_ct_get_dump, }; +static const struct nft_expr_ops nft_ct_set_ops = { + .type = &nft_ct_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), + .eval = nft_ct_set_eval, + .init = nft_ct_init, + .destroy = nft_ct_destroy, + .dump = nft_ct_set_dump, +}; + +static const struct nft_expr_ops * +nft_ct_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_CT_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_CT_DREG]) + return &nft_ct_get_ops; + + if (tb[NFTA_CT_SREG]) + return &nft_ct_set_ops; + + return ERR_PTR(-EINVAL); +} + static struct nft_expr_type nft_ct_type __read_mostly = { .name = "ct", - .ops = &nft_ct_ops, + .select_ops = &nft_ct_select_ops, .policy = nft_ct_policy, .maxattr = NFTA_CT_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 8e0bb75e7c5..55c939f5371 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -31,7 +31,7 @@ static void nft_exthdr_eval(const struct nft_expr *expr, { struct nft_exthdr *priv = nft_expr_priv(expr); struct nft_data *dest = &data[priv->dreg]; - unsigned int offset; + unsigned int offset = 0; int err; err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 57cad072a13..5af790123ad 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -33,7 +33,7 @@ static void nft_log_eval(const struct nft_expr *expr, const struct nft_log *priv = nft_expr_priv(expr); struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in, + nf_log_packet(net, priv->family, pkt->ops->hooknum, pkt->skb, pkt->in, pkt->out, &priv->loginfo, "%s", priv->prefix); } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 8c28220a90b..e8254ad2e5a 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -21,12 +21,15 @@ struct nft_meta { enum nft_meta_keys key:8; - enum nft_registers dreg:8; + union { + enum nft_registers dreg:8; + enum nft_registers sreg:8; + }; }; -static void nft_meta_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; @@ -40,6 +43,12 @@ static void nft_meta_eval(const struct nft_expr *expr, case NFT_META_PROTOCOL: *(__be16 *)dest->data = skb->protocol; break; + case NFT_META_NFPROTO: + dest->data[0] = pkt->ops->pf; + break; + case NFT_META_L4PROTO: + dest->data[0] = pkt->tprot; + break; case NFT_META_PRIORITY: dest->data[0] = skb->priority; break; @@ -132,25 +141,54 @@ err: data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +static void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 value = data[meta->sreg].data[0]; + + switch (meta->key) { + case NFT_META_MARK: + skb->mark = value; + break; + case NFT_META_PRIORITY: + skb->priority = value; + break; + case NFT_META_NFTRACE: + skb->nf_trace = 1; + break; + default: + WARN_ON(1); + } +} + static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, [NFTA_META_KEY] = { .type = NLA_U32 }, + [NFTA_META_SREG] = { .type = NLA_U32 }, }; -static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_meta_init_validate_set(uint32_t key) { - struct nft_meta *priv = nft_expr_priv(expr); - int err; - - if (tb[NFTA_META_DREG] == NULL || - tb[NFTA_META_KEY] == NULL) - return -EINVAL; + switch (key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: + return 0; + default: + return -EOPNOTSUPP; + } +} - priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); - switch (priv->key) { +static int nft_meta_init_validate_get(uint32_t key) +{ + switch (key) { case NFT_META_LEN: case NFT_META_PROTOCOL: + case NFT_META_NFPROTO: + case NFT_META_L4PROTO: case NFT_META_PRIORITY: case NFT_META_MARK: case NFT_META_IIF: @@ -167,26 +205,72 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif - break; + return 0; default: return -EOPNOTSUPP; } - priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); - err = nft_validate_output_register(priv->dreg); +} + +static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + + if (tb[NFTA_META_DREG]) { + err = nft_meta_init_validate_get(priv->key); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + return nft_validate_data_load(ctx, priv->dreg, NULL, + NFT_DATA_VALUE); + } + + err = nft_meta_init_validate_set(priv->key); + if (err < 0) + return err; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG])); + err = nft_validate_input_register(priv->sreg); if (err < 0) return err; - return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + + return 0; } -static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) + goto nla_put_failure; if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg))) + goto nla_put_failure; + return 0; nla_put_failure: @@ -194,17 +278,44 @@ nla_put_failure: } static struct nft_expr_type nft_meta_type; -static const struct nft_expr_ops nft_meta_ops = { +static const struct nft_expr_ops nft_meta_get_ops = { + .type = &nft_meta_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_get_eval, + .init = nft_meta_init, + .dump = nft_meta_get_dump, +}; + +static const struct nft_expr_ops nft_meta_set_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .eval = nft_meta_eval, + .eval = nft_meta_set_eval, .init = nft_meta_init, - .dump = nft_meta_dump, + .dump = nft_meta_set_dump, }; +static const struct nft_expr_ops * +nft_meta_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_META_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG]) + return &nft_meta_get_ops; + + if (tb[NFTA_META_SREG]) + return &nft_meta_set_ops; + + return ERR_PTR(-EINVAL); +} + static struct nft_expr_type nft_meta_type __read_mostly = { .name = "meta", - .ops = &nft_meta_ops, + .select_ops = &nft_meta_select_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c deleted file mode 100644 index 71177df75ff..00000000000 --- a/net/netfilter/nft_meta_target.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/rbtree.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> - -struct nft_meta { - enum nft_meta_keys key; -}; - -static void nft_meta_eval(const struct nft_expr *expr, - struct nft_data *nfres, - struct nft_data *data, - const struct nft_pktinfo *pkt) -{ - const struct nft_meta *meta = nft_expr_priv(expr); - struct sk_buff *skb = pkt->skb; - u32 val = data->data[0]; - - switch (meta->key) { - case NFT_META_MARK: - skb->mark = val; - break; - case NFT_META_PRIORITY: - skb->priority = val; - break; - case NFT_META_NFTRACE: - skb->nf_trace = val; - break; -#ifdef CONFIG_NETWORK_SECMARK - case NFT_META_SECMARK: - skb->secmark = val; - break; -#endif - default: - WARN_ON(1); - } -} - -static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { - [NFTA_META_KEY] = { .type = NLA_U32 }, -}; - -static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[]) -{ - struct nft_meta *meta = nft_expr_priv(expr); - - if (tb[NFTA_META_KEY] == NULL) - return -EINVAL; - - meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); - switch (meta->key) { - case NFT_META_MARK: - case NFT_META_PRIORITY: - case NFT_META_NFTRACE: -#ifdef CONFIG_NETWORK_SECMARK - case NFT_META_SECMARK: -#endif - break; - default: - return -EINVAL; - } - - return 0; -} - -static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - struct nft_meta *meta = nft_expr_priv(expr); - - NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key)); - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_ops meta_target __read_mostly = { - .name = "meta", - .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .owner = THIS_MODULE, - .eval = nft_meta_eval, - .init = nft_meta_init, - .dump = nft_meta_dump, - .policy = nft_meta_policy, - .maxattr = NFTA_META_MAX, -}; - -static int __init nft_meta_target_init(void) -{ - return nft_register_expr(&meta_target); -} - -static void __exit nft_meta_target_exit(void) -{ - nft_unregister_expr(&meta_target); -} - -module_init(nft_meta_target_init); -module_exit(nft_meta_target_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c new file mode 100644 index 00000000000..cbea473d69e --- /dev/null +++ b/net/netfilter/nft_queue.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code partly funded by OISF + * (http://www.openinfosecfoundation.org/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/jhash.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_queue.h> + +static u32 jhash_initval __read_mostly; + +struct nft_queue { + u16 queuenum; + u16 queues_total; + u16 flags; + u8 family; +}; + +static void nft_queue_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_queue *priv = nft_expr_priv(expr); + u32 queue = priv->queuenum; + u32 ret; + + if (priv->queues_total > 1) { + if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) { + int cpu = smp_processor_id(); + + queue = priv->queuenum + cpu % priv->queues_total; + } else { + queue = nfqueue_hash(pkt->skb, queue, + priv->queues_total, priv->family, + jhash_initval); + } + } + + ret = NF_QUEUE_NR(queue); + if (priv->flags & NFT_QUEUE_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + data[NFT_REG_VERDICT].verdict = ret; +} + +static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { + [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, + [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, + [NFTA_QUEUE_FLAGS] = { .type = NLA_U16 }, +}; + +static int nft_queue_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_queue *priv = nft_expr_priv(expr); + + if (tb[NFTA_QUEUE_NUM] == NULL) + return -EINVAL; + + init_hashrandom(&jhash_initval); + priv->family = ctx->afi->family; + priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM])); + + if (tb[NFTA_QUEUE_TOTAL] != NULL) + priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL])); + if (tb[NFTA_QUEUE_FLAGS] != NULL) { + priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); + if (priv->flags & ~NFT_QUEUE_FLAG_MASK) + return -EINVAL; + } + return 0; +} + +static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_queue *priv = nft_expr_priv(expr); + + if (nla_put_be16(skb, NFTA_QUEUE_NUM, htons(priv->queuenum)) || + nla_put_be16(skb, NFTA_QUEUE_TOTAL, htons(priv->queues_total)) || + nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_queue_type; +static const struct nft_expr_ops nft_queue_ops = { + .type = &nft_queue_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_queue)), + .eval = nft_queue_eval, + .init = nft_queue_init, + .dump = nft_queue_dump, +}; + +static struct nft_expr_type nft_queue_type __read_mostly = { + .name = "queue", + .ops = &nft_queue_ops, + .policy = nft_queue_policy, + .maxattr = NFTA_QUEUE_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_queue_module_init(void) +{ + return nft_register_expr(&nft_queue_type); +} + +static void __exit nft_queue_module_exit(void) +{ + nft_unregister_expr(&nft_queue_type); +} + +module_init(nft_queue_module_init); +module_exit(nft_queue_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Leblond <eric@regit.org>"); +MODULE_ALIAS_NFT_EXPR("queue"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/netfilter/nft_reject.c index 4a5e94ac314..5e204711d70 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/netfilter/nft_reject.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,10 +17,16 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/icmp.h> +#include <net/netfilter/ipv4/nf_reject.h> + +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) +#include <net/netfilter/ipv6/nf_reject.h> +#endif struct nft_reject { enum nft_reject_types type:8; u8 icmp_code; + u8 family; }; static void nft_reject_eval(const struct nft_expr *expr, @@ -27,12 +34,26 @@ static void nft_reject_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); - +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); +#endif switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0); + if (priv->family == NFPROTO_IPV4) + nf_send_unreach(pkt->skb, priv->icmp_code); +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + else if (priv->family == NFPROTO_IPV6) + nf_send_unreach6(net, pkt->skb, priv->icmp_code, + pkt->ops->hooknum); +#endif break; case NFT_REJECT_TCP_RST: + if (priv->family == NFPROTO_IPV4) + nf_send_reset(pkt->skb, pkt->ops->hooknum); +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + else if (priv->family == NFPROTO_IPV6) + nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); +#endif break; } @@ -53,6 +74,7 @@ static int nft_reject_init(const struct nft_ctx *ctx, if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; + priv->family = ctx->afi->family; priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index da35ac06a97..5929be622c5 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -211,8 +211,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, ret = 0; if ((info->ct_events || info->exp_events) && !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, - GFP_KERNEL)) + GFP_KERNEL)) { + ret = -EINVAL; goto err3; + } if (info->helper[0]) { ret = xt_ct_set_helper(ct, info->helper, par); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index ed00fef5899..8f1779ff7e3 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -11,15 +11,13 @@ #include <linux/module.h> #include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/jhash.h> - #include <linux/netfilter.h> #include <linux/netfilter_arp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_NFQUEUE.h> +#include <net/netfilter/nf_queue.h> + MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet forwarding to netlink"); MODULE_LICENSE("GPL"); @@ -28,7 +26,6 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static u32 jhash_initval __read_mostly; -static bool rnd_inited __read_mostly; static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) @@ -38,69 +35,16 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) return NF_QUEUE_NR(tinfo->queuenum); } -static u32 hash_v4(const struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - /* packets in either direction go into same queue */ - if ((__force u32)iph->saddr < (__force u32)iph->daddr) - return jhash_3words((__force u32)iph->saddr, - (__force u32)iph->daddr, iph->protocol, jhash_initval); - - return jhash_3words((__force u32)iph->daddr, - (__force u32)iph->saddr, iph->protocol, jhash_initval); -} - -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -static u32 hash_v6(const struct sk_buff *skb) -{ - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - u32 a, b, c; - - if ((__force u32)ip6h->saddr.s6_addr32[3] < - (__force u32)ip6h->daddr.s6_addr32[3]) { - a = (__force u32) ip6h->saddr.s6_addr32[3]; - b = (__force u32) ip6h->daddr.s6_addr32[3]; - } else { - b = (__force u32) ip6h->saddr.s6_addr32[3]; - a = (__force u32) ip6h->daddr.s6_addr32[3]; - } - - if ((__force u32)ip6h->saddr.s6_addr32[1] < - (__force u32)ip6h->daddr.s6_addr32[1]) - c = (__force u32) ip6h->saddr.s6_addr32[1]; - else - c = (__force u32) ip6h->daddr.s6_addr32[1]; - - return jhash_3words(a, b, c, jhash_initval); -} -#endif - -static u32 -nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct xt_NFQ_info_v1 *info = par->targinfo; - u32 queue = info->queuenum; - - if (par->family == NFPROTO_IPV4) - queue += ((u64) hash_v4(skb) * info->queues_total) >> 32; -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - else if (par->family == NFPROTO_IPV6) - queue += ((u64) hash_v6(skb) * info->queues_total) >> 32; -#endif - - return queue; -} - static unsigned int nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v1 *info = par->targinfo; u32 queue = info->queuenum; - if (info->queues_total > 1) - queue = nfqueue_hash(skb, par); - + if (info->queues_total > 1) { + queue = nfqueue_hash(skb, queue, info->queues_total, + par->family, jhash_initval); + } return NF_QUEUE_NR(queue); } @@ -120,10 +64,8 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par) const struct xt_NFQ_info_v3 *info = par->targinfo; u32 maxid; - if (unlikely(!rnd_inited)) { - get_random_bytes(&jhash_initval, sizeof(jhash_initval)); - rnd_inited = true; - } + init_hashrandom(&jhash_initval); + if (info->queues_total == 0) { pr_err("NFQUEUE: number of total queues is 0\n"); return -EINVAL; @@ -154,8 +96,10 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) int cpu = smp_processor_id(); queue = info->queuenum + cpu % info->queues_total; - } else - queue = nfqueue_hash(skb, par); + } else { + queue = nfqueue_hash(skb, queue, info->queues_total, + par->family, jhash_initval); + } } ret = NF_QUEUE_NR(queue); diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c new file mode 100644 index 00000000000..9a8e77e7f8d --- /dev/null +++ b/net/netfilter/xt_cgroup.c @@ -0,0 +1,71 @@ +/* + * Xtables module to match the process control group. + * + * Might be used to implement individual "per-application" firewall + * policies in contrast to global policies based on control groups. + * Matching is based upon processes tagged to net_cls' classid marker. + * + * (C) 2013 Daniel Borkmann <dborkman@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/skbuff.h> +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_cgroup.h> +#include <net/sock.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); +MODULE_DESCRIPTION("Xtables: process control group matching"); +MODULE_ALIAS("ipt_cgroup"); +MODULE_ALIAS("ip6t_cgroup"); + +static int cgroup_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_cgroup_info *info = par->matchinfo; + + if (info->invert & ~1) + return -EINVAL; + + return info->id ? 0 : -EINVAL; +} + +static bool +cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cgroup_info *info = par->matchinfo; + + if (skb->sk == NULL) + return false; + + return (info->id == skb->sk->sk_classid) ^ info->invert; +} + +static struct xt_match cgroup_mt_reg __read_mostly = { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check, + .match = cgroup_mt, + .matchsize = sizeof(struct xt_cgroup_info), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), +}; + +static int __init cgroup_mt_init(void) +{ + return xt_register_match(&cgroup_mt_reg); +} + +static void __exit cgroup_mt_exit(void) +{ + xt_unregister_match(&cgroup_mt_reg); +} + +module_init(cgroup_mt_init); +module_exit(cgroup_mt_exit); diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c new file mode 100644 index 00000000000..a4c7561698c --- /dev/null +++ b/net/netfilter/xt_ipcomp.c @@ -0,0 +1,111 @@ +/* Kernel module to match IPComp parameters for IPv4 and IPv6 + * + * Copyright (C) 2013 WindRiver + * + * Author: + * Fan Du <fan.du@windriver.com> + * + * Based on: + * net/netfilter/xt_esp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/in.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> + +#include <linux/netfilter/xt_ipcomp.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Fan Du <fan.du@windriver.com>"); +MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match"); + +/* Returns 1 if the spi is matched by the range, 0 otherwise */ +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) +{ + bool r; + pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", + invert ? '!' : ' ', min, spi, max); + r = (spi >= min && spi <= max) ^ invert; + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); + return r; +} + +static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct ip_comp_hdr _comphdr; + const struct ip_comp_hdr *chdr; + const struct xt_ipcomp *compinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + chdr = skb_header_pointer(skb, par->thoff, sizeof(_comphdr), &_comphdr); + if (chdr == NULL) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + pr_debug("Dropping evil IPComp tinygram.\n"); + par->hotdrop = true; + return 0; + } + + return spi_match(compinfo->spis[0], compinfo->spis[1], + ntohl(chdr->cpi << 16), + !!(compinfo->invflags & XT_IPCOMP_INV_SPI)); +} + +static int comp_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_ipcomp *compinfo = par->matchinfo; + + /* Must specify no unknown invflags */ + if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) { + pr_err("unknown flags %X\n", compinfo->invflags); + return -EINVAL; + } + return 0; +} + +static struct xt_match comp_mt_reg[] __read_mostly = { + { + .name = "ipcomp", + .family = NFPROTO_IPV4, + .match = comp_mt, + .matchsize = sizeof(struct xt_ipcomp), + .proto = IPPROTO_COMP, + .checkentry = comp_mt_check, + .me = THIS_MODULE, + }, + { + .name = "ipcomp", + .family = NFPROTO_IPV6, + .match = comp_mt, + .matchsize = sizeof(struct xt_ipcomp), + .proto = IPPROTO_COMP, + .checkentry = comp_mt_check, + .me = THIS_MODULE, + }, +}; + +static int __init comp_mt_init(void) +{ + return xt_register_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg)); +} + +static void __exit comp_mt_exit(void) +{ + xt_unregister_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg)); +} + +module_init(comp_mt_init); +module_exit(comp_mt_exit); diff --git a/net/netfilter/xt_l2tp.c b/net/netfilter/xt_l2tp.c new file mode 100644 index 00000000000..8aee572771f --- /dev/null +++ b/net/netfilter/xt_l2tp.c @@ -0,0 +1,354 @@ +/* Kernel module to match L2TP header parameters. */ + +/* (C) 2013 James Chapman <jchapman@katalix.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> +#include <net/ip.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <net/udp.h> +#include <linux/l2tp.h> + +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_tcpudp.h> +#include <linux/netfilter/xt_l2tp.h> + +/* L2TP header masks */ +#define L2TP_HDR_T_BIT 0x8000 +#define L2TP_HDR_L_BIT 0x4000 +#define L2TP_HDR_VER 0x000f + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); +MODULE_DESCRIPTION("Xtables: L2TP header match"); +MODULE_ALIAS("ipt_l2tp"); +MODULE_ALIAS("ip6t_l2tp"); + +/* The L2TP fields that can be matched */ +struct l2tp_data { + u32 tid; + u32 sid; + u8 type; + u8 version; +}; + +union l2tp_val { + __be16 val16[2]; + __be32 val32; +}; + +static bool l2tp_match(const struct xt_l2tp_info *info, struct l2tp_data *data) +{ + if ((info->flags & XT_L2TP_TYPE) && (info->type != data->type)) + return false; + + if ((info->flags & XT_L2TP_VERSION) && (info->version != data->version)) + return false; + + /* Check tid only for L2TPv3 control or any L2TPv2 packets */ + if ((info->flags & XT_L2TP_TID) && + ((data->type == XT_L2TP_TYPE_CONTROL) || (data->version == 2)) && + (info->tid != data->tid)) + return false; + + /* Check sid only for L2TP data packets */ + if ((info->flags & XT_L2TP_SID) && (data->type == XT_L2TP_TYPE_DATA) && + (info->sid != data->sid)) + return false; + + return true; +} + +/* Parse L2TP header fields when UDP encapsulation is used. Handles + * L2TPv2 and L2TPv3. Note the L2TPv3 control and data packets have a + * different format. See + * RFC2661, Section 3.1, L2TPv2 Header Format + * RFC3931, Section 3.2.1, L2TPv3 Control Message Header + * RFC3931, Section 3.2.2, L2TPv3 Data Message Header + * RFC3931, Section 4.1.2.1, L2TPv3 Session Header over UDP + */ +static bool l2tp_udp_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 thoff) +{ + const struct xt_l2tp_info *info = par->matchinfo; + int uhlen = sizeof(struct udphdr); + int offs = thoff + uhlen; + union l2tp_val *lh; + union l2tp_val lhbuf; + u16 flags; + struct l2tp_data data = { 0, }; + + if (par->fragoff != 0) + return false; + + /* Extract L2TP header fields. The flags in the first 16 bits + * tell us where the other fields are. + */ + lh = skb_header_pointer(skb, offs, 2, &lhbuf); + if (lh == NULL) + return false; + + flags = ntohs(lh->val16[0]); + if (flags & L2TP_HDR_T_BIT) + data.type = XT_L2TP_TYPE_CONTROL; + else + data.type = XT_L2TP_TYPE_DATA; + data.version = (u8) flags & L2TP_HDR_VER; + + /* Now extract the L2TP tid/sid. These are in different places + * for L2TPv2 (rfc2661) and L2TPv3 (rfc3931). For L2TPv2, we + * must also check to see if the length field is present, + * since this affects the offsets into the packet of the + * tid/sid fields. + */ + if (data.version == 3) { + lh = skb_header_pointer(skb, offs + 4, 4, &lhbuf); + if (lh == NULL) + return false; + if (data.type == XT_L2TP_TYPE_CONTROL) + data.tid = ntohl(lh->val32); + else + data.sid = ntohl(lh->val32); + } else if (data.version == 2) { + if (flags & L2TP_HDR_L_BIT) + offs += 2; + lh = skb_header_pointer(skb, offs + 2, 4, &lhbuf); + if (lh == NULL) + return false; + data.tid = (u32) ntohs(lh->val16[0]); + data.sid = (u32) ntohs(lh->val16[1]); + } else + return false; + + return l2tp_match(info, &data); +} + +/* Parse L2TP header fields for IP encapsulation (no UDP header). + * L2TPv3 data packets have a different form with IP encap. See + * RC3931, Section 4.1.1.1, L2TPv3 Session Header over IP. + * RC3931, Section 4.1.1.2, L2TPv3 Control and Data Traffic over IP. + */ +static bool l2tp_ip_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 thoff) +{ + const struct xt_l2tp_info *info = par->matchinfo; + union l2tp_val *lh; + union l2tp_val lhbuf; + struct l2tp_data data = { 0, }; + + /* For IP encap, the L2TP sid is the first 32-bits. */ + lh = skb_header_pointer(skb, thoff, sizeof(lhbuf), &lhbuf); + if (lh == NULL) + return false; + if (lh->val32 == 0) { + /* Must be a control packet. The L2TP tid is further + * into the packet. + */ + data.type = XT_L2TP_TYPE_CONTROL; + lh = skb_header_pointer(skb, thoff + 8, sizeof(lhbuf), + &lhbuf); + if (lh == NULL) + return false; + data.tid = ntohl(lh->val32); + } else { + data.sid = ntohl(lh->val32); + data.type = XT_L2TP_TYPE_DATA; + } + + data.version = 3; + + return l2tp_match(info, &data); +} + +static bool l2tp_mt4(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct iphdr *iph = ip_hdr(skb); + u8 ipproto = iph->protocol; + + /* l2tp_mt_check4 already restricts the transport protocol */ + switch (ipproto) { + case IPPROTO_UDP: + return l2tp_udp_mt(skb, par, par->thoff); + case IPPROTO_L2TP: + return l2tp_ip_mt(skb, par, par->thoff); + } + + return false; +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static bool l2tp_mt6(const struct sk_buff *skb, struct xt_action_param *par) +{ + unsigned int thoff = 0; + unsigned short fragoff = 0; + int ipproto; + + ipproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); + if (fragoff != 0) + return false; + + /* l2tp_mt_check6 already restricts the transport protocol */ + switch (ipproto) { + case IPPROTO_UDP: + return l2tp_udp_mt(skb, par, thoff); + case IPPROTO_L2TP: + return l2tp_ip_mt(skb, par, thoff); + } + + return false; +} +#endif + +static int l2tp_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_l2tp_info *info = par->matchinfo; + + /* Check for invalid flags */ + if (info->flags & ~(XT_L2TP_TID | XT_L2TP_SID | XT_L2TP_VERSION | + XT_L2TP_TYPE)) { + pr_info("unknown flags: %x\n", info->flags); + return -EINVAL; + } + + /* At least one of tid, sid or type=control must be specified */ + if ((!(info->flags & XT_L2TP_TID)) && + (!(info->flags & XT_L2TP_SID)) && + ((!(info->flags & XT_L2TP_TYPE)) || + (info->type != XT_L2TP_TYPE_CONTROL))) { + pr_info("invalid flags combination: %x\n", info->flags); + return -EINVAL; + } + + /* If version 2 is specified, check that incompatible params + * are not supplied + */ + if (info->flags & XT_L2TP_VERSION) { + if ((info->version < 2) || (info->version > 3)) { + pr_info("wrong L2TP version: %u\n", info->version); + return -EINVAL; + } + + if (info->version == 2) { + if ((info->flags & XT_L2TP_TID) && + (info->tid > 0xffff)) { + pr_info("v2 tid > 0xffff: %u\n", info->tid); + return -EINVAL; + } + if ((info->flags & XT_L2TP_SID) && + (info->sid > 0xffff)) { + pr_info("v2 sid > 0xffff: %u\n", info->sid); + return -EINVAL; + } + } + } + + return 0; +} + +static int l2tp_mt_check4(const struct xt_mtchk_param *par) +{ + const struct xt_l2tp_info *info = par->matchinfo; + const struct ipt_entry *e = par->entryinfo; + const struct ipt_ip *ip = &e->ip; + int ret; + + ret = l2tp_mt_check(par); + if (ret != 0) + return ret; + + if ((ip->proto != IPPROTO_UDP) && + (ip->proto != IPPROTO_L2TP)) { + pr_info("missing protocol rule (udp|l2tpip)\n"); + return -EINVAL; + } + + if ((ip->proto == IPPROTO_L2TP) && + (info->version == 2)) { + pr_info("v2 doesn't support IP mode\n"); + return -EINVAL; + } + + return 0; +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static int l2tp_mt_check6(const struct xt_mtchk_param *par) +{ + const struct xt_l2tp_info *info = par->matchinfo; + const struct ip6t_entry *e = par->entryinfo; + const struct ip6t_ip6 *ip = &e->ipv6; + int ret; + + ret = l2tp_mt_check(par); + if (ret != 0) + return ret; + + if ((ip->proto != IPPROTO_UDP) && + (ip->proto != IPPROTO_L2TP)) { + pr_info("missing protocol rule (udp|l2tpip)\n"); + return -EINVAL; + } + + if ((ip->proto == IPPROTO_L2TP) && + (info->version == 2)) { + pr_info("v2 doesn't support IP mode\n"); + return -EINVAL; + } + + return 0; +} +#endif + +static struct xt_match l2tp_mt_reg[] __read_mostly = { + { + .name = "l2tp", + .revision = 0, + .family = NFPROTO_IPV4, + .match = l2tp_mt4, + .matchsize = XT_ALIGN(sizeof(struct xt_l2tp_info)), + .checkentry = l2tp_mt_check4, + .hooks = ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD)), + .me = THIS_MODULE, + }, +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + { + .name = "l2tp", + .revision = 0, + .family = NFPROTO_IPV6, + .match = l2tp_mt6, + .matchsize = XT_ALIGN(sizeof(struct xt_l2tp_info)), + .checkentry = l2tp_mt_check6, + .hooks = ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD)), + .me = THIS_MODULE, + }, +#endif +}; + +static int __init l2tp_mt_init(void) +{ + return xt_register_matches(&l2tp_mt_reg[0], ARRAY_SIZE(l2tp_mt_reg)); +} + +static void __exit l2tp_mt_exit(void) +{ + xt_unregister_matches(&l2tp_mt_reg[0], ARRAY_SIZE(l2tp_mt_reg)); +} + +module_init(l2tp_mt_init); +module_exit(l2tp_mt_exit); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index bca50b95c18..34a656d9017 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -131,7 +131,7 @@ int netlink_add_tap(struct netlink_tap *nt) } EXPORT_SYMBOL_GPL(netlink_add_tap); -int __netlink_remove_tap(struct netlink_tap *nt) +static int __netlink_remove_tap(struct netlink_tap *nt) { bool found = false; struct netlink_tap *tmp; @@ -155,7 +155,6 @@ out: return found ? 0 : -ENODEV; } -EXPORT_SYMBOL_GPL(__netlink_remove_tap); int netlink_remove_tap(struct netlink_tap *nt) { @@ -204,6 +203,8 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, if (nskb) { nskb->dev = dev; nskb->protocol = htons((u16) sk->sk_protocol); + nskb->pkt_type = netlink_is_kernel(sk) ? + PACKET_KERNEL : PACKET_USER; ret = dev_queue_xmit(nskb); if (unlikely(ret > 0)) @@ -239,6 +240,13 @@ static void netlink_deliver_tap(struct sk_buff *skb) rcu_read_unlock(); } +static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, + struct sk_buff *skb) +{ + if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) + netlink_deliver_tap(skb); +} + static void netlink_overrun(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -1697,14 +1705,10 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { - /* We could do a netlink_deliver_tap(skb) here as well - * but since this is intended for the kernel only, we - * should rather let it stay under the hood. - */ - ret = skb->len; netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; + netlink_deliver_tap_kernel(sk, ssk, skb); nlk->netlink_rcv(skb); consume_skb(skb); } else { @@ -1769,6 +1773,9 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, if (ring->pg_vec == NULL) goto out_put; + if (ring->frame_size - NL_MMAP_HDRLEN < size) + goto out_put; + skb = alloc_skb_head(gfp_mask); if (skb == NULL) goto err1; @@ -1778,6 +1785,7 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, if (ring->pg_vec == NULL) goto out_free; + /* check again under lock */ maxlen = ring->frame_size - NL_MMAP_HDRLEN; if (maxlen < size) goto out_free; @@ -2535,21 +2543,6 @@ void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) netlink_update_socket_mc(nlk_sk(sk), group, 0); } -/** - * netlink_clear_multicast_users - kick off multicast listeners - * - * This function removes all listeners from the given group. - * @ksk: The kernel netlink socket, as returned by - * netlink_kernel_create(). - * @group: The multicast group to clear. - */ -void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) -{ - netlink_table_grab(); - __netlink_clear_multicast_users(ksk, group); - netlink_table_ungrab(); -} - struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 713671ae45a..b1dcdb932a8 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -461,6 +461,26 @@ int genl_unregister_family(struct genl_family *family) EXPORT_SYMBOL(genl_unregister_family); /** + * genlmsg_new_unicast - Allocate generic netlink message for unicast + * @payload: size of the message payload + * @info: information on destination + * @flags: the type of memory to allocate + * + * Allocates a new sk_buff large enough to cover the specified payload + * plus required Netlink headers. Will check receiving socket for + * memory mapped i/o capability and use it if enabled. Will fall back + * to non-mapped skb if message size exceeds the frame size of the ring. + */ +struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, + gfp_t flags) +{ + size_t len = nlmsg_total_size(genlmsg_total_size(payload)); + + return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags); +} +EXPORT_SYMBOL_GPL(genlmsg_new_unicast); + +/** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message * @portid: netlink portid the message is addressed to @@ -600,6 +620,7 @@ static int genl_family_rcv_msg(struct genl_family *family, info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; info.attrs = attrbuf; + info.dst_sk = skb->sk; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6f5e1dd3be2..df4692826ea 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -108,10 +108,9 @@ int lockdep_ovsl_is_held(void) #endif static struct vport *new_vport(const struct vport_parms *); -static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, +static int queue_gso_packets(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *); -static int queue_userspace_packet(struct net *, int dp_ifindex, - struct sk_buff *, +static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *); /* Must be called with rcu_read_lock or ovs_mutex. */ @@ -133,7 +132,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex) } /* Must be called with rcu_read_lock or ovs_mutex. */ -const char *ovs_dp_name(const struct datapath *dp) +static const char *ovs_dp_name(const struct datapath *dp) { struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); return vport->ops->get_name(vport); @@ -234,7 +233,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) } /* Look up flow. */ - flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit); + flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -251,9 +250,9 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) OVS_CB(skb)->flow = flow; OVS_CB(skb)->pkt_key = &key; - stats_counter = &stats->n_hit; - ovs_flow_used(OVS_CB(skb)->flow, skb); + ovs_flow_stats_update(OVS_CB(skb)->flow, skb); ovs_execute_actions(dp, skb); + stats_counter = &stats->n_hit; out: /* Update datapath statistics. */ @@ -277,7 +276,6 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct dp_stats_percpu *stats; - int dp_ifindex; int err; if (upcall_info->portid == 0) { @@ -285,16 +283,10 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, goto err; } - dp_ifindex = get_dpifindex(dp); - if (!dp_ifindex) { - err = -ENODEV; - goto err; - } - if (!skb_is_gso(skb)) - err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); + err = queue_userspace_packet(dp, skb, upcall_info); else - err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); + err = queue_gso_packets(dp, skb, upcall_info); if (err) goto err; @@ -310,8 +302,7 @@ err: return err; } -static int queue_gso_packets(struct net *net, int dp_ifindex, - struct sk_buff *skb, +static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { unsigned short gso_type = skb_shinfo(skb)->gso_type; @@ -320,14 +311,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, struct sk_buff *segs, *nskb; int err; - segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); + segs = __skb_gso_segment(skb, NETIF_F_SG, false); if (IS_ERR(segs)) return PTR_ERR(segs); /* Queue all of the segments. */ skb = segs; do { - err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); + err = queue_userspace_packet(dp, skb, upcall_info); if (err) break; @@ -380,11 +371,11 @@ static size_t key_attr_size(void) + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } -static size_t upcall_msg_size(const struct sk_buff *skb, - const struct nlattr *userdata) +static size_t upcall_msg_size(const struct nlattr *userdata, + unsigned int hdrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) - + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */ + + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ /* OVS_PACKET_ATTR_USERDATA */ @@ -394,15 +385,24 @@ static size_t upcall_msg_size(const struct sk_buff *skb, return size; } -static int queue_userspace_packet(struct net *net, int dp_ifindex, - struct sk_buff *skb, +static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct ovs_header *upcall; struct sk_buff *nskb = NULL; struct sk_buff *user_skb; /* to be queued to userspace */ struct nlattr *nla; - int err; + struct genl_info info = { + .dst_sk = ovs_dp_get_net(dp)->genl_sock, + .snd_portid = upcall_info->portid, + }; + size_t len; + unsigned int hlen; + int err, dp_ifindex; + + dp_ifindex = get_dpifindex(dp); + if (!dp_ifindex) + return -ENODEV; if (vlan_tx_tag_present(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); @@ -422,7 +422,22 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, goto out; } - user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC); + /* Complete checksum if needed */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + (err = skb_checksum_help(skb))) + goto out; + + /* Older versions of OVS user space enforce alignment of the last + * Netlink attribute to NLA_ALIGNTO which would require extensive + * padding logic. Only perform zerocopy if padding is not required. + */ + if (dp->user_features & OVS_DP_F_UNALIGNED) + hlen = skb_zerocopy_headlen(skb); + else + hlen = skb->len; + + len = upcall_msg_size(upcall_info->userdata, hlen); + user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; goto out; @@ -441,26 +456,24 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, nla_len(upcall_info->userdata), nla_data(upcall_info->userdata)); - nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); + /* Only reserve room for attribute header, packet data is added + * in skb_zerocopy() */ + if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { + err = -ENOBUFS; + goto out; + } + nla->nla_len = nla_attr_size(skb->len); - skb_copy_and_csum_dev(skb, nla_data(nla)); + skb_zerocopy(user_skb, skb, skb->len, hlen); - genlmsg_end(user_skb, upcall); - err = genlmsg_unicast(net, user_skb, upcall_info->portid); + ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; + err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); out: kfree_skb(nskb); return err; } -static void clear_stats(struct sw_flow *flow) -{ - flow->used = 0; - flow->tcp_flags = 0; - flow->packet_count = 0; - flow->byte_count = 0; -} - static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) { struct ovs_header *ovs_header = info->userhdr; @@ -499,7 +512,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->protocol = htons(ETH_P_802_2); /* Build an sw_flow for sending this packet. */ - flow = ovs_flow_alloc(); + flow = ovs_flow_alloc(false); err = PTR_ERR(flow); if (IS_ERR(flow)) goto err_kfree_skb; @@ -635,10 +648,10 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, const int skb_orig_len = skb->len; struct nlattr *start; struct ovs_flow_stats stats; + __be16 tcp_flags; + unsigned long used; struct ovs_header *ovs_header; struct nlattr *nla; - unsigned long used; - u8 tcp_flags; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); @@ -667,24 +680,17 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, nla_nest_end(skb, nla); - spin_lock_bh(&flow->lock); - used = flow->used; - stats.n_packets = flow->packet_count; - stats.n_bytes = flow->byte_count; - tcp_flags = (u8)ntohs(flow->tcp_flags); - spin_unlock_bh(&flow->lock); - + ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); if (used && nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) goto nla_put_failure; if (stats.n_packets && - nla_put(skb, OVS_FLOW_ATTR_STATS, - sizeof(struct ovs_flow_stats), &stats)) + nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) goto nla_put_failure; - if (tcp_flags && - nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) + if ((u8)ntohs(tcp_flags) && + nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) goto nla_put_failure; /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if @@ -701,8 +707,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (start) { const struct sw_flow_actions *sf_acts; - sf_acts = rcu_dereference_check(flow->sf_acts, - lockdep_ovsl_is_held()); + sf_acts = rcu_dereference_ovsl(flow->sf_acts); err = ovs_nla_put_actions(sf_acts->actions, sf_acts->actions_len, skb); @@ -726,39 +731,34 @@ error: return err; } -static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) +static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, + struct genl_info *info) { - const struct sw_flow_actions *sf_acts; + size_t len; - sf_acts = ovsl_dereference(flow->sf_acts); + len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); - return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL); + return genlmsg_new_unicast(len, info, GFP_KERNEL); } static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, - u32 portid, u32 seq, u8 cmd) + struct genl_info *info, + u8 cmd) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow); + skb = ovs_flow_cmd_alloc_info(flow, info); if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, + info->snd_seq, 0, cmd); BUG_ON(retval < 0); return skb; } -static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl, - const struct sw_flow_key *key) -{ - u32 __always_unused n_mask_hit; - - return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit); -} - static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -770,6 +770,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; + bool exact_5tuple; int error; /* Extract key. */ @@ -778,7 +779,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, + error = ovs_nla_get_match(&match, &exact_5tuple, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) goto error; @@ -809,7 +810,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; /* Check if this is a duplicate flow */ - flow = __ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; @@ -817,12 +818,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; /* Allocate flow. */ - flow = ovs_flow_alloc(); + flow = ovs_flow_alloc(!exact_5tuple); if (IS_ERR(flow)) { error = PTR_ERR(flow); goto err_unlock_ovs; } - clear_stats(flow); flow->key = masked_key; flow->unmasked_key = key; @@ -835,8 +835,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, - info->snd_seq, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts; @@ -864,15 +863,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); ovs_nla_free_flow_actions(old_acts); - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, - info->snd_seq, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); /* Clear stats. */ - if (a[OVS_FLOW_ATTR_CLEAR]) { - spin_lock_bh(&flow->lock); - clear_stats(flow); - spin_unlock_bh(&flow->lock); - } + if (a[OVS_FLOW_ATTR_CLEAR]) + ovs_flow_stats_clear(flow); } ovs_unlock(); @@ -910,7 +905,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -921,14 +916,13 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = __ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, - info->snd_seq, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -965,17 +959,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); if (err) goto unlock; - flow = __ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow); + reply = ovs_flow_cmd_alloc_info(flow, info); if (!reply) { err = -ENOMEM; goto unlock; @@ -1061,6 +1055,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, }; static struct genl_family dp_datapath_genl_family = { @@ -1119,6 +1114,9 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, &dp_megaflow_stats)) goto nla_put_failure; + if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) + goto nla_put_failure; + return genlmsg_end(skb, ovs_header); nla_put_failure: @@ -1127,17 +1125,17 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, - u32 seq, u8 cmd) +static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, + struct genl_info *info, u8 cmd) { struct sk_buff *skb; int retval; - skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); + skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); + retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1165,6 +1163,24 @@ static struct datapath *lookup_datapath(struct net *net, return dp ? dp : ERR_PTR(-ENODEV); } +static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); + if (!dp) + return; + + WARN(dp->user_features, "Dropping previously announced user features\n"); + dp->user_features = 0; +} + +static void ovs_dp_change(struct datapath *dp, struct nlattr **a) +{ + if (a[OVS_DP_ATTR_USER_FEATURES]) + dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); +} + static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -1223,17 +1239,27 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + ovs_dp_change(dp, a); + vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); if (err == -EBUSY) err = -EEXIST; + if (err == -EEXIST) { + /* An outdated user space instance that does not understand + * the concept of user_features has attempted to create a new + * datapath and is likely to reuse it. Drop all user features. + */ + if (info->genlhdr->version < OVS_DP_VER_FEATURES) + ovs_dp_reset_user_features(skb, info); + } + goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_NEW); + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) goto err_destroy_local_port; @@ -1299,8 +1325,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto unlock; - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_DEL); + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) goto unlock; @@ -1328,8 +1353,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto unlock; - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_NEW); + ovs_dp_change(dp, info->attrs); + + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0, @@ -1360,8 +1386,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_NEW); + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1441,7 +1466,7 @@ struct genl_family dp_vport_genl_family = { .parallel_ops = true, }; -struct genl_multicast_group ovs_dp_vport_multicast_group = { +static struct genl_multicast_group ovs_dp_vport_multicast_group = { .name = OVS_VPORT_MCGROUP }; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 4067ea41be2..6be9fbb5e9c 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -88,6 +88,8 @@ struct datapath { /* Network namespace ref. */ struct net *net; #endif + + u32 user_features; }; /** @@ -145,6 +147,8 @@ int lockdep_ovsl_is_held(void); #define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held())) #define ovsl_dereference(p) \ rcu_dereference_protected(p, lockdep_ovsl_is_held()) +#define rcu_dereference_ovsl(p) \ + rcu_dereference_check(p, lockdep_ovsl_is_held()) static inline struct net *ovs_dp_get_net(struct datapath *dp) { @@ -178,14 +182,12 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; -extern struct genl_multicast_group ovs_dp_vport_multicast_group; void ovs_dp_process_received_packet(struct vport *, struct sk_buff *); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, const struct dp_upcall_info *); -const char *ovs_dp_name(const struct datapath *dp); struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, u8 cmd); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b409f527960..16f4b46161d 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -35,6 +35,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/sctp.h> +#include <linux/smp.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> @@ -60,10 +61,16 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) { + struct flow_stats *stats; __be16 tcp_flags = 0; + if (!flow->stats.is_percpu) + stats = flow->stats.stat; + else + stats = this_cpu_ptr(flow->stats.cpu_stats); + if ((flow->key.eth.type == htons(ETH_P_IP) || flow->key.eth.type == htons(ETH_P_IPV6)) && flow->key.ip.proto == IPPROTO_TCP && @@ -71,12 +78,87 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); } - spin_lock(&flow->lock); - flow->used = jiffies; - flow->packet_count++; - flow->byte_count += skb->len; - flow->tcp_flags |= tcp_flags; - spin_unlock(&flow->lock); + spin_lock(&stats->lock); + stats->used = jiffies; + stats->packet_count++; + stats->byte_count += skb->len; + stats->tcp_flags |= tcp_flags; + spin_unlock(&stats->lock); +} + +static void stats_read(struct flow_stats *stats, + struct ovs_flow_stats *ovs_stats, + unsigned long *used, __be16 *tcp_flags) +{ + spin_lock(&stats->lock); + if (time_after(stats->used, *used)) + *used = stats->used; + *tcp_flags |= stats->tcp_flags; + ovs_stats->n_packets += stats->packet_count; + ovs_stats->n_bytes += stats->byte_count; + spin_unlock(&stats->lock); +} + +void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, + unsigned long *used, __be16 *tcp_flags) +{ + int cpu, cur_cpu; + + *used = 0; + *tcp_flags = 0; + memset(ovs_stats, 0, sizeof(*ovs_stats)); + + if (!flow->stats.is_percpu) { + stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); + } else { + cur_cpu = get_cpu(); + for_each_possible_cpu(cpu) { + struct flow_stats *stats; + + if (cpu == cur_cpu) + local_bh_disable(); + + stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); + stats_read(stats, ovs_stats, used, tcp_flags); + + if (cpu == cur_cpu) + local_bh_enable(); + } + put_cpu(); + } +} + +static void stats_reset(struct flow_stats *stats) +{ + spin_lock(&stats->lock); + stats->used = 0; + stats->packet_count = 0; + stats->byte_count = 0; + stats->tcp_flags = 0; + spin_unlock(&stats->lock); +} + +void ovs_flow_stats_clear(struct sw_flow *flow) +{ + int cpu, cur_cpu; + + if (!flow->stats.is_percpu) { + stats_reset(flow->stats.stat); + } else { + cur_cpu = get_cpu(); + + for_each_possible_cpu(cpu) { + + if (cpu == cur_cpu) + local_bh_disable(); + + stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); + + if (cpu == cur_cpu) + local_bh_enable(); + } + put_cpu(); + } } static int check_header(struct sk_buff *skb, int len) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 1510f51dbf7..2d770e28a3a 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -19,6 +19,7 @@ #ifndef FLOW_H #define FLOW_H 1 +#include <linux/cache.h> #include <linux/kernel.h> #include <linux/netlink.h> #include <linux/openvswitch.h> @@ -122,8 +123,8 @@ struct sw_flow_key { } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ struct sw_flow_key_range { - size_t start; - size_t end; + unsigned short int start; + unsigned short int end; }; struct sw_flow_mask { @@ -146,6 +147,22 @@ struct sw_flow_actions { struct nlattr actions[]; }; +struct flow_stats { + u64 packet_count; /* Number of packets matched. */ + u64 byte_count; /* Number of bytes matched. */ + unsigned long used; /* Last used time (in jiffies). */ + spinlock_t lock; /* Lock for atomic stats update. */ + __be16 tcp_flags; /* Union of seen TCP flags. */ +}; + +struct sw_flow_stats { + bool is_percpu; + union { + struct flow_stats *stat; + struct flow_stats __percpu *cpu_stats; + }; +}; + struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; @@ -155,12 +172,7 @@ struct sw_flow { struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - - spinlock_t lock; /* Lock for values below. */ - unsigned long used; /* Last used time (in jiffies). */ - u64 packet_count; /* Number of packets matched. */ - u64 byte_count; /* Number of bytes matched. */ - __be16 tcp_flags; /* Union of seen TCP flags. */ + struct sw_flow_stats stats; }; struct arp_eth_header { @@ -177,7 +189,10 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -void ovs_flow_used(struct sw_flow *, struct sk_buff *); +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb); +void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats, + unsigned long *used, __be16 *tcp_flags); +void ovs_flow_stats_clear(struct sw_flow *flow); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 2bc1bc1aca3..4d000acaed0 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -266,6 +266,20 @@ static bool is_all_zero(const u8 *fp, size_t size) return true; } +static bool is_all_set(const u8 *fp, size_t size) +{ + int i; + + if (!fp) + return false; + + for (i = 0; i < size; i++) + if (fp[i] != 0xff) + return false; + + return true; +} + static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool nz) @@ -487,8 +501,9 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, return 0; } -static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask) +static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple, + u64 attrs, const struct nlattr **a, + bool is_mask) { int err; u64 orig_attrs = attrs; @@ -545,6 +560,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); } + if (is_mask && exact_5tuple) { + if (match->mask->key.eth.type != htons(0xffff)) + *exact_5tuple = false; + } + if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; @@ -567,6 +587,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, SW_FLOW_KEY_PUT(match, ipv4.addr.dst, ipv4_key->ipv4_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV4); + + if (is_mask && exact_5tuple && *exact_5tuple) { + if (ipv4_key->ipv4_proto != 0xff || + ipv4_key->ipv4_src != htonl(0xffffffff) || + ipv4_key->ipv4_dst != htonl(0xffffffff)) + *exact_5tuple = false; + } } if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { @@ -598,6 +625,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV6); + + if (is_mask && exact_5tuple && *exact_5tuple) { + if (ipv6_key->ipv6_proto != 0xff || + !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) || + !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst))) + *exact_5tuple = false; + } } if (attrs & (1 << OVS_KEY_ATTR_ARP)) { @@ -640,6 +674,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, tcp_key->tcp_dst, is_mask); } attrs &= ~(1 << OVS_KEY_ATTR_TCP); + + if (is_mask && exact_5tuple && *exact_5tuple && + (tcp_key->tcp_src != htons(0xffff) || + tcp_key->tcp_dst != htons(0xffff))) + *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { @@ -671,6 +710,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, udp_key->udp_dst, is_mask); } attrs &= ~(1 << OVS_KEY_ATTR_UDP); + + if (is_mask && exact_5tuple && *exact_5tuple && + (udp_key->udp_src != htons(0xffff) || + udp_key->udp_dst != htons(0xffff))) + *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { @@ -756,6 +800,7 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask, * attribute specifies the mask field of the wildcarded flow. */ int ovs_nla_get_match(struct sw_flow_match *match, + bool *exact_5tuple, const struct nlattr *key, const struct nlattr *mask) { @@ -803,10 +848,13 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, key_attrs, a, false); + err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false); if (err) return err; + if (exact_5tuple) + *exact_5tuple = true; + if (mask) { err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); if (err) @@ -844,7 +892,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); + err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true); if (err) return err; } else { @@ -1128,19 +1176,11 @@ struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) return sfa; } -/* RCU callback used by ovs_nla_free_flow_actions. */ -static void rcu_free_acts_callback(struct rcu_head *rcu) -{ - struct sw_flow_actions *sf_acts = container_of(rcu, - struct sw_flow_actions, rcu); - kfree(sf_acts); -} - /* Schedules 'sf_acts' to be freed after the next RCU grace period. * The caller must hold rcu_read_lock for this to be sensible. */ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) { - call_rcu(&sf_acts->rcu, rcu_free_acts_callback); + kfree_rcu(sf_acts, rcu); } static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 440151045d3..b31fbe28bc7 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,6 +45,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *, int ovs_nla_get_flow_metadata(struct sw_flow *flow, const struct nlattr *attr); int ovs_nla_get_match(struct sw_flow_match *match, + bool *exact_5tuple, const struct nlattr *, const struct nlattr *); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 0e720c31607..c58a0fe3c88 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -44,8 +44,6 @@ #include <net/ipv6.h> #include <net/ndisc.h> -#include "datapath.h" - #define TBL_MIN_BUCKETS 1024 #define REHASH_INTERVAL (10 * 60 * HZ) @@ -72,19 +70,42 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, *d++ = *s++ & *m++; } -struct sw_flow *ovs_flow_alloc(void) +struct sw_flow *ovs_flow_alloc(bool percpu_stats) { struct sw_flow *flow; + int cpu; flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); if (!flow) return ERR_PTR(-ENOMEM); - spin_lock_init(&flow->lock); flow->sf_acts = NULL; flow->mask = NULL; + flow->stats.is_percpu = percpu_stats; + + if (!percpu_stats) { + flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL); + if (!flow->stats.stat) + goto err; + + spin_lock_init(&flow->stats.stat->lock); + } else { + flow->stats.cpu_stats = alloc_percpu(struct flow_stats); + if (!flow->stats.cpu_stats) + goto err; + + for_each_possible_cpu(cpu) { + struct flow_stats *cpu_stats; + + cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); + spin_lock_init(&cpu_stats->lock); + } + } return flow; +err: + kmem_cache_free(flow_cache, flow); + return ERR_PTR(-ENOMEM); } int ovs_flow_tbl_count(struct flow_table *table) @@ -118,6 +139,10 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { kfree((struct sf_flow_acts __force *)flow->sf_acts); + if (flow->stats.is_percpu) + free_percpu(flow->stats.cpu_stats); + else + kfree(flow->stats.stat); kmem_cache_free(flow_cache, flow); } @@ -128,13 +153,6 @@ static void rcu_free_flow_callback(struct rcu_head *rcu) flow_free(flow); } -static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) -{ - struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); - - kfree(mask); -} - static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) { if (!mask) @@ -146,7 +164,7 @@ static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) if (!mask->ref_count) { list_del_rcu(&mask->list); if (deferred) - call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); + kfree_rcu(mask, rcu); else kfree(mask); } @@ -429,11 +447,11 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, return NULL; } -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, const struct sw_flow_key *key, u32 *n_mask_hit) { - struct table_instance *ti = rcu_dereference(tbl->ti); + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct sw_flow_mask *mask; struct sw_flow *flow; @@ -447,6 +465,14 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, return NULL; } +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key) +{ + u32 __always_unused n_mask_hit; + + return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); +} + int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct sw_flow_mask *mask; @@ -514,11 +540,7 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, return NULL; } -/** - * add a new mask into the mask list. - * The caller needs to make sure that 'mask' is not the same - * as any masks that are already on the list. - */ +/* Add 'mask' into the mask list, if it is not already there. */ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, struct sw_flow_mask *new) { diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index fbe45d5ad07..1996e34c0fd 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -55,7 +55,7 @@ struct flow_table { int ovs_flow_init(void); void ovs_flow_exit(void); -struct sw_flow *ovs_flow_alloc(void); +struct sw_flow *ovs_flow_alloc(bool percpu_stats); void ovs_flow_free(struct sw_flow *, bool deferred); int ovs_flow_tbl_init(struct flow_table *); @@ -69,9 +69,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); int ovs_flow_tbl_num_masks(const struct flow_table *table); struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, u32 *bucket, u32 *idx); -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, const struct sw_flow_key *, u32 *n_mask_hit); +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, + const struct sw_flow_key *); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow_match *match); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index d830a95f03a..208dd9a26dd 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -33,6 +33,9 @@ #include "vport.h" #include "vport-internal_dev.h" +static void ovs_vport_record_error(struct vport *, + enum vport_err_type err_type); + /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the bottom of vport.h. */ static const struct vport_ops *vport_ops_list[] = { @@ -136,14 +139,14 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); - vport->percpu_stats = alloc_percpu(struct pcpu_tstats); + vport->percpu_stats = alloc_percpu(struct pcpu_sw_netstats); if (!vport->percpu_stats) { kfree(vport); return ERR_PTR(-ENOMEM); } for_each_possible_cpu(i) { - struct pcpu_tstats *vport_stats; + struct pcpu_sw_netstats *vport_stats; vport_stats = per_cpu_ptr(vport->percpu_stats, i); u64_stats_init(&vport_stats->syncp); } @@ -275,8 +278,8 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) spin_unlock_bh(&vport->stats_lock); for_each_possible_cpu(i) { - const struct pcpu_tstats *percpu_stats; - struct pcpu_tstats local_stats; + const struct pcpu_sw_netstats *percpu_stats; + struct pcpu_sw_netstats local_stats; unsigned int start; percpu_stats = per_cpu_ptr(vport->percpu_stats, i); @@ -344,7 +347,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, struct ovs_key_ipv4_tunnel *tun_key) { - struct pcpu_tstats *stats; + struct pcpu_sw_netstats *stats; stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); @@ -370,7 +373,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) int sent = vport->ops->send(vport, skb); if (likely(sent > 0)) { - struct pcpu_tstats *stats; + struct pcpu_sw_netstats *stats; stats = this_cpu_ptr(vport->percpu_stats); @@ -396,7 +399,8 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) * If using the vport generic stats layer indicate that an error of the given * type has occurred. */ -void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) +static void ovs_vport_record_error(struct vport *vport, + enum vport_err_type err_type) { spin_lock(&vport->stats_lock); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 1a9fbcec6e1..d7e50a17396 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -87,7 +87,7 @@ struct vport { struct hlist_node dp_hash_node; const struct vport_ops *ops; - struct pcpu_tstats __percpu *percpu_stats; + struct pcpu_sw_netstats __percpu *percpu_stats; spinlock_t stats_lock; struct vport_err_stats err_stats; @@ -192,7 +192,6 @@ static inline struct vport *vport_from_priv(const void *priv) void ovs_vport_receive(struct vport *, struct sk_buff *, struct ovs_key_ipv4_tunnel *); -void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the top of vport.c. */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index dd3840846ce..279467b74eb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1356,9 +1356,9 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } -static bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +static bool match_fanout_group(struct packet_type *ptype, struct sock *sk) { - if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) + if (ptype->af_packet_priv == (void *)((struct packet_sock *)sk)->fanout) return true; return false; diff --git a/net/rds/ib.c b/net/rds/ib.c index b4c8b0022fe..ba2dffeff60 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -338,7 +338,8 @@ static int rds_ib_laddr_check(__be32 addr) ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); /* due to this, we will claim to support iWARP devices unless we check node_type. */ - if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA) + if (ret || !cm_id->device || + cm_id->device->node_type != RDMA_NODE_IB_CA) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI4 ret %d node type %d\n", diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 33af77246bf..d080eb4b0d2 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1012,7 +1012,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros make_rose->source_call = facilities.source_call; make_rose->source_ndigis = facilities.source_ndigis; for (n = 0 ; n < facilities.source_ndigis ; n++) - make_rose->source_digis[n]= facilities.source_digis[n]; + make_rose->source_digis[n] = facilities.source_digis[n]; make_rose->neighbour = neigh; make_rose->device = dev; make_rose->facilities = facilities; @@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, if (msg->msg_name) { struct sockaddr_rose *srose; + struct full_sockaddr_rose *full_srose = msg->msg_name; memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); srose = msg->msg_name; @@ -1260,18 +1261,9 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; srose->srose_ndigis = rose->dest_ndigis; - if (msg->msg_namelen >= sizeof(struct full_sockaddr_rose)) { - struct full_sockaddr_rose *full_srose = (struct full_sockaddr_rose *)msg->msg_name; - for (n = 0 ; n < rose->dest_ndigis ; n++) - full_srose->srose_digis[n] = rose->dest_digis[n]; - msg->msg_namelen = sizeof(struct full_sockaddr_rose); - } else { - if (rose->dest_ndigis >= 1) { - srose->srose_ndigis = 1; - srose->srose_digi = rose->dest_digis[0]; - } - msg->msg_namelen = sizeof(struct sockaddr_rose); - } + for (n = 0 ; n < rose->dest_ndigis ; n++) + full_srose->srose_digis[n] = rose->dest_digis[n]; + msg->msg_namelen = sizeof(struct full_sockaddr_rose); } skb_free_datagram(sk, skb); diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 28dbdb911b8..50005888be5 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -146,7 +146,7 @@ static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev) static const struct header_ops rose_header_ops = { .create = rose_header, - .rebuild= rose_rebuild_header, + .rebuild = rose_rebuild_header, }; static const struct net_device_ops rose_netdev_ops = { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 919847beec3..a1a8e29e5fc 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -295,6 +295,19 @@ config NET_SCH_HHF To compile this driver as a module, choose M here: the module will be called sch_hhf. +config NET_SCH_PIE + tristate "Proportional Integral controller Enhanced (PIE) scheduler" + help + Say Y here if you want to use the Proportional Integral controller + Enhanced scheduler packet scheduling algorithm. + For more information, please see + http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 + + To compile this driver as a module, choose M here: the module + will be called sch_pie. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT @@ -444,6 +457,7 @@ config NET_CLS_FLOW config NET_CLS_CGROUP tristate "Control Group Classifier" select NET_CLS + select CGROUP_NET_CLASSID depends on CGROUPS ---help--- Say Y here if you want to classify packets based on the control diff --git a/net/sched/Makefile b/net/sched/Makefile index 3442e5fbc4d..0a869a11f3e 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o +obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8114fef308d..35f89e9ce49 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -126,9 +126,10 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, for (i = 0; i < (hinfo->hmask + 1); i++) { head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { - if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) + if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) { module_put(a->ops->owner); - n_i++; + n_i++; + } } } if (nla_put_u32(skb, TCA_FCNT, n_i)) @@ -141,8 +142,8 @@ nla_put_failure: return -EINVAL; } -int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, - int type, struct tc_action *a) +static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, + int type, struct tc_action *a) { struct tcf_hashinfo *hinfo = a->ops->hinfo; @@ -155,7 +156,6 @@ int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, return -EINVAL; } } -EXPORT_SYMBOL(tcf_generic_walker); struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) { @@ -173,21 +173,21 @@ struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) } EXPORT_SYMBOL(tcf_hash_lookup); -u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo) +u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo) { - u32 val = *idx_gen; + u32 val = hinfo->index; do { if (++val == 0) val = 1; } while (tcf_hash_lookup(val, hinfo)); - *idx_gen = val; + hinfo->index = val; return val; } EXPORT_SYMBOL(tcf_hash_new_index); -int tcf_hash_search(struct tc_action *a, u32 index) +static int tcf_hash_search(struct tc_action *a, u32 index) { struct tcf_hashinfo *hinfo = a->ops->hinfo; struct tcf_common *p = tcf_hash_lookup(index, hinfo); @@ -198,7 +198,6 @@ int tcf_hash_search(struct tc_action *a, u32 index) } return 0; } -EXPORT_SYMBOL(tcf_hash_search); struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, struct tcf_hashinfo *hinfo) @@ -216,7 +215,7 @@ EXPORT_SYMBOL(tcf_hash_check); struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, - u32 *idx_gen, struct tcf_hashinfo *hinfo) + struct tcf_hashinfo *hinfo) { struct tcf_common *p = kzalloc(size, GFP_KERNEL); @@ -228,7 +227,7 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, spin_lock_init(&p->tcfc_lock); INIT_HLIST_NODE(&p->tcfc_head); - p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); + p->tcfc_index = index ? index : tcf_hash_new_index(hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; if (est) { @@ -291,12 +290,12 @@ int tcf_unregister_action(struct tc_action_ops *act) int err = -ENOENT; write_lock(&act_mod_lock); - list_for_each_entry(a, &act_base, head) - if (a == act) + list_for_each_entry(a, &act_base, head) { + if (a == act) { + list_del(&act->head); + err = 0; break; - if (a) { - list_del(&act->head); - err = 0; + } } write_unlock(&act_mod_lock); return err; @@ -306,68 +305,41 @@ EXPORT_SYMBOL(tcf_unregister_action); /* lookup by name */ static struct tc_action_ops *tc_lookup_action_n(char *kind) { - struct tc_action_ops *a = NULL; + struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (strcmp(kind, a->kind) == 0) { - if (!try_module_get(a->owner)) { - read_unlock(&act_mod_lock); - return NULL; - } + if (try_module_get(a->owner)) + res = a; break; } } read_unlock(&act_mod_lock); } - return a; + return res; } /* lookup by nlattr */ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) { - struct tc_action_ops *a = NULL; + struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (nla_strcmp(kind, a->kind) == 0) { - if (!try_module_get(a->owner)) { - read_unlock(&act_mod_lock); - return NULL; - } + if (try_module_get(a->owner)) + res = a; break; } } read_unlock(&act_mod_lock); } - return a; + return res; } -#if 0 -/* lookup by id */ -static struct tc_action_ops *tc_lookup_action_id(u32 type) -{ - struct tc_action_ops *a = NULL; - - if (type) { - read_lock(&act_mod_lock); - for (a = act_base; a; a = a->next) { - if (a->type == type) { - if (!try_module_get(a->owner)) { - read_unlock(&act_mod_lock); - return NULL; - } - break; - } - } - read_unlock(&act_mod_lock); - } - return a; -} -#endif - int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, struct tcf_result *res) { @@ -381,18 +353,16 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, } list_for_each_entry(a, actions, list) { repeat: - if (a->ops) { - ret = a->ops->act(skb, a, res); - if (TC_MUNGED & skb->tc_verd) { - /* copied already, allow trampling */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); - } - if (ret == TC_ACT_REPEAT) - goto repeat; /* we need a ttl - JHS */ - if (ret != TC_ACT_PIPE) - goto exec_done; + ret = a->ops->act(skb, a, res); + if (TC_MUNGED & skb->tc_verd) { + /* copied already, allow trampling */ + skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); + skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); } + if (ret == TC_ACT_REPEAT) + goto repeat; /* we need a ttl - JHS */ + if (ret != TC_ACT_PIPE) + goto exec_done; } exec_done: return ret; @@ -404,27 +374,16 @@ void tcf_action_destroy(struct list_head *actions, int bind) struct tc_action *a, *tmp; list_for_each_entry_safe(a, tmp, actions, list) { - if (a->ops) { - if (a->ops->cleanup(a, bind) == ACT_P_DELETED) - module_put(a->ops->owner); - list_del(&a->list); - kfree(a); - } else { - /*FIXME: Remove later - catch insertion bugs*/ - WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n"); - list_del(&a->list); - kfree(a); - } + if (a->ops->cleanup(a, bind) == ACT_P_DELETED) + module_put(a->ops->owner); + list_del(&a->list); + kfree(a); } } int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - int err = -EINVAL; - - if (a->ops == NULL) - return err; return a->ops->dump(skb, a, bind, ref); } @@ -435,9 +394,6 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - if (a->ops == NULL) - return err; - if (nla_put_string(skb, TCA_KIND, a->ops->kind)) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) @@ -600,9 +556,9 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, { int err = 0; struct gnet_dump d; - struct tcf_act_hdr *h = a->priv; + struct tcf_common *p = a->priv; - if (h == NULL) + if (p == NULL) goto errout; /* compat_mode being true specifies a call that is supposed @@ -611,20 +567,20 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, &h->tcf_lock, &d); + TCA_STATS, TCA_XSTATS, &p->tcfc_lock, &d); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - &h->tcf_lock, &d); + &p->tcfc_lock, &d); if (err < 0) goto errout; - if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || - gnet_stats_copy_rate_est(&d, &h->tcf_bstats, - &h->tcf_rate_est) < 0 || - gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) + if (gnet_stats_copy_basic(&d, &p->tcfc_bstats) < 0 || + gnet_stats_copy_rate_est(&d, &p->tcfc_bstats, + &p->tcfc_rate_est) < 0 || + gnet_stats_copy_queue(&d, &p->tcfc_qstats) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) @@ -713,7 +669,7 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) INIT_LIST_HEAD(&a->list); err = -EINVAL; a->ops = tc_lookup_action(tb[TCA_ACT_KIND]); - if (a->ops == NULL) + if (a->ops == NULL) /* could happen in batch of actions */ goto err_free; err = -ENOENT; if (a->ops->lookup(a, index) == 0) @@ -789,7 +745,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, err = -EINVAL; kind = tb[TCA_ACT_KIND]; a->ops = tc_lookup_action(kind); - if (a->ops == NULL) + if (a->ops == NULL) /*some idjot trying to flush unknown action */ goto err_out; nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); @@ -833,6 +789,33 @@ noflush_out: } static int +tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, + u32 portid) +{ + int ret; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, + 0, 1) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + /* now do the delete */ + tcf_action_destroy(actions, 0); + + ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + if (ret > 0) + return 0; + return ret; +} + +static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int event) { @@ -865,27 +848,9 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, if (event == RTM_GETACTION) ret = act_get_notify(net, portid, n, &actions, event); else { /* delete */ - struct sk_buff *skb; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) { - ret = -ENOBUFS; + ret = tcf_del_notify(net, n, &actions, portid); + if (ret) goto err; - } - - if (tca_get_fill(skb, &actions, portid, n->nlmsg_seq, 0, event, - 0, 1) <= 0) { - kfree_skb(skb); - ret = -EINVAL; - goto err; - } - - /* now do the delete */ - tcf_action_destroy(&actions, 0); - ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); - if (ret > 0) - return 0; return ret; } err: @@ -893,60 +858,36 @@ err: return ret; } -static int tcf_add_notify(struct net *net, struct list_head *actions, - u32 portid, u32 seq, int event, u16 flags) +static int +tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, + u32 portid) { - struct tcamsg *t; - struct nlmsghdr *nlh; struct sk_buff *skb; - struct nlattr *nest; - unsigned char *b; int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - b = skb_tail_pointer(skb); - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); - if (!nlh) - goto out_kfree_skb; - t = nlmsg_data(nlh); - t->tca_family = AF_UNSPEC; - t->tca__pad1 = 0; - t->tca__pad2 = 0; - - nest = nla_nest_start(skb, TCA_ACT_TAB); - if (nest == NULL) - goto out_kfree_skb; - - if (tcf_action_dump(skb, actions, 0, 0) < 0) - goto out_kfree_skb; - - nla_nest_end(skb, nest); - - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - NETLINK_CB(skb).dst_group = RTNLGRP_TC; + if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, + RTM_NEWACTION, 0, 0) <= 0) { + kfree_skb(skb); + return -EINVAL; + } - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); if (err > 0) err = 0; return err; - -out_kfree_skb: - kfree_skb(skb); - return -1; } - static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int ovr) { int ret = 0; LIST_HEAD(actions); - u32 seq = n->nlmsg_seq; ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions); if (ret) @@ -955,7 +896,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* dump then free all the actions after update; inserted policy * stays intact */ - ret = tcf_add_notify(net, &actions, portid, seq, RTM_NEWACTION, n->nlmsg_flags); + ret = tcf_add_notify(net, n, &actions, portid); cleanup_a(&actions); done: return ret; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 5d350c57af3..ee28e1ccbbd 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,7 +37,6 @@ #include <net/tc_act/tc_csum.h> #define CSUM_TAB_MASK 15 -static u32 csum_idx_gen; static struct tcf_hashinfo csum_hash_info; static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { @@ -67,19 +66,19 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info); if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, - &csum_idx_gen, &csum_hash_info); + &csum_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); - p = to_tcf_csum(pc); ret = ACT_P_CREATED; } else { - p = to_tcf_csum(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &csum_hash_info); + if (bind)/* dont override defaults */ + return 0; + tcf_hash_release(pc, bind, &csum_hash_info); + if (!ovr) return -EEXIST; - } } + p = to_tcf_csum(pc); spin_lock_bh(&p->tcf_lock); p->tcf_action = parm->action; p->update_flags = parm->update_flags; @@ -586,7 +585,7 @@ MODULE_LICENSE("GPL"); static int __init csum_init_module(void) { - int err = tcf_hashinfo_init(&csum_hash_info, CSUM_TAB_MASK+1); + int err = tcf_hashinfo_init(&csum_hash_info, CSUM_TAB_MASK); if (err) return err; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 1e6e0e76524..f26e6b890cc 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -24,7 +24,6 @@ #include <net/tc_act/tc_gact.h> #define GACT_TAB_MASK 15 -static u32 gact_idx_gen; static struct tcf_hashinfo gact_hash_info; #ifdef CONFIG_GACT_PROB @@ -90,15 +89,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), - bind, &gact_idx_gen, &gact_hash_info); + bind, &gact_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); ret = ACT_P_CREATED; } else { - if (!ovr) { - tcf_hash_release(pc, bind, &gact_hash_info); + if (bind)/* dont override defaults */ + return 0; + tcf_hash_release(pc, bind, &gact_hash_info); + if (!ovr) return -EEXIST; - } } gact = to_gact(pc); @@ -208,7 +208,7 @@ MODULE_LICENSE("GPL"); static int __init gact_init_module(void) { - int err = tcf_hashinfo_init(&gact_hash_info, GACT_TAB_MASK+1); + int err = tcf_hashinfo_init(&gact_hash_info, GACT_TAB_MASK); if (err) return err; #ifdef CONFIG_GACT_PROB diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 8344380ebaf..484bd19601e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -29,7 +29,6 @@ #define IPT_TAB_MASK 15 -static u32 ipt_idx_gen; static struct tcf_hashinfo ipt_hash_info; static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook) @@ -129,15 +128,17 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, pc = tcf_hash_check(index, a, bind, &ipt_hash_info); if (!pc) { pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, - &ipt_idx_gen, &ipt_hash_info); + &ipt_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); ret = ACT_P_CREATED; } else { - if (!ovr) { - tcf_ipt_release(to_ipt(pc), bind); + if (bind)/* dont override defaults */ + return 0; + tcf_ipt_release(to_ipt(pc), bind); + + if (!ovr) return -EEXIST; - } } ipt = to_ipt(pc); @@ -314,7 +315,7 @@ MODULE_ALIAS("act_xt"); static int __init ipt_init_module(void) { int ret1, ret2, err; - err = tcf_hashinfo_init(&ipt_hash_info, IPT_TAB_MASK+1); + err = tcf_hashinfo_init(&ipt_hash_info, IPT_TAB_MASK); if (err) return err; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 199fc9838af..5d05b573a62 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -30,7 +30,6 @@ #include <linux/if_arp.h> #define MIRRED_TAB_MASK 7 -static u32 mirred_idx_gen; static LIST_HEAD(mirred_list); static struct tcf_hashinfo mirred_hash_info; @@ -107,7 +106,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (dev == NULL) return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, - &mirred_idx_gen, &mirred_hash_info); + &mirred_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); ret = ACT_P_CREATED; @@ -276,7 +275,7 @@ static int __init mirred_init_module(void) if (err) return err; - err = tcf_hashinfo_init(&mirred_hash_info, MIRRED_TAB_MASK+1); + err = tcf_hashinfo_init(&mirred_hash_info, MIRRED_TAB_MASK); if (err) { unregister_netdevice_notifier(&mirred_device_notifier); return err; @@ -287,9 +286,9 @@ static int __init mirred_init_module(void) static void __exit mirred_cleanup_module(void) { - unregister_netdevice_notifier(&mirred_device_notifier); - tcf_hashinfo_destroy(&mirred_hash_info); tcf_unregister_action(&act_mirred_ops); + tcf_hashinfo_destroy(&mirred_hash_info); + unregister_netdevice_notifier(&mirred_device_notifier); } module_init(mirred_init_module); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 409fe7181c5..a49fa23b49d 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -30,7 +30,6 @@ #define NAT_TAB_MASK 15 -static u32 nat_idx_gen; static struct tcf_hashinfo nat_hash_info; @@ -61,18 +60,18 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, - &nat_idx_gen, &nat_hash_info); + &nat_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); - p = to_tcf_nat(pc); ret = ACT_P_CREATED; } else { - p = to_tcf_nat(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &nat_hash_info); + if (bind) + return 0; + tcf_hash_release(pc, bind, &nat_hash_info); + if (!ovr) return -EEXIST; - } } + p = to_tcf_nat(pc); spin_lock_bh(&p->tcf_lock); p->old_addr = parm->old_addr; @@ -310,7 +309,7 @@ MODULE_LICENSE("GPL"); static int __init nat_init_module(void) { - int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK+1); + int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK); if (err) return err; return tcf_register_action(&act_nat_ops); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index aa5347c1b9f..f361e4e3c31 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -24,7 +24,6 @@ #include <net/tc_act/tc_pedit.h> #define PEDIT_TAB_MASK 15 -static u32 pedit_idx_gen; static struct tcf_hashinfo pedit_hash_info; @@ -63,7 +62,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (!parm->nkeys) return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, - &pedit_idx_gen, &pedit_hash_info); + &pedit_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); p = to_pedit(pc); @@ -78,10 +77,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { p = to_pedit(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &pedit_hash_info); + tcf_hash_release(pc, bind, &pedit_hash_info); + if (bind) + return 0; + if (!ovr) return -EEXIST; - } + if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) @@ -246,7 +247,7 @@ MODULE_LICENSE("GPL"); static int __init pedit_init_module(void) { - int err = tcf_hashinfo_init(&pedit_hash_info, PEDIT_TAB_MASK+1); + int err = tcf_hashinfo_init(&pedit_hash_info, PEDIT_TAB_MASK); if (err) return err; return tcf_register_action(&act_pedit_ops); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 7b23ab07c6c..a719fdff575 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -41,7 +41,6 @@ struct tcf_police { container_of(pc, struct tcf_police, common) #define POL_TAB_MASK 15 -static u32 police_idx_gen; static struct tcf_hashinfo police_hash_info; /* old policer structure from before tc actions */ @@ -162,10 +161,12 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, if (bind) { police->tcf_bindcnt += 1; police->tcf_refcnt += 1; + return 0; } if (ovr) goto override; - return ret; + /* not replacing */ + return -EEXIST; } } @@ -249,7 +250,7 @@ override: police->tcfp_t_c = ktime_to_ns(ktime_get()); police->tcf_index = parm->index ? parm->index : - tcf_hash_new_index(&police_idx_gen, &police_hash_info); + tcf_hash_new_index(&police_hash_info); h = tcf_hash(police->tcf_index, POL_TAB_MASK); spin_lock_bh(&police_hash_info.lock); hlist_add_head(&police->tcf_head, &police_hash_info.htab[h]); @@ -396,7 +397,7 @@ static struct tc_action_ops act_police_ops = { static int __init police_init_module(void) { - int err = tcf_hashinfo_init(&police_hash_info, POL_TAB_MASK+1); + int err = tcf_hashinfo_init(&police_hash_info, POL_TAB_MASK); if (err) return err; err = tcf_register_action(&act_police_ops); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 2d7a0eb11c6..f7d5406c1fe 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -25,7 +25,6 @@ #include <net/tc_act/tc_defact.h> #define SIMP_TAB_MASK 7 -static u32 simp_idx_gen; static struct tcf_hashinfo simp_hash_info; #define SIMP_MAX_DATA 32 @@ -118,7 +117,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, - &simp_idx_gen, &simp_hash_info); + &simp_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); @@ -135,10 +134,13 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_defact(pc); - if (!ovr) { - tcf_simp_release(d, bind); + + if (bind) + return 0; + tcf_simp_release(d, bind); + if (!ovr) return -EEXIST; - } + reset_policy(d, defdata, parm); } @@ -203,7 +205,7 @@ MODULE_LICENSE("GPL"); static int __init simp_init_module(void) { int err, ret; - err = tcf_hashinfo_init(&simp_hash_info, SIMP_TAB_MASK+1); + err = tcf_hashinfo_init(&simp_hash_info, SIMP_TAB_MASK); if (err) return err; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 90ed04a83cf..74af4612706 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -28,7 +28,6 @@ #include <net/tc_act/tc_skbedit.h> #define SKBEDIT_TAB_MASK 15 -static u32 skbedit_idx_gen; static struct tcf_hashinfo skbedit_hash_info; static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, @@ -104,7 +103,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info); if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, - &skbedit_idx_gen, &skbedit_hash_info); + &skbedit_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); @@ -112,10 +111,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &skbedit_hash_info); + if (bind) + return 0; + tcf_hash_release(pc, bind, &skbedit_hash_info); + if (!ovr) return -EEXIST; - } } spin_lock_bh(&d->tcf_lock); @@ -203,7 +203,7 @@ MODULE_LICENSE("GPL"); static int __init skbedit_init_module(void) { - int err = tcf_hashinfo_init(&skbedit_hash_info, SKBEDIT_TAB_MASK+1); + int err = tcf_hashinfo_init(&skbedit_hash_info, SKBEDIT_TAB_MASK); if (err) return err; return tcf_register_action(&act_skbedit_ops); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6b085cf27a6..29a30a14c31 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -40,20 +40,20 @@ static DEFINE_RWLOCK(cls_mod_lock); static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind) { - const struct tcf_proto_ops *t = NULL; + const struct tcf_proto_ops *t, *res = NULL; if (kind) { read_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { if (nla_strcmp(kind, t->kind) == 0) { - if (!try_module_get(t->owner)) - t = NULL; + if (try_module_get(t->owner)) + res = t; break; } } read_unlock(&cls_mod_lock); } - return t; + return res; } /* Register(unregister) new classifier type */ @@ -82,15 +82,13 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) int rc = -ENOENT; write_lock(&cls_mod_lock); - list_for_each_entry(t, &tcf_proto_base, head) - if (t == ops) + list_for_each_entry(t, &tcf_proto_base, head) { + if (t == ops) { + list_del(&t->head); + rc = 0; break; - - if (!t) - goto out; - list_del(&t->head); - rc = 0; -out: + } + } write_unlock(&cls_mod_lock); return rc; } @@ -342,7 +340,7 @@ errout: return err; } -static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, +static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; @@ -364,7 +362,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, tcm->tcm_handle = fh; if (RTM_DELTFILTER != event) { tcm->tcm_handle = 0; - if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0) + if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) goto nla_put_failure; } nlh->nlmsg_len = skb_tail_pointer(skb) - b; @@ -387,7 +385,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { + if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { kfree_skb(skb); return -EINVAL; } @@ -406,8 +404,9 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walker *arg) { struct tcf_dump_args *a = (void *)arg; + struct net *net = sock_net(a->skb->sk); - return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, + return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } @@ -465,7 +464,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid, + if (tcf_fill_node(net, skb, tp, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) break; @@ -579,7 +578,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) } else if (exts->police) { struct tc_action *act = tcf_exts_first_act(exts); nest = nla_nest_start(skb, exts->police); - if (nest == NULL) + if (nest == NULL || !act) goto nla_put_failure; if (tcf_action_dump_old(skb, act, 0, 0) < 0) goto nla_put_failure; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index b6552035d1f..e98ca99c202 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -38,7 +38,7 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { int r; - struct basic_head *head = (struct basic_head *) tp->root; + struct basic_head *head = tp->root; struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { @@ -56,7 +56,7 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long basic_get(struct tcf_proto *tp, u32 handle) { unsigned long l = 0UL; - struct basic_head *head = (struct basic_head *) tp->root; + struct basic_head *head = tp->root; struct basic_filter *f; if (head == NULL) @@ -107,7 +107,7 @@ static void basic_destroy(struct tcf_proto *tp) static int basic_delete(struct tcf_proto *tp, unsigned long arg) { - struct basic_head *head = (struct basic_head *) tp->root; + struct basic_head *head = tp->root; struct basic_filter *t, *f = (struct basic_filter *) arg; list_for_each_entry(t, &head->flist, link) @@ -164,7 +164,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, struct nlattr **tca, unsigned long *arg) { int err; - struct basic_head *head = (struct basic_head *) tp->root; + struct basic_head *head = tp->root; struct nlattr *tb[TCA_BASIC_MAX + 1]; struct basic_filter *f = (struct basic_filter *) *arg; @@ -225,7 +225,7 @@ errout: static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct basic_head *head = (struct basic_head *) tp->root; + struct basic_head *head = tp->root; struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { @@ -241,7 +241,7 @@ skip: } } -static int basic_dump(struct tcf_proto *tp, unsigned long fh, +static int basic_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct basic_filter *f = (struct basic_filter *) fh; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 00a5a585e5f..8e3cf49118e 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -295,7 +295,7 @@ errout: return ret; } -static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh, +static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *tm) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index f9d212583ea..8e2158ab551 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -11,109 +11,13 @@ #include <linux/module.h> #include <linux/slab.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/errno.h> #include <linux/skbuff.h> -#include <linux/cgroup.h> #include <linux/rcupdate.h> -#include <linux/fdtable.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> #include <net/cls_cgroup.h> -static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) -{ - return css ? container_of(css, struct cgroup_cls_state, css) : NULL; -} - -static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) -{ - return css_cls_state(task_css(p, net_cls_subsys_id)); -} - -static struct cgroup_subsys_state * -cgrp_css_alloc(struct cgroup_subsys_state *parent_css) -{ - struct cgroup_cls_state *cs; - - cs = kzalloc(sizeof(*cs), GFP_KERNEL); - if (!cs) - return ERR_PTR(-ENOMEM); - return &cs->css; -} - -static int cgrp_css_online(struct cgroup_subsys_state *css) -{ - struct cgroup_cls_state *cs = css_cls_state(css); - struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); - - if (parent) - cs->classid = parent->classid; - return 0; -} - -static void cgrp_css_free(struct cgroup_subsys_state *css) -{ - kfree(css_cls_state(css)); -} - -static int update_classid(const void *v, struct file *file, unsigned n) -{ - int err; - struct socket *sock = sock_from_file(file, &err); - if (sock) - sock->sk->sk_classid = (u32)(unsigned long)v; - return 0; -} - -static void cgrp_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) -{ - struct task_struct *p; - struct cgroup_cls_state *cs = css_cls_state(css); - void *v = (void *)(unsigned long)cs->classid; - - cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid, v); - task_unlock(p); - } -} - -static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) -{ - return css_cls_state(css)->classid; -} - -static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, - u64 value) -{ - css_cls_state(css)->classid = (u32) value; - return 0; -} - -static struct cftype ss_files[] = { - { - .name = "classid", - .read_u64 = read_classid, - .write_u64 = write_classid, - }, - { } /* terminate */ -}; - -struct cgroup_subsys net_cls_subsys = { - .name = "net_cls", - .css_alloc = cgrp_css_alloc, - .css_online = cgrp_css_online, - .css_free = cgrp_css_free, - .attach = cgrp_attach, - .subsys_id = net_cls_subsys_id, - .base_cftypes = ss_files, - .module = THIS_MODULE, -}; - struct cls_cgroup_head { u32 handle; struct tcf_exts exts; @@ -260,7 +164,7 @@ skip: arg->count++; } -static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh, +static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct cls_cgroup_head *head = tp->root; @@ -305,25 +209,12 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { static int __init init_cgroup_cls(void) { - int ret; - - ret = cgroup_load_subsys(&net_cls_subsys); - if (ret) - goto out; - - ret = register_tcf_proto_ops(&cls_cgroup_ops); - if (ret) - cgroup_unload_subsys(&net_cls_subsys); - -out: - return ret; + return register_tcf_proto_ops(&cls_cgroup_ops); } static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); - - cgroup_unload_subsys(&net_cls_subsys); } module_init(init_cgroup_cls); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index dfd18a5c3e8..257029c5433 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -563,7 +563,7 @@ static void flow_put(struct tcf_proto *tp, unsigned long f) { } -static int flow_dump(struct tcf_proto *tp, unsigned long fh, +static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct flow_filter *f = (struct flow_filter *)fh; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 3f9cece1380..ed00e8c21ce 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -41,7 +41,7 @@ struct fw_filter { u32 id; struct tcf_result res; #ifdef CONFIG_NET_CLS_IND - char indev[IFNAMSIZ]; + int ifindex; #endif /* CONFIG_NET_CLS_IND */ struct tcf_exts exts; }; @@ -75,7 +75,7 @@ static inline int fw_hash(u32 handle) static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct fw_filter *f; int r; u32 id = skb->mark; @@ -86,7 +86,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (f->id == id) { *res = f->res; #ifdef CONFIG_NET_CLS_IND - if (!tcf_match_indev(skb, f->indev)) + if (!tcf_match_indev(skb, f->ifindex)) continue; #endif /* CONFIG_NET_CLS_IND */ r = tcf_exts_exec(skb, &f->exts, res); @@ -111,7 +111,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long fw_get(struct tcf_proto *tp, u32 handle) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct fw_filter *f; if (head == NULL) @@ -160,7 +160,7 @@ static void fw_destroy(struct tcf_proto *tp) static int fw_delete(struct tcf_proto *tp, unsigned long arg) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct fw_filter *f = (struct fw_filter *)arg; struct fw_filter **fp; @@ -190,7 +190,7 @@ static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, struct nlattr **tca, unsigned long base) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct tcf_exts e; u32 mask; int err; @@ -207,9 +207,11 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, #ifdef CONFIG_NET_CLS_IND if (tb[TCA_FW_INDEV]) { - err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV]); - if (err < 0) + int ret; + ret = tcf_change_indev(net, tb[TCA_FW_INDEV]); + if (ret < 0) goto errout; + f->ifindex = ret; } #endif /* CONFIG_NET_CLS_IND */ @@ -235,7 +237,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, struct nlattr **tca, unsigned long *arg) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct fw_filter *f = (struct fw_filter *) *arg; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FW_MAX + 1]; @@ -298,7 +300,7 @@ errout: static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; int h; if (head == NULL) @@ -324,10 +326,10 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int fw_dump(struct tcf_proto *tp, unsigned long fh, +static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct fw_filter *f = (struct fw_filter *)fh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -348,9 +350,12 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_FW_CLASSID, f->res.classid)) goto nla_put_failure; #ifdef CONFIG_NET_CLS_IND - if (strlen(f->indev) && - nla_put_string(skb, TCA_FW_INDEV, f->indev)) - goto nla_put_failure; + if (f->ifindex) { + struct net_device *dev; + dev = __dev_get_by_index(net, f->ifindex); + if (dev && nla_put_string(skb, TCA_FW_INDEV, dev->name)) + goto nla_put_failure; + } #endif /* CONFIG_NET_CLS_IND */ if (head->mask != 0xFFFFFFFF && nla_put_u32(skb, TCA_FW_MASK, head->mask)) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 2473953a594..1ad3068f2ce 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -123,7 +123,7 @@ static inline int route4_hash_wild(void) static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct route4_head *head = (struct route4_head *)tp->root; + struct route4_head *head = tp->root; struct dst_entry *dst; struct route4_bucket *b; struct route4_filter *f; @@ -213,7 +213,7 @@ static inline u32 from_hash(u32 id) static unsigned long route4_get(struct tcf_proto *tp, u32 handle) { - struct route4_head *head = (struct route4_head *)tp->root; + struct route4_head *head = tp->root; struct route4_bucket *b; struct route4_filter *f; unsigned int h1, h2; @@ -284,7 +284,7 @@ static void route4_destroy(struct tcf_proto *tp) static int route4_delete(struct tcf_proto *tp, unsigned long arg) { - struct route4_head *head = (struct route4_head *)tp->root; + struct route4_head *head = tp->root; struct route4_filter **fp, *f = (struct route4_filter *)arg; unsigned int h = 0; struct route4_bucket *b; @@ -551,7 +551,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int route4_dump(struct tcf_proto *tp, unsigned long fh, +static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct route4_filter *f = (struct route4_filter *)fh; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 4f25c2ac825..19f8e5dfa8b 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -594,7 +594,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, +static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct rsvp_filter *f = (struct rsvp_filter *)fh; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index ffad18791c9..eed8404443d 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -24,9 +24,6 @@ #define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */ -#define PRIV(tp) ((struct tcindex_data *) (tp)->root) - - struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; @@ -77,7 +74,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key) static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter_result *f; int key = (skb->tc_index & p->mask) >> p->shift; @@ -102,7 +99,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter_result *r; pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle); @@ -140,7 +137,7 @@ static int tcindex_init(struct tcf_proto *tp) static int __tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; struct tcindex_filter *f = NULL; @@ -338,7 +335,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; int err; @@ -360,7 +357,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter *f, *next; int i; @@ -407,7 +404,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp, static void tcindex_destroy(struct tcf_proto *tp) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcf_walker walker; pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); @@ -422,10 +419,10 @@ static void tcindex_destroy(struct tcf_proto *tp) } -static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, +static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 20f2fb79c74..84c28daff84 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -48,7 +48,7 @@ struct tc_u_knode { struct tc_u_hnode *ht_up; struct tcf_exts exts; #ifdef CONFIG_NET_CLS_IND - char indev[IFNAMSIZ]; + int ifindex; #endif u8 fshift; struct tcf_result res; @@ -95,7 +95,7 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct unsigned int off; } stack[TC_U32_MAXDEPTH]; - struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root; + struct tc_u_hnode *ht = tp->root; unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; @@ -152,7 +152,7 @@ check_terminal: *res = n->res; #ifdef CONFIG_NET_CLS_IND - if (!tcf_match_indev(skb, n->indev)) { + if (!tcf_match_indev(skb, n->ifindex)) { n = n->next; goto next_knode; } @@ -527,9 +527,11 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, #ifdef CONFIG_NET_CLS_IND if (tb[TCA_U32_INDEV]) { - err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV]); - if (err < 0) + int ret; + ret = tcf_change_indev(net, tb[TCA_U32_INDEV]); + if (ret < 0) goto errout; + n->ifindex = ret; } #endif tcf_exts_change(tp, &n->exts, &e); @@ -712,7 +714,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int u32_dump(struct tcf_proto *tp, unsigned long fh, +static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct tc_u_knode *n = (struct tc_u_knode *)fh; @@ -760,9 +762,12 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; #ifdef CONFIG_NET_CLS_IND - if (strlen(n->indev) && - nla_put_string(skb, TCA_U32_INDEV, n->indev)) - goto nla_put_failure; + if (n->ifindex) { + struct net_device *dev; + dev = __dev_get_by_index(net, n->ifindex); + if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name)) + goto nla_put_failure; + } #endif #ifdef CONFIG_CLS_U32_PERF if (nla_put(skb, TCA_U32_PCNT, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c31190e29b9..1313145e3b8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -135,7 +135,7 @@ static DEFINE_RWLOCK(qdisc_mod_lock); static struct Qdisc_ops *qdisc_base; -/* Register/uregister queueing discipline */ +/* Register/unregister queueing discipline */ int register_qdisc(struct Qdisc_ops *qops) { diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index e2518333380..2f80d01d42a 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1060,8 +1060,8 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) } if (cl->quantum <= 0 || cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) { - pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n", - cl->common.classid, cl->quantum); + pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n", + cl->common.classid, cl->quantum); cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; } } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 0952fd2684e..49d6ef338b5 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -57,8 +57,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n", - sch, p, new, old); + pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n", + __func__, sch, p, new, old); if (new == NULL) { new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, @@ -85,8 +85,8 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) static unsigned long dsmark_get(struct Qdisc *sch, u32 classid) { - pr_debug("dsmark_get(sch %p,[qdisc %p],classid %x)\n", - sch, qdisc_priv(sch), classid); + pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", + __func__, sch, qdisc_priv(sch), classid); return TC_H_MIN(classid) + 1; } @@ -118,8 +118,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, int err = -EINVAL; u8 mask = 0; - pr_debug("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x)," - "arg 0x%lx\n", sch, p, classid, parent, *arg); + pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", + __func__, sch, p, classid, parent, *arg); if (!dsmark_valid_index(p, *arg)) { err = -ENOENT; @@ -166,7 +166,8 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) struct dsmark_qdisc_data *p = qdisc_priv(sch); int i; - pr_debug("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); + pr_debug("%s(sch %p,[qdisc %p],walker %p)\n", + __func__, sch, p, walker); if (walker->stop) return; @@ -199,7 +200,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct dsmark_qdisc_data *p = qdisc_priv(sch); int err; - pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); + pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { switch (skb->protocol) { @@ -275,7 +276,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) struct sk_buff *skb; u32 index; - pr_debug("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); skb = p->q->ops->dequeue(p->q); if (skb == NULL) @@ -303,8 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) * and don't need yet another qdisc as a bypass. */ if (p->mask[index] != 0xff || p->value[index]) - pr_warning("dsmark_dequeue: unsupported protocol %d\n", - ntohs(skb->protocol)); + pr_warn("%s: unsupported protocol %d\n", + __func__, ntohs(skb->protocol)); break; } @@ -315,7 +316,7 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); return p->q->ops->peek(p->q); } @@ -325,7 +326,7 @@ static unsigned int dsmark_drop(struct Qdisc *sch) struct dsmark_qdisc_data *p = qdisc_priv(sch); unsigned int len; - pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); if (p->q->ops->drop == NULL) return 0; @@ -346,7 +347,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) u16 indices; u8 *mask; - pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); + pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); if (!opt) goto errout; @@ -384,7 +385,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (p->q == NULL) p->q = &noop_qdisc; - pr_debug("dsmark_init: qdisc %p\n", p->q); + pr_debug("%s: qdisc %p\n", __func__, p->q); err = 0; errout: @@ -395,7 +396,7 @@ static void dsmark_reset(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); qdisc_reset(p->q); sch->q.qlen = 0; } @@ -404,7 +405,7 @@ static void dsmark_destroy(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); tcf_destroy_chain(&p->filter_list); qdisc_destroy(p->q); @@ -417,7 +418,7 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, struct dsmark_qdisc_data *p = qdisc_priv(sch); struct nlattr *opts = NULL; - pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl); + pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl); if (!dsmark_valid_index(p, cl)) return -EINVAL; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index d42234c0f13..12cbc09157f 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -370,8 +370,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) for (i = table->DPs; i < MAX_DPs; i++) { if (table->tab[i]) { - pr_warning("GRED: Warning: Destroying " - "shadowed VQ 0x%x\n", i); + pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n", + i); gred_destroy_vq(table->tab[i]); table->tab[i] = NULL; } diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 97aa33dbb90..1cf84a9e13a 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -574,18 +574,18 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]); if (tb[TCA_HHF_RESET_TIMEOUT]) { - u32 ms = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]); + u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]); - q->hhf_reset_timeout = msecs_to_jiffies(ms); + q->hhf_reset_timeout = usecs_to_jiffies(us); } if (tb[TCA_HHF_ADMIT_BYTES]) q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]); if (tb[TCA_HHF_EVICT_TIMEOUT]) { - u32 ms = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]); + u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]); - q->hhf_evict_timeout = msecs_to_jiffies(ms); + q->hhf_evict_timeout = usecs_to_jiffies(us); } qlen = sch->q.qlen; @@ -684,10 +684,10 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) || nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) || nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT, - jiffies_to_msecs(q->hhf_reset_timeout)) || + jiffies_to_usecs(q->hhf_reset_timeout)) || nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) || nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT, - jiffies_to_msecs(q->hhf_evict_timeout)) || + jiffies_to_usecs(q->hhf_evict_timeout)) || nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight)) goto nla_put_failure; @@ -711,7 +711,7 @@ static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d) return gnet_stats_copy_app(d, &st, sizeof(st)); } -struct Qdisc_ops hhf_qdisc_ops __read_mostly = { +static struct Qdisc_ops hhf_qdisc_ops __read_mostly = { .id = "hhf", .priv_size = sizeof(struct hhf_sched_data), @@ -727,7 +727,6 @@ struct Qdisc_ops hhf_qdisc_ops __read_mostly = { .dump_stats = hhf_dump_stats, .owner = THIS_MODULE, }; -EXPORT_SYMBOL(hhf_qdisc_ops); static int __init hhf_module_init(void) { diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 6b0e854b011..0db5a6eae87 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -712,7 +712,7 @@ static s64 htb_do_events(struct htb_sched *q, const int level, /* too much load - let's continue after a break for scheduling */ if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { - pr_warning("htb: too many events!\n"); + pr_warn("htb: too many events!\n"); q->warned |= HTB_WARN_TOOMANYEVENTS; } @@ -1276,9 +1276,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) struct Qdisc *new_q = NULL; int last_child = 0; - // TODO: why don't allow to delete subtree ? references ? does - // tc subsys quarantee us that in htb_destroy it holds no class - // refs so that we can remove children safely there ? + /* TODO: why don't allow to delete subtree ? references ? does + * tc subsys guarantee us that in htb_destroy it holds no class + * refs so that we can remove children safely there ? + */ if (cl->children || cl->filter_cnt) return -EBUSY; @@ -1488,15 +1489,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->quantum = min_t(u64, quantum, INT_MAX); if (!hopt->quantum && cl->quantum < 1000) { - pr_warning( - "HTB: quantum of class %X is small. Consider r2q change.\n", - cl->common.classid); + pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n", + cl->common.classid); cl->quantum = 1000; } if (!hopt->quantum && cl->quantum > 200000) { - pr_warning( - "HTB: quantum of class %X is big. Consider r2q change.\n", - cl->common.classid); + pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n", + cl->common.classid); cl->quantum = 200000; } if (hopt->quantum) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index f3befd6b478..090a4e3ecd0 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -88,7 +88,7 @@ struct netem_sched_data { u32 duplicate; u32 reorder; u32 corrupt; - u32 rate; + u64 rate; s32 packet_overhead; u32 cell_size; u32 cell_size_reciprocal; @@ -495,7 +495,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) now = netem_skb_cb(last)->time_to_send; } - delay += packet_len_2_sched_time(skb->len, q); + delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); } cb->time_to_send = now + delay; @@ -782,6 +782,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, [TCA_NETEM_ECN] = { .type = NLA_U32 }, + [TCA_NETEM_RATE64] = { .type = NLA_U64 }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -852,6 +853,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_NETEM_RATE]) get_rate(sch, tb[TCA_NETEM_RATE]); + if (tb[TCA_NETEM_RATE64]) + q->rate = max_t(u64, q->rate, + nla_get_u64(tb[TCA_NETEM_RATE64])); + if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); @@ -974,7 +979,13 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt)) goto nla_put_failure; - rate.rate = q->rate; + if (q->rate >= (1ULL << 32)) { + if (nla_put_u64(skb, TCA_NETEM_RATE64, q->rate)) + goto nla_put_failure; + rate.rate = ~0U; + } else { + rate.rate = q->rate; + } rate.packet_overhead = q->packet_overhead; rate.cell_size = q->cell_size; rate.cell_overhead = q->cell_overhead; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c new file mode 100644 index 00000000000..fe65340c8eb --- /dev/null +++ b/net/sched/sch_pie.c @@ -0,0 +1,555 @@ +/* Copyright (C) 2013 Cisco Systems, Inc, 2013. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Vijay Subramanian <vijaynsu@cisco.com> + * Author: Mythili Prabhu <mysuryan@cisco.com> + * + * ECN support is added by Naeem Khademi <naeemk@ifi.uio.no> + * University of Oslo, Norway. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <net/pkt_sched.h> +#include <net/inet_ecn.h> + +#define QUEUE_THRESHOLD 10000 +#define DQCOUNT_INVALID -1 +#define MAX_PROB 0xffffffff +#define PIE_SCALE 8 + +/* parameters used */ +struct pie_params { + psched_time_t target; /* user specified target delay in pschedtime */ + u32 tupdate; /* timer frequency (in jiffies) */ + u32 limit; /* number of packets that can be enqueued */ + u32 alpha; /* alpha and beta are between -4 and 4 */ + u32 beta; /* and are used for shift relative to 1 */ + bool ecn; /* true if ecn is enabled */ + bool bytemode; /* to scale drop early prob based on pkt size */ +}; + +/* variables used */ +struct pie_vars { + u32 prob; /* probability but scaled by u32 limit. */ + psched_time_t burst_time; + psched_time_t qdelay; + psched_time_t qdelay_old; + u64 dq_count; /* measured in bytes */ + psched_time_t dq_tstamp; /* drain rate */ + u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */ + u32 qlen_old; /* in bytes */ +}; + +/* statistics gathering */ +struct pie_stats { + u32 packets_in; /* total number of packets enqueued */ + u32 dropped; /* packets dropped due to pie_action */ + u32 overlimit; /* dropped due to lack of space in queue */ + u32 maxq; /* maximum queue size */ + u32 ecn_mark; /* packets marked with ECN */ +}; + +/* private data for the Qdisc */ +struct pie_sched_data { + struct pie_params params; + struct pie_vars vars; + struct pie_stats stats; + struct timer_list adapt_timer; +}; + +static void pie_params_init(struct pie_params *params) +{ + params->alpha = 2; + params->beta = 20; + params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */ + params->limit = 1000; /* default of 1000 packets */ + params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */ + params->ecn = false; + params->bytemode = false; +} + +static void pie_vars_init(struct pie_vars *vars) +{ + vars->dq_count = DQCOUNT_INVALID; + vars->avg_dq_rate = 0; + /* default of 100 ms in pschedtime */ + vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC); +} + +static bool drop_early(struct Qdisc *sch, u32 packet_size) +{ + struct pie_sched_data *q = qdisc_priv(sch); + u32 rnd; + u32 local_prob = q->vars.prob; + u32 mtu = psched_mtu(qdisc_dev(sch)); + + /* If there is still burst allowance left skip random early drop */ + if (q->vars.burst_time > 0) + return false; + + /* If current delay is less than half of target, and + * if drop prob is low already, disable early_drop + */ + if ((q->vars.qdelay < q->params.target / 2) + && (q->vars.prob < MAX_PROB / 5)) + return false; + + /* If we have fewer than 2 mtu-sized packets, disable drop_early, + * similar to min_th in RED + */ + if (sch->qstats.backlog < 2 * mtu) + return false; + + /* If bytemode is turned on, use packet size to compute new + * probablity. Smaller packets will have lower drop prob in this case + */ + if (q->params.bytemode && packet_size <= mtu) + local_prob = (local_prob / mtu) * packet_size; + else + local_prob = q->vars.prob; + + rnd = net_random(); + if (rnd < local_prob) + return true; + + return false; +} + +static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + bool enqueue = false; + + if (unlikely(qdisc_qlen(sch) >= sch->limit)) { + q->stats.overlimit++; + goto out; + } + + if (!drop_early(sch, skb->len)) { + enqueue = true; + } else if (q->params.ecn && (q->vars.prob <= MAX_PROB / 10) && + INET_ECN_set_ce(skb)) { + /* If packet is ecn capable, mark it if drop probability + * is lower than 10%, else drop it. + */ + q->stats.ecn_mark++; + enqueue = true; + } + + /* we can enqueue the packet */ + if (enqueue) { + q->stats.packets_in++; + if (qdisc_qlen(sch) > q->stats.maxq) + q->stats.maxq = qdisc_qlen(sch); + + return qdisc_enqueue_tail(skb, sch); + } + +out: + q->stats.dropped++; + return qdisc_drop(skb, sch); +} + +static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { + [TCA_PIE_TARGET] = {.type = NLA_U32}, + [TCA_PIE_LIMIT] = {.type = NLA_U32}, + [TCA_PIE_TUPDATE] = {.type = NLA_U32}, + [TCA_PIE_ALPHA] = {.type = NLA_U32}, + [TCA_PIE_BETA] = {.type = NLA_U32}, + [TCA_PIE_ECN] = {.type = NLA_U32}, + [TCA_PIE_BYTEMODE] = {.type = NLA_U32}, +}; + +static int pie_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_PIE_MAX + 1]; + unsigned int qlen; + int err; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy); + if (err < 0) + return err; + + sch_tree_lock(sch); + + /* convert from microseconds to pschedtime */ + if (tb[TCA_PIE_TARGET]) { + /* target is in us */ + u32 target = nla_get_u32(tb[TCA_PIE_TARGET]); + + /* convert to pschedtime */ + q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); + } + + /* tupdate is in jiffies */ + if (tb[TCA_PIE_TUPDATE]) + q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])); + + if (tb[TCA_PIE_LIMIT]) { + u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]); + + q->params.limit = limit; + sch->limit = limit; + } + + if (tb[TCA_PIE_ALPHA]) + q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]); + + if (tb[TCA_PIE_BETA]) + q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]); + + if (tb[TCA_PIE_ECN]) + q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]); + + if (tb[TCA_PIE_BYTEMODE]) + q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]); + + /* Drop excess packets if new limit is lower */ + qlen = sch->q.qlen; + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = __skb_dequeue(&sch->q); + + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_drop(skb, sch); + } + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + + sch_tree_unlock(sch); + return 0; +} + +static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb) +{ + + struct pie_sched_data *q = qdisc_priv(sch); + int qlen = sch->qstats.backlog; /* current queue size in bytes */ + + /* If current queue is about 10 packets or more and dq_count is unset + * we have enough packets to calculate the drain rate. Save + * current time as dq_tstamp and start measurement cycle. + */ + if (qlen >= QUEUE_THRESHOLD && q->vars.dq_count == DQCOUNT_INVALID) { + q->vars.dq_tstamp = psched_get_time(); + q->vars.dq_count = 0; + } + + /* Calculate the average drain rate from this value. If queue length + * has receded to a small value viz., <= QUEUE_THRESHOLD bytes,reset + * the dq_count to -1 as we don't have enough packets to calculate the + * drain rate anymore The following if block is entered only when we + * have a substantial queue built up (QUEUE_THRESHOLD bytes or more) + * and we calculate the drain rate for the threshold here. dq_count is + * in bytes, time difference in psched_time, hence rate is in + * bytes/psched_time. + */ + if (q->vars.dq_count != DQCOUNT_INVALID) { + q->vars.dq_count += skb->len; + + if (q->vars.dq_count >= QUEUE_THRESHOLD) { + psched_time_t now = psched_get_time(); + u32 dtime = now - q->vars.dq_tstamp; + u32 count = q->vars.dq_count << PIE_SCALE; + + if (dtime == 0) + return; + + count = count / dtime; + + if (q->vars.avg_dq_rate == 0) + q->vars.avg_dq_rate = count; + else + q->vars.avg_dq_rate = + (q->vars.avg_dq_rate - + (q->vars.avg_dq_rate >> 3)) + (count >> 3); + + /* If the queue has receded below the threshold, we hold + * on to the last drain rate calculated, else we reset + * dq_count to 0 to re-enter the if block when the next + * packet is dequeued + */ + if (qlen < QUEUE_THRESHOLD) + q->vars.dq_count = DQCOUNT_INVALID; + else { + q->vars.dq_count = 0; + q->vars.dq_tstamp = psched_get_time(); + } + + if (q->vars.burst_time > 0) { + if (q->vars.burst_time > dtime) + q->vars.burst_time -= dtime; + else + q->vars.burst_time = 0; + } + } + } +} + +static void calculate_probability(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + u32 qlen = sch->qstats.backlog; /* queue size in bytes */ + psched_time_t qdelay = 0; /* in pschedtime */ + psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */ + s32 delta = 0; /* determines the change in probability */ + u32 oldprob; + u32 alpha, beta; + bool update_prob = true; + + q->vars.qdelay_old = q->vars.qdelay; + + if (q->vars.avg_dq_rate > 0) + qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate; + else + qdelay = 0; + + /* If qdelay is zero and qlen is not, it means qlen is very small, less + * than dequeue_rate, so we do not update probabilty in this round + */ + if (qdelay == 0 && qlen != 0) + update_prob = false; + + /* Add ranges for alpha and beta, more aggressive for high dropping + * mode and gentle steps for light dropping mode + * In light dropping mode, take gentle steps; in medium dropping mode, + * take medium steps; in high dropping mode, take big steps. + */ + if (q->vars.prob < MAX_PROB / 100) { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7; + } else if (q->vars.prob < MAX_PROB / 10) { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5; + } else { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + } + + /* alpha and beta should be between 0 and 32, in multiples of 1/16 */ + delta += alpha * ((qdelay - q->params.target)); + delta += beta * ((qdelay - qdelay_old)); + + oldprob = q->vars.prob; + + /* to ensure we increase probability in steps of no more than 2% */ + if (delta > (s32) (MAX_PROB / (100 / 2)) && + q->vars.prob >= MAX_PROB / 10) + delta = (MAX_PROB / 100) * 2; + + /* Non-linear drop: + * Tune drop probability to increase quickly for high delays(>= 250ms) + * 250ms is derived through experiments and provides error protection + */ + + if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC))) + delta += MAX_PROB / (100 / 2); + + q->vars.prob += delta; + + if (delta > 0) { + /* prevent overflow */ + if (q->vars.prob < oldprob) { + q->vars.prob = MAX_PROB; + /* Prevent normalization error. If probability is at + * maximum value already, we normalize it here, and + * skip the check to do a non-linear drop in the next + * section. + */ + update_prob = false; + } + } else { + /* prevent underflow */ + if (q->vars.prob > oldprob) + q->vars.prob = 0; + } + + /* Non-linear drop in probability: Reduce drop probability quickly if + * delay is 0 for 2 consecutive Tupdate periods. + */ + + if ((qdelay == 0) && (qdelay_old == 0) && update_prob) + q->vars.prob = (q->vars.prob * 98) / 100; + + q->vars.qdelay = qdelay; + q->vars.qlen_old = qlen; + + /* We restart the measurement cycle if the following conditions are met + * 1. If the delay has been low for 2 consecutive Tupdate periods + * 2. Calculated drop probability is zero + * 3. We have atleast one estimate for the avg_dq_rate ie., + * is a non-zero value + */ + if ((q->vars.qdelay < q->params.target / 2) && + (q->vars.qdelay_old < q->params.target / 2) && + (q->vars.prob == 0) && + (q->vars.avg_dq_rate > 0)) + pie_vars_init(&q->vars); +} + +static void pie_timer(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc *)arg; + struct pie_sched_data *q = qdisc_priv(sch); + spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + + spin_lock(root_lock); + calculate_probability(sch); + + /* reset the timer to fire after 'tupdate'. tupdate is in jiffies. */ + if (q->params.tupdate) + mod_timer(&q->adapt_timer, jiffies + q->params.tupdate); + spin_unlock(root_lock); + +} + +static int pie_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct pie_sched_data *q = qdisc_priv(sch); + + pie_params_init(&q->params); + pie_vars_init(&q->vars); + sch->limit = q->params.limit; + + setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); + mod_timer(&q->adapt_timer, jiffies + HZ / 2); + + if (opt) { + int err = pie_change(sch, opt); + + if (err) + return err; + } + + return 0; +} + +static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + + /* convert target from pschedtime to us */ + if (nla_put_u32(skb, TCA_PIE_TARGET, + ((u32) PSCHED_TICKS2NS(q->params.target)) / + NSEC_PER_USEC) || + nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) || + nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) || + nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) || + nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) || + nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) || + nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode)) + goto nla_put_failure; + + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -1; + +} + +static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct tc_pie_xstats st = { + .prob = q->vars.prob, + .delay = ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) / + NSEC_PER_USEC, + /* unscale and return dq_rate in bytes per sec */ + .avg_dq_rate = q->vars.avg_dq_rate * + (PSCHED_TICKS_PER_SEC) >> PIE_SCALE, + .packets_in = q->stats.packets_in, + .overlimit = q->stats.overlimit, + .maxq = q->stats.maxq, + .dropped = q->stats.dropped, + .ecn_mark = q->stats.ecn_mark, + }; + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) +{ + struct sk_buff *skb; + skb = __qdisc_dequeue_head(sch, &sch->q); + + if (!skb) + return NULL; + + pie_process_dequeue(sch, skb); + return skb; +} + +static void pie_reset(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + qdisc_reset_queue(sch); + pie_vars_init(&q->vars); +} + +static void pie_destroy(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + q->params.tupdate = 0; + del_timer_sync(&q->adapt_timer); +} + +static struct Qdisc_ops pie_qdisc_ops __read_mostly = { + .id = "pie", + .priv_size = sizeof(struct pie_sched_data), + .enqueue = pie_qdisc_enqueue, + .dequeue = pie_qdisc_dequeue, + .peek = qdisc_peek_dequeued, + .init = pie_init, + .destroy = pie_destroy, + .reset = pie_reset, + .change = pie_change, + .dump = pie_dump, + .dump_stats = pie_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init pie_module_init(void) +{ + return register_qdisc(&pie_qdisc_ops); +} + +static void __exit pie_module_exit(void) +{ + unregister_qdisc(&pie_qdisc_ops); +} + +module_init(pie_module_init); +module_exit(pie_module_exit); + +MODULE_DESCRIPTION("Proportional Integral controller Enhanced (PIE) scheduler"); +MODULE_AUTHOR("Vijay Subramanian"); +MODULE_AUTHOR("Mythili Prabhu"); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 887e672f9d7..fbba5b0ec12 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -307,6 +307,8 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_TBF_RATE64] = { .type = NLA_U64 }, [TCA_TBF_PRATE64] = { .type = NLA_U64 }, + [TCA_TBF_BURST] = { .type = NLA_U32 }, + [TCA_TBF_PBURST] = { .type = NLA_U32 }, }; static int tbf_change(struct Qdisc *sch, struct nlattr *opt) @@ -358,7 +360,12 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); psched_ratecfg_precompute(&rate, &qopt->rate, rate64); - max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); + if (tb[TCA_TBF_BURST]) { + max_size = nla_get_u32(tb[TCA_TBF_BURST]); + buffer = psched_l2t_ns(&rate, max_size); + } else { + max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); + } if (qopt->peakrate.rate) { if (tb[TCA_TBF_PRATE64]) @@ -366,12 +373,18 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", - peak.rate_bytes_ps, rate.rate_bytes_ps); + peak.rate_bytes_ps, rate.rate_bytes_ps); err = -EINVAL; goto done; } - max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); + if (tb[TCA_TBF_PBURST]) { + u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]); + max_size = min_t(u32, max_size, pburst); + mtu = psched_l2t_ns(&peak, pburst); + } else { + max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); + } } if (max_size < psched_mtu(qdisc_dev(sch))) @@ -391,9 +404,15 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->qdisc = child; } q->limit = qopt->limit; - q->mtu = PSCHED_TICKS2NS(qopt->mtu); + if (tb[TCA_TBF_PBURST]) + q->mtu = mtu; + else + q->mtu = PSCHED_TICKS2NS(qopt->mtu); q->max_size = max_size; - q->buffer = PSCHED_TICKS2NS(qopt->buffer); + if (tb[TCA_TBF_BURST]) + q->buffer = buffer; + else + q->buffer = PSCHED_TICKS2NS(qopt->buffer); q->tokens = q->buffer; q->ptokens = q->mtu; diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 5c9f64c1c90..683c7d1b130 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -41,7 +41,7 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { }, { .hmac_id = SCTP_AUTH_HMAC_ID_SHA1, - .hmac_name="hmac(sha1)", + .hmac_name = "hmac(sha1)", .hmac_len = SCTP_SHA1_SIG_SIZE, }, { @@ -51,7 +51,7 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { #if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE) { .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, - .hmac_name="hmac(sha256)", + .hmac_name = "hmac(sha256)", .hmac_len = SCTP_SHA256_SIG_SIZE, } #endif @@ -163,7 +163,7 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1, * lead-zero padded. If it is not, it * is automatically larger numerically. */ - for (i = 0; i < abs(diff); i++ ) { + for (i = 0; i < abs(diff); i++) { if (longer[i] != 0) return diff; } @@ -226,9 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector( gfp_t gfp) { return sctp_auth_make_key_vector( - (sctp_random_param_t*)asoc->c.auth_random, - (sctp_chunks_param_t*)asoc->c.auth_chunks, - (sctp_hmac_algo_param_t*)asoc->c.auth_hmacs, + (sctp_random_param_t *)asoc->c.auth_random, + (sctp_chunks_param_t *)asoc->c.auth_chunks, + (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs, gfp); } @@ -499,8 +499,7 @@ void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[]) if (!auth_hmacs) return; - for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) - { + for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) { if (auth_hmacs[i]) crypto_free_hash(auth_hmacs[i]); } @@ -647,15 +646,15 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) */ for (i = 0; !found && i < len; i++) { switch (param->chunks[i]) { - case SCTP_CID_INIT: - case SCTP_CID_INIT_ACK: - case SCTP_CID_SHUTDOWN_COMPLETE: - case SCTP_CID_AUTH: + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + case SCTP_CID_SHUTDOWN_COMPLETE: + case SCTP_CID_AUTH: break; - default: + default: if (param->chunks[i] == chunk) - found = 1; + found = 1; break; } } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 5573e425b0c..158701da2d3 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -254,7 +254,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); /* Create chunks for all the full sized DATA chunks. */ - for (i=0, len=first_len; i < whole; i++) { + for (i = 0, len = first_len; i < whole; i++) { frag = SCTP_DATA_MIDDLE_FRAG; if (0 == i) @@ -317,7 +317,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, goto errout; } - err = sctp_user_addto_chunk(chunk, offset, over,msgh->msg_iov); + err = sctp_user_addto_chunk(chunk, offset, over, msgh->msg_iov); /* Put the chunk->skb back into the form expected by send. */ __skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr diff --git a/net/sctp/input.c b/net/sctp/input.c index 042ec6c9ae2..1f4eeb43fbd 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -119,7 +119,7 @@ int sctp_rcv(struct sk_buff *skb) struct sctp_af *af; struct net *net = dev_net(skb->dev); - if (skb->pkt_type!=PACKET_HOST) + if (skb->pkt_type != PACKET_HOST) goto discard_it; SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); @@ -180,8 +180,7 @@ int sctp_rcv(struct sk_buff *skb) * If a frame arrives on an interface and the receiving socket is * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ - if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) - { + if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { if (asoc) { sctp_association_put(asoc); asoc = NULL; @@ -389,9 +388,6 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, if (!t || (t->pathmtu <= pmtu)) return; - if (!ip6_sk_accept_pmtu(sk)) - return; - if (sock_owned_by_user(sk)) { asoc->pmtu_pending = 1; t->pmtu_pending = 1; @@ -613,8 +609,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) if (ICMP_FRAG_NEEDED == code) { sctp_icmp_frag_needed(sk, asoc, transport, info); goto out_unlock; - } - else { + } else { if (ICMP_PROT_UNREACH == code) { sctp_icmp_proto_unreachable(sk, asoc, transport); @@ -1058,31 +1053,31 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, if (ch_end > skb_tail_pointer(skb)) break; - switch(ch->type) { - case SCTP_CID_AUTH: - have_auth = chunk_num; - break; - - case SCTP_CID_COOKIE_ECHO: - /* If a packet arrives containing an AUTH chunk as - * a first chunk, a COOKIE-ECHO chunk as the second - * chunk, and possibly more chunks after them, and - * the receiver does not have an STCB for that - * packet, then authentication is based on - * the contents of the COOKIE- ECHO chunk. - */ - if (have_auth == 1 && chunk_num == 2) - return NULL; - break; - - case SCTP_CID_ASCONF: - if (have_auth || net->sctp.addip_noauth) - asoc = __sctp_rcv_asconf_lookup( - net, ch, laddr, - sctp_hdr(skb)->source, - transportp); - default: - break; + switch (ch->type) { + case SCTP_CID_AUTH: + have_auth = chunk_num; + break; + + case SCTP_CID_COOKIE_ECHO: + /* If a packet arrives containing an AUTH chunk as + * a first chunk, a COOKIE-ECHO chunk as the second + * chunk, and possibly more chunks after them, and + * the receiver does not have an STCB for that + * packet, then authentication is based on + * the contents of the COOKIE- ECHO chunk. + */ + if (have_auth == 1 && chunk_num == 2) + return NULL; + break; + + case SCTP_CID_ASCONF: + if (have_auth || net->sctp.addip_noauth) + asoc = __sctp_rcv_asconf_lookup( + net, ch, laddr, + sctp_hdr(skb)->source, + transportp); + default: + break; } if (asoc) @@ -1119,19 +1114,10 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, return NULL; /* If this is INIT/INIT-ACK look inside the chunk too. */ - switch (ch->type) { - case SCTP_CID_INIT: - case SCTP_CID_INIT_ACK: + if (ch->type == SCTP_CID_INIT || ch->type == SCTP_CID_INIT_ACK) return __sctp_rcv_init_lookup(net, skb, laddr, transportp); - break; - - default: - return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); - break; - } - - return NULL; + return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); } /* Lookup an association for an inbound skb. */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 317e13eb2c5..0f6259a6a93 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -172,7 +172,8 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, switch (type) { case ICMPV6_PKT_TOOBIG: - sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info)); + if (ip6_sk_accept_pmtu(sk)) + sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info)); goto out_unlock; case ICMPV6_PARAMPROB: if (ICMPV6_UNK_NEXTHDR == code) { @@ -401,7 +402,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, } /* Initialize a sockaddr_storage from in incoming skb. */ -static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb, +static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb, int is_saddr) { __be16 *port; diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 0c28e8a5532..40e7fac96c4 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -97,7 +97,7 @@ static void sctp_objcnt_seq_stop(struct seq_file *seq, void *v) { } -static void * sctp_objcnt_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *sctp_objcnt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return (*pos >= ARRAY_SIZE(sctp_dbg_objcnt)) ? NULL : (void *)pos; diff --git a/net/sctp/output.c b/net/sctp/output.c index 3f55823279d..0f4d15fc262 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -280,7 +280,7 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, /* We believe that this chunk is OK to add to the packet */ switch (chunk->chunk_hdr->type) { - case SCTP_CID_DATA: + case SCTP_CID_DATA: /* Account for the data being in the packet */ sctp_packet_append_data(packet, chunk); /* Disallow SACK bundling after DATA. */ @@ -292,17 +292,17 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, /* timestamp the chunk for rtx purposes */ chunk->sent_at = jiffies; break; - case SCTP_CID_COOKIE_ECHO: + case SCTP_CID_COOKIE_ECHO: packet->has_cookie_echo = 1; break; - case SCTP_CID_SACK: + case SCTP_CID_SACK: packet->has_sack = 1; if (chunk->asoc) chunk->asoc->stats.osacks++; break; - case SCTP_CID_AUTH: + case SCTP_CID_AUTH: packet->has_auth = 1; packet->auth = chunk; break; @@ -387,7 +387,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) int err = 0; int padding; /* How much padding do we need? */ __u8 has_data = 0; - struct dst_entry *dst = tp->dst; + struct dst_entry *dst; unsigned char *auth = NULL; /* pointer to auth in skb data */ pr_debug("%s: packet:%p\n", __func__, packet); @@ -420,9 +420,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) } } dst = dst_clone(tp->dst); - skb_dst_set(nskb, dst); if (!dst) goto no_route; + skb_dst_set(nskb, dst); /* Build the SCTP header. */ sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); @@ -540,8 +540,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } else { /* no need to seed pseudo checksum for SCTP */ nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->csum_start = (skb_transport_header(nskb) - - nskb->head); + nskb->csum_start = skb_transport_header(nskb) - nskb->head; nskb->csum_offset = offsetof(struct sctphdr, checksum); } } @@ -558,7 +557,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: The works for IPv6 layer checks this bit too later * in transmission. See IP6_ECN_flow_xmit(). */ - (*tp->af_specific->ecn_capable)(nskb->sk); + tp->af_specific->ecn_capable(nskb->sk); /* Set up the IP options. */ /* BUG: not implemented @@ -593,7 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); nskb->local_df = packet->ipfragok; - (*tp->af_specific->sctp_xmit)(nskb, tp); + tp->af_specific->sctp_xmit(nskb, tp); out: sctp_packet_reset(packet); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index b6b09f3f1a8..9c77947c059 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -110,7 +110,7 @@ static inline int sctp_cacc_skip_3_1_d(struct sctp_transport *primary, struct sctp_transport *transport, int count_of_newacks) { - if (count_of_newacks >=2 && transport != primary) + if (count_of_newacks >= 2 && transport != primary) return 1; return 0; } @@ -207,8 +207,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); - - q->empty = 1; } /* Free the outqueue structure and any related pending chunks. @@ -331,7 +329,6 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); else SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); - q->empty = 0; break; } } else { @@ -470,7 +467,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, struct net *net = sock_net(q->asoc->base.sk); int error = 0; - switch(reason) { + switch (reason) { case SCTP_RTXR_T3_RTX: SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); @@ -653,7 +650,6 @@ redo: if (chunk->fast_retransmit == SCTP_NEED_FRTX) chunk->fast_retransmit = SCTP_DONT_FRTX; - q->empty = 0; q->asoc->stats.rtxchunks++; break; } @@ -1064,8 +1060,6 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) sctp_transport_reset_timers(transport); - q->empty = 0; - /* Only let one DATA chunk get bundled with a * COOKIE-ECHO chunk. */ @@ -1088,7 +1082,7 @@ sctp_flush_out: * * --xguo */ - while ((ltransport = sctp_list_dequeue(&transport_list)) != NULL ) { + while ((ltransport = sctp_list_dequeue(&transport_list)) != NULL) { struct sctp_transport *t = list_entry(ltransport, struct sctp_transport, send_ready); @@ -1217,7 +1211,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) * destinations for which cacc_saw_newack is set. */ if (transport->cacc.cacc_saw_newack) - count_of_newacks ++; + count_of_newacks++; } /* Move the Cumulative TSN Ack Point if appropriate. */ @@ -1274,29 +1268,17 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) "advertised peer ack point:0x%x\n", __func__, asoc, ctsn, asoc->adv_peer_ack_point); - /* See if all chunks are acked. - * Make sure the empty queue handler will get run later. - */ - q->empty = (list_empty(&q->out_chunk_list) && - list_empty(&q->retransmit)); - if (!q->empty) - goto finish; - - list_for_each_entry(transport, transport_list, transports) { - q->empty = q->empty && list_empty(&transport->transmitted); - if (!q->empty) - goto finish; - } - - pr_debug("%s: sack queue is empty\n", __func__); -finish: - return q->empty; + return sctp_outq_is_empty(q); } -/* Is the outqueue empty? */ +/* Is the outqueue empty? + * The queue is empty when we have not pending data, no in-flight data + * and nothing pending retransmissions. + */ int sctp_outq_is_empty(const struct sctp_outq *q) { - return q->empty; + return q->out_qlen == 0 && q->outstanding_bytes == 0 && + list_empty(&q->retransmit); } /******************************************************************** diff --git a/net/sctp/proc.c b/net/sctp/proc.c index de32e14f739..63ba0bdc867 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -177,7 +177,7 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa rcu_read_unlock(); } -static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) +static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) { if (*pos >= sctp_ep_hashsize) return NULL; @@ -196,7 +196,7 @@ static void sctp_eps_seq_stop(struct seq_file *seq, void *v) } -static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) { if (++*pos >= sctp_ep_hashsize) return NULL; @@ -282,7 +282,7 @@ void sctp_eps_proc_exit(struct net *net) } -static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) +static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) { if (*pos >= sctp_assoc_hashsize) return NULL; @@ -305,7 +305,7 @@ static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) } -static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) { if (++*pos >= sctp_assoc_hashsize) return NULL; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 19bd4c5bdae..7c161084f24 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1030,6 +1030,7 @@ static const struct net_protocol sctp_protocol = { .err_handler = sctp_v4_err, .no_policy = 1, .netns_ok = 1, + .icmp_strict_tag_validation = 1, }; /* IPv4 address related functions. */ @@ -1065,8 +1066,8 @@ static struct sctp_af sctp_af_inet = { #endif }; -struct sctp_pf *sctp_get_pf_specific(sa_family_t family) { - +struct sctp_pf *sctp_get_pf_specific(sa_family_t family) +{ switch (family) { case PF_INET: return sctp_pf_inet_specific; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index d9aaf9641aa..632090b961c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -78,6 +78,8 @@ static int sctp_process_param(struct sctp_association *asoc, gfp_t gfp); static void *sctp_addto_param(struct sctp_chunk *chunk, int len, const void *data); +static void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, + const void *data); /* Control chunk destructor */ static void sctp_control_release_owner(struct sk_buff *skb) @@ -1475,8 +1477,8 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) /* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient * space in the chunk */ -void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, - int len, const void *data) +static void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, + int len, const void *data) { if (skb_tailroom(chunk->skb) >= len) return sctp_addto_chunk(chunk, len, data); @@ -1967,13 +1969,13 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param) for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { - case SCTP_CID_AUTH: - have_auth = 1; - break; - case SCTP_CID_ASCONF: - case SCTP_CID_ASCONF_ACK: - have_asconf = 1; - break; + case SCTP_CID_AUTH: + have_auth = 1; + break; + case SCTP_CID_ASCONF: + case SCTP_CID_ASCONF_ACK: + have_asconf = 1; + break; } } @@ -2000,25 +2002,24 @@ static void sctp_process_ext_param(struct sctp_association *asoc, for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { - case SCTP_CID_FWD_TSN: - if (net->sctp.prsctp_enable && - !asoc->peer.prsctp_capable) + case SCTP_CID_FWD_TSN: + if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable) asoc->peer.prsctp_capable = 1; - break; - case SCTP_CID_AUTH: - /* if the peer reports AUTH, assume that he - * supports AUTH. - */ - if (net->sctp.auth_enable) - asoc->peer.auth_capable = 1; - break; - case SCTP_CID_ASCONF: - case SCTP_CID_ASCONF_ACK: - if (net->sctp.addip_enable) - asoc->peer.asconf_capable = 1; - break; - default: - break; + break; + case SCTP_CID_AUTH: + /* if the peer reports AUTH, assume that he + * supports AUTH. + */ + if (net->sctp.auth_enable) + asoc->peer.auth_capable = 1; + break; + case SCTP_CID_ASCONF: + case SCTP_CID_ASCONF_ACK: + if (net->sctp.addip_enable) + asoc->peer.asconf_capable = 1; + break; + default: + break; } } } @@ -2251,7 +2252,7 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc, * VIOLATION error. We build the ERROR chunk here and let the normal * error handling code build and send the packet. */ - if (param.v != (void*)chunk->chunk_end) + if (param.v != (void *)chunk->chunk_end) return sctp_process_inv_paramlength(asoc, param.p, chunk, errp); /* The only missing mandatory param possible today is @@ -2266,14 +2267,14 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc, result = sctp_verify_param(net, asoc, param, cid, chunk, errp); switch (result) { - case SCTP_IERROR_ABORT: - case SCTP_IERROR_NOMEM: - return 0; - case SCTP_IERROR_ERROR: - return 1; - case SCTP_IERROR_NO_ERROR: - default: - break; + case SCTP_IERROR_ABORT: + case SCTP_IERROR_NOMEM: + return 0; + case SCTP_IERROR_ERROR: + return 1; + case SCTP_IERROR_NO_ERROR: + default: + break; } } /* for (loop through all parameters) */ @@ -2308,7 +2309,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * added as the primary transport. The source address seems to * be a a better choice than any of the embedded addresses. */ - if(!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) + if (!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) goto nomem; if (sctp_cmp_addr_exact(sctp_source(chunk), peer_addr)) @@ -3334,7 +3335,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, while (asconf_ack_len > 0) { if (asconf_ack_param->crr_id == asconf_param->crr_id) { - switch(asconf_ack_param->param_hdr.type) { + switch (asconf_ack_param->param_hdr.type) { case SCTP_PARAM_SUCCESS_REPORT: return SCTP_ERROR_NO_ERROR; case SCTP_PARAM_ERR_CAUSE: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 02b7ad1ff46..ded6db66fb2 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -404,7 +404,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; struct net *net = sock_net(asoc->base.sk); - + sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { pr_debug("%s: sock is busy\n", __func__); @@ -543,7 +543,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands, { struct sctp_ulpevent *event; - event = sctp_ulpevent_make_assoc_change(asoc,0, SCTP_CANT_STR_ASSOC, + event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_CANT_STR_ASSOC, (__u16)error, 0, 0, NULL, GFP_ATOMIC); @@ -1115,7 +1115,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_init_cmd_seq(&commands); debug_pre_sfn(); - status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands); + status = state_fn->fn(net, ep, asoc, subtype, event_arg, &commands); debug_post_sfn(); error = sctp_side_effects(event_type, subtype, state, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index ee02771d8b9..483dcd71b3c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2945,7 +2945,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - error = sctp_eat_data(asoc, chunk, commands ); + error = sctp_eat_data(asoc, chunk, commands); switch (error) { case SCTP_IERROR_NO_ERROR: break; @@ -3066,7 +3066,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - error = sctp_eat_data(asoc, chunk, commands ); + error = sctp_eat_data(asoc, chunk, commands); switch (error) { case SCTP_IERROR_NO_ERROR: case SCTP_IERROR_HIGH_TSN: @@ -3681,8 +3681,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, asconf_ack->dest = chunk->source; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); if (asoc->new_transport) { - sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport, - commands); + sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport, commands); ((struct sctp_association *)asoc)->new_transport = NULL; } @@ -3765,7 +3764,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); - sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, @@ -3799,7 +3798,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, /* We are going to ABORT, so we might as well stop * processing the rest of the chunks in the packet. */ - sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, @@ -4451,7 +4450,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( void *arg, sctp_cmd_seq_t *commands) { - static const char err_str[]="The following chunk had invalid length:"; + static const char err_str[] = "The following chunk had invalid length:"; return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); @@ -4514,7 +4513,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( void *arg, sctp_cmd_seq_t *commands) { - static const char err_str[]="The cumulative tsn ack beyond the max tsn currently sent:"; + static const char err_str[] = "The cumulative tsn ack beyond the max tsn currently sent:"; return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); @@ -4534,7 +4533,7 @@ static sctp_disposition_t sctp_sf_violation_chunk( void *arg, sctp_cmd_seq_t *commands) { - static const char err_str[]="The following chunk violates protocol:"; + static const char err_str[] = "The following chunk violates protocol:"; if (!asoc) return sctp_sf_violation(net, ep, asoc, type, arg, commands); @@ -4610,7 +4609,7 @@ sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, sctp_cmd_seq_t *commands) { struct sctp_chunk *repl; - struct sctp_association* my_asoc; + struct sctp_association *my_asoc; /* The comment below says that we enter COOKIE-WAIT AFTER * sending the INIT, but that doesn't actually work in our @@ -6000,7 +5999,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, /* Special case the INIT-ACK as there is no peer's vtag * yet. */ - switch(chunk->chunk_hdr->type) { + switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT_ACK: { sctp_initack_chunk_t *initack; @@ -6017,7 +6016,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, /* Special case the INIT and stale COOKIE_ECHO as there is no * vtag yet. */ - switch(chunk->chunk_hdr->type) { + switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT: { sctp_init_chunk_t *init; @@ -6207,7 +6206,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, */ if (*sk->sk_prot_creator->memory_pressure) { if (sctp_tsnmap_has_gap(map) && - (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { + (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { pr_debug("%s: under pressure, reneging for tsn:%u\n", __func__, tsn); deliver = SCTP_CMD_RENEGE; @@ -6231,7 +6230,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, /* We are going to ABORT, so we might as well stop * processing the rest of the chunks in the packet. */ - sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index a4f17437fb2..a987d54b379 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -69,7 +69,7 @@ static const sctp_sm_table_entry_t bug = { if ((event_subtype._type > (_max))) { \ pr_warn("table %p possible attack: event %d exceeds max %d\n", \ _table, event_subtype._type, _max); \ - rtn = &bug; \ + rtn = &bug; \ } else \ rtn = &_table[event_subtype._type][(int)state]; \ \ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d39fd0c2c4c..d32dae78a48 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -82,7 +82,7 @@ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p, size_t msg_len); -static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p); +static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); static void sctp_wait_for_close(struct sock *sk, long timeo); @@ -952,7 +952,7 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) * * Returns 0 if ok, <0 errno code on error. */ -static int sctp_setsockopt_bindx(struct sock* sk, +static int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr __user *addrs, int addrs_size, int op) { @@ -1039,7 +1039,7 @@ out: * Common routine for handling connect() and sctp_connectx(). * Connect will come in with just a single address. */ -static int __sctp_connect(struct sock* sk, +static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, int addrs_size, sctp_assoc_t *assoc_id) @@ -1299,7 +1299,7 @@ out_free: * * Returns >=0 if ok, <0 errno code on error. */ -static int __sctp_setsockopt_connectx(struct sock* sk, +static int __sctp_setsockopt_connectx(struct sock *sk, struct sockaddr __user *addrs, int addrs_size, sctp_assoc_t *assoc_id) @@ -1337,7 +1337,7 @@ static int __sctp_setsockopt_connectx(struct sock* sk, * This is an older interface. It's kept for backward compatibility * to the option that doesn't provide association id. */ -static int sctp_setsockopt_connectx_old(struct sock* sk, +static int sctp_setsockopt_connectx_old(struct sock *sk, struct sockaddr __user *addrs, int addrs_size) { @@ -1350,7 +1350,7 @@ static int sctp_setsockopt_connectx_old(struct sock* sk, * indication to the call. Error is always negative and association id is * always positive. */ -static int sctp_setsockopt_connectx(struct sock* sk, +static int sctp_setsockopt_connectx(struct sock *sk, struct sockaddr __user *addrs, int addrs_size) { @@ -1373,7 +1373,7 @@ static int sctp_setsockopt_connectx(struct sock* sk, * addrs_num structure member. That way we can re-use the existing * code. */ -static int sctp_getsockopt_connectx3(struct sock* sk, int len, +static int sctp_getsockopt_connectx3(struct sock *sk, int len, char __user *optval, int __user *optlen) { @@ -1568,7 +1568,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; - struct sctp_association *new_asoc=NULL, *asoc=NULL; + struct sctp_association *new_asoc = NULL, *asoc = NULL; struct sctp_transport *transport, *chunk_tp; struct sctp_chunk *chunk; union sctp_addr to; @@ -2462,7 +2462,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, int hb_change, pmtud_change, sackdelay_change; if (optlen != sizeof(struct sctp_paddrparams)) - return - EINVAL; + return -EINVAL; if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -2483,7 +2483,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, (union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) @@ -2578,8 +2578,11 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, if (params.sack_delay == 0 && params.sack_freq == 0) return 0; } else if (optlen == sizeof(struct sctp_assoc_value)) { - pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); - pr_warn("Use struct sctp_sack_info instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of struct sctp_assoc_value in delayed_ack socket option.\n" + "Use struct sctp_sack_info instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -2588,7 +2591,7 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, else params.sack_freq = 0; } else - return - EINVAL; + return -EINVAL; /* Validate value parameter. */ if (params.sack_delay > 500) @@ -2994,8 +2997,11 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int val; if (optlen == sizeof(int)) { - pr_warn("Use of int in maxseg socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in maxseg socket option.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(&val, optval, optlen)) return -EFAULT; params.assoc_id = 0; @@ -3252,8 +3258,11 @@ static int sctp_setsockopt_maxburst(struct sock *sk, int assoc_id = 0; if (optlen == sizeof(int)) { - pr_warn("Use of int in max_burst socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in max_burst socket option deprecated.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(&val, optval, optlen)) return -EFAULT; } else if (optlen == sizeof(struct sctp_assoc_value)) { @@ -3332,7 +3341,7 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, if (optlen < sizeof(struct sctp_hmacalgo)) return -EINVAL; - hmacs= memdup_user(optval, optlen); + hmacs = memdup_user(optval, optlen); if (IS_ERR(hmacs)) return PTR_ERR(hmacs); @@ -3370,7 +3379,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk, if (optlen <= sizeof(struct sctp_authkey)) return -EINVAL; - authkey= memdup_user(optval, optlen); + authkey = memdup_user(optval, optlen); if (IS_ERR(authkey)) return PTR_ERR(authkey); @@ -3924,7 +3933,7 @@ static int sctp_init_sock(struct sock *sk) */ sp->hbinterval = net->sctp.hb_interval; sp->pathmaxrxt = net->sctp.max_retrans_path; - sp->pathmtu = 0; // allow default discovery + sp->pathmtu = 0; /* allow default discovery */ sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | @@ -4467,7 +4476,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, (union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) { @@ -4573,12 +4582,15 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else if (len == sizeof(struct sctp_assoc_value)) { - pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); - pr_warn("Use struct sctp_sack_info instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of struct sctp_assoc_value in delayed_ack socket option.\n" + "Use struct sctp_sack_info instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else - return - EINVAL; + return -EINVAL; /* Get association, if sack_assoc_id != 0 and the socket is a one * to many style socket, and an association was not found, then @@ -4668,8 +4680,8 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, if (!asoc) return -EINVAL; - to = optval + offsetof(struct sctp_getaddrs,addrs); - space_left = len - offsetof(struct sctp_getaddrs,addrs); + to = optval + offsetof(struct sctp_getaddrs, addrs); + space_left = len - offsetof(struct sctp_getaddrs, addrs); list_for_each_entry(from, &asoc->peer.transport_addr_list, transports) { @@ -4729,7 +4741,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, memcpy(to, &temp, addrlen); to += addrlen; - cnt ++; + cnt++; space_left -= addrlen; *bytes_copied += addrlen; } @@ -4778,8 +4790,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, bp = &asoc->base.bind_addr; } - to = optval + offsetof(struct sctp_getaddrs,addrs); - space_left = len - offsetof(struct sctp_getaddrs,addrs); + to = optval + offsetof(struct sctp_getaddrs, addrs); + space_left = len - offsetof(struct sctp_getaddrs, addrs); addrs = kmalloc(space_left, GFP_KERNEL); if (!addrs) @@ -4818,7 +4830,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, memcpy(buf, &temp, addrlen); buf += addrlen; bytes_copied += addrlen; - cnt ++; + cnt++; space_left -= addrlen; } @@ -5090,7 +5102,7 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, assocparams.sasoc_cookie_life = ktime_to_ms(asoc->cookie_life); list_for_each(pos, &asoc->peer.transport_addr_list) { - cnt ++; + cnt++; } assocparams.sasoc_number_peer_destinations = cnt; @@ -5218,8 +5230,11 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - pr_warn("Use of int in maxseg socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in maxseg socket option.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5310,8 +5325,11 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - pr_warn("Use of int in max_burst socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in max_burst socket option.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5443,7 +5461,8 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, return -EFAULT; num: len = sizeof(struct sctp_authchunks) + num_chunks; - if (put_user(len, optlen)) return -EFAULT; + if (put_user(len, optlen)) + return -EFAULT; if (put_user(num_chunks, &p->gauth_number_of_chunks)) return -EFAULT; return 0; @@ -5475,7 +5494,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, return -EINVAL; if (asoc) - ch = (struct sctp_chunks_param*)asoc->c.auth_chunks; + ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; else ch = sctp_sk(sk)->ep->auth_chunk_list; @@ -6417,7 +6436,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) * Note: This function is the same function as in core/datagram.c * with a few modifications to make lksctp work. */ -static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) +static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; DEFINE_WAIT(wait); diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 9dd5ac08466..7135e617ab0 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -424,7 +424,7 @@ void sctp_sysctl_net_unregister(struct net *net) kfree(table); } -static struct ctl_table_header * sctp_sysctl_header; +static struct ctl_table_header *sctp_sysctl_header; /* Sysctl registration. */ void sctp_sysctl_register(void) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index a67470083be..5dc94117e9d 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -43,9 +43,9 @@ #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ -static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq, +static struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *); -static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *, +static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *, struct sctp_ulpevent *); static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq); @@ -107,7 +107,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, event = sctp_ulpq_reasm(ulpq, event); /* Do ordering if needed. */ - if ((event) && (event->msg_flags & MSG_EOR)){ + if ((event) && (event->msg_flags & MSG_EOR)) { /* Create a temporary list to collect chunks on. */ skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); @@ -336,7 +336,8 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, pos = f_frag->next; /* Get the last skb in the f_frag's frag_list if present. */ - for (last = list; list; last = list, list = list->next); + for (last = list; list; last = list, list = list->next) + ; /* Add the list of remaining fragments to the first fragments * frag_list. @@ -726,7 +727,7 @@ static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq) while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) { /* Do ordering if needed. */ - if ((event) && (event->msg_flags & MSG_EOR)){ + if ((event) && (event->msg_flags & MSG_EOR)) { skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 4c2a80b3c01..bf860d9e75a 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -794,7 +794,7 @@ void tipc_bclink_init(void) void tipc_bclink_stop(void) { spin_lock_bh(&bc_lock); - tipc_link_stop(bcl); + tipc_link_purge_queues(bcl); spin_unlock_bh(&bc_lock); memset(bclink, 0, sizeof(*bclink)); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3bb5f266b0e..2d456ab0e84 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -185,25 +185,6 @@ struct tipc_bearer *tipc_bearer_find(const char *name) } /** - * tipc_bearer_find_interface - locates bearer object with matching interface name - */ -struct tipc_bearer *tipc_bearer_find_interface(const char *if_name) -{ - struct tipc_bearer *b_ptr; - char *b_if_name; - u32 i; - - for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { - if (!b_ptr->active) - continue; - b_if_name = strchr(b_ptr->name, ':') + 1; - if (!strcmp(b_if_name, if_name)) - return b_ptr; - } - return NULL; -} - -/** * tipc_bearer_get_names - record names of bearers in buffer */ struct sk_buff *tipc_bearer_get_names(void) @@ -560,7 +541,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, if (likely(b_ptr)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; - tipc_recv_msg(buf, b_ptr); + tipc_rcv(buf, b_ptr); rcu_read_unlock(); return NET_RX_SUCCESS; } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index fa95c34ca92..4f5db9ad5bf 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -161,8 +161,7 @@ extern struct tipc_bearer tipc_bearers[]; * TIPC routines available to supported media types */ -void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); - +void tipc_rcv(struct sk_buff *buf, struct tipc_bearer *tb_ptr); int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); int tipc_disable_bearer(const char *name); @@ -190,7 +189,6 @@ struct sk_buff *tipc_bearer_get_names(void); void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); struct tipc_bearer *tipc_bearer_find(const char *name); -struct tipc_bearer *tipc_bearer_find_interface(const char *if_name); struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index bc849f1efa1..412ff41b861 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -50,6 +50,7 @@ * @dest: destination address for request messages * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) + * @lock: spinlock for controlling access to requests * @buf: request message to be (repeatedly) sent * @timer: timer governing period between requests * @timer_intv: current interval between requests (in ms) @@ -59,6 +60,7 @@ struct tipc_link_req { struct tipc_media_addr dest; u32 domain; int num_nodes; + spinlock_t lock; struct sk_buff *buf; struct timer_list timer; unsigned int timer_intv; @@ -274,7 +276,9 @@ static void disc_update(struct tipc_link_req *req) */ void tipc_disc_add_dest(struct tipc_link_req *req) { + spin_lock_bh(&req->lock); req->num_nodes++; + spin_unlock_bh(&req->lock); } /** @@ -283,8 +287,10 @@ void tipc_disc_add_dest(struct tipc_link_req *req) */ void tipc_disc_remove_dest(struct tipc_link_req *req) { + spin_lock_bh(&req->lock); req->num_nodes--; disc_update(req); + spin_unlock_bh(&req->lock); } /** @@ -297,7 +303,7 @@ static void disc_timeout(struct tipc_link_req *req) { int max_delay; - spin_lock_bh(&req->bearer->lock); + spin_lock_bh(&req->lock); /* Stop searching if only desired node has been found */ if (tipc_node(req->domain) && req->num_nodes) { @@ -325,7 +331,7 @@ static void disc_timeout(struct tipc_link_req *req) k_start_timer(&req->timer, req->timer_intv); exit: - spin_unlock_bh(&req->bearer->lock); + spin_unlock_bh(&req->lock); } /** @@ -356,6 +362,7 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, req->domain = dest_domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; + spin_lock_init(&req->lock); k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); k_start_timer(&req->timer, req->timer_intv); b_ptr->link_req = req; diff --git a/net/tipc/link.c b/net/tipc/link.c index 3d73144a1cc..471973ff134 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1,7 +1,7 @@ /* * net/tipc/link.c: TIPC link code * - * Copyright (c) 1996-2007, 2012, Ericsson AB + * Copyright (c) 1996-2007, 2012-2014, Ericsson AB * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -78,8 +78,8 @@ static const char *link_unk_evt = "Unknown link event "; static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, struct sk_buff *buf); static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf); -static int link_recv_changeover_msg(struct tipc_link **l_ptr, - struct sk_buff **buf); +static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, + struct sk_buff **buf); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, @@ -87,7 +87,6 @@ static int link_send_sections_long(struct tipc_port *sender, static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); -static void link_start(struct tipc_link *l_ptr); static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf); static void tipc_link_send_sync(struct tipc_link *l); static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf); @@ -278,9 +277,11 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, tipc_node_attach_link(n_ptr, l_ptr); - k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); + k_init_timer(&l_ptr->timer, (Handler)link_timeout, + (unsigned long)l_ptr); list_add_tail(&l_ptr->link_list, &b_ptr->links); - tipc_k_signal((Handler)link_start, (unsigned long)l_ptr); + + link_state_event(l_ptr, STARTING_EVT); return l_ptr; } @@ -305,19 +306,13 @@ void tipc_link_delete(struct tipc_link *l_ptr) tipc_node_lock(l_ptr->owner); tipc_link_reset(l_ptr); tipc_node_detach_link(l_ptr->owner, l_ptr); - tipc_link_stop(l_ptr); + tipc_link_purge_queues(l_ptr); list_del_init(&l_ptr->link_list); tipc_node_unlock(l_ptr->owner); k_term_timer(&l_ptr->timer); kfree(l_ptr); } -static void link_start(struct tipc_link *l_ptr) -{ - tipc_node_lock(l_ptr->owner); - link_state_event(l_ptr, STARTING_EVT); - tipc_node_unlock(l_ptr->owner); -} /** * link_schedule_port - schedule port for deferred sending @@ -403,10 +398,10 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr) } /** - * tipc_link_stop - purge all inbound and outbound messages associated with link + * tipc_link_purge_queues - purge all pkt queues associated with link * @l_ptr: pointer to link */ -void tipc_link_stop(struct tipc_link *l_ptr) +void tipc_link_purge_queues(struct tipc_link *l_ptr) { kfree_skb_list(l_ptr->oldest_deferred_in); kfree_skb_list(l_ptr->first_out); @@ -437,8 +432,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) tipc_node_link_down(l_ptr->owner, l_ptr); tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); - if (was_active_link && tipc_node_active_links(l_ptr->owner) && - l_ptr->owner->permit_changeover) { + if (was_active_link && tipc_node_active_links(l_ptr->owner)) { l_ptr->reset_checkpoint = checkpoint; l_ptr->exp_msg_count = START_CHANGEOVER; } @@ -1169,7 +1163,7 @@ reject: /* * tipc_link_push_packet: Push one unsent packet to the media */ -u32 tipc_link_push_packet(struct tipc_link *l_ptr) +static u32 tipc_link_push_packet(struct tipc_link *l_ptr) { struct sk_buff *buf = l_ptr->first_out; u32 r_q_size = l_ptr->retransm_queue_size; @@ -1422,14 +1416,14 @@ static int link_recv_buf_validate(struct sk_buff *buf) } /** - * tipc_recv_msg - process TIPC messages arriving from off-node + * tipc_rcv - process TIPC packets/messages arriving from off-node * @head: pointer to message buffer chain * @tb_ptr: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ -void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) +void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) { read_lock_bh(&tipc_net_lock); while (head) { @@ -1603,7 +1597,7 @@ deliver: continue; case CHANGEOVER_PROTOCOL: type = msg_type(msg); - if (link_recv_changeover_msg(&l_ptr, &buf)) { + if (tipc_link_tunnel_rcv(&l_ptr, &buf)) { msg = buf_msg(buf); seq_no = msg_seqno(msg); if (type == ORIGINAL_MSG) @@ -1837,8 +1831,6 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf) if (tipc_own_addr > msg_prevnode(msg)) l_ptr->b_ptr->net_plane = msg_net_plane(msg); - l_ptr->owner->permit_changeover = msg_redundant_link(msg); - switch (msg_type(msg)) { case RESET_MSG: @@ -1954,13 +1946,13 @@ exit: } -/* - * tipc_link_tunnel(): Send one message via a link belonging to - * another bearer. Owner node is locked. +/* tipc_link_tunnel_xmit(): Tunnel one packet via a link belonging to + * a different bearer. Owner node is locked. */ -static void tipc_link_tunnel(struct tipc_link *l_ptr, - struct tipc_msg *tunnel_hdr, struct tipc_msg *msg, - u32 selector) +static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, + struct tipc_msg *tunnel_hdr, + struct tipc_msg *msg, + u32 selector) { struct tipc_link *tunnel; struct sk_buff *buf; @@ -1983,12 +1975,13 @@ static void tipc_link_tunnel(struct tipc_link *l_ptr, } - -/* - * changeover(): Send whole message queue via the remaining link - * Owner node is locked. +/* tipc_link_failover_send_queue(): A link has gone down, but a second + * link is still active. We can do failover. Tunnel the failing link's + * whole send queue via the remaining link. This way, we don't lose + * any packets, and sequence order is preserved for subsequent traffic + * sent over the remaining link. Owner node is locked. */ -void tipc_link_changeover(struct tipc_link *l_ptr) +void tipc_link_failover_send_queue(struct tipc_link *l_ptr) { u32 msgcount = l_ptr->out_queue_size; struct sk_buff *crs = l_ptr->first_out; @@ -1999,11 +1992,6 @@ void tipc_link_changeover(struct tipc_link *l_ptr) if (!tunnel) return; - if (!l_ptr->owner->permit_changeover) { - pr_warn("%speer did not permit changeover\n", link_co_err); - return; - } - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); @@ -2037,20 +2025,30 @@ void tipc_link_changeover(struct tipc_link *l_ptr) msgcount = msg_msgcnt(msg); while (msgcount--) { msg_set_seqno(m, msg_seqno(msg)); - tipc_link_tunnel(l_ptr, &tunnel_hdr, m, - msg_link_selector(m)); + tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, m, + msg_link_selector(m)); pos += align(msg_size(m)); m = (struct tipc_msg *)pos; } } else { - tipc_link_tunnel(l_ptr, &tunnel_hdr, msg, - msg_link_selector(msg)); + tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, msg, + msg_link_selector(msg)); } crs = crs->next; } } -void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *tunnel) +/* tipc_link_dup_send_queue(): A second link has become active. Tunnel a + * duplicate of the first link's send queue via the new link. This way, we + * are guaranteed that currently queued packets from a socket are delivered + * before future traffic from the same socket, even if this is using the + * new link. The last arriving copy of each duplicate packet is dropped at + * the receiving end by the regular protocol check, so packet cardinality + * and sequence order is preserved per sender/receiver socket pair. + * Owner node is locked. + */ +void tipc_link_dup_send_queue(struct tipc_link *l_ptr, + struct tipc_link *tunnel) { struct sk_buff *iter; struct tipc_msg tunnel_hdr; @@ -2106,12 +2104,14 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) return eb; } -/* - * link_recv_changeover_msg(): Receive tunneled packet sent - * via other link. Node is locked. Return extracted buffer. +/* tipc_link_tunnel_rcv(): Receive a tunneled packet, sent + * via other link as result of a failover (ORIGINAL_MSG) or + * a new active link (DUPLICATE_MSG). Failover packets are + * returned to the active link for delivery upwards. + * Owner node is locked. */ -static int link_recv_changeover_msg(struct tipc_link **l_ptr, - struct sk_buff **buf) +static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, + struct sk_buff **buf) { struct sk_buff *tunnel_buf = *buf; struct tipc_link *dest_link; diff --git a/net/tipc/link.h b/net/tipc/link.h index 424b1dfe436..3b6aa65b608 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -216,16 +216,20 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); void tipc_link_delete(struct tipc_link *l_ptr); -void tipc_link_changeover(struct tipc_link *l_ptr); -void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *dest); +void tipc_link_failover_send_queue(struct tipc_link *l_ptr); +void tipc_link_dup_send_queue(struct tipc_link *l_ptr, + struct tipc_link *dest); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); -u32 tipc_link_push_packet(struct tipc_link *l_ptr); -void tipc_link_stop(struct tipc_link *l_ptr); -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd); -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space); -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); +void tipc_link_purge_queues(struct tipc_link *l_ptr); +struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, + int req_tlv_space, + u16 cmd); +struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, + int req_tlv_space); +struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, + int req_tlv_space); void tipc_link_reset(struct tipc_link *l_ptr); int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); void tipc_link_send_names(struct list_head *message_list, u32 dest); diff --git a/net/tipc/node.c b/net/tipc/node.c index bf1ac89b480..efe4d41bf11 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -162,7 +162,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) pr_info("New link <%s> becomes standby\n", l_ptr->name); return; } - tipc_link_send_duplicate(active[0], l_ptr); + tipc_link_dup_send_queue(active[0], l_ptr); if (l_ptr->priority == active[0]->priority) { active[0] = l_ptr; return; @@ -225,7 +225,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) if (active[0] == l_ptr) node_select_active_links(n_ptr); if (tipc_node_is_up(n_ptr)) - tipc_link_changeover(l_ptr); + tipc_link_failover_send_queue(l_ptr); else node_lost_contact(n_ptr); } @@ -235,11 +235,6 @@ int tipc_node_active_links(struct tipc_node *n_ptr) return n_ptr->active_links[0] != NULL; } -int tipc_node_redundant_links(struct tipc_node *n_ptr) -{ - return n_ptr->working_links > 1; -} - int tipc_node_is_up(struct tipc_node *n_ptr) { return tipc_node_active_links(n_ptr); diff --git a/net/tipc/node.h b/net/tipc/node.h index e5e96c04e16..63e2e8ead2f 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -64,7 +64,6 @@ * @working_links: number of working links to node (both active and standby) * @block_setup: bit mask of conditions preventing link establishment to node * @link_cnt: number of links to node - * @permit_changeover: non-zero if node has redundant links to this system * @signature: node instance identifier * @bclink: broadcast-related info * @acked: sequence # of last outbound b'cast message acknowledged by node @@ -89,7 +88,6 @@ struct tipc_node { int link_cnt; int working_links; int block_setup; - int permit_changeover; u32 signature; struct { u32 acked; @@ -115,7 +113,6 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr); int tipc_node_active_links(struct tipc_node *n_ptr); -int tipc_node_redundant_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); diff --git a/net/tipc/port.c b/net/tipc/port.c index 5fd4c8cec08..b742b265452 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -251,18 +251,15 @@ struct tipc_port *tipc_createport(struct sock *sk, return p_ptr; } -int tipc_deleteport(u32 ref) +int tipc_deleteport(struct tipc_port *p_ptr) { - struct tipc_port *p_ptr; struct sk_buff *buf = NULL; - tipc_withdraw(ref, 0, NULL); - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; + tipc_withdraw(p_ptr, 0, NULL); - tipc_ref_discard(ref); - tipc_port_unlock(p_ptr); + spin_lock_bh(p_ptr->lock); + tipc_ref_discard(p_ptr->ref); + spin_unlock_bh(p_ptr->lock); k_cancel_timer(&p_ptr->timer); if (p_ptr->connected) { @@ -704,47 +701,36 @@ int tipc_set_portimportance(u32 ref, unsigned int imp) } -int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) +int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, + struct tipc_name_seq const *seq) { - struct tipc_port *p_ptr; struct publication *publ; u32 key; - int res = -EINVAL; - p_ptr = tipc_port_lock(ref); - if (!p_ptr) + if (p_ptr->connected) return -EINVAL; + key = p_ptr->ref + p_ptr->pub_count + 1; + if (key == p_ptr->ref) + return -EADDRINUSE; - if (p_ptr->connected) - goto exit; - key = ref + p_ptr->pub_count + 1; - if (key == ref) { - res = -EADDRINUSE; - goto exit; - } publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, scope, p_ptr->ref, key); if (publ) { list_add(&publ->pport_list, &p_ptr->publications); p_ptr->pub_count++; p_ptr->published = 1; - res = 0; + return 0; } -exit: - tipc_port_unlock(p_ptr); - return res; + return -EINVAL; } -int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) +int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, + struct tipc_name_seq const *seq) { - struct tipc_port *p_ptr; struct publication *publ; struct publication *tpubl; int res = -EINVAL; - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; if (!seq) { list_for_each_entry_safe(publ, tpubl, &p_ptr->publications, pport_list) { @@ -771,7 +757,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) } if (list_empty(&p_ptr->publications)) p_ptr->published = 0; - tipc_port_unlock(p_ptr); return res; } diff --git a/net/tipc/port.h b/net/tipc/port.h index 91225359734..34f12bd4074 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -116,7 +116,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err); void tipc_acknowledge(u32 port_ref, u32 ack); -int tipc_deleteport(u32 portref); +int tipc_deleteport(struct tipc_port *p_ptr); int tipc_portimportance(u32 portref, unsigned int *importance); int tipc_set_portimportance(u32 portref, unsigned int importance); @@ -127,9 +127,9 @@ int tipc_set_portunreliable(u32 portref, unsigned int isunreliable); int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); -int tipc_publish(u32 portref, unsigned int scope, +int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *name_seq); -int tipc_withdraw(u32 portref, unsigned int scope, +int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *name_seq); int tipc_connect(u32 portref, struct tipc_portid const *port); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 83f466e57fe..c8341d1f995 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -351,7 +351,7 @@ static int release(struct socket *sock) * Delete TIPC port; this ensures no more messages are queued * (also disconnects an active connection & sends a 'FIN-' to peer) */ - res = tipc_deleteport(tport->ref); + res = tipc_deleteport(tport); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -383,30 +383,46 @@ static int release(struct socket *sock) */ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) { + struct sock *sk = sock->sk; struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - u32 portref = tipc_sk_port(sock->sk)->ref; + struct tipc_port *tport = tipc_sk_port(sock->sk); + int res = -EINVAL; - if (unlikely(!uaddr_len)) - return tipc_withdraw(portref, 0, NULL); + lock_sock(sk); + if (unlikely(!uaddr_len)) { + res = tipc_withdraw(tport, 0, NULL); + goto exit; + } - if (uaddr_len < sizeof(struct sockaddr_tipc)) - return -EINVAL; - if (addr->family != AF_TIPC) - return -EAFNOSUPPORT; + if (uaddr_len < sizeof(struct sockaddr_tipc)) { + res = -EINVAL; + goto exit; + } + if (addr->family != AF_TIPC) { + res = -EAFNOSUPPORT; + goto exit; + } if (addr->addrtype == TIPC_ADDR_NAME) addr->addr.nameseq.upper = addr->addr.nameseq.lower; - else if (addr->addrtype != TIPC_ADDR_NAMESEQ) - return -EAFNOSUPPORT; + else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { + res = -EAFNOSUPPORT; + goto exit; + } if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && (addr->addr.nameseq.type != TIPC_TOP_SRV) && - (addr->addr.nameseq.type != TIPC_CFG_SRV)) - return -EACCES; + (addr->addr.nameseq.type != TIPC_CFG_SRV)) { + res = -EACCES; + goto exit; + } - return (addr->scope > 0) ? - tipc_publish(portref, addr->scope, &addr->addr.nameseq) : - tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq); + res = (addr->scope > 0) ? + tipc_publish(tport, addr->scope, &addr->addr.nameseq) : + tipc_withdraw(tport, -addr->scope, &addr->addr.nameseq); +exit: + release_sock(sk); + return res; } /** @@ -751,13 +767,10 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, /* Handle special cases where there is no connection */ if (unlikely(sock->state != SS_CONNECTED)) { - res = -ENOTCONN; - if (sock->state == SS_UNCONNECTED) res = send_packet(NULL, sock, m, total_len); - else if (sock->state == SS_DISCONNECTING) - res = -EPIPE; - + else + res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN; goto exit; } diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 324e8d851dc..11ee4ed04f7 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -29,6 +29,7 @@ static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, wdev->beacon_interval = 0; wdev->channel = NULL; wdev->ssid_len = 0; + rdev_set_qos_map(rdev, dev, NULL); } return err; diff --git a/net/wireless/core.c b/net/wireless/core.c index 06db6eb5258..d89dee2259b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -203,17 +203,8 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->opencount--; - if (rdev->scan_req && rdev->scan_req->wdev == wdev) { - /* - * If the scan request wasn't notified as done, set it - * to aborted and leak it after a warning. The driver - * should have notified us that it ended at the latest - * during rdev_stop_p2p_device(). - */ - if (WARN_ON(!rdev->scan_req->notified)) - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, !rdev->scan_req->notified); - } + WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev && + !rdev->scan_req->notified); } static int cfg80211_rfkill_set_block(void *data, bool blocked) @@ -765,13 +756,16 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, { struct net_device *dev = wdev->netdev; + ASSERT_RTNL(); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: cfg80211_leave_ibss(rdev, dev, true); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - __cfg80211_stop_sched_scan(rdev, false); + if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev) + __cfg80211_stop_sched_scan(rdev, false); wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT @@ -865,11 +859,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_DOWN: cfg80211_update_iface_num(rdev, wdev->iftype, -1); - if (rdev->scan_req && rdev->scan_req->wdev == wdev) { - if (WARN_ON(!rdev->scan_req->notified)) - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } + WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev && + !rdev->scan_req->notified); if (WARN_ON(rdev->sched_scan_req && rdev->sched_scan_req->dev == wdev->netdev)) { diff --git a/net/wireless/core.h b/net/wireless/core.h index 0a277c33bb0..37ec16d7bb1 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -67,9 +67,7 @@ struct cfg80211_registered_device { struct work_struct scan_done_wk; struct work_struct sched_scan_results_wk; -#ifdef CONFIG_NL80211_TESTMODE - struct genl_info *testmode_info; -#endif + struct genl_info *cur_cmd_info; struct work_struct conn_work; struct work_struct event_work; @@ -363,7 +361,7 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); void __cfg80211_scan_done(struct work_struct *wk); -void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak); +void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev); void __cfg80211_sched_scan_results(struct work_struct *wk); int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, bool driver_initiated); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 730147ed8e6..f911c5f9f90 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -183,6 +183,8 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) kfree(wdev->connect_keys); wdev->connect_keys = NULL; + rdev_set_qos_map(rdev, dev, NULL); + /* * Delete all the keys ... pairwise keys can't really * exist any more anyway, but default keys might. diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index b0e1869de7d..885862447b6 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -99,6 +99,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; + u8 radar_detect_width = 0; int err; BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -177,8 +178,16 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) return -EINVAL; - err = cfg80211_can_use_chan(rdev, wdev, setup->chandef.chan, - CHAN_MODE_SHARED); + err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef); + if (err < 0) + return err; + if (err) + radar_detect_width = BIT(setup->chandef.width); + + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + setup->chandef.chan, + CHAN_MODE_SHARED, + radar_detect_width); if (err) return err; @@ -268,6 +277,7 @@ static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, if (!err) { wdev->mesh_id_len = 0; wdev->channel = NULL; + rdev_set_qos_map(rdev, dev, NULL); } return err; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a693f86e597..b4f40fe84a0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -53,6 +53,7 @@ enum nl80211_multicast_groups { NL80211_MCGRP_SCAN, NL80211_MCGRP_REGULATORY, NL80211_MCGRP_MLME, + NL80211_MCGRP_VENDOR, NL80211_MCGRP_TESTMODE /* keep last - ifdef! */ }; @@ -61,6 +62,7 @@ static const struct genl_multicast_group nl80211_mcgrps[] = { [NL80211_MCGRP_SCAN] = { .name = "scan", }, [NL80211_MCGRP_REGULATORY] = { .name = "regulatory", }, [NL80211_MCGRP_MLME] = { .name = "mlme", }, + [NL80211_MCGRP_VENDOR] = { .name = "vendor", }, #ifdef CONFIG_NL80211_TESTMODE [NL80211_MCGRP_TESTMODE] = { .name = "testmode", } #endif @@ -376,6 +378,12 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY }, [NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG }, + [NL80211_ATTR_OPMODE_NOTIF] = { .type = NLA_U8 }, + [NL80211_ATTR_VENDOR_ID] = { .type = NLA_U32 }, + [NL80211_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, + [NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, + [NL80211_ATTR_QOS_MAP] = { .type = NLA_BINARY, + .len = IEEE80211_QOS_MAP_LEN_MAX }, }; /* policy for the key attributes */ @@ -1450,6 +1458,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (dev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); } + CMD(set_qos_map, SET_QOS_MAP); #ifdef CONFIG_NL80211_TESTMODE CMD(testmode_cmd, TESTMODE); @@ -1579,6 +1588,41 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, (nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) || nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ))) goto nla_put_failure; + state->split_start++; + break; + case 11: + if (dev->wiphy.n_vendor_commands) { + const struct nl80211_vendor_cmd_info *info; + struct nlattr *nested; + + nested = nla_nest_start(msg, NL80211_ATTR_VENDOR_DATA); + if (!nested) + goto nla_put_failure; + + for (i = 0; i < dev->wiphy.n_vendor_commands; i++) { + info = &dev->wiphy.vendor_commands[i].info; + if (nla_put(msg, i + 1, sizeof(*info), info)) + goto nla_put_failure; + } + nla_nest_end(msg, nested); + } + + if (dev->wiphy.n_vendor_events) { + const struct nl80211_vendor_cmd_info *info; + struct nlattr *nested; + + nested = nla_nest_start(msg, + NL80211_ATTR_VENDOR_EVENTS); + if (!nested) + goto nla_put_failure; + + for (i = 0; i < dev->wiphy.n_vendor_events; i++) { + info = &dev->wiphy.vendor_events[i]; + if (nla_put(msg, i + 1, sizeof(*info), info)) + goto nla_put_failure; + } + nla_nest_end(msg, nested); + } /* done */ state->split_start = 0; @@ -4151,6 +4195,12 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { + params.opmode_notif_used = true; + params.opmode_notif = + nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); + } + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) { params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); @@ -5667,8 +5717,13 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_chan_def chandef; + enum nl80211_dfs_regions dfs_region; int err; + dfs_region = reg_get_dfs_region(wdev->wiphy); + if (dfs_region == NL80211_DFS_UNSET) + return -EINVAL; + err = nl80211_parse_chandef(rdev, info, &chandef); if (err) return err; @@ -6693,6 +6748,52 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) return err; } +static struct sk_buff * +__cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, + int approxlen, u32 portid, u32 seq, + enum nl80211_commands cmd, + enum nl80211_attrs attr, + const struct nl80211_vendor_cmd_info *info, + gfp_t gfp) +{ + struct sk_buff *skb; + void *hdr; + struct nlattr *data; + + skb = nlmsg_new(approxlen + 100, gfp); + if (!skb) + return NULL; + + hdr = nl80211hdr_put(skb, portid, seq, 0, cmd); + if (!hdr) { + kfree_skb(skb); + return NULL; + } + + if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + + if (info) { + if (nla_put_u32(skb, NL80211_ATTR_VENDOR_ID, + info->vendor_id)) + goto nla_put_failure; + if (nla_put_u32(skb, NL80211_ATTR_VENDOR_SUBCMD, + info->subcmd)) + goto nla_put_failure; + } + + data = nla_nest_start(skb, attr); + + ((void **)skb->cb)[0] = rdev; + ((void **)skb->cb)[1] = hdr; + ((void **)skb->cb)[2] = data; + + return skb; + + nla_put_failure: + kfree_skb(skb); + return NULL; +} #ifdef CONFIG_NL80211_TESTMODE static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) @@ -6717,11 +6818,11 @@ static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_TESTDATA]) return -EINVAL; - rdev->testmode_info = info; + rdev->cur_cmd_info = info; err = rdev_testmode_cmd(rdev, wdev, nla_data(info->attrs[NL80211_ATTR_TESTDATA]), nla_len(info->attrs[NL80211_ATTR_TESTDATA])); - rdev->testmode_info = NULL; + rdev->cur_cmd_info = NULL; return err; } @@ -6821,92 +6922,54 @@ static int nl80211_testmode_dump(struct sk_buff *skb, return err; } -static struct sk_buff * -__cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, - int approxlen, u32 portid, u32 seq, gfp_t gfp) -{ - struct sk_buff *skb; - void *hdr; - struct nlattr *data; - - skb = nlmsg_new(approxlen + 100, gfp); - if (!skb) - return NULL; - - hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE); - if (!hdr) { - kfree_skb(skb); - return NULL; - } - - if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) - goto nla_put_failure; - data = nla_nest_start(skb, NL80211_ATTR_TESTDATA); - - ((void **)skb->cb)[0] = rdev; - ((void **)skb->cb)[1] = hdr; - ((void **)skb->cb)[2] = data; - - return skb; - - nla_put_failure: - kfree_skb(skb); - return NULL; -} - -struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, - int approxlen) +struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, + enum nl80211_commands cmd, + enum nl80211_attrs attr, + int vendor_event_idx, + int approxlen, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + const struct nl80211_vendor_cmd_info *info; - if (WARN_ON(!rdev->testmode_info)) + switch (cmd) { + case NL80211_CMD_TESTMODE: + if (WARN_ON(vendor_event_idx != -1)) + return NULL; + info = NULL; + break; + case NL80211_CMD_VENDOR: + if (WARN_ON(vendor_event_idx < 0 || + vendor_event_idx >= wiphy->n_vendor_events)) + return NULL; + info = &wiphy->vendor_events[vendor_event_idx]; + break; + default: + WARN_ON(1); return NULL; + } - return __cfg80211_testmode_alloc_skb(rdev, approxlen, - rdev->testmode_info->snd_portid, - rdev->testmode_info->snd_seq, - GFP_KERNEL); + return __cfg80211_alloc_vendor_skb(rdev, approxlen, 0, 0, + cmd, attr, info, gfp); } -EXPORT_SYMBOL(cfg80211_testmode_alloc_reply_skb); +EXPORT_SYMBOL(__cfg80211_alloc_event_skb); -int cfg80211_testmode_reply(struct sk_buff *skb) +void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp) { struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; - - if (WARN_ON(!rdev->testmode_info)) { - kfree_skb(skb); - return -EINVAL; - } + enum nl80211_multicast_groups mcgrp = NL80211_MCGRP_TESTMODE; nla_nest_end(skb, data); genlmsg_end(skb, hdr); - return genlmsg_reply(skb, rdev->testmode_info); -} -EXPORT_SYMBOL(cfg80211_testmode_reply); - -struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, - int approxlen, gfp_t gfp) -{ - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - return __cfg80211_testmode_alloc_skb(rdev, approxlen, 0, 0, gfp); -} -EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb); -void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) -{ - struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; - void *hdr = ((void **)skb->cb)[1]; - struct nlattr *data = ((void **)skb->cb)[2]; + if (data->nla_type == NL80211_ATTR_VENDOR_DATA) + mcgrp = NL80211_MCGRP_VENDOR; - nla_nest_end(skb, data); - genlmsg_end(skb, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), skb, 0, - NL80211_MCGRP_TESTMODE, gfp); + mcgrp, gfp); } -EXPORT_SYMBOL(cfg80211_testmode_event); +EXPORT_SYMBOL(__cfg80211_send_event_skb); #endif static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) @@ -7328,11 +7391,72 @@ static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, return true; } +static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) +{ + u16 mcs_mask = 0; + + switch (vht_mcs_map) { + case IEEE80211_VHT_MCS_NOT_SUPPORTED: + break; + case IEEE80211_VHT_MCS_SUPPORT_0_7: + mcs_mask = 0x00FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_8: + mcs_mask = 0x01FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_9: + mcs_mask = 0x03FF; + break; + default: + break; + } + + return mcs_mask; +} + +static void vht_build_mcs_mask(u16 vht_mcs_map, + u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) +{ + u8 nss; + + for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { + vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); + vht_mcs_map >>= 2; + } +} + +static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, + struct nl80211_txrate_vht *txrate, + u16 mcs[NL80211_VHT_NSS_MAX]) +{ + u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; + u8 i; + + if (!sband->vht_cap.vht_supported) + return false; + + memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); + + /* Build vht_mcs_mask from VHT capabilities */ + vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) + mcs[i] = txrate->mcs[i]; + else + return false; + } + + return true; +} + static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, - [NL80211_TXRATE_MCS] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_HT] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, }; static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, @@ -7345,9 +7469,7 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct nlattr *tx_rates; struct ieee80211_supported_band *sband; - - if (info->attrs[NL80211_ATTR_TX_RATES] == NULL) - return -EINVAL; + u16 vht_tx_mcs_map; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; @@ -7356,17 +7478,26 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, /* Default to all rates enabled */ for (i = 0; i < IEEE80211_NUM_BANDS; i++) { sband = rdev->wiphy.bands[i]; - mask.control[i].legacy = - sband ? (1 << sband->n_bitrates) - 1 : 0; - if (sband) - memcpy(mask.control[i].mcs, - sband->ht_cap.mcs.rx_mask, - sizeof(mask.control[i].mcs)); - else - memset(mask.control[i].mcs, 0, - sizeof(mask.control[i].mcs)); + + if (!sband) + continue; + + mask.control[i].legacy = (1 << sband->n_bitrates) - 1; + memcpy(mask.control[i].ht_mcs, + sband->ht_cap.mcs.rx_mask, + sizeof(mask.control[i].ht_mcs)); + + if (!sband->vht_cap.vht_supported) + continue; + + vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + vht_build_mcs_mask(vht_tx_mcs_map, mask.control[i].vht_mcs); } + /* if no rates are given set it back to the defaults */ + if (!info->attrs[NL80211_ATTR_TX_RATES]) + goto out; + /* * The nested attribute uses enum nl80211_band as the index. This maps * directly to the enum ieee80211_band values used in cfg80211. @@ -7391,31 +7522,44 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, nla_len(tb[NL80211_TXRATE_LEGACY])) return -EINVAL; } - if (tb[NL80211_TXRATE_MCS]) { + if (tb[NL80211_TXRATE_HT]) { if (!ht_rateset_to_mask( sband, - nla_data(tb[NL80211_TXRATE_MCS]), - nla_len(tb[NL80211_TXRATE_MCS]), - mask.control[band].mcs)) + nla_data(tb[NL80211_TXRATE_HT]), + nla_len(tb[NL80211_TXRATE_HT]), + mask.control[band].ht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_VHT]) { + if (!vht_set_mcs_mask( + sband, + nla_data(tb[NL80211_TXRATE_VHT]), + mask.control[band].vht_mcs)) return -EINVAL; } if (mask.control[band].legacy == 0) { - /* don't allow empty legacy rates if HT - * is not even supported. */ - if (!rdev->wiphy.bands[band]->ht_cap.ht_supported) + /* don't allow empty legacy rates if HT or VHT + * are not even supported. + */ + if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || + rdev->wiphy.bands[band]->vht_cap.vht_supported)) return -EINVAL; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - if (mask.control[band].mcs[i]) - break; + if (mask.control[band].ht_mcs[i]) + goto out; + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) + if (mask.control[band].vht_mcs[i]) + goto out; /* legacy and mcs rates may not be both empty */ - if (i == IEEE80211_HT_MCS_MASK_LEN) - return -EINVAL; + return -EINVAL; } } +out: return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); } @@ -8875,6 +9019,162 @@ static int nl80211_crit_protocol_stop(struct sk_buff *skb, return 0; } +static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = + __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs); + int i, err; + u32 vid, subcmd; + + if (!rdev->wiphy.vendor_commands) + return -EOPNOTSUPP; + + if (IS_ERR(wdev)) { + err = PTR_ERR(wdev); + if (err != -EINVAL) + return err; + wdev = NULL; + } else if (wdev->wiphy != &rdev->wiphy) { + return -EINVAL; + } + + if (!info->attrs[NL80211_ATTR_VENDOR_ID] || + !info->attrs[NL80211_ATTR_VENDOR_SUBCMD]) + return -EINVAL; + + vid = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_ID]); + subcmd = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_SUBCMD]); + for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) { + const struct wiphy_vendor_command *vcmd; + void *data = NULL; + int len = 0; + + vcmd = &rdev->wiphy.vendor_commands[i]; + + if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) + continue; + + if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | + WIPHY_VENDOR_CMD_NEED_NETDEV)) { + if (!wdev) + return -EINVAL; + if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && + !wdev->netdev) + return -EINVAL; + + if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { + if (wdev->netdev && + !netif_running(wdev->netdev)) + return -ENETDOWN; + if (!wdev->netdev && !wdev->p2p_started) + return -ENETDOWN; + } + } else { + wdev = NULL; + } + + if (info->attrs[NL80211_ATTR_VENDOR_DATA]) { + data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]); + len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]); + } + + rdev->cur_cmd_info = info; + err = rdev->wiphy.vendor_commands[i].doit(&rdev->wiphy, wdev, + data, len); + rdev->cur_cmd_info = NULL; + return err; + } + + return -EOPNOTSUPP; +} + +struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, + enum nl80211_commands cmd, + enum nl80211_attrs attr, + int approxlen) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + if (WARN_ON(!rdev->cur_cmd_info)) + return NULL; + + return __cfg80211_alloc_vendor_skb(rdev, approxlen, + rdev->cur_cmd_info->snd_portid, + rdev->cur_cmd_info->snd_seq, + cmd, attr, NULL, GFP_KERNEL); +} +EXPORT_SYMBOL(__cfg80211_alloc_reply_skb); + +int cfg80211_vendor_cmd_reply(struct sk_buff *skb) +{ + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; + void *hdr = ((void **)skb->cb)[1]; + struct nlattr *data = ((void **)skb->cb)[2]; + + if (WARN_ON(!rdev->cur_cmd_info)) { + kfree_skb(skb); + return -EINVAL; + } + + nla_nest_end(skb, data); + genlmsg_end(skb, hdr); + return genlmsg_reply(skb, rdev->cur_cmd_info); +} +EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_reply); + + +static int nl80211_set_qos_map(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_qos_map *qos_map = NULL; + struct net_device *dev = info->user_ptr[1]; + u8 *pos, len, num_des, des_len, des; + int ret; + + if (!rdev->ops->set_qos_map) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_QOS_MAP]) { + pos = nla_data(info->attrs[NL80211_ATTR_QOS_MAP]); + len = nla_len(info->attrs[NL80211_ATTR_QOS_MAP]); + + if (len % 2 || len < IEEE80211_QOS_MAP_LEN_MIN || + len > IEEE80211_QOS_MAP_LEN_MAX) + return -EINVAL; + + qos_map = kzalloc(sizeof(struct cfg80211_qos_map), GFP_KERNEL); + if (!qos_map) + return -ENOMEM; + + num_des = (len - IEEE80211_QOS_MAP_LEN_MIN) >> 1; + if (num_des) { + des_len = num_des * + sizeof(struct cfg80211_dscp_exception); + memcpy(qos_map->dscp_exception, pos, des_len); + qos_map->num_des = num_des; + for (des = 0; des < num_des; des++) { + if (qos_map->dscp_exception[des].up > 7) { + kfree(qos_map); + return -EINVAL; + } + } + pos += des_len; + } + memcpy(qos_map->up, pos, IEEE80211_QOS_MAP_LEN_MIN); + } + + wdev_lock(dev->ieee80211_ptr); + ret = nl80211_key_allowed(dev->ieee80211_ptr); + if (!ret) + ret = rdev_set_qos_map(rdev, dev, qos_map); + wdev_unlock(dev->ieee80211_ptr); + + kfree(qos_map); + return ret; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -9599,6 +9899,22 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_VENDOR, + .doit = nl80211_vendor_cmd, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_SET_QOS_MAP, + .doit = nl80211_set_qos_map, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; /* notification functions */ diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index a271c27fac7..722da616438 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -124,6 +124,10 @@ int ieee80211_radiotap_iterator_init( /* find payload start allowing for extended bitmap(s) */ if (iterator->_bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT)) { + if ((unsigned long)iterator->_arg - + (unsigned long)iterator->_rtheader + sizeof(uint32_t) > + (unsigned long)iterator->_max_length) + return -EINVAL; while (get_unaligned_le32(iterator->_arg) & (1 << IEEE80211_RADIOTAP_EXT)) { iterator->_arg += sizeof(uint32_t); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index a6c03ab14a0..c8e225947ad 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -932,4 +932,19 @@ static inline int rdev_channel_switch(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_qos_map *qos_map) +{ + int ret = -EOPNOTSUPP; + + if (rdev->ops->set_qos_map) { + trace_rdev_set_qos_map(&rdev->wiphy, dev, qos_map); + ret = rdev->ops->set_qos_map(&rdev->wiphy, dev, qos_map); + trace_rdev_return_int(&rdev->wiphy, ret); + } + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ec54e1aac8e..7d20d844ca6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -135,6 +135,33 @@ static const char *reg_dfs_region_str(enum nl80211_dfs_regions dfs_region) return "Unknown"; } +enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy) +{ + const struct ieee80211_regdomain *regd = NULL; + const struct ieee80211_regdomain *wiphy_regd = NULL; + + regd = get_cfg80211_regdom(); + if (!wiphy) + goto out; + + wiphy_regd = get_wiphy_regdom(wiphy); + if (!wiphy_regd) + goto out; + + if (wiphy_regd->dfs_region == regd->dfs_region) + goto out; + + REG_DBG_PRINT("%s: device specific dfs_region " + "(%s) disagrees with cfg80211's " + "central dfs_region (%s)\n", + dev_name(&wiphy->dev), + reg_dfs_region_str(wiphy_regd->dfs_region), + reg_dfs_region_str(regd->dfs_region)); + +out: + return regd->dfs_region; +} + static void rcu_free_regdom(const struct ieee80211_regdomain *r) { if (!r) diff --git a/net/wireless/reg.h b/net/wireless/reg.h index cc4c2c0a672..02bd8f4b092 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -21,6 +21,7 @@ extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain; bool reg_is_valid_request(const char *alpha2); bool is_world_regdom(const char *alpha2); bool reg_supported_dfs_region(enum nl80211_dfs_regions dfs_region); +enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy); int regulatory_hint_user(const char *alpha2, enum nl80211_user_reg_hint_type user_reg_hint_type); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d4397eba540..a32d52a04c2 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -161,7 +161,7 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev, dev->bss_generation++; } -void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) +void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev) { struct cfg80211_scan_request *request; struct wireless_dev *wdev; @@ -210,17 +210,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) dev_put(wdev->netdev); rdev->scan_req = NULL; - - /* - * OK. If this is invoked with "leak" then we can't - * free this ... but we've cleaned it up anyway. The - * driver failed to call the scan_done callback, so - * all bets are off, it might still be trying to use - * the scan request or not ... if it accesses the dev - * in there (it shouldn't anyway) then it may crash. - */ - if (!leak) - kfree(request); + kfree(request); } void __cfg80211_scan_done(struct work_struct *wk) @@ -231,7 +221,7 @@ void __cfg80211_scan_done(struct work_struct *wk) scan_done_wk); rtnl_lock(); - ___cfg80211_scan_done(rdev, false); + ___cfg80211_scan_done(rdev); rtnl_unlock(); } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 65f800890d7..5d6e7bb2fc8 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -632,6 +632,16 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, } #endif + if (!bss && (status == WLAN_STATUS_SUCCESS)) { + WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, + wdev->ssid, wdev->ssid_len, + WLAN_CAPABILITY_ESS, + WLAN_CAPABILITY_ESS); + if (bss) + cfg80211_hold_bss(bss_from_pub(bss)); + } + if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); @@ -649,16 +659,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, return; } - if (!bss) { - WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); - bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, - wdev->ssid, wdev->ssid_len, - WLAN_CAPABILITY_ESS, - WLAN_CAPABILITY_ESS); - if (WARN_ON(!bss)) - return; - cfg80211_hold_bss(bss_from_pub(bss)); - } + if (WARN_ON(!bss)) + return; wdev->current_bss = bss_from_pub(bss); @@ -870,6 +872,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, for (i = 0; i < 6; i++) rdev_del_key(rdev, dev, i, false, NULL); + rdev_set_qos_map(rdev, dev, NULL); + #ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index f7aa7a72d9b..fbcc23edee5 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -186,6 +186,28 @@ #define BOOL_TO_STR(bo) (bo) ? "true" : "false" +#define QOS_MAP_ENTRY __field(u8, num_des) \ + __array(u8, dscp_exception, \ + 2 * IEEE80211_QOS_MAP_MAX_EX) \ + __array(u8, up, IEEE80211_QOS_MAP_LEN_MIN) +#define QOS_MAP_ASSIGN(qos_map) \ + do { \ + if ((qos_map)) { \ + __entry->num_des = (qos_map)->num_des; \ + memcpy(__entry->dscp_exception, \ + &(qos_map)->dscp_exception, \ + 2 * IEEE80211_QOS_MAP_MAX_EX); \ + memcpy(__entry->up, &(qos_map)->up, \ + IEEE80211_QOS_MAP_LEN_MIN); \ + } else { \ + __entry->num_des = 0; \ + memset(__entry->dscp_exception, 0, \ + 2 * IEEE80211_QOS_MAP_MAX_EX); \ + memset(__entry->up, 0, \ + IEEE80211_QOS_MAP_LEN_MIN); \ + } \ + } while (0) + /************************************************************* * rdev->ops traces * *************************************************************/ @@ -1875,6 +1897,24 @@ TRACE_EVENT(rdev_channel_switch, __entry->counter_offset_presp) ); +TRACE_EVENT(rdev_set_qos_map, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_qos_map *qos_map), + TP_ARGS(wiphy, netdev, qos_map), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + QOS_MAP_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + QOS_MAP_ASSIGN(qos_map); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", num_des: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ diff --git a/net/wireless/util.c b/net/wireless/util.c index 935dea9485d..5618888853b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -689,7 +689,8 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, EXPORT_SYMBOL(ieee80211_amsdu_to_8023s); /* Given a data frame determine the 802.1p/1d tag to use. */ -unsigned int cfg80211_classify8021d(struct sk_buff *skb) +unsigned int cfg80211_classify8021d(struct sk_buff *skb, + struct cfg80211_qos_map *qos_map) { unsigned int dscp; unsigned char vlan_priority; @@ -720,6 +721,21 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb) return 0; } + if (qos_map) { + unsigned int i, tmp_dscp = dscp >> 2; + + for (i = 0; i < qos_map->num_des; i++) { + if (tmp_dscp == qos_map->dscp_exception[i].dscp) + return qos_map->dscp_exception[i].up; + } + + for (i = 0; i < 8; i++) { + if (tmp_dscp >= qos_map->up[i].low && + tmp_dscp <= qos_map->up[i].high) + return i; + } + } + return dscp >> 5; } EXPORT_SYMBOL(cfg80211_classify8021d); @@ -863,6 +879,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; + rdev_set_qos_map(rdev, dev, NULL); switch (otype) { case NL80211_IFTYPE_AP: diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index fe8942bb8bb..01770826a15 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1641,20 +1641,22 @@ free_dst: goto out; } -static inline int -xfrm_dst_alloc_copy(void **target, const void *src, int size) +#ifdef CONFIG_XFRM_SUB_POLICY +static int xfrm_dst_alloc_copy(void **target, const void *src, int size) { if (!*target) { *target = kmalloc(size, GFP_ATOMIC); if (!*target) return -ENOMEM; } + memcpy(*target, src, size); return 0; } +#endif -static inline int -xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) +static int xfrm_dst_update_parent(struct dst_entry *dst, + const struct xfrm_selector *sel) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1665,8 +1667,8 @@ xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) #endif } -static inline int -xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl) +static int xfrm_dst_update_origin(struct dst_entry *dst, + const struct flowi *fl) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; |